diff options
author | Stephen Hines <srhines@google.com> | 2014-07-21 00:45:20 -0700 |
---|---|---|
committer | Stephen Hines <srhines@google.com> | 2014-07-21 00:45:20 -0700 |
commit | c6a4f5e819217e1e12c458aed8e7b122e23a3a58 (patch) | |
tree | 81b7dd2bb4370a392f31d332a566c903b5744764 /lib | |
parent | 19c6fbb3e8aaf74093afa08013134b61fa08f245 (diff) | |
download | external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.zip external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.gz external_llvm-c6a4f5e819217e1e12c458aed8e7b122e23a3a58.tar.bz2 |
Update LLVM for rebase to r212749.
Includes a cherry-pick of:
r212948 - fixes a small issue with atomic calls
Change-Id: Ib97bd980b59f18142a69506400911a6009d9df18
Diffstat (limited to 'lib')
715 files changed, 34625 insertions, 17381 deletions
diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 57237e5..5cde979 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -60,6 +60,13 @@ bool AliasAnalysis::pointsToConstantMemory(const Location &Loc, return AA->pointsToConstantMemory(Loc, OrLocal); } +AliasAnalysis::Location +AliasAnalysis::getArgLocation(ImmutableCallSite CS, unsigned ArgIdx, + AliasAnalysis::ModRefResult &Mask) { + assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); + return AA->getArgLocation(CS, ArgIdx, Mask); +} + void AliasAnalysis::deleteValue(Value *V) { assert(AA && "AA didn't call InitializeAliasAnalysis in its run method!"); AA->deleteValue(V); @@ -91,22 +98,26 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS, if (onlyAccessesArgPointees(MRB)) { bool doesAlias = false; + ModRefResult AllArgsMask = NoModRef; if (doesAccessArgPointees(MRB)) { - MDNode *CSTag = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa); for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), AE = CS.arg_end(); AI != AE; ++AI) { const Value *Arg = *AI; if (!Arg->getType()->isPointerTy()) continue; - Location CSLoc(Arg, UnknownSize, CSTag); + ModRefResult ArgMask; + Location CSLoc = + getArgLocation(CS, (unsigned) std::distance(CS.arg_begin(), AI), + ArgMask); if (!isNoAlias(CSLoc, Loc)) { doesAlias = true; - break; + AllArgsMask = ModRefResult(AllArgsMask | ArgMask); } } } if (!doesAlias) return NoModRef; + Mask = ModRefResult(Mask & AllArgsMask); } // If Loc is a constant memory location, the call definitely could not @@ -150,14 +161,23 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { if (onlyAccessesArgPointees(CS2B)) { AliasAnalysis::ModRefResult R = NoModRef; if (doesAccessArgPointees(CS2B)) { - MDNode *CS2Tag = CS2.getInstruction()->getMetadata(LLVMContext::MD_tbaa); for (ImmutableCallSite::arg_iterator I = CS2.arg_begin(), E = CS2.arg_end(); I != E; ++I) { const Value *Arg = *I; if (!Arg->getType()->isPointerTy()) continue; - Location CS2Loc(Arg, UnknownSize, CS2Tag); - R = ModRefResult((R | getModRefInfo(CS1, CS2Loc)) & Mask); + ModRefResult ArgMask; + Location CS2Loc = + getArgLocation(CS2, (unsigned) std::distance(CS2.arg_begin(), I), + ArgMask); + // ArgMask indicates what CS2 might do to CS2Loc, and the dependence of + // CS1 on that location is the inverse. + if (ArgMask == Mod) + ArgMask = ModRef; + else if (ArgMask == Ref) + ArgMask = Mod; + + R = ModRefResult((R | (getModRefInfo(CS1, CS2Loc) & ArgMask)) & Mask); if (R == Mask) break; } @@ -170,14 +190,16 @@ AliasAnalysis::getModRefInfo(ImmutableCallSite CS1, ImmutableCallSite CS2) { if (onlyAccessesArgPointees(CS1B)) { AliasAnalysis::ModRefResult R = NoModRef; if (doesAccessArgPointees(CS1B)) { - MDNode *CS1Tag = CS1.getInstruction()->getMetadata(LLVMContext::MD_tbaa); for (ImmutableCallSite::arg_iterator I = CS1.arg_begin(), E = CS1.arg_end(); I != E; ++I) { const Value *Arg = *I; if (!Arg->getType()->isPointerTy()) continue; - Location CS1Loc(Arg, UnknownSize, CS1Tag); - if (getModRefInfo(CS2, CS1Loc) != NoModRef) { + ModRefResult ArgMask; + Location CS1Loc = + getArgLocation(CS1, (unsigned) std::distance(CS1.arg_begin(), I), + ArgMask); + if ((getModRefInfo(CS2, CS1Loc) & ArgMask) != NoModRef) { R = Mask; break; } diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index 01c1c7e..ade940a 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -48,6 +48,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeIVUsersPass(Registry); initializeInstCountPass(Registry); initializeIntervalPartitionPass(Registry); + initializeJumpInstrTableInfoPass(Registry); initializeLazyValueInfoPass(Registry); initializeLibCallAliasAnalysisPass(Registry); initializeLintPass(Registry); diff --git a/lib/Analysis/Android.mk b/lib/Analysis/Android.mk index bca673e..4e435a1 100644 --- a/lib/Analysis/Android.mk +++ b/lib/Analysis/Android.mk @@ -27,6 +27,7 @@ analysis_SRC_FILES := \ InstructionSimplify.cpp \ Interval.cpp \ IntervalPartition.cpp \ + JumpInstrTableInfo.cpp \ LazyCallGraph.cpp \ LazyValueInfo.cpp \ LibCallAliasAnalysis.cpp \ diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index fe90b84..c50dd4a 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -490,6 +490,10 @@ namespace { /// global) or not. bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override; + /// Get the location associated with a pointer argument of a callsite. + Location getArgLocation(ImmutableCallSite CS, unsigned ArgIdx, + ModRefResult &Mask) override; + /// getModRefBehavior - Return the behavior when calling the given /// call site. ModRefBehavior getModRefBehavior(ImmutableCallSite CS) override; @@ -653,6 +657,21 @@ BasicAliasAnalysis::pointsToConstantMemory(const Location &Loc, bool OrLocal) { return Worklist.empty(); } +static bool isMemsetPattern16(const Function *MS, + const TargetLibraryInfo &TLI) { + if (TLI.has(LibFunc::memset_pattern16) && + MS->getName() == "memset_pattern16") { + FunctionType *MemsetType = MS->getFunctionType(); + if (!MemsetType->isVarArg() && MemsetType->getNumParams() == 3 && + isa<PointerType>(MemsetType->getParamType(0)) && + isa<PointerType>(MemsetType->getParamType(1)) && + isa<IntegerType>(MemsetType->getParamType(2))) + return true; + } + + return false; +} + /// getModRefBehavior - Return the behavior when calling the given call site. AliasAnalysis::ModRefBehavior BasicAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { @@ -692,10 +711,93 @@ BasicAliasAnalysis::getModRefBehavior(const Function *F) { if (F->onlyReadsMemory()) Min = OnlyReadsMemory; + const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>(); + if (isMemsetPattern16(F, TLI)) + Min = OnlyAccessesArgumentPointees; + // Otherwise be conservative. return ModRefBehavior(AliasAnalysis::getModRefBehavior(F) & Min); } +AliasAnalysis::Location +BasicAliasAnalysis::getArgLocation(ImmutableCallSite CS, unsigned ArgIdx, + ModRefResult &Mask) { + Location Loc = AliasAnalysis::getArgLocation(CS, ArgIdx, Mask); + const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>(); + const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()); + if (II != nullptr) + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: { + assert((ArgIdx == 0 || ArgIdx == 1) && + "Invalid argument index for memory intrinsic"); + if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) + Loc.Size = LenCI->getZExtValue(); + assert(Loc.Ptr == II->getArgOperand(ArgIdx) && + "Memory intrinsic location pointer not argument?"); + Mask = ArgIdx ? Ref : Mod; + break; + } + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::invariant_start: { + assert(ArgIdx == 1 && "Invalid argument index"); + assert(Loc.Ptr == II->getArgOperand(ArgIdx) && + "Intrinsic location pointer not argument?"); + Loc.Size = cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); + break; + } + case Intrinsic::invariant_end: { + assert(ArgIdx == 2 && "Invalid argument index"); + assert(Loc.Ptr == II->getArgOperand(ArgIdx) && + "Intrinsic location pointer not argument?"); + Loc.Size = cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(); + break; + } + case Intrinsic::arm_neon_vld1: { + assert(ArgIdx == 0 && "Invalid argument index"); + assert(Loc.Ptr == II->getArgOperand(ArgIdx) && + "Intrinsic location pointer not argument?"); + // LLVM's vld1 and vst1 intrinsics currently only support a single + // vector register. + if (DL) + Loc.Size = DL->getTypeStoreSize(II->getType()); + break; + } + case Intrinsic::arm_neon_vst1: { + assert(ArgIdx == 0 && "Invalid argument index"); + assert(Loc.Ptr == II->getArgOperand(ArgIdx) && + "Intrinsic location pointer not argument?"); + if (DL) + Loc.Size = DL->getTypeStoreSize(II->getArgOperand(1)->getType()); + break; + } + } + + // We can bound the aliasing properties of memset_pattern16 just as we can + // for memcpy/memset. This is particularly important because the + // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16 + // whenever possible. + else if (CS.getCalledFunction() && + isMemsetPattern16(CS.getCalledFunction(), TLI)) { + assert((ArgIdx == 0 || ArgIdx == 1) && + "Invalid argument index for memset_pattern16"); + if (ArgIdx == 1) + Loc.Size = 16; + else if (const ConstantInt *LenCI = + dyn_cast<ConstantInt>(CS.getArgument(2))) + Loc.Size = LenCI->getZExtValue(); + assert(Loc.Ptr == CS.getArgument(ArgIdx) && + "memset_pattern16 location pointer not argument?"); + Mask = ArgIdx ? Ref : Mod; + } + // FIXME: Handle memset_pattern4 and memset_pattern8 also. + + return Loc; +} + /// getModRefInfo - Check to see if the specified callsite can clobber the /// specified memory object. Since we only look at local properties of this /// function, we really can't say much about this query. We do, however, use @@ -748,124 +850,8 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, return NoModRef; } - const TargetLibraryInfo &TLI = getAnalysis<TargetLibraryInfo>(); - ModRefResult Min = ModRef; - - // Finally, handle specific knowledge of intrinsics. - const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()); - if (II != nullptr) - switch (II->getIntrinsicID()) { - default: break; - case Intrinsic::memcpy: - case Intrinsic::memmove: { - uint64_t Len = UnknownSize; - if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) - Len = LenCI->getZExtValue(); - Value *Dest = II->getArgOperand(0); - Value *Src = II->getArgOperand(1); - // If it can't overlap the source dest, then it doesn't modref the loc. - if (isNoAlias(Location(Dest, Len), Loc)) { - if (isNoAlias(Location(Src, Len), Loc)) - return NoModRef; - // If it can't overlap the dest, then worst case it reads the loc. - Min = Ref; - } else if (isNoAlias(Location(Src, Len), Loc)) { - // If it can't overlap the source, then worst case it mutates the loc. - Min = Mod; - } - break; - } - case Intrinsic::memset: - // Since memset is 'accesses arguments' only, the AliasAnalysis base class - // will handle it for the variable length case. - if (ConstantInt *LenCI = dyn_cast<ConstantInt>(II->getArgOperand(2))) { - uint64_t Len = LenCI->getZExtValue(); - Value *Dest = II->getArgOperand(0); - if (isNoAlias(Location(Dest, Len), Loc)) - return NoModRef; - } - // We know that memset doesn't load anything. - Min = Mod; - break; - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::invariant_start: { - uint64_t PtrSize = - cast<ConstantInt>(II->getArgOperand(0))->getZExtValue(); - if (isNoAlias(Location(II->getArgOperand(1), - PtrSize, - II->getMetadata(LLVMContext::MD_tbaa)), - Loc)) - return NoModRef; - break; - } - case Intrinsic::invariant_end: { - uint64_t PtrSize = - cast<ConstantInt>(II->getArgOperand(1))->getZExtValue(); - if (isNoAlias(Location(II->getArgOperand(2), - PtrSize, - II->getMetadata(LLVMContext::MD_tbaa)), - Loc)) - return NoModRef; - break; - } - case Intrinsic::arm_neon_vld1: { - // LLVM's vld1 and vst1 intrinsics currently only support a single - // vector register. - uint64_t Size = - DL ? DL->getTypeStoreSize(II->getType()) : UnknownSize; - if (isNoAlias(Location(II->getArgOperand(0), Size, - II->getMetadata(LLVMContext::MD_tbaa)), - Loc)) - return NoModRef; - break; - } - case Intrinsic::arm_neon_vst1: { - uint64_t Size = - DL ? DL->getTypeStoreSize(II->getArgOperand(1)->getType()) : UnknownSize; - if (isNoAlias(Location(II->getArgOperand(0), Size, - II->getMetadata(LLVMContext::MD_tbaa)), - Loc)) - return NoModRef; - break; - } - } - - // We can bound the aliasing properties of memset_pattern16 just as we can - // for memcpy/memset. This is particularly important because the - // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16 - // whenever possible. - else if (TLI.has(LibFunc::memset_pattern16) && - CS.getCalledFunction() && - CS.getCalledFunction()->getName() == "memset_pattern16") { - const Function *MS = CS.getCalledFunction(); - FunctionType *MemsetType = MS->getFunctionType(); - if (!MemsetType->isVarArg() && MemsetType->getNumParams() == 3 && - isa<PointerType>(MemsetType->getParamType(0)) && - isa<PointerType>(MemsetType->getParamType(1)) && - isa<IntegerType>(MemsetType->getParamType(2))) { - uint64_t Len = UnknownSize; - if (const ConstantInt *LenCI = dyn_cast<ConstantInt>(CS.getArgument(2))) - Len = LenCI->getZExtValue(); - const Value *Dest = CS.getArgument(0); - const Value *Src = CS.getArgument(1); - // If it can't overlap the source dest, then it doesn't modref the loc. - if (isNoAlias(Location(Dest, Len), Loc)) { - // Always reads 16 bytes of the source. - if (isNoAlias(Location(Src, 16), Loc)) - return NoModRef; - // If it can't overlap the dest, then worst case it reads the loc. - Min = Ref; - // Always reads 16 bytes of the source. - } else if (isNoAlias(Location(Src, 16), Loc)) { - // If it can't overlap the source, then worst case it mutates the loc. - Min = Mod; - } - } - } - // The AliasAnalysis base class has some smarts, lets use them. - return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min); + return AliasAnalysis::getModRefInfo(CS, Loc); } /// aliasGEP - Provide a bunch of ad-hoc rules to disambiguate a GEP instruction diff --git a/lib/Analysis/BlockFrequencyInfoImpl.cpp b/lib/Analysis/BlockFrequencyInfoImpl.cpp index 87d93a4..4fd2c11 100644 --- a/lib/Analysis/BlockFrequencyInfoImpl.cpp +++ b/lib/Analysis/BlockFrequencyInfoImpl.cpp @@ -12,7 +12,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/BlockFrequencyInfoImpl.h" -#include "llvm/ADT/APFloat.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/Support/raw_ostream.h" #include <deque> @@ -24,298 +23,13 @@ using namespace llvm::bfi_detail; //===----------------------------------------------------------------------===// // -// UnsignedFloat implementation. -// -//===----------------------------------------------------------------------===// -#ifndef _MSC_VER -const int32_t UnsignedFloatBase::MaxExponent; -const int32_t UnsignedFloatBase::MinExponent; -#endif - -static void appendDigit(std::string &Str, unsigned D) { - assert(D < 10); - Str += '0' + D % 10; -} - -static void appendNumber(std::string &Str, uint64_t N) { - while (N) { - appendDigit(Str, N % 10); - N /= 10; - } -} - -static bool doesRoundUp(char Digit) { - switch (Digit) { - case '5': - case '6': - case '7': - case '8': - case '9': - return true; - default: - return false; - } -} - -static std::string toStringAPFloat(uint64_t D, int E, unsigned Precision) { - assert(E >= UnsignedFloatBase::MinExponent); - assert(E <= UnsignedFloatBase::MaxExponent); - - // Find a new E, but don't let it increase past MaxExponent. - int LeadingZeros = UnsignedFloatBase::countLeadingZeros64(D); - int NewE = std::min(UnsignedFloatBase::MaxExponent, E + 63 - LeadingZeros); - int Shift = 63 - (NewE - E); - assert(Shift <= LeadingZeros); - assert(Shift == LeadingZeros || NewE == UnsignedFloatBase::MaxExponent); - D <<= Shift; - E = NewE; - - // Check for a denormal. - unsigned AdjustedE = E + 16383; - if (!(D >> 63)) { - assert(E == UnsignedFloatBase::MaxExponent); - AdjustedE = 0; - } - - // Build the float and print it. - uint64_t RawBits[2] = {D, AdjustedE}; - APFloat Float(APFloat::x87DoubleExtended, APInt(80, RawBits)); - SmallVector<char, 24> Chars; - Float.toString(Chars, Precision, 0); - return std::string(Chars.begin(), Chars.end()); -} - -static std::string stripTrailingZeros(const std::string &Float) { - size_t NonZero = Float.find_last_not_of('0'); - assert(NonZero != std::string::npos && "no . in floating point string"); - - if (Float[NonZero] == '.') - ++NonZero; - - return Float.substr(0, NonZero + 1); -} - -std::string UnsignedFloatBase::toString(uint64_t D, int16_t E, int Width, - unsigned Precision) { - if (!D) - return "0.0"; - - // Canonicalize exponent and digits. - uint64_t Above0 = 0; - uint64_t Below0 = 0; - uint64_t Extra = 0; - int ExtraShift = 0; - if (E == 0) { - Above0 = D; - } else if (E > 0) { - if (int Shift = std::min(int16_t(countLeadingZeros64(D)), E)) { - D <<= Shift; - E -= Shift; - - if (!E) - Above0 = D; - } - } else if (E > -64) { - Above0 = D >> -E; - Below0 = D << (64 + E); - } else if (E > -120) { - Below0 = D >> (-E - 64); - Extra = D << (128 + E); - ExtraShift = -64 - E; - } - - // Fall back on APFloat for very small and very large numbers. - if (!Above0 && !Below0) - return toStringAPFloat(D, E, Precision); - - // Append the digits before the decimal. - std::string Str; - size_t DigitsOut = 0; - if (Above0) { - appendNumber(Str, Above0); - DigitsOut = Str.size(); - } else - appendDigit(Str, 0); - std::reverse(Str.begin(), Str.end()); - - // Return early if there's nothing after the decimal. - if (!Below0) - return Str + ".0"; - - // Append the decimal and beyond. - Str += '.'; - uint64_t Error = UINT64_C(1) << (64 - Width); - - // We need to shift Below0 to the right to make space for calculating - // digits. Save the precision we're losing in Extra. - Extra = (Below0 & 0xf) << 56 | (Extra >> 8); - Below0 >>= 4; - size_t SinceDot = 0; - size_t AfterDot = Str.size(); - do { - if (ExtraShift) { - --ExtraShift; - Error *= 5; - } else - Error *= 10; - - Below0 *= 10; - Extra *= 10; - Below0 += (Extra >> 60); - Extra = Extra & (UINT64_MAX >> 4); - appendDigit(Str, Below0 >> 60); - Below0 = Below0 & (UINT64_MAX >> 4); - if (DigitsOut || Str.back() != '0') - ++DigitsOut; - ++SinceDot; - } while (Error && (Below0 << 4 | Extra >> 60) >= Error / 2 && - (!Precision || DigitsOut <= Precision || SinceDot < 2)); - - // Return early for maximum precision. - if (!Precision || DigitsOut <= Precision) - return stripTrailingZeros(Str); - - // Find where to truncate. - size_t Truncate = - std::max(Str.size() - (DigitsOut - Precision), AfterDot + 1); - - // Check if there's anything to truncate. - if (Truncate >= Str.size()) - return stripTrailingZeros(Str); - - bool Carry = doesRoundUp(Str[Truncate]); - if (!Carry) - return stripTrailingZeros(Str.substr(0, Truncate)); - - // Round with the first truncated digit. - for (std::string::reverse_iterator I(Str.begin() + Truncate), E = Str.rend(); - I != E; ++I) { - if (*I == '.') - continue; - if (*I == '9') { - *I = '0'; - continue; - } - - ++*I; - Carry = false; - break; - } - - // Add "1" in front if we still need to carry. - return stripTrailingZeros(std::string(Carry, '1') + Str.substr(0, Truncate)); -} - -raw_ostream &UnsignedFloatBase::print(raw_ostream &OS, uint64_t D, int16_t E, - int Width, unsigned Precision) { - return OS << toString(D, E, Width, Precision); -} - -void UnsignedFloatBase::dump(uint64_t D, int16_t E, int Width) { - print(dbgs(), D, E, Width, 0) << "[" << Width << ":" << D << "*2^" << E - << "]"; -} - -static std::pair<uint64_t, int16_t> -getRoundedFloat(uint64_t N, bool ShouldRound, int64_t Shift) { - if (ShouldRound) - if (!++N) - // Rounding caused an overflow. - return std::make_pair(UINT64_C(1), Shift + 64); - return std::make_pair(N, Shift); -} - -std::pair<uint64_t, int16_t> UnsignedFloatBase::divide64(uint64_t Dividend, - uint64_t Divisor) { - // Input should be sanitized. - assert(Divisor); - assert(Dividend); - - // Minimize size of divisor. - int16_t Shift = 0; - if (int Zeros = countTrailingZeros(Divisor)) { - Shift -= Zeros; - Divisor >>= Zeros; - } - - // Check for powers of two. - if (Divisor == 1) - return std::make_pair(Dividend, Shift); - - // Maximize size of dividend. - if (int Zeros = countLeadingZeros64(Dividend)) { - Shift -= Zeros; - Dividend <<= Zeros; - } - - // Start with the result of a divide. - uint64_t Quotient = Dividend / Divisor; - Dividend %= Divisor; - - // Continue building the quotient with long division. - // - // TODO: continue with largers digits. - while (!(Quotient >> 63) && Dividend) { - // Shift Dividend, and check for overflow. - bool IsOverflow = Dividend >> 63; - Dividend <<= 1; - --Shift; - - // Divide. - bool DoesDivide = IsOverflow || Divisor <= Dividend; - Quotient = (Quotient << 1) | uint64_t(DoesDivide); - Dividend -= DoesDivide ? Divisor : 0; - } - - // Round. - if (Dividend >= getHalf(Divisor)) - if (!++Quotient) - // Rounding caused an overflow in Quotient. - return std::make_pair(UINT64_C(1), Shift + 64); - - return getRoundedFloat(Quotient, Dividend >= getHalf(Divisor), Shift); -} - -std::pair<uint64_t, int16_t> UnsignedFloatBase::multiply64(uint64_t L, - uint64_t R) { - // Separate into two 32-bit digits (U.L). - uint64_t UL = L >> 32, LL = L & UINT32_MAX, UR = R >> 32, LR = R & UINT32_MAX; - - // Compute cross products. - uint64_t P1 = UL * UR, P2 = UL * LR, P3 = LL * UR, P4 = LL * LR; - - // Sum into two 64-bit digits. - uint64_t Upper = P1, Lower = P4; - auto addWithCarry = [&](uint64_t N) { - uint64_t NewLower = Lower + (N << 32); - Upper += (N >> 32) + (NewLower < Lower); - Lower = NewLower; - }; - addWithCarry(P2); - addWithCarry(P3); - - // Check whether the upper digit is empty. - if (!Upper) - return std::make_pair(Lower, 0); - - // Shift as little as possible to maximize precision. - unsigned LeadingZeros = countLeadingZeros64(Upper); - int16_t Shift = 64 - LeadingZeros; - if (LeadingZeros) - Upper = Upper << LeadingZeros | Lower >> Shift; - bool ShouldRound = Shift && (Lower & UINT64_C(1) << (Shift - 1)); - return getRoundedFloat(Upper, ShouldRound, Shift); -} - -//===----------------------------------------------------------------------===// -// // BlockMass implementation. // //===----------------------------------------------------------------------===// -UnsignedFloat<uint64_t> BlockMass::toFloat() const { +ScaledNumber<uint64_t> BlockMass::toScaled() const { if (isFull()) - return UnsignedFloat<uint64_t>(1, 0); - return UnsignedFloat<uint64_t>(getMass() + 1, -64); + return ScaledNumber<uint64_t>(1, 0); + return ScaledNumber<uint64_t>(getMass() + 1, -64); } void BlockMass::dump() const { print(dbgs()); } @@ -342,7 +56,7 @@ namespace { typedef BlockFrequencyInfoImplBase::BlockNode BlockNode; typedef BlockFrequencyInfoImplBase::Distribution Distribution; typedef BlockFrequencyInfoImplBase::Distribution::WeightList WeightList; -typedef BlockFrequencyInfoImplBase::Float Float; +typedef BlockFrequencyInfoImplBase::Scaled64 Scaled64; typedef BlockFrequencyInfoImplBase::LoopData LoopData; typedef BlockFrequencyInfoImplBase::Weight Weight; typedef BlockFrequencyInfoImplBase::FrequencyData FrequencyData; @@ -622,7 +336,7 @@ bool BlockFrequencyInfoImplBase::addLoopSuccessorsToDist( /// /// Gives the maximum number of estimated iterations allowed for a loop. Very /// large numbers cause problems downstream (even within 64-bits). -static Float getMaxLoopScale() { return Float(1, 12); } +static Scaled64 getMaxLoopScale() { return Scaled64(1, 12); } /// \brief Compute the loop scale for a loop. void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) { @@ -634,7 +348,7 @@ void BlockFrequencyInfoImplBase::computeLoopScale(LoopData &Loop) { BlockMass ExitMass = BlockMass::getFull() - Loop.BackedgeMass; // Block scale stores the inverse of the scale. - Loop.Scale = ExitMass.toFloat().inverse(); + Loop.Scale = ExitMass.toScaled().inverse(); DEBUG(dbgs() << " - exit-mass = " << ExitMass << " (" << BlockMass::getFull() << " - " << Loop.BackedgeMass << ")\n" @@ -708,15 +422,16 @@ void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source, } static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI, - const Float &Min, const Float &Max) { + const Scaled64 &Min, const Scaled64 &Max) { // Scale the Factor to a size that creates integers. Ideally, integers would // be scaled so that Max == UINT64_MAX so that they can be best // differentiated. However, the register allocator currently deals poorly // with large numbers. Instead, push Min up a little from 1 to give some // room to differentiate small, unequal numbers. // - // TODO: fix issues downstream so that ScalingFactor can be Float(1,64)/Max. - Float ScalingFactor = Min.inverse(); + // TODO: fix issues downstream so that ScalingFactor can be + // Scaled64(1,64)/Max. + Scaled64 ScalingFactor = Min.inverse(); if ((Max / Min).lg() < 60) ScalingFactor <<= 3; @@ -724,10 +439,10 @@ static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI, DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max << ", factor = " << ScalingFactor << "\n"); for (size_t Index = 0; Index < BFI.Freqs.size(); ++Index) { - Float Scaled = BFI.Freqs[Index].Floating * ScalingFactor; + Scaled64 Scaled = BFI.Freqs[Index].Scaled * ScalingFactor; BFI.Freqs[Index].Integer = std::max(UINT64_C(1), Scaled.toInt<uint64_t>()); DEBUG(dbgs() << " - " << BFI.getBlockName(Index) << ": float = " - << BFI.Freqs[Index].Floating << ", scaled = " << Scaled + << BFI.Freqs[Index].Scaled << ", scaled = " << Scaled << ", int = " << BFI.Freqs[Index].Integer << "\n"); } } @@ -740,7 +455,7 @@ static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) { DEBUG(dbgs() << "unwrap-loop-package: " << BFI.getLoopName(Loop) << ": mass = " << Loop.Mass << ", scale = " << Loop.Scale << "\n"); - Loop.Scale *= Loop.Mass.toFloat(); + Loop.Scale *= Loop.Mass.toScaled(); Loop.IsPackaged = false; DEBUG(dbgs() << " => combined-scale = " << Loop.Scale << "\n"); @@ -749,9 +464,9 @@ static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) { // final head scale will be used for updated the rest of the members. for (const BlockNode &N : Loop.Nodes) { const auto &Working = BFI.Working[N.Index]; - Float &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale - : BFI.Freqs[N.Index].Floating; - Float New = Loop.Scale * F; + Scaled64 &F = Working.isAPackage() ? Working.getPackagedLoop()->Scale + : BFI.Freqs[N.Index].Scaled; + Scaled64 New = Loop.Scale * F; DEBUG(dbgs() << " - " << BFI.getBlockName(N) << ": " << F << " => " << New << "\n"); F = New; @@ -761,7 +476,7 @@ static void unwrapLoop(BlockFrequencyInfoImplBase &BFI, LoopData &Loop) { void BlockFrequencyInfoImplBase::unwrapLoops() { // Set initial frequencies from loop-local masses. for (size_t Index = 0; Index < Working.size(); ++Index) - Freqs[Index].Floating = Working[Index].Mass.toFloat(); + Freqs[Index].Scaled = Working[Index].Mass.toScaled(); for (LoopData &Loop : Loops) unwrapLoop(*this, Loop); @@ -770,12 +485,12 @@ void BlockFrequencyInfoImplBase::unwrapLoops() { void BlockFrequencyInfoImplBase::finalizeMetrics() { // Unwrap loop packages in reverse post-order, tracking min and max // frequencies. - auto Min = Float::getLargest(); - auto Max = Float::getZero(); + auto Min = Scaled64::getLargest(); + auto Max = Scaled64::getZero(); for (size_t Index = 0; Index < Working.size(); ++Index) { // Update min/max scale. - Min = std::min(Min, Freqs[Index].Floating); - Max = std::max(Max, Freqs[Index].Floating); + Min = std::min(Min, Freqs[Index].Scaled); + Max = std::max(Max, Freqs[Index].Scaled); } // Convert to integers. @@ -794,11 +509,11 @@ BlockFrequencyInfoImplBase::getBlockFreq(const BlockNode &Node) const { return 0; return Freqs[Node.Index].Integer; } -Float +Scaled64 BlockFrequencyInfoImplBase::getFloatingBlockFreq(const BlockNode &Node) const { if (!Node.isValid()) - return Float::getZero(); - return Freqs[Node.Index].Floating; + return Scaled64::getZero(); + return Freqs[Node.Index].Scaled; } std::string @@ -819,8 +534,8 @@ BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS, raw_ostream & BlockFrequencyInfoImplBase::printBlockFreq(raw_ostream &OS, const BlockFrequency &Freq) const { - Float Block(Freq.getFrequency(), 0); - Float Entry(getEntryFreq(), 0); + Scaled64 Block(Freq.getFrequency(), 0); + Scaled64 Entry(getEntryFreq(), 0); return OS << Block / Entry; } diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index b546789..d1632fd 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -25,6 +25,7 @@ add_llvm_library(LLVMAnalysis InstructionSimplify.cpp Interval.cpp IntervalPartition.cpp + JumpInstrTableInfo.cpp LazyCallGraph.cpp LazyValueInfo.cpp LibCallAliasAnalysis.cpp diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 0ac1cb5..eb3e2c6 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -21,6 +21,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Config/config.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" @@ -31,11 +32,15 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Operator.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FEnv.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetLibraryInfo.h" #include <cerrno> #include <cmath> + +#ifdef HAVE_FENV_H +#include <fenv.h> +#endif + using namespace llvm; //===----------------------------------------------------------------------===// @@ -706,7 +711,7 @@ static Constant *CastGEPIndices(ArrayRef<Constant *> Ops, static Constant* StripPtrCastKeepAS(Constant* Ptr) { assert(Ptr->getType()->isPointerTy() && "Not a pointer type"); PointerType *OldPtrTy = cast<PointerType>(Ptr->getType()); - Ptr = cast<Constant>(Ptr->stripPointerCasts()); + Ptr = Ptr->stripPointerCasts(); PointerType *NewPtrTy = cast<PointerType>(Ptr->getType()); // Preserve the address space number of the pointer. @@ -1314,12 +1319,34 @@ static Constant *GetConstantFoldFPValue(double V, Type *Ty) { } +namespace { +/// llvm_fenv_clearexcept - Clear the floating-point exception state. +static inline void llvm_fenv_clearexcept() { +#if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT + feclearexcept(FE_ALL_EXCEPT); +#endif + errno = 0; +} + +/// llvm_fenv_testexcept - Test if a floating-point exception was raised. +static inline bool llvm_fenv_testexcept() { + int errno_val = errno; + if (errno_val == ERANGE || errno_val == EDOM) + return true; +#if defined(HAVE_FENV_H) && HAVE_DECL_FE_ALL_EXCEPT && HAVE_DECL_FE_INEXACT + if (fetestexcept(FE_ALL_EXCEPT & ~FE_INEXACT)) + return true; +#endif + return false; +} +} // End namespace + static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, Type *Ty) { - sys::llvm_fenv_clearexcept(); + llvm_fenv_clearexcept(); V = NativeFP(V); - if (sys::llvm_fenv_testexcept()) { - sys::llvm_fenv_clearexcept(); + if (llvm_fenv_testexcept()) { + llvm_fenv_clearexcept(); return nullptr; } @@ -1328,10 +1355,10 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), double V, double W, Type *Ty) { - sys::llvm_fenv_clearexcept(); + llvm_fenv_clearexcept(); V = NativeFP(V, W); - if (sys::llvm_fenv_testexcept()) { - sys::llvm_fenv_clearexcept(); + if (llvm_fenv_testexcept()) { + llvm_fenv_clearexcept(); return nullptr; } diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp index 780b1aa..1b74f8c 100644 --- a/lib/Analysis/CostModel.cpp +++ b/lib/Analysis/CostModel.cpp @@ -95,6 +95,31 @@ static bool isReverseVectorMask(SmallVectorImpl<int> &Mask) { return true; } +static bool isAlternateVectorMask(SmallVectorImpl<int> &Mask) { + bool isAlternate = true; + unsigned MaskSize = Mask.size(); + + // Example: shufflevector A, B, <0,5,2,7> + for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { + if (Mask[i] < 0) + continue; + isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i); + } + + if (isAlternate) + return true; + + isAlternate = true; + // Example: shufflevector A, B, <4,1,6,3> + for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { + if (Mask[i] < 0) + continue; + isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i); + } + + return isAlternate; +} + static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { TargetTransformInfo::OperandValueKind OpInfo = TargetTransformInfo::OK_AnyValue; @@ -466,9 +491,15 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { unsigned NumVecElems = VecTypOp0->getVectorNumElements(); SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); - if (NumVecElems == Mask.size() && isReverseVectorMask(Mask)) - return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0, - nullptr); + if (NumVecElems == Mask.size()) { + if (isReverseVectorMask(Mask)) + return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, + 0, nullptr); + if (isAlternateVectorMask(Mask)) + return TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, + VecTypOp0, 0, nullptr); + } + return -1; } case Instruction::Call: diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp index bfab744..c27edbf 100644 --- a/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -602,8 +602,12 @@ namespace { bool runOnSCC(CallGraphSCC &SCC) override { Out << Banner; - for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) - (*I)->getFunction()->print(Out); + for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { + if ((*I)->getFunction()) + (*I)->getFunction()->print(Out); + else + Out << "\nPrinting <null> Function\n"; + } return false; } }; diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp index 66f3f8e..8807529 100644 --- a/lib/Analysis/IPA/InlineCost.cpp +++ b/lib/Analysis/IPA/InlineCost.cpp @@ -841,10 +841,7 @@ bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) { // original function which is extremely undefined behavior. // FIXME: This logic isn't really right; we can safely inline functions with // indirectbr's as long as no other function or global references the - // blockaddress of a block within the current function. And as a QOI issue, - // if someone is using a blockaddress without an indirectbr, and that - // reference somehow ends up in another function or global, we probably don't - // want to inline this function. + // blockaddress of a block within the current function. HasIndirectBr = true; return false; } @@ -1121,6 +1118,15 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { if (BB->empty()) continue; + // Disallow inlining a blockaddress. A blockaddress only has defined + // behavior for an indirect branch in the same function, and we do not + // currently support inlining indirect branches. But, the inliner may not + // see an indirect branch that ends up being dead code at a particular call + // site. If the blockaddress escapes the function, e.g., via a global + // variable, inlining may lead to an invalid cross-function reference. + if (BB->hasAddressTaken()) + return false; + // Analyze the cost of this block. If we blow through the threshold, this // returns false, and we can bail on out. if (!analyzeBlock(BB)) { @@ -1303,8 +1309,9 @@ bool InlineCostAnalysis::isInlineViable(Function &F) { F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::ReturnsTwice); for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { - // Disallow inlining of functions which contain an indirect branch. - if (isa<IndirectBrInst>(BI->getTerminator())) + // Disallow inlining of functions which contain indirect branches or + // blockaddresses. + if (isa<IndirectBrInst>(BI->getTerminator()) || BI->hasAddressTaken()) return false; for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index c819bd3..24655aa 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -287,7 +287,10 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const { OS << ")"; } OS << " in "; - UI->getUser()->print(OS); + if (UI->getUser()) + UI->getUser()->print(OS); + else + OS << "Printing <null> User"; OS << '\n'; } } diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 3684fda..bd42af1 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -39,7 +39,6 @@ using namespace llvm::PatternMatch; enum { RecursionLimit = 3 }; STATISTIC(NumExpand, "Number of expansions"); -STATISTIC(NumFactor , "Number of factorizations"); STATISTIC(NumReassoc, "Number of reassociations"); struct Query { @@ -183,78 +182,6 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS, return nullptr; } -/// FactorizeBinOp - Simplify "LHS Opcode RHS" by factorizing out a common term -/// using the operation OpCodeToExtract. For example, when Opcode is Add and -/// OpCodeToExtract is Mul then this tries to turn "(A*B)+(A*C)" into "A*(B+C)". -/// Returns the simplified value, or null if no simplification was performed. -static Value *FactorizeBinOp(unsigned Opcode, Value *LHS, Value *RHS, - unsigned OpcToExtract, const Query &Q, - unsigned MaxRecurse) { - Instruction::BinaryOps OpcodeToExtract = (Instruction::BinaryOps)OpcToExtract; - // Recursion is always used, so bail out at once if we already hit the limit. - if (!MaxRecurse--) - return nullptr; - - BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS); - BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS); - - if (!Op0 || Op0->getOpcode() != OpcodeToExtract || - !Op1 || Op1->getOpcode() != OpcodeToExtract) - return nullptr; - - // The expression has the form "(A op' B) op (C op' D)". - Value *A = Op0->getOperand(0), *B = Op0->getOperand(1); - Value *C = Op1->getOperand(0), *D = Op1->getOperand(1); - - // Use left distributivity, i.e. "X op' (Y op Z) = (X op' Y) op (X op' Z)". - // Does the instruction have the form "(A op' B) op (A op' D)" or, in the - // commutative case, "(A op' B) op (C op' A)"? - if (A == C || (Instruction::isCommutative(OpcodeToExtract) && A == D)) { - Value *DD = A == C ? D : C; - // Form "A op' (B op DD)" if it simplifies completely. - // Does "B op DD" simplify? - if (Value *V = SimplifyBinOp(Opcode, B, DD, Q, MaxRecurse)) { - // It does! Return "A op' V" if it simplifies or is already available. - // If V equals B then "A op' V" is just the LHS. If V equals DD then - // "A op' V" is just the RHS. - if (V == B || V == DD) { - ++NumFactor; - return V == B ? LHS : RHS; - } - // Otherwise return "A op' V" if it simplifies. - if (Value *W = SimplifyBinOp(OpcodeToExtract, A, V, Q, MaxRecurse)) { - ++NumFactor; - return W; - } - } - } - - // Use right distributivity, i.e. "(X op Y) op' Z = (X op' Z) op (Y op' Z)". - // Does the instruction have the form "(A op' B) op (C op' B)" or, in the - // commutative case, "(A op' B) op (B op' D)"? - if (B == D || (Instruction::isCommutative(OpcodeToExtract) && B == C)) { - Value *CC = B == D ? C : D; - // Form "(A op CC) op' B" if it simplifies completely.. - // Does "A op CC" simplify? - if (Value *V = SimplifyBinOp(Opcode, A, CC, Q, MaxRecurse)) { - // It does! Return "V op' B" if it simplifies or is already available. - // If V equals A then "V op' B" is just the LHS. If V equals CC then - // "V op' B" is just the RHS. - if (V == A || V == CC) { - ++NumFactor; - return V == A ? LHS : RHS; - } - // Otherwise return "V op' B" if it simplifies. - if (Value *W = SimplifyBinOp(OpcodeToExtract, V, B, Q, MaxRecurse)) { - ++NumFactor; - return W; - } - } - } - - return nullptr; -} - /// SimplifyAssociativeBinOp - Generic simplifications for associative binary /// operations. Returns the simpler value, or null if none was found. static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS, @@ -634,11 +561,6 @@ static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, MaxRecurse)) return V; - // Mul distributes over Add. Try some generic simplifications based on this. - if (Value *V = FactorizeBinOp(Instruction::Add, Op0, Op1, Instruction::Mul, - Q, MaxRecurse)) - return V; - // Threading Add over selects and phi nodes is pointless, so don't bother. // Threading over the select in "A + select(cond, B, C)" means evaluating // "A+B" and "A+C" and seeing if they are equal; but they are equal if and @@ -754,16 +676,9 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, if (Op0 == Op1) return Constant::getNullValue(Op0->getType()); - // (X*2) - X -> X - // (X<<1) - X -> X - Value *X = nullptr; - if (match(Op0, m_Mul(m_Specific(Op1), m_ConstantInt<2>())) || - match(Op0, m_Shl(m_Specific(Op1), m_One()))) - return Op1; - // (X + Y) - Z -> X + (Y - Z) or Y + (X - Z) if everything simplifies. // For example, (X + Y) - Y -> X; (Y + X) - Y -> X - Value *Y = nullptr, *Z = Op1; + Value *X = nullptr, *Y = nullptr, *Z = Op1; if (MaxRecurse && match(Op0, m_Add(m_Value(X), m_Value(Y)))) { // (X + Y) - Z // See if "V === Y - Z" simplifies. if (Value *V = SimplifyBinOp(Instruction::Sub, Y, Z, Q, MaxRecurse-1)) @@ -835,11 +750,6 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, if (Constant *Result = computePointerDifference(Q.DL, X, Y)) return ConstantExpr::getIntegerCast(Result, Op0->getType(), true); - // Mul distributes over Sub. Try some generic simplifications based on this. - if (Value *V = FactorizeBinOp(Instruction::Sub, Op0, Op1, Instruction::Mul, - Q, MaxRecurse)) - return V; - // i1 sub -> xor. if (MaxRecurse && Op0->getType()->isIntegerTy(1)) if (Value *V = SimplifyXorInst(Op0, Op1, Q, MaxRecurse-1)) @@ -1518,11 +1428,6 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, Q, MaxRecurse)) return V; - // Or distributes over And. Try some generic simplifications based on this. - if (Value *V = FactorizeBinOp(Instruction::And, Op0, Op1, Instruction::Or, - Q, MaxRecurse)) - return V; - // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) @@ -1613,11 +1518,6 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, MaxRecurse)) return V; - // And distributes over Or. Try some generic simplifications based on this. - if (Value *V = FactorizeBinOp(Instruction::Or, Op0, Op1, Instruction::And, - Q, MaxRecurse)) - return V; - // If the operation is with the result of a select instruction, check whether // operating on either branch of the select always yields the same value. if (isa<SelectInst>(Op0) || isa<SelectInst>(Op1)) @@ -1625,6 +1525,38 @@ static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q, MaxRecurse)) return V; + // (A & C)|(B & D) + Value *C = nullptr, *D = nullptr; + if (match(Op0, m_And(m_Value(A), m_Value(C))) && + match(Op1, m_And(m_Value(B), m_Value(D)))) { + ConstantInt *C1 = dyn_cast<ConstantInt>(C); + ConstantInt *C2 = dyn_cast<ConstantInt>(D); + if (C1 && C2 && (C1->getValue() == ~C2->getValue())) { + // (A & C1)|(B & C2) + // If we have: ((V + N) & C1) | (V & C2) + // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 + // replace with V+N. + Value *V1, *V2; + if ((C2->getValue() & (C2->getValue() + 1)) == 0 && // C2 == 0+1+ + match(A, m_Add(m_Value(V1), m_Value(V2)))) { + // Add commutes, try both ways. + if (V1 == B && MaskedValueIsZero(V2, C2->getValue())) + return A; + if (V2 == B && MaskedValueIsZero(V1, C2->getValue())) + return A; + } + // Or commutes, try both ways. + if ((C1->getValue() & (C1->getValue() + 1)) == 0 && + match(B, m_Add(m_Value(V1), m_Value(V2)))) { + // Add commutes, try both ways. + if (V1 == A && MaskedValueIsZero(V2, C1->getValue())) + return B; + if (V2 == A && MaskedValueIsZero(V1, C1->getValue())) + return B; + } + } + } + // If the operation is with the result of a phi instruction, check whether // operating on all incoming values of the phi always yields the same value. if (isa<PHINode>(Op0) || isa<PHINode>(Op1)) @@ -1677,11 +1609,6 @@ static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q, MaxRecurse)) return V; - // And distributes over Xor. Try some generic simplifications based on this. - if (Value *V = FactorizeBinOp(Instruction::Xor, Op0, Op1, Instruction::And, - Q, MaxRecurse)) - return V; - // Threading Xor over selects and phi nodes is pointless, so don't bother. // Threading over the select in "A ^ select(cond, B, C)" means evaluating // "A^B" and "A^C" and seeing if they are equal; but they are equal if and @@ -2021,9 +1948,15 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, if (!CI2->isZero()) Upper = NegOne.udiv(CI2->getValue()) + 1; } else if (match(LHS, m_SDiv(m_ConstantInt(CI2), m_Value()))) { - // 'sdiv CI2, x' produces [-|CI2|, |CI2|]. - Upper = CI2->getValue().abs() + 1; - Lower = (-Upper) + 1; + if (CI2->isMinSignedValue()) { + // 'sdiv INT_MIN, x' produces [INT_MIN, INT_MIN / -2]. + Lower = CI2->getValue(); + Upper = Lower.lshr(1) + 1; + } else { + // 'sdiv CI2, x' produces [-|CI2|, |CI2|]. + Upper = CI2->getValue().abs() + 1; + Lower = (-Upper) + 1; + } } else if (match(LHS, m_SDiv(m_Value(), m_ConstantInt(CI2)))) { // 'sdiv x, CI2' produces [INT_MIN / CI2, INT_MAX / CI2]. APInt IntMin = APInt::getSignedMinValue(Width); @@ -2241,6 +2174,25 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } + // If a bit is known to be zero for A and known to be one for B, + // then A and B cannot be equal. + if (ICmpInst::isEquality(Pred)) { + if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { + uint32_t BitWidth = CI->getBitWidth(); + APInt LHSKnownZero(BitWidth, 0); + APInt LHSKnownOne(BitWidth, 0); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); + APInt RHSKnownZero(BitWidth, 0); + APInt RHSKnownOne(BitWidth, 0); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); + if (((LHSKnownOne & RHSKnownZero) != 0) || + ((LHSKnownZero & RHSKnownOne) != 0)) + return (Pred == ICmpInst::ICMP_EQ) + ? ConstantInt::getFalse(CI->getContext()) + : ConstantInt::getTrue(CI->getContext()); + } + } + // Special logic for binary operators. BinaryOperator *LBO = dyn_cast<BinaryOperator>(LHS); BinaryOperator *RBO = dyn_cast<BinaryOperator>(RHS); diff --git a/lib/Analysis/JumpInstrTableInfo.cpp b/lib/Analysis/JumpInstrTableInfo.cpp new file mode 100644 index 0000000..b5b4265 --- /dev/null +++ b/lib/Analysis/JumpInstrTableInfo.cpp @@ -0,0 +1,40 @@ +//===-- JumpInstrTableInfo.cpp: Info for Jump-Instruction Tables ----------===// +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief Information about jump-instruction tables that have been created by +/// JumpInstrTables pass. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "jiti" + +#include "llvm/Analysis/JumpInstrTableInfo.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Type.h" + +using namespace llvm; + +INITIALIZE_PASS(JumpInstrTableInfo, "jump-instr-table-info", + "Jump-Instruction Table Info", true, true) +char JumpInstrTableInfo::ID = 0; + +ImmutablePass *llvm::createJumpInstrTableInfoPass() { + return new JumpInstrTableInfo(); +} + +JumpInstrTableInfo::JumpInstrTableInfo() : ImmutablePass(ID), Tables() { + initializeJumpInstrTableInfoPass(*PassRegistry::getPassRegistry()); +} + +JumpInstrTableInfo::~JumpInstrTableInfo() {} + +void JumpInstrTableInfo::insertEntry(FunctionType *TableFunTy, Function *Target, + Function *Jump) { + Tables[TableFunTy].push_back(JumpPair(Target, Jump)); +} diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp index 8df18e7..7bd866e 100644 --- a/lib/Analysis/LoopPass.cpp +++ b/lib/Analysis/LoopPass.cpp @@ -45,7 +45,10 @@ public: for (Loop::block_iterator b = L->block_begin(), be = L->block_end(); b != be; ++b) { - (*b)->print(Out); + if (*b) + (*b)->print(Out); + else + Out << "Printing <null> block"; } return false; } diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp index 4e11e50..139fa38 100644 --- a/lib/Analysis/NoAliasAnalysis.cpp +++ b/lib/Analysis/NoAliasAnalysis.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Pass.h" using namespace llvm; @@ -53,6 +54,13 @@ namespace { bool pointsToConstantMemory(const Location &Loc, bool OrLocal) override { return false; } + Location getArgLocation(ImmutableCallSite CS, unsigned ArgIdx, + ModRefResult &Mask) override { + Mask = ModRef; + return Location(CS.getArgument(ArgIdx), UnknownSize, + CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)); + } + ModRefResult getModRefInfo(ImmutableCallSite CS, const Location &Loc) override { return ModRef; diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp index 3c7798f..71de144 100644 --- a/lib/Analysis/RegionPass.cpp +++ b/lib/Analysis/RegionPass.cpp @@ -195,8 +195,12 @@ public: bool runOnRegion(Region *R, RGPassManager &RGM) override { Out << Banner; - for (const auto &BB : R->blocks()) - BB->print(Out); + for (const auto &BB : R->blocks()) { + if (BB) + BB->print(Out); + else + Out << "Printing <null> Block"; + } return false; } diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 42a7aa2..06dbde5 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -7216,6 +7216,15 @@ public: cast<SCEVConstant>(Zero)->getValue(); Remainder = SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true); + if (Remainder->isZero()) { + // The Quotient is obtained by replacing Denominator by 1 in Numerator. + RewriteMap[cast<SCEVUnknown>(Denominator)->getValue()] = + cast<SCEVConstant>(One)->getValue(); + Quotient = + SCEVParameterRewriter::rewrite(Numerator, SE, RewriteMap, true); + return; + } + // Quotient is (Numerator - Remainder) divided by Denominator. const SCEV *Q, *R; const SCEV *Diff = SE.getMinusSCEV(Numerator, Remainder); @@ -7356,7 +7365,7 @@ const SCEV *ScalarEvolution::getElementSize(Instruction *Inst) { if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) Ty = Store->getValueOperand()->getType(); else if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) - Ty = Load->getPointerOperand()->getType(); + Ty = Load->getType(); else return nullptr; @@ -7370,7 +7379,7 @@ void ScalarEvolution::findArrayDimensions(SmallVectorImpl<const SCEV *> &Terms, SmallVectorImpl<const SCEV *> &Sizes, const SCEV *ElementSize) const { - if (Terms.size() < 1) + if (Terms.size() < 1 || !ElementSize) return; // Early return when Terms do not contain parameters: we do not delinearize diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index b507043..8c75b0d 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/ScalarEvolutionExpander.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DataLayout.h" @@ -1706,7 +1707,7 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, // Fold constant phis. They may be congruent to other constant phis and // would confuse the logic below that expects proper IVs. - if (Value *V = Phi->hasConstantValue()) { + if (Value *V = SimplifyInstruction(Phi, SE.DL, SE.TLI, SE.DT)) { Phi->replaceAllUsesWith(V); DeadInsts.push_back(Phi); ++NumElim; diff --git a/lib/Analysis/ScalarEvolutionNormalization.cpp b/lib/Analysis/ScalarEvolutionNormalization.cpp index e9db295..3ccefb0 100644 --- a/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -241,7 +241,7 @@ TransformSubExpr(const SCEV *S, Instruction *User, Value *OperandValToReplace) { } /// Top level driver for transforming an expression DAG into its requested -/// post-inc form (either "Normalized" or "Denormalized". +/// post-inc form (either "Normalized" or "Denormalized"). const SCEV *llvm::TransformForPostIncUse(TransformKind Kind, const SCEV *S, Instruction *User, diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 4f48753..5264745 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -188,7 +188,8 @@ static void computeKnownBitsMul(Value *Op0, Value *Op1, bool NSW, KnownOne.setBit(BitWidth - 1); } -void llvm::computeKnownBitsLoad(const MDNode &Ranges, APInt &KnownZero) { +void llvm::computeKnownBitsFromRangeMetadata(const MDNode &Ranges, + APInt &KnownZero) { unsigned BitWidth = KnownZero.getBitWidth(); unsigned NumRanges = Ranges.getNumOperands() / 2; assert(NumRanges >= 1); @@ -338,7 +339,7 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, default: break; case Instruction::Load: if (MDNode *MD = cast<LoadInst>(I)->getMetadata(LLVMContext::MD_range)) - computeKnownBitsLoad(*MD, KnownZero); + computeKnownBitsFromRangeMetadata(*MD, KnownZero); break; case Instruction::And: { // If either the LHS or the RHS are Zero, the result is zero. @@ -733,6 +734,12 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, break; } case Instruction::Call: + case Instruction::Invoke: + if (MDNode *MD = cast<Instruction>(I)->getMetadata(LLVMContext::MD_range)) + computeKnownBitsFromRangeMetadata(*MD, KnownZero); + // If a range metadata is attached to this IntrinsicInst, intersect the + // explicit range specified by the metadata and the implicit range of + // the intrinsic. if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { switch (II->getIntrinsicID()) { default: break; @@ -742,16 +749,16 @@ void llvm::computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne, // If this call is undefined for 0, the result will be less than 2^n. if (II->getArgOperand(1) == ConstantInt::getTrue(II->getContext())) LowBits -= 1; - KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); break; } case Intrinsic::ctpop: { unsigned LowBits = Log2_32(BitWidth)+1; - KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); break; } case Intrinsic::x86_sse42_crc32_64_64: - KnownZero = APInt::getHighBitsSet(64, 32); + KnownZero |= APInt::getHighBitsSet(64, 32); break; } } @@ -1977,7 +1984,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, return true; case Instruction::UDiv: case Instruction::URem: - // x / y is undefined if y == 0, but calcuations like x / 3 are safe. + // x / y is undefined if y == 0, but calculations like x / 3 are safe. return isKnownNonZero(Inst->getOperand(1), TD); case Instruction::SDiv: case Instruction::SRem: { @@ -2000,12 +2007,12 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, // Speculative load may create a race that did not exist in the source. LI->getParent()->getParent()->hasFnAttribute(Attribute::SanitizeThread)) return false; - return LI->getPointerOperand()->isDereferenceablePointer(); + return LI->getPointerOperand()->isDereferenceablePointer(TD); } case Instruction::Call: { if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { switch (II->getIntrinsicID()) { - // These synthetic intrinsics have no side-effects, and just mark + // These synthetic intrinsics have no side-effects and just mark // information about their operands. // FIXME: There are other no-op synthetic instructions that potentially // should be considered at least *safe* to speculate... diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 44a3412..1e5bcdd 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -209,6 +209,7 @@ lltok::Kind LLLexer::LexToken() { return LexToken(); case '+': return LexPositive(); case '@': return LexAt(); + case '$': return LexDollar(); case '%': return LexPercent(); case '"': return LexQuote(); case '.': @@ -222,13 +223,6 @@ lltok::Kind LLLexer::LexToken() { return lltok::dotdotdot; } return lltok::Error; - case '$': - if (const char *Ptr = isLabelTail(CurPtr)) { - CurPtr = Ptr; - StrVal.assign(TokStart, CurPtr-1); - return lltok::LabelStr; - } - return lltok::Error; case ';': SkipLineComment(); return LexToken(); @@ -307,6 +301,43 @@ lltok::Kind LLLexer::LexAt() { return lltok::Error; } +lltok::Kind LLLexer::LexDollar() { + if (const char *Ptr = isLabelTail(TokStart)) { + CurPtr = Ptr; + StrVal.assign(TokStart, CurPtr - 1); + return lltok::LabelStr; + } + + // Handle DollarStringConstant: $\"[^\"]*\" + if (CurPtr[0] == '"') { + ++CurPtr; + + while (1) { + int CurChar = getNextChar(); + + if (CurChar == EOF) { + Error("end of file in COMDAT variable name"); + return lltok::Error; + } + if (CurChar == '"') { + StrVal.assign(TokStart + 2, CurPtr - 1); + UnEscapeLexed(StrVal); + if (StringRef(StrVal).find_first_of(0) != StringRef::npos) { + Error("Null bytes are not allowed in names"); + return lltok::Error; + } + return lltok::ComdatVar; + } + } + } + + // Handle ComdatVarName: $[-a-zA-Z$._][-a-zA-Z$._0-9]* + if (ReadVarName()) + return lltok::ComdatVar; + + return lltok::Error; +} + /// ReadString - Read a string until the closing quote. lltok::Kind LLLexer::ReadString(lltok::Kind kind) { const char *Start = CurPtr; @@ -490,7 +521,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(available_externally); KEYWORD(linkonce); KEYWORD(linkonce_odr); - KEYWORD(weak); + KEYWORD(weak); // Use as a linkage, and a modifier for "cmpxchg". KEYWORD(weak_odr); KEYWORD(appending); KEYWORD(dllimport); @@ -583,6 +614,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(cold); KEYWORD(inlinehint); KEYWORD(inreg); + KEYWORD(jumptable); KEYWORD(minsize); KEYWORD(naked); KEYWORD(nest); @@ -617,6 +649,15 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(type); KEYWORD(opaque); + KEYWORD(comdat); + + // Comdat types + KEYWORD(any); + KEYWORD(exactmatch); + KEYWORD(largest); + KEYWORD(noduplicates); + KEYWORD(samesize); + KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle); KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge); KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole); diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h index ad11d49..d42de57 100644 --- a/lib/AsmParser/LLLexer.h +++ b/lib/AsmParser/LLLexer.h @@ -81,6 +81,7 @@ namespace llvm { lltok::Kind LexDigitOrNegative(); lltok::Kind LexPositive(); lltok::Kind LexAt(); + lltok::Kind LexDollar(); lltok::Kind LexExclaim(); lltok::Kind LexPercent(); lltok::Kind LexQuote(); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 3282e8a..be55ac6 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -163,6 +163,11 @@ bool LLParser::ValidateEndOfModule() { return Error(I->second.second, "use of undefined type named '" + I->getKey() + "'"); + if (!ForwardRefComdats.empty()) + return Error(ForwardRefComdats.begin()->second, + "use of undefined comdat '$" + + ForwardRefComdats.begin()->first + "'"); + if (!ForwardRefVals.empty()) return Error(ForwardRefVals.begin()->second.second, "use of undefined value '@" + ForwardRefVals.begin()->first + @@ -238,6 +243,7 @@ bool LLParser::ParseTopLevelEntities() { case lltok::LocalVar: if (ParseNamedType()) return true; break; case lltok::GlobalID: if (ParseUnnamedGlobal()) return true; break; case lltok::GlobalVar: if (ParseNamedGlobal()) return true; break; + case lltok::ComdatVar: if (parseComdat()) return true; break; case lltok::exclaim: if (ParseStandaloneMetadata()) return true; break; case lltok::MetadataVar:if (ParseNamedMetadata()) return true; break; @@ -257,33 +263,31 @@ bool LLParser::ParseTopLevelEntities() { case lltok::kw_appending: // OptionalLinkage case lltok::kw_common: // OptionalLinkage case lltok::kw_extern_weak: // OptionalLinkage - case lltok::kw_external: { // OptionalLinkage + case lltok::kw_external: // OptionalLinkage + case lltok::kw_default: // OptionalVisibility + case lltok::kw_hidden: // OptionalVisibility + case lltok::kw_protected: // OptionalVisibility + case lltok::kw_dllimport: // OptionalDLLStorageClass + case lltok::kw_dllexport: // OptionalDLLStorageClass + case lltok::kw_thread_local: // OptionalThreadLocal + case lltok::kw_addrspace: // OptionalAddrSpace + case lltok::kw_constant: // GlobalType + case lltok::kw_global: { // GlobalType unsigned Linkage, Visibility, DLLStorageClass; - if (ParseOptionalLinkage(Linkage) || + bool UnnamedAddr; + GlobalVariable::ThreadLocalMode TLM; + bool HasLinkage; + if (ParseOptionalLinkage(Linkage, HasLinkage) || ParseOptionalVisibility(Visibility) || ParseOptionalDLLStorageClass(DLLStorageClass) || - ParseGlobal("", SMLoc(), Linkage, true, Visibility, DLLStorageClass)) - return true; - break; - } - case lltok::kw_default: // OptionalVisibility - case lltok::kw_hidden: // OptionalVisibility - case lltok::kw_protected: { // OptionalVisibility - unsigned Visibility, DLLStorageClass; - if (ParseOptionalVisibility(Visibility) || - ParseOptionalDLLStorageClass(DLLStorageClass) || - ParseGlobal("", SMLoc(), 0, false, Visibility, DLLStorageClass)) + ParseOptionalThreadLocal(TLM) || + parseOptionalUnnamedAddr(UnnamedAddr) || + ParseGlobal("", SMLoc(), Linkage, HasLinkage, Visibility, + DLLStorageClass, TLM, UnnamedAddr)) return true; break; } - case lltok::kw_thread_local: // OptionalThreadLocal - case lltok::kw_addrspace: // OptionalAddrSpace - case lltok::kw_constant: // GlobalType - case lltok::kw_global: // GlobalType - if (ParseGlobal("", SMLoc(), 0, false, 0, 0)) return true; - break; - case lltok::kw_attributes: if (ParseUnnamedAttrGrp()) return true; break; } } @@ -470,15 +474,20 @@ bool LLParser::ParseUnnamedGlobal() { bool HasLinkage; unsigned Linkage, Visibility, DLLStorageClass; + GlobalVariable::ThreadLocalMode TLM; + bool UnnamedAddr; if (ParseOptionalLinkage(Linkage, HasLinkage) || ParseOptionalVisibility(Visibility) || - ParseOptionalDLLStorageClass(DLLStorageClass)) + ParseOptionalDLLStorageClass(DLLStorageClass) || + ParseOptionalThreadLocal(TLM) || + parseOptionalUnnamedAddr(UnnamedAddr)) return true; if (HasLinkage || Lex.getKind() != lltok::kw_alias) return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility, - DLLStorageClass); - return ParseAlias(Name, NameLoc, Visibility, DLLStorageClass); + DLLStorageClass, TLM, UnnamedAddr); + return ParseAlias(Name, NameLoc, Visibility, DLLStorageClass, TLM, + UnnamedAddr); } /// ParseNamedGlobal: @@ -493,16 +502,71 @@ bool LLParser::ParseNamedGlobal() { bool HasLinkage; unsigned Linkage, Visibility, DLLStorageClass; + GlobalVariable::ThreadLocalMode TLM; + bool UnnamedAddr; if (ParseToken(lltok::equal, "expected '=' in global variable") || ParseOptionalLinkage(Linkage, HasLinkage) || ParseOptionalVisibility(Visibility) || - ParseOptionalDLLStorageClass(DLLStorageClass)) + ParseOptionalDLLStorageClass(DLLStorageClass) || + ParseOptionalThreadLocal(TLM) || + parseOptionalUnnamedAddr(UnnamedAddr)) return true; if (HasLinkage || Lex.getKind() != lltok::kw_alias) return ParseGlobal(Name, NameLoc, Linkage, HasLinkage, Visibility, - DLLStorageClass); - return ParseAlias(Name, NameLoc, Visibility, DLLStorageClass); + DLLStorageClass, TLM, UnnamedAddr); + return ParseAlias(Name, NameLoc, Visibility, DLLStorageClass, TLM, + UnnamedAddr); +} + +bool LLParser::parseComdat() { + assert(Lex.getKind() == lltok::ComdatVar); + std::string Name = Lex.getStrVal(); + LocTy NameLoc = Lex.getLoc(); + Lex.Lex(); + + if (ParseToken(lltok::equal, "expected '=' here")) + return true; + + if (ParseToken(lltok::kw_comdat, "expected comdat keyword")) + return TokError("expected comdat type"); + + Comdat::SelectionKind SK; + switch (Lex.getKind()) { + default: + return TokError("unknown selection kind"); + case lltok::kw_any: + SK = Comdat::Any; + break; + case lltok::kw_exactmatch: + SK = Comdat::ExactMatch; + break; + case lltok::kw_largest: + SK = Comdat::Largest; + break; + case lltok::kw_noduplicates: + SK = Comdat::NoDuplicates; + break; + case lltok::kw_samesize: + SK = Comdat::SameSize; + break; + } + Lex.Lex(); + + // See if the comdat was forward referenced, if so, use the comdat. + Module::ComdatSymTabType &ComdatSymTab = M->getComdatSymbolTable(); + Module::ComdatSymTabType::iterator I = ComdatSymTab.find(Name); + if (I != ComdatSymTab.end() && !ForwardRefComdats.erase(Name)) + return Error(NameLoc, "redefinition of comdat '$" + Name + "'"); + + Comdat *C; + if (I != ComdatSymTab.end()) + C = &I->second; + else + C = M->getOrInsertComdat(Name); + C->setSelectionKind(SK); + + return false; } // MDString: @@ -510,6 +574,7 @@ bool LLParser::ParseNamedGlobal() { bool LLParser::ParseMDString(MDString *&Result) { std::string Str; if (ParseStringConstant(Str)) return true; + llvm::UpgradeMDStringConstant(Str); Result = MDString::get(Context, Str); return false; } @@ -628,18 +693,19 @@ static bool isValidVisibilityForLinkage(unsigned V, unsigned L) { } /// ParseAlias: -/// ::= GlobalVar '=' OptionalVisibility OptionalDLLStorageClass 'alias' +/// ::= GlobalVar '=' OptionalVisibility OptionalDLLStorageClass +/// OptionalThreadLocal OptionalUnNammedAddr 'alias' /// OptionalLinkage Aliasee -/// ::= GlobalVar '=' OptionalVisibility OptionalDLLStorageClass 'alias' -/// OptionalLinkage OptionalAddrSpace Type, Aliasee /// /// Aliasee /// ::= TypeAndValue /// -/// Everything through DLL storage class has already been parsed. +/// Everything through OptionalUnNammedAddr has already been parsed. /// bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, - unsigned Visibility, unsigned DLLStorageClass) { + unsigned Visibility, unsigned DLLStorageClass, + GlobalVariable::ThreadLocalMode TLM, + bool UnnamedAddr) { assert(Lex.getKind() == lltok::kw_alias); Lex.Lex(); LocTy LinkageLoc = Lex.getLoc(); @@ -656,51 +722,39 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, return Error(LinkageLoc, "symbol with local linkage must have default visibility"); - bool HasAddrSpace = Lex.getKind() == lltok::kw_addrspace; - unsigned AddrSpace; - LocTy AddrSpaceLoc = Lex.getLoc(); - if (ParseOptionalAddrSpace(AddrSpace)) - return true; - - LocTy TyLoc = Lex.getLoc(); - Type *Ty = nullptr; - if (ParseType(Ty)) - return true; - - bool DifferentType = EatIfPresent(lltok::comma); - if (HasAddrSpace && !DifferentType) - return Error(AddrSpaceLoc, "A type is required if addrspace is given"); - - Type *AliaseeType = nullptr; - if (DifferentType) { - if (ParseType(AliaseeType)) + Constant *Aliasee; + LocTy AliaseeLoc = Lex.getLoc(); + if (Lex.getKind() != lltok::kw_bitcast && + Lex.getKind() != lltok::kw_getelementptr && + Lex.getKind() != lltok::kw_addrspacecast && + Lex.getKind() != lltok::kw_inttoptr) { + if (ParseGlobalTypeAndValue(Aliasee)) return true; } else { - AliaseeType = Ty; - auto *PTy = dyn_cast<PointerType>(Ty); - if (!PTy) - return Error(TyLoc, "An alias must have pointer type"); - Ty = PTy->getElementType(); - AddrSpace = PTy->getAddressSpace(); + // The bitcast dest type is not present, it is implied by the dest type. + ValID ID; + if (ParseValID(ID)) + return true; + if (ID.Kind != ValID::t_Constant) + return Error(AliaseeLoc, "invalid aliasee"); + Aliasee = ID.ConstantVal; } - LocTy AliaseeLoc = Lex.getLoc(); - Constant *C; - if (ParseGlobalValue(AliaseeType, C)) - return true; - - auto *Aliasee = dyn_cast<GlobalObject>(C); - if (!Aliasee) - return Error(AliaseeLoc, "Alias must point to function or variable"); - - assert(Aliasee->getType()->isPointerTy()); + Type *AliaseeType = Aliasee->getType(); + auto *PTy = dyn_cast<PointerType>(AliaseeType); + if (!PTy) + return Error(AliaseeLoc, "An alias must have pointer type"); + Type *Ty = PTy->getElementType(); + unsigned AddrSpace = PTy->getAddressSpace(); // Okay, create the alias but do not insert it into the module yet. std::unique_ptr<GlobalAlias> GA( GlobalAlias::create(Ty, AddrSpace, (GlobalValue::LinkageTypes)Linkage, Name, Aliasee, /*Parent*/ nullptr)); + GA->setThreadLocalMode(TLM); GA->setVisibility((GlobalValue::VisibilityTypes)Visibility); GA->setDLLStorageClass((GlobalValue::DLLStorageClassTypes)DLLStorageClass); + GA->setUnnamedAddr(UnnamedAddr); // See if this value already exists in the symbol table. If so, it is either // a redefinition or a definition of a forward reference. @@ -720,11 +774,6 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, // If they agree, just RAUW the old value with the alias and remove the // forward ref info. - for (auto *User : Val->users()) { - if (auto *GA = dyn_cast<GlobalAlias>(User)) - return Error(NameLoc, "Alias is pointed by alias " + GA->getName()); - } - Val->replaceAllUsesWith(GA.get()); Val->eraseFromParent(); ForwardRefVals.erase(I); @@ -742,34 +791,31 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, /// ParseGlobal /// ::= GlobalVar '=' OptionalLinkage OptionalVisibility OptionalDLLStorageClass -/// OptionalThreadLocal OptionalAddrSpace OptionalUnNammedAddr +/// OptionalThreadLocal OptionalUnNammedAddr OptionalAddrSpace /// OptionalExternallyInitialized GlobalType Type Const /// ::= OptionalLinkage OptionalVisibility OptionalDLLStorageClass -/// OptionalThreadLocal OptionalAddrSpace OptionalUnNammedAddr +/// OptionalThreadLocal OptionalUnNammedAddr OptionalAddrSpace /// OptionalExternallyInitialized GlobalType Type Const /// -/// Everything up to and including OptionalDLLStorageClass has been parsed +/// Everything up to and including OptionalUnNammedAddr has been parsed /// already. /// bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, unsigned Linkage, bool HasLinkage, - unsigned Visibility, unsigned DLLStorageClass) { + unsigned Visibility, unsigned DLLStorageClass, + GlobalVariable::ThreadLocalMode TLM, + bool UnnamedAddr) { if (!isValidVisibilityForLinkage(Visibility, Linkage)) return Error(NameLoc, "symbol with local linkage must have default visibility"); unsigned AddrSpace; - bool IsConstant, UnnamedAddr, IsExternallyInitialized; - GlobalVariable::ThreadLocalMode TLM; - LocTy UnnamedAddrLoc; + bool IsConstant, IsExternallyInitialized; LocTy IsExternallyInitializedLoc; LocTy TyLoc; Type *Ty = nullptr; - if (ParseOptionalThreadLocal(TLM) || - ParseOptionalAddrSpace(AddrSpace) || - ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr, - &UnnamedAddrLoc) || + if (ParseOptionalAddrSpace(AddrSpace) || ParseOptionalToken(lltok::kw_externally_initialized, IsExternallyInitialized, &IsExternallyInitializedLoc) || @@ -848,7 +894,13 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, if (ParseOptionalAlignment(Alignment)) return true; GV->setAlignment(Alignment); } else { - TokError("unknown global variable property!"); + Comdat *C; + if (parseOptionalComdat(C)) + return true; + if (C) + GV->setComdat(C); + else + return TokError("unknown global variable property!"); } } @@ -967,6 +1019,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, case lltok::kw_builtin: B.addAttribute(Attribute::Builtin); break; case lltok::kw_cold: B.addAttribute(Attribute::Cold); break; case lltok::kw_inlinehint: B.addAttribute(Attribute::InlineHint); break; + case lltok::kw_jumptable: B.addAttribute(Attribute::JumpTable); break; case lltok::kw_minsize: B.addAttribute(Attribute::MinSize); break; case lltok::kw_naked: B.addAttribute(Attribute::Naked); break; case lltok::kw_nobuiltin: B.addAttribute(Attribute::NoBuiltin); break; @@ -1106,6 +1159,24 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc) { //===----------------------------------------------------------------------===// +// Comdat Reference/Resolution Routines. +//===----------------------------------------------------------------------===// + +Comdat *LLParser::getComdat(const std::string &Name, LocTy Loc) { + // Look this name up in the comdat symbol table. + Module::ComdatSymTabType &ComdatSymTab = M->getComdatSymbolTable(); + Module::ComdatSymTabType::iterator I = ComdatSymTab.find(Name); + if (I != ComdatSymTab.end()) + return &I->second; + + // Otherwise, create a new forward reference for this value and remember it. + Comdat *C = M->getOrInsertComdat(Name); + ForwardRefComdats[Name] = Loc; + return C; +} + + +//===----------------------------------------------------------------------===// // Helper Routines. //===----------------------------------------------------------------------===// @@ -1230,6 +1301,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) { case lltok::kw_alwaysinline: case lltok::kw_builtin: case lltok::kw_inlinehint: + case lltok::kw_jumptable: case lltok::kw_minsize: case lltok::kw_naked: case lltok::kw_nobuiltin: @@ -1291,6 +1363,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { case lltok::kw_builtin: case lltok::kw_cold: case lltok::kw_inlinehint: + case lltok::kw_jumptable: case lltok::kw_minsize: case lltok::kw_naked: case lltok::kw_nobuiltin: @@ -2797,6 +2870,19 @@ bool LLParser::ParseGlobalTypeAndValue(Constant *&V) { ParseGlobalValue(Ty, V); } +bool LLParser::parseOptionalComdat(Comdat *&C) { + C = nullptr; + if (!EatIfPresent(lltok::kw_comdat)) + return false; + if (Lex.getKind() != lltok::ComdatVar) + return TokError("expected comdat variable"); + LocTy Loc = Lex.getLoc(); + StringRef Name = Lex.getStrVal(); + C = getComdat(Name, Loc); + Lex.Lex(); + return false; +} + /// ParseGlobalValueVector /// ::= /*empty*/ /// ::= TypeAndValue (',' TypeAndValue)* @@ -3097,6 +3183,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { bool UnnamedAddr; LocTy UnnamedAddrLoc; Constant *Prefix = nullptr; + Comdat *C; if (ParseArgumentList(ArgList, isVarArg) || ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr, @@ -3105,6 +3192,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { BuiltinLoc) || (EatIfPresent(lltok::kw_section) && ParseStringConstant(Section)) || + parseOptionalComdat(C) || ParseOptionalAlignment(Alignment) || (EatIfPresent(lltok::kw_gc) && ParseStringConstant(GC)) || @@ -3207,6 +3295,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { Fn->setUnnamedAddr(UnnamedAddr); Fn->setAlignment(Alignment); Fn->setSection(Section); + Fn->setComdat(C); if (!GC.empty()) Fn->setGC(GC.c_str()); Fn->setPrefixData(Prefix); ForwardRefAttrGroups[Fn] = FwdRefAttrGrps; @@ -4011,7 +4100,8 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) { else return TokError("expected 'catch' or 'filter' clause type"); - Value *V; LocTy VLoc; + Value *V; + LocTy VLoc; if (ParseTypeAndValue(V, VLoc, PFS)) { delete LP; return true; @@ -4027,7 +4117,7 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) { Error(VLoc, "'filter' clause has an invalid type"); } - LP->addClause(V); + LP->addClause(cast<Constant>(V)); } Inst = LP; @@ -4263,8 +4353,8 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS) { } /// ParseCmpXchg -/// ::= 'cmpxchg' 'volatile'? TypeAndValue ',' TypeAndValue ',' TypeAndValue -/// 'singlethread'? AtomicOrdering AtomicOrdering +/// ::= 'cmpxchg' 'weak'? 'volatile'? TypeAndValue ',' TypeAndValue ',' +/// TypeAndValue 'singlethread'? AtomicOrdering AtomicOrdering int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) { Value *Ptr, *Cmp, *New; LocTy PtrLoc, CmpLoc, NewLoc; bool AteExtraComma = false; @@ -4272,6 +4362,10 @@ int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) { AtomicOrdering FailureOrdering = NotAtomic; SynchronizationScope Scope = CrossThread; bool isVolatile = false; + bool isWeak = false; + + if (EatIfPresent(lltok::kw_weak)) + isWeak = true; if (EatIfPresent(lltok::kw_volatile)) isVolatile = true; @@ -4304,9 +4398,10 @@ int LLParser::ParseCmpXchg(Instruction *&Inst, PerFunctionState &PFS) { return Error(NewLoc, "cmpxchg operand must be power-of-two byte-sized" " integer"); - AtomicCmpXchgInst *CXI = new AtomicCmpXchgInst(Ptr, Cmp, New, SuccessOrdering, - FailureOrdering, Scope); + AtomicCmpXchgInst *CXI = new AtomicCmpXchgInst( + Ptr, Cmp, New, SuccessOrdering, FailureOrdering, Scope); CXI->setVolatile(isVolatile); + CXI->setWeak(isWeak); Inst = CXI; return AteExtraComma ? InstExtraComma : InstNormal; } diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index e2bf462..2efb260 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -34,6 +34,7 @@ namespace llvm { class Instruction; class Constant; class GlobalValue; + class Comdat; class MDString; class MDNode; class StructType; @@ -122,6 +123,9 @@ namespace llvm { std::map<unsigned, std::pair<GlobalValue*, LocTy> > ForwardRefValIDs; std::vector<GlobalValue*> NumberedVals; + // Comdat forward reference information. + std::map<std::string, LocTy> ForwardRefComdats; + // References to blockaddress. The key is the function ValID, the value is // a list of references to blocks in that function. std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > > @@ -154,6 +158,10 @@ namespace llvm { GlobalValue *GetGlobalVal(const std::string &N, Type *Ty, LocTy Loc); GlobalValue *GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc); + /// Get a Comdat with the specified name, creating a forward reference + /// record if needed. + Comdat *getComdat(const std::string &N, LocTy Loc); + // Helper Routines. bool ParseToken(lltok::Kind T, const char *ErrMsg); bool EatIfPresent(lltok::Kind T) { @@ -197,6 +205,9 @@ namespace llvm { bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM); bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM); + bool parseOptionalUnnamedAddr(bool &UnnamedAddr) { + return ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr); + } bool ParseOptionalAddrSpace(unsigned &AddrSpace); bool ParseOptionalParamAttrs(AttrBuilder &B); bool ParseOptionalReturnAttrs(AttrBuilder &B); @@ -239,9 +250,12 @@ namespace llvm { bool ParseNamedGlobal(); bool ParseGlobal(const std::string &Name, LocTy Loc, unsigned Linkage, bool HasLinkage, unsigned Visibility, - unsigned DLLStorageClass); + unsigned DLLStorageClass, + GlobalVariable::ThreadLocalMode TLM, bool UnnamedAddr); bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Visibility, - unsigned DLLStorageClass); + unsigned DLLStorageClass, + GlobalVariable::ThreadLocalMode TLM, bool UnnamedAddr); + bool parseComdat(); bool ParseStandaloneMetadata(); bool ParseNamedMetadata(); bool ParseMDString(MDString *&Result); @@ -353,6 +367,7 @@ namespace llvm { bool ParseGlobalValue(Type *Ty, Constant *&V); bool ParseGlobalTypeAndValue(Constant *&V); bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts); + bool parseOptionalComdat(Comdat *&C); bool ParseMetadataListValue(ValID &ID, PerFunctionState *PFS); bool ParseMetadataValue(ValID &ID, PerFunctionState *PFS); bool ParseMDNodeVector(SmallVectorImpl<Value*> &, PerFunctionState *PFS); diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index b6b7d82..534d824 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -42,7 +42,8 @@ namespace lltok { kw_linker_private, // NOTE: deprecated, for parser compatibility kw_linker_private_weak, // NOTE: deprecated, for parser compatibility kw_linkonce, kw_linkonce_odr, - kw_weak, kw_weak_odr, kw_appending, + kw_weak, // Used as a linkage, and a modifier for "cmpxchg". + kw_weak_odr, kw_appending, kw_dllimport, kw_dllexport, kw_common, kw_available_externally, kw_default, kw_hidden, kw_protected, kw_unnamed_addr, @@ -107,6 +108,7 @@ namespace lltok { kw_cold, kw_inlinehint, kw_inreg, + kw_jumptable, kw_minsize, kw_naked, kw_nest, @@ -140,6 +142,15 @@ namespace lltok { kw_type, kw_opaque, + kw_comdat, + + // Comdat types + kw_any, + kw_exactmatch, + kw_largest, + kw_noduplicates, + kw_samesize, + kw_eq, kw_ne, kw_slt, kw_sgt, kw_sle, kw_sge, kw_ult, kw_ugt, kw_ule, kw_uge, kw_oeq, kw_one, kw_olt, kw_ogt, kw_ole, kw_oge, kw_ord, kw_uno, kw_ueq, kw_une, @@ -178,6 +189,7 @@ namespace lltok { // String valued tokens (StrVal). LabelStr, // foo: GlobalVar, // @foo @"foo" + ComdatVar, // $foo LocalVar, // %foo %"foo" MetadataVar, // !foo StringConstant, // "foo" diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp index 2606bc2..91bb51c 100644 --- a/lib/AsmParser/Parser.cpp +++ b/lib/AsmParser/Parser.cpp @@ -17,8 +17,8 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/system_error.h" #include <cstring> +#include <system_error> using namespace llvm; Module *llvm::ParseAssembly(MemoryBuffer *F, @@ -41,21 +41,21 @@ Module *llvm::ParseAssembly(MemoryBuffer *F, Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err, LLVMContext &Context) { - std::unique_ptr<MemoryBuffer> File; - if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, File)) { + ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = + MemoryBuffer::getFileOrSTDIN(Filename); + if (std::error_code EC = FileOrErr.getError()) { Err = SMDiagnostic(Filename, SourceMgr::DK_Error, - "Could not open input file: " + ec.message()); + "Could not open input file: " + EC.message()); return nullptr; } - return ParseAssembly(File.release(), nullptr, Err, Context); + return ParseAssembly(FileOrErr.get().release(), nullptr, Err, Context); } Module *llvm::ParseAssemblyString(const char *AsmString, Module *M, SMDiagnostic &Err, LLVMContext &Context) { MemoryBuffer *F = - MemoryBuffer::getMemBuffer(StringRef(AsmString, strlen(AsmString)), - "<string>"); + MemoryBuffer::getMemBuffer(StringRef(AsmString), "<string>"); return ParseAssembly(F, M, Err, Context); } diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp index 716299f..b5886c1 100644 --- a/lib/Bitcode/Reader/BitReader.cpp +++ b/lib/Bitcode/Reader/BitReader.cpp @@ -32,7 +32,7 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef, char **OutMessage) { ErrorOr<Module *> ModuleOrErr = parseBitcodeFile(unwrap(MemBuf), *unwrap(ContextRef)); - if (error_code EC = ModuleOrErr.getError()) { + if (std::error_code EC = ModuleOrErr.getError()) { if (OutMessage) *OutMessage = strdup(EC.message().c_str()); *OutModule = wrap((Module*)nullptr); @@ -54,7 +54,7 @@ LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef, ErrorOr<Module *> ModuleOrErr = getLazyBitcodeModule(unwrap(MemBuf), *unwrap(ContextRef)); - if (error_code EC = ModuleOrErr.getError()) { + if (std::error_code EC = ModuleOrErr.getError()) { *OutM = wrap((Module *)nullptr); if (OutMessage) *OutMessage = strdup(EC.message().c_str()); diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 4170f98..192f753 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -39,12 +39,11 @@ void BitcodeReader::materializeForwardReferencedFunctions() { } void BitcodeReader::FreeState() { - if (BufferOwned) - delete Buffer; Buffer = nullptr; std::vector<Type*>().swap(TypeList); ValueList.clear(); MDValueList.clear(); + std::vector<Comdat *>().swap(ComdatList); std::vector<AttributeSet>().swap(MAttributes); std::vector<BasicBlock*>().swap(FunctionBBs); @@ -205,6 +204,22 @@ static SynchronizationScope GetDecodedSynchScope(unsigned Val) { } } +static Comdat::SelectionKind getDecodedComdatSelectionKind(unsigned Val) { + switch (Val) { + default: // Map unknown selection kinds to any. + case bitc::COMDAT_SELECTION_KIND_ANY: + return Comdat::Any; + case bitc::COMDAT_SELECTION_KIND_EXACT_MATCH: + return Comdat::ExactMatch; + case bitc::COMDAT_SELECTION_KIND_LARGEST: + return Comdat::Largest; + case bitc::COMDAT_SELECTION_KIND_NO_DUPLICATES: + return Comdat::NoDuplicates; + case bitc::COMDAT_SELECTION_KIND_SAME_SIZE: + return Comdat::SameSize; + } +} + static void UpgradeDLLImportExportLinkage(llvm::GlobalValue *GV, unsigned Val) { switch (Val) { case 5: GV->setDLLStorageClass(GlobalValue::DLLImportStorageClass); break; @@ -470,7 +485,7 @@ static void decodeLLVMAttributesForBitcode(AttrBuilder &B, (EncodedAttrs & 0xffff)); } -error_code BitcodeReader::ParseAttributeBlock() { +std::error_code BitcodeReader::ParseAttributeBlock() { if (Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID)) return Error(InvalidRecord); @@ -490,7 +505,7 @@ error_code BitcodeReader::ParseAttributeBlock() { case BitstreamEntry::Error: return Error(MalformedBlock); case BitstreamEntry::EndBlock: - return error_code::success(); + return std::error_code(); case BitstreamEntry::Record: // The interesting case. break; @@ -549,6 +564,8 @@ static Attribute::AttrKind GetAttrFromCode(uint64_t Code) { return Attribute::InlineHint; case bitc::ATTR_KIND_IN_REG: return Attribute::InReg; + case bitc::ATTR_KIND_JUMP_TABLE: + return Attribute::JumpTable; case bitc::ATTR_KIND_MIN_SIZE: return Attribute::MinSize; case bitc::ATTR_KIND_NAKED: @@ -614,15 +631,15 @@ static Attribute::AttrKind GetAttrFromCode(uint64_t Code) { } } -error_code BitcodeReader::ParseAttrKind(uint64_t Code, - Attribute::AttrKind *Kind) { +std::error_code BitcodeReader::ParseAttrKind(uint64_t Code, + Attribute::AttrKind *Kind) { *Kind = GetAttrFromCode(Code); if (*Kind == Attribute::None) return Error(InvalidValue); - return error_code::success(); + return std::error_code(); } -error_code BitcodeReader::ParseAttributeGroupBlock() { +std::error_code BitcodeReader::ParseAttributeGroupBlock() { if (Stream.EnterSubBlock(bitc::PARAMATTR_GROUP_BLOCK_ID)) return Error(InvalidRecord); @@ -640,7 +657,7 @@ error_code BitcodeReader::ParseAttributeGroupBlock() { case BitstreamEntry::Error: return Error(MalformedBlock); case BitstreamEntry::EndBlock: - return error_code::success(); + return std::error_code(); case BitstreamEntry::Record: // The interesting case. break; @@ -662,13 +679,13 @@ error_code BitcodeReader::ParseAttributeGroupBlock() { for (unsigned i = 2, e = Record.size(); i != e; ++i) { if (Record[i] == 0) { // Enum attribute Attribute::AttrKind Kind; - if (error_code EC = ParseAttrKind(Record[++i], &Kind)) + if (std::error_code EC = ParseAttrKind(Record[++i], &Kind)) return EC; B.addAttribute(Kind); } else if (Record[i] == 1) { // Align attribute Attribute::AttrKind Kind; - if (error_code EC = ParseAttrKind(Record[++i], &Kind)) + if (std::error_code EC = ParseAttrKind(Record[++i], &Kind)) return EC; if (Kind == Attribute::Alignment) B.addAlignmentAttr(Record[++i]); @@ -704,14 +721,14 @@ error_code BitcodeReader::ParseAttributeGroupBlock() { } } -error_code BitcodeReader::ParseTypeTable() { +std::error_code BitcodeReader::ParseTypeTable() { if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_NEW)) return Error(InvalidRecord); return ParseTypeTableBody(); } -error_code BitcodeReader::ParseTypeTableBody() { +std::error_code BitcodeReader::ParseTypeTableBody() { if (!TypeList.empty()) return Error(InvalidMultipleBlocks); @@ -731,7 +748,7 @@ error_code BitcodeReader::ParseTypeTableBody() { case BitstreamEntry::EndBlock: if (NumRecords != TypeList.size()) return Error(MalformedBlock); - return error_code::success(); + return std::error_code(); case BitstreamEntry::Record: // The interesting case. break; @@ -931,7 +948,7 @@ error_code BitcodeReader::ParseTypeTableBody() { } } -error_code BitcodeReader::ParseValueSymbolTable() { +std::error_code BitcodeReader::ParseValueSymbolTable() { if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID)) return Error(InvalidRecord); @@ -947,7 +964,7 @@ error_code BitcodeReader::ParseValueSymbolTable() { case BitstreamEntry::Error: return Error(MalformedBlock); case BitstreamEntry::EndBlock: - return error_code::success(); + return std::error_code(); case BitstreamEntry::Record: // The interesting case. break; @@ -985,7 +1002,7 @@ error_code BitcodeReader::ParseValueSymbolTable() { } } -error_code BitcodeReader::ParseMetadata() { +std::error_code BitcodeReader::ParseMetadata() { unsigned NextMDValueNo = MDValueList.size(); if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID)) @@ -1002,7 +1019,7 @@ error_code BitcodeReader::ParseMetadata() { case BitstreamEntry::Error: return Error(MalformedBlock); case BitstreamEntry::EndBlock: - return error_code::success(); + return std::error_code(); case BitstreamEntry::Record: // The interesting case. break; @@ -1062,7 +1079,8 @@ error_code BitcodeReader::ParseMetadata() { break; } case bitc::METADATA_STRING: { - SmallString<8> String(Record.begin(), Record.end()); + std::string String(Record.begin(), Record.end()); + llvm::UpgradeMDStringConstant(String); Value *V = MDString::get(Context, String); MDValueList.AssignValue(V, NextMDValueNo++); break; @@ -1094,31 +1112,9 @@ uint64_t BitcodeReader::decodeSignRotatedValue(uint64_t V) { return 1ULL << 63; } -// FIXME: Delete this in LLVM 4.0 and just assert that the aliasee is a -// GlobalObject. -static GlobalObject & -getGlobalObjectInExpr(const DenseMap<GlobalAlias *, Constant *> &Map, - Constant &C) { - auto *GO = dyn_cast<GlobalObject>(&C); - if (GO) - return *GO; - - auto *GA = dyn_cast<GlobalAlias>(&C); - if (GA) - return getGlobalObjectInExpr(Map, *Map.find(GA)->second); - - auto &CE = cast<ConstantExpr>(C); - assert(CE.getOpcode() == Instruction::BitCast || - CE.getOpcode() == Instruction::GetElementPtr || - CE.getOpcode() == Instruction::AddrSpaceCast); - if (CE.getOpcode() == Instruction::GetElementPtr) - assert(cast<GEPOperator>(CE).hasAllZeroIndices()); - return getGlobalObjectInExpr(Map, *CE.getOperand(0)); -} - /// ResolveGlobalAndAliasInits - Resolve all of the initializers for global /// values and aliases that we can. -error_code BitcodeReader::ResolveGlobalAndAliasInits() { +std::error_code BitcodeReader::ResolveGlobalAndAliasInits() { std::vector<std::pair<GlobalVariable*, unsigned> > GlobalInitWorklist; std::vector<std::pair<GlobalAlias*, unsigned> > AliasInitWorklist; std::vector<std::pair<Function*, unsigned> > FunctionPrefixWorklist; @@ -1141,30 +1137,19 @@ error_code BitcodeReader::ResolveGlobalAndAliasInits() { GlobalInitWorklist.pop_back(); } - // FIXME: Delete this in LLVM 4.0 - // Older versions of llvm could write an alias pointing to another. We cannot - // construct those aliases, so we first collect an alias to aliasee expression - // and then compute the actual aliasee. - DenseMap<GlobalAlias *, Constant *> AliasInit; - while (!AliasInitWorklist.empty()) { unsigned ValID = AliasInitWorklist.back().second; if (ValID >= ValueList.size()) { AliasInits.push_back(AliasInitWorklist.back()); } else { if (Constant *C = dyn_cast_or_null<Constant>(ValueList[ValID])) - AliasInit.insert(std::make_pair(AliasInitWorklist.back().first, C)); + AliasInitWorklist.back().first->setAliasee(C); else return Error(ExpectedConstant); } AliasInitWorklist.pop_back(); } - for (auto &Pair : AliasInit) { - auto &GO = getGlobalObjectInExpr(AliasInit, *Pair.second); - Pair.first->setAliasee(&GO); - } - while (!FunctionPrefixWorklist.empty()) { unsigned ValID = FunctionPrefixWorklist.back().second; if (ValID >= ValueList.size()) { @@ -1178,7 +1163,7 @@ error_code BitcodeReader::ResolveGlobalAndAliasInits() { FunctionPrefixWorklist.pop_back(); } - return error_code::success(); + return std::error_code(); } static APInt ReadWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits) { @@ -1189,7 +1174,7 @@ static APInt ReadWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits) { return APInt(TypeBits, Words); } -error_code BitcodeReader::ParseConstants() { +std::error_code BitcodeReader::ParseConstants() { if (Stream.EnterSubBlock(bitc::CONSTANTS_BLOCK_ID)) return Error(InvalidRecord); @@ -1212,7 +1197,7 @@ error_code BitcodeReader::ParseConstants() { // Once all the constants have been read, go through and resolve forward // references. ValueList.ResolveConstantForwardRefs(); - return error_code::success(); + return std::error_code(); case BitstreamEntry::Record: // The interesting case. break; @@ -1627,7 +1612,7 @@ error_code BitcodeReader::ParseConstants() { } } -error_code BitcodeReader::ParseUseLists() { +std::error_code BitcodeReader::ParseUseLists() { if (Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID)) return Error(InvalidRecord); @@ -1642,7 +1627,7 @@ error_code BitcodeReader::ParseUseLists() { case BitstreamEntry::Error: return Error(MalformedBlock); case BitstreamEntry::EndBlock: - return error_code::success(); + return std::error_code(); case BitstreamEntry::Record: // The interesting case. break; @@ -1667,7 +1652,7 @@ error_code BitcodeReader::ParseUseLists() { /// RememberAndSkipFunctionBody - When we see the block for a function body, /// remember where it is and then skip it. This lets us lazily deserialize the /// functions. -error_code BitcodeReader::RememberAndSkipFunctionBody() { +std::error_code BitcodeReader::RememberAndSkipFunctionBody() { // Get the function we are talking about. if (FunctionsWithBodies.empty()) return Error(InsufficientFunctionProtos); @@ -1682,10 +1667,10 @@ error_code BitcodeReader::RememberAndSkipFunctionBody() { // Skip over the function block for now. if (Stream.SkipBlock()) return Error(InvalidRecord); - return error_code::success(); + return std::error_code(); } -error_code BitcodeReader::GlobalCleanup() { +std::error_code BitcodeReader::GlobalCleanup() { // Patch the initializers for globals and aliases up. ResolveGlobalAndAliasInits(); if (!GlobalInits.empty() || !AliasInits.empty()) @@ -1711,10 +1696,10 @@ error_code BitcodeReader::GlobalCleanup() { // want lazy deserialization. std::vector<std::pair<GlobalVariable*, unsigned> >().swap(GlobalInits); std::vector<std::pair<GlobalAlias*, unsigned> >().swap(AliasInits); - return error_code::success(); + return std::error_code(); } -error_code BitcodeReader::ParseModule(bool Resume) { +std::error_code BitcodeReader::ParseModule(bool Resume) { if (Resume) Stream.JumpToBit(NextUnreadBit); else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) @@ -1745,30 +1730,30 @@ error_code BitcodeReader::ParseModule(bool Resume) { return Error(MalformedBlock); break; case bitc::PARAMATTR_BLOCK_ID: - if (error_code EC = ParseAttributeBlock()) + if (std::error_code EC = ParseAttributeBlock()) return EC; break; case bitc::PARAMATTR_GROUP_BLOCK_ID: - if (error_code EC = ParseAttributeGroupBlock()) + if (std::error_code EC = ParseAttributeGroupBlock()) return EC; break; case bitc::TYPE_BLOCK_ID_NEW: - if (error_code EC = ParseTypeTable()) + if (std::error_code EC = ParseTypeTable()) return EC; break; case bitc::VALUE_SYMTAB_BLOCK_ID: - if (error_code EC = ParseValueSymbolTable()) + if (std::error_code EC = ParseValueSymbolTable()) return EC; SeenValueSymbolTable = true; break; case bitc::CONSTANTS_BLOCK_ID: - if (error_code EC = ParseConstants()) + if (std::error_code EC = ParseConstants()) return EC; - if (error_code EC = ResolveGlobalAndAliasInits()) + if (std::error_code EC = ResolveGlobalAndAliasInits()) return EC; break; case bitc::METADATA_BLOCK_ID: - if (error_code EC = ParseMetadata()) + if (std::error_code EC = ParseMetadata()) return EC; break; case bitc::FUNCTION_BLOCK_ID: @@ -1776,12 +1761,12 @@ error_code BitcodeReader::ParseModule(bool Resume) { // FunctionsWithBodies list. if (!SeenFirstFunctionBody) { std::reverse(FunctionsWithBodies.begin(), FunctionsWithBodies.end()); - if (error_code EC = GlobalCleanup()) + if (std::error_code EC = GlobalCleanup()) return EC; SeenFirstFunctionBody = true; } - if (error_code EC = RememberAndSkipFunctionBody()) + if (std::error_code EC = RememberAndSkipFunctionBody()) return EC; // For streaming bitcode, suspend parsing when we reach the function // bodies. Subsequent materialization calls will resume it when @@ -1791,11 +1776,11 @@ error_code BitcodeReader::ParseModule(bool Resume) { // just finish the parse now. if (LazyStreamer && SeenValueSymbolTable) { NextUnreadBit = Stream.GetCurrentBitNo(); - return error_code::success(); + return std::error_code(); } break; case bitc::USELIST_BLOCK_ID: - if (error_code EC = ParseUseLists()) + if (std::error_code EC = ParseUseLists()) return EC; break; } @@ -1870,6 +1855,20 @@ error_code BitcodeReader::ParseModule(bool Resume) { GCTable.push_back(S); break; } + case bitc::MODULE_CODE_COMDAT: { // COMDAT: [selection_kind, name] + if (Record.size() < 2) + return Error(InvalidRecord); + Comdat::SelectionKind SK = getDecodedComdatSelectionKind(Record[0]); + unsigned ComdatNameSize = Record[1]; + std::string ComdatName; + ComdatName.reserve(ComdatNameSize); + for (unsigned i = 0; i != ComdatNameSize; ++i) + ComdatName += (char)Record[2 + i]; + Comdat *C = TheModule->getOrInsertComdat(ComdatName); + C->setSelectionKind(SK); + ComdatList.push_back(C); + break; + } // GLOBALVAR: [pointer type, isconst, initid, // linkage, alignment, section, visibility, threadlocal, // unnamed_addr, dllstorageclass] @@ -1930,6 +1929,12 @@ error_code BitcodeReader::ParseModule(bool Resume) { // Remember which value to use for the global initializer. if (unsigned InitID = Record[2]) GlobalInits.push_back(std::make_pair(NewGV, InitID-1)); + + if (Record.size() > 11) + if (unsigned ComdatID = Record[11]) { + assert(ComdatID <= ComdatList.size()); + NewGV->setComdat(ComdatList[ComdatID - 1]); + } break; } // FUNCTION: [type, callingconv, isproto, linkage, paramattr, @@ -1983,6 +1988,12 @@ error_code BitcodeReader::ParseModule(bool Resume) { else UpgradeDLLImportExportLinkage(Func, Record[3]); + if (Record.size() > 12) + if (unsigned ComdatID = Record[12]) { + assert(ComdatID <= ComdatList.size()); + Func->setComdat(ComdatList[ComdatID - 1]); + } + ValueList.push_back(Func); // If this is a function with a body, remember the prototype we are @@ -2017,6 +2028,10 @@ error_code BitcodeReader::ParseModule(bool Resume) { NewGA->setDLLStorageClass(GetDecodedDLLStorageClass(Record[4])); else UpgradeDLLImportExportLinkage(NewGA, Record[2]); + if (Record.size() > 5) + NewGA->setThreadLocalMode(GetDecodedThreadLocalMode(Record[5])); + if (Record.size() > 6) + NewGA->setUnnamedAddr(Record[6]); ValueList.push_back(NewGA); AliasInits.push_back(std::make_pair(NewGA, Record[1])); break; @@ -2033,10 +2048,10 @@ error_code BitcodeReader::ParseModule(bool Resume) { } } -error_code BitcodeReader::ParseBitcodeInto(Module *M) { +std::error_code BitcodeReader::ParseBitcodeInto(Module *M) { TheModule = nullptr; - if (error_code EC = InitStream()) + if (std::error_code EC = InitStream()) return EC; // Sniff for the signature. @@ -2052,7 +2067,7 @@ error_code BitcodeReader::ParseBitcodeInto(Module *M) { // need to understand them all. while (1) { if (Stream.AtEndOfStream()) - return error_code::success(); + return std::error_code(); BitstreamEntry Entry = Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs); @@ -2061,7 +2076,7 @@ error_code BitcodeReader::ParseBitcodeInto(Module *M) { case BitstreamEntry::Error: return Error(MalformedBlock); case BitstreamEntry::EndBlock: - return error_code::success(); + return std::error_code(); case BitstreamEntry::SubBlock: switch (Entry.ID) { @@ -2074,10 +2089,10 @@ error_code BitcodeReader::ParseBitcodeInto(Module *M) { if (TheModule) return Error(InvalidMultipleBlocks); TheModule = M; - if (error_code EC = ParseModule(false)) + if (std::error_code EC = ParseModule(false)) return EC; if (LazyStreamer) - return error_code::success(); + return std::error_code(); break; default: if (Stream.SkipBlock()) @@ -2094,19 +2109,20 @@ error_code BitcodeReader::ParseBitcodeInto(Module *M) { if (Stream.getAbbrevIDWidth() == 2 && Entry.ID == 2 && Stream.Read(6) == 2 && Stream.Read(24) == 0xa0a0a && Stream.AtEndOfStream()) - return error_code::success(); + return std::error_code(); return Error(InvalidRecord); } } } -error_code BitcodeReader::ParseModuleTriple(std::string &Triple) { +ErrorOr<std::string> BitcodeReader::parseModuleTriple() { if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) return Error(InvalidRecord); SmallVector<uint64_t, 64> Record; + std::string Triple; // Read all the records for this module. while (1) { BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); @@ -2116,7 +2132,7 @@ error_code BitcodeReader::ParseModuleTriple(std::string &Triple) { case BitstreamEntry::Error: return Error(MalformedBlock); case BitstreamEntry::EndBlock: - return error_code::success(); + return Triple; case BitstreamEntry::Record: // The interesting case. break; @@ -2135,10 +2151,11 @@ error_code BitcodeReader::ParseModuleTriple(std::string &Triple) { } Record.clear(); } + llvm_unreachable("Exit infinite loop"); } -error_code BitcodeReader::ParseTriple(std::string &Triple) { - if (error_code EC = InitStream()) +ErrorOr<std::string> BitcodeReader::parseTriple() { + if (std::error_code EC = InitStream()) return EC; // Sniff for the signature. @@ -2159,11 +2176,11 @@ error_code BitcodeReader::ParseTriple(std::string &Triple) { case BitstreamEntry::Error: return Error(MalformedBlock); case BitstreamEntry::EndBlock: - return error_code::success(); + return std::error_code(); case BitstreamEntry::SubBlock: if (Entry.ID == bitc::MODULE_BLOCK_ID) - return ParseModuleTriple(Triple); + return parseModuleTriple(); // Ignore other sub-blocks. if (Stream.SkipBlock()) @@ -2178,7 +2195,7 @@ error_code BitcodeReader::ParseTriple(std::string &Triple) { } /// ParseMetadataAttachment - Parse metadata attachments. -error_code BitcodeReader::ParseMetadataAttachment() { +std::error_code BitcodeReader::ParseMetadataAttachment() { if (Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID)) return Error(InvalidRecord); @@ -2191,7 +2208,7 @@ error_code BitcodeReader::ParseMetadataAttachment() { case BitstreamEntry::Error: return Error(MalformedBlock); case BitstreamEntry::EndBlock: - return error_code::success(); + return std::error_code(); case BitstreamEntry::Record: // The interesting case. break; @@ -2225,7 +2242,7 @@ error_code BitcodeReader::ParseMetadataAttachment() { } /// ParseFunctionBody - Lazily parse the specified function body block. -error_code BitcodeReader::ParseFunctionBody(Function *F) { +std::error_code BitcodeReader::ParseFunctionBody(Function *F) { if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID)) return Error(InvalidRecord); @@ -2261,20 +2278,20 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { return Error(InvalidRecord); break; case bitc::CONSTANTS_BLOCK_ID: - if (error_code EC = ParseConstants()) + if (std::error_code EC = ParseConstants()) return EC; NextValueNo = ValueList.size(); break; case bitc::VALUE_SYMTAB_BLOCK_ID: - if (error_code EC = ParseValueSymbolTable()) + if (std::error_code EC = ParseValueSymbolTable()) return EC; break; case bitc::METADATA_ATTACHMENT_ID: - if (error_code EC = ParseMetadataAttachment()) + if (std::error_code EC = ParseMetadataAttachment()) return EC; break; case bitc::METADATA_BLOCK_ID: - if (error_code EC = ParseMetadata()) + if (std::error_code EC = ParseMetadata()) return EC; break; } @@ -2857,7 +2874,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { assert((CT != LandingPadInst::Filter || isa<ArrayType>(Val->getType())) && "Filter clause has invalid type!"); - LP->addClause(Val); + LP->addClause(cast<Constant>(Val)); } I = LP; @@ -2950,7 +2967,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { } case bitc::FUNC_CODE_INST_CMPXCHG: { // CMPXCHG:[ptrty, ptr, cmp, new, vol, successordering, synchscope, - // failureordering] + // failureordering?, isweak?] unsigned OpNum = 0; Value *Ptr, *Cmp, *New; if (getValueTypePair(Record, OpNum, NextValueNo, Ptr) || @@ -2958,7 +2975,7 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { cast<PointerType>(Ptr->getType())->getElementType(), Cmp) || popValue(Record, OpNum, NextValueNo, cast<PointerType>(Ptr->getType())->getElementType(), New) || - (OpNum + 3 != Record.size() && OpNum + 4 != Record.size())) + (Record.size() < OpNum + 3 || Record.size() > OpNum + 5)) return Error(InvalidRecord); AtomicOrdering SuccessOrdering = GetDecodedOrdering(Record[OpNum+1]); if (SuccessOrdering == NotAtomic || SuccessOrdering == Unordered) @@ -2975,6 +2992,17 @@ error_code BitcodeReader::ParseFunctionBody(Function *F) { I = new AtomicCmpXchgInst(Ptr, Cmp, New, SuccessOrdering, FailureOrdering, SynchScope); cast<AtomicCmpXchgInst>(I)->setVolatile(Record[OpNum]); + + if (Record.size() < 8) { + // Before weak cmpxchgs existed, the instruction simply returned the + // value loaded from memory, so bitcode files from that era will be + // expecting the first component of a modern cmpxchg. + CurBB->getInstList().push_back(I); + I = ExtractValueInst::Create(I, 0); + } else { + cast<AtomicCmpXchgInst>(I)->setWeak(Record[OpNum+4]); + } + InstructionList.push_back(I); break; } @@ -3144,27 +3172,29 @@ OutOfRecordLoop: ValueList.shrinkTo(ModuleValueListSize); MDValueList.shrinkTo(ModuleMDValueListSize); std::vector<BasicBlock*>().swap(FunctionBBs); - return error_code::success(); + return std::error_code(); } /// Find the function body in the bitcode stream -error_code BitcodeReader::FindFunctionInStream(Function *F, - DenseMap<Function*, uint64_t>::iterator DeferredFunctionInfoIterator) { +std::error_code BitcodeReader::FindFunctionInStream( + Function *F, + DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator) { while (DeferredFunctionInfoIterator->second == 0) { if (Stream.AtEndOfStream()) return Error(CouldNotFindFunctionInStream); // ParseModule will parse the next body in the stream and set its // position in the DeferredFunctionInfo map. - if (error_code EC = ParseModule(true)) + if (std::error_code EC = ParseModule(true)) return EC; } - return error_code::success(); + return std::error_code(); } //===----------------------------------------------------------------------===// // GVMaterializer implementation //===----------------------------------------------------------------------===// +void BitcodeReader::releaseBuffer() { Buffer.release(); } bool BitcodeReader::isMaterializable(const GlobalValue *GV) const { if (const Function *F = dyn_cast<Function>(GV)) { @@ -3174,24 +3204,24 @@ bool BitcodeReader::isMaterializable(const GlobalValue *GV) const { return false; } -error_code BitcodeReader::Materialize(GlobalValue *GV) { +std::error_code BitcodeReader::Materialize(GlobalValue *GV) { Function *F = dyn_cast<Function>(GV); // If it's not a function or is already material, ignore the request. if (!F || !F->isMaterializable()) - return error_code::success(); + return std::error_code(); DenseMap<Function*, uint64_t>::iterator DFII = DeferredFunctionInfo.find(F); assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!"); // If its position is recorded as 0, its body is somewhere in the stream // but we haven't seen it yet. if (DFII->second == 0 && LazyStreamer) - if (error_code EC = FindFunctionInStream(F, DFII)) + if (std::error_code EC = FindFunctionInStream(F, DFII)) return EC; // Move the bit stream to the saved position of the deferred function body. Stream.JumpToBit(DFII->second); - if (error_code EC = ParseFunctionBody(F)) + if (std::error_code EC = ParseFunctionBody(F)) return EC; // Upgrade any old intrinsic calls in the function. @@ -3206,7 +3236,7 @@ error_code BitcodeReader::Materialize(GlobalValue *GV) { } } - return error_code::success(); + return std::error_code(); } bool BitcodeReader::isDematerializable(const GlobalValue *GV) const { @@ -3228,8 +3258,7 @@ void BitcodeReader::Dematerialize(GlobalValue *GV) { F->deleteBody(); } - -error_code BitcodeReader::MaterializeModule(Module *M) { +std::error_code BitcodeReader::MaterializeModule(Module *M) { assert(M == TheModule && "Can only Materialize the Module this BitcodeReader is attached to."); // Iterate over the module, deserializing any functions that are still on @@ -3237,7 +3266,7 @@ error_code BitcodeReader::MaterializeModule(Module *M) { for (Module::iterator F = TheModule->begin(), E = TheModule->end(); F != E; ++F) { if (F->isMaterializable()) { - if (error_code EC = Materialize(F)) + if (std::error_code EC = Materialize(F)) return EC; } } @@ -3270,16 +3299,16 @@ error_code BitcodeReader::MaterializeModule(Module *M) { UpgradeInstWithTBAATag(InstsWithTBAATag[I]); UpgradeDebugInfo(*M); - return error_code::success(); + return std::error_code(); } -error_code BitcodeReader::InitStream() { +std::error_code BitcodeReader::InitStream() { if (LazyStreamer) return InitLazyStream(); return InitStreamFromBuffer(); } -error_code BitcodeReader::InitStreamFromBuffer() { +std::error_code BitcodeReader::InitStreamFromBuffer() { const unsigned char *BufPtr = (const unsigned char*)Buffer->getBufferStart(); const unsigned char *BufEnd = BufPtr+Buffer->getBufferSize(); @@ -3299,10 +3328,10 @@ error_code BitcodeReader::InitStreamFromBuffer() { StreamFile.reset(new BitstreamReader(BufPtr, BufEnd)); Stream.init(*StreamFile); - return error_code::success(); + return std::error_code(); } -error_code BitcodeReader::InitLazyStream() { +std::error_code BitcodeReader::InitLazyStream() { // Check and strip off the bitcode wrapper; BitstreamReader expects never to // see it. StreamingMemoryObject *Bytes = new StreamingMemoryObject(LazyStreamer); @@ -3323,12 +3352,12 @@ error_code BitcodeReader::InitLazyStream() { Bytes->dropLeadingBytes(bitcodeStart - buf); Bytes->setKnownObjectSize(bitcodeEnd - bitcodeStart); } - return error_code::success(); + return std::error_code(); } namespace { -class BitcodeErrorCategoryType : public error_category { - const char *name() const override { +class BitcodeErrorCategoryType : public std::error_category { + const char *name() const LLVM_NOEXCEPT override { return "llvm.bitcode"; } std::string message(int IE) const override { @@ -3378,7 +3407,7 @@ class BitcodeErrorCategoryType : public error_category { }; } -const error_category &BitcodeReader::BitcodeErrorCategory() { +const std::error_category &BitcodeReader::BitcodeErrorCategory() { static BitcodeErrorCategoryType O; return O; } @@ -3394,12 +3423,11 @@ ErrorOr<Module *> llvm::getLazyBitcodeModule(MemoryBuffer *Buffer, Module *M = new Module(Buffer->getBufferIdentifier(), Context); BitcodeReader *R = new BitcodeReader(Buffer, Context); M->setMaterializer(R); - if (error_code EC = R->ParseBitcodeInto(M)) { + if (std::error_code EC = R->ParseBitcodeInto(M)) { + R->releaseBuffer(); // Never take ownership on error. delete M; // Also deletes R. return EC; } - // Have the BitcodeReader dtor delete 'Buffer'. - R->setBufferOwned(true); R->materializeForwardReferencedFunctions(); @@ -3414,13 +3442,12 @@ Module *llvm::getStreamedBitcodeModule(const std::string &name, Module *M = new Module(name, Context); BitcodeReader *R = new BitcodeReader(streamer, Context); M->setMaterializer(R); - if (error_code EC = R->ParseBitcodeInto(M)) { + if (std::error_code EC = R->ParseBitcodeInto(M)) { if (ErrMsg) *ErrMsg = EC.message(); delete M; // Also deletes R. return nullptr; } - R->setBufferOwned(false); // no buffer to delete return M; } @@ -3430,13 +3457,8 @@ ErrorOr<Module *> llvm::parseBitcodeFile(MemoryBuffer *Buffer, if (!ModuleOrErr) return ModuleOrErr; Module *M = ModuleOrErr.get(); - - // Don't let the BitcodeReader dtor delete 'Buffer', regardless of whether - // there was an error. - static_cast<BitcodeReader*>(M->getMaterializer())->setBufferOwned(false); - // Read in the entire module, and destroy the BitcodeReader. - if (error_code EC = M->materializeAllPermanently()) { + if (std::error_code EC = M->materializeAllPermanently(true)) { delete M; return EC; } @@ -3448,17 +3470,12 @@ ErrorOr<Module *> llvm::parseBitcodeFile(MemoryBuffer *Buffer, } std::string llvm::getBitcodeTargetTriple(MemoryBuffer *Buffer, - LLVMContext& Context, - std::string *ErrMsg) { + LLVMContext &Context) { BitcodeReader *R = new BitcodeReader(Buffer, Context); - // Don't let the BitcodeReader dtor delete 'Buffer'. - R->setBufferOwned(false); - - std::string Triple(""); - if (error_code EC = R->ParseTriple(Triple)) - if (ErrMsg) - *ErrMsg = EC.message(); - + ErrorOr<std::string> Triple = R->parseTriple(); + R->releaseBuffer(); delete R; - return Triple; + if (Triple.getError()) + return ""; + return Triple.get(); } diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h index 593d8f9..1d4869a 100644 --- a/lib/Bitcode/Reader/BitcodeReader.h +++ b/lib/Bitcode/Reader/BitcodeReader.h @@ -22,10 +22,11 @@ #include "llvm/IR/OperandTraits.h" #include "llvm/IR/Type.h" #include "llvm/IR/ValueHandle.h" -#include "llvm/Support/system_error.h" +#include <system_error> #include <vector> namespace llvm { + class Comdat; class MemoryBuffer; class LLVMContext; @@ -125,8 +126,7 @@ public: class BitcodeReader : public GVMaterializer { LLVMContext &Context; Module *TheModule; - MemoryBuffer *Buffer; - bool BufferOwned; + std::unique_ptr<MemoryBuffer> Buffer; std::unique_ptr<BitstreamReader> StreamFile; BitstreamCursor Stream; DataStreamer *LazyStreamer; @@ -136,6 +136,7 @@ class BitcodeReader : public GVMaterializer { std::vector<Type*> TypeList; BitcodeReaderValueList ValueList; BitcodeReaderMDValueList MDValueList; + std::vector<Comdat *> ComdatList; SmallVector<Instruction *, 64> InstructionList; SmallVector<SmallVector<uint64_t, 64>, 64> UseListRecords; @@ -193,7 +194,7 @@ class BitcodeReader : public GVMaterializer { /// not need this flag. bool UseRelativeIDs; - static const error_category &BitcodeErrorCategory(); + static const std::error_category &BitcodeErrorCategory(); public: enum ErrorType { @@ -219,47 +220,39 @@ public: InvalidValue // Invalid version, inst number, attr number, etc }; - error_code Error(ErrorType E) { - return error_code(E, BitcodeErrorCategory()); + std::error_code Error(ErrorType E) { + return std::error_code(E, BitcodeErrorCategory()); } explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C) - : Context(C), TheModule(nullptr), Buffer(buffer), BufferOwned(false), - LazyStreamer(nullptr), NextUnreadBit(0), SeenValueSymbolTable(false), - ValueList(C), MDValueList(C), - SeenFirstFunctionBody(false), UseRelativeIDs(false) { - } + : Context(C), TheModule(nullptr), Buffer(buffer), LazyStreamer(nullptr), + NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C), + MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) {} explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C) - : Context(C), TheModule(nullptr), Buffer(nullptr), BufferOwned(false), - LazyStreamer(streamer), NextUnreadBit(0), SeenValueSymbolTable(false), - ValueList(C), MDValueList(C), - SeenFirstFunctionBody(false), UseRelativeIDs(false) { - } - ~BitcodeReader() { - FreeState(); - } + : Context(C), TheModule(nullptr), Buffer(nullptr), LazyStreamer(streamer), + NextUnreadBit(0), SeenValueSymbolTable(false), ValueList(C), + MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) {} + ~BitcodeReader() { FreeState(); } void materializeForwardReferencedFunctions(); void FreeState(); - /// setBufferOwned - If this is true, the reader will destroy the MemoryBuffer - /// when the reader is destroyed. - void setBufferOwned(bool Owned) { BufferOwned = Owned; } + void releaseBuffer() override; bool isMaterializable(const GlobalValue *GV) const override; bool isDematerializable(const GlobalValue *GV) const override; - error_code Materialize(GlobalValue *GV) override; - error_code MaterializeModule(Module *M) override; + std::error_code Materialize(GlobalValue *GV) override; + std::error_code MaterializeModule(Module *M) override; void Dematerialize(GlobalValue *GV) override; /// @brief Main interface to parsing a bitcode buffer. /// @returns true if an error occurred. - error_code ParseBitcodeInto(Module *M); + std::error_code ParseBitcodeInto(Module *M); /// @brief Cheap mechanism to just extract module triple /// @returns true if an error occurred. - error_code ParseTriple(std::string &Triple); + ErrorOr<std::string> parseTriple(); static uint64_t decodeSignRotatedValue(uint64_t V); @@ -346,28 +339,29 @@ private: return getFnValueByID(ValNo, Ty); } - error_code ParseAttrKind(uint64_t Code, Attribute::AttrKind *Kind); - error_code ParseModule(bool Resume); - error_code ParseAttributeBlock(); - error_code ParseAttributeGroupBlock(); - error_code ParseTypeTable(); - error_code ParseTypeTableBody(); - - error_code ParseValueSymbolTable(); - error_code ParseConstants(); - error_code RememberAndSkipFunctionBody(); - error_code ParseFunctionBody(Function *F); - error_code GlobalCleanup(); - error_code ResolveGlobalAndAliasInits(); - error_code ParseMetadata(); - error_code ParseMetadataAttachment(); - error_code ParseModuleTriple(std::string &Triple); - error_code ParseUseLists(); - error_code InitStream(); - error_code InitStreamFromBuffer(); - error_code InitLazyStream(); - error_code FindFunctionInStream(Function *F, - DenseMap<Function*, uint64_t>::iterator DeferredFunctionInfoIterator); + std::error_code ParseAttrKind(uint64_t Code, Attribute::AttrKind *Kind); + std::error_code ParseModule(bool Resume); + std::error_code ParseAttributeBlock(); + std::error_code ParseAttributeGroupBlock(); + std::error_code ParseTypeTable(); + std::error_code ParseTypeTableBody(); + + std::error_code ParseValueSymbolTable(); + std::error_code ParseConstants(); + std::error_code RememberAndSkipFunctionBody(); + std::error_code ParseFunctionBody(Function *F); + std::error_code GlobalCleanup(); + std::error_code ResolveGlobalAndAliasInits(); + std::error_code ParseMetadata(); + std::error_code ParseMetadataAttachment(); + ErrorOr<std::string> parseModuleTriple(); + std::error_code ParseUseLists(); + std::error_code InitStream(); + std::error_code InitStreamFromBuffer(); + std::error_code InitLazyStream(); + std::error_code FindFunctionInStream( + Function *F, + DenseMap<Function *, uint64_t>::iterator DeferredFunctionInfoIterator); }; } // End llvm namespace diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp index f31e1fa..72451ec 100644 --- a/lib/Bitcode/Reader/BitstreamReader.cpp +++ b/lib/Bitcode/Reader/BitstreamReader.cpp @@ -97,7 +97,7 @@ void BitstreamCursor::readAbbreviatedField(const BitCodeAbbrevOp &Op, switch (Op.getEncoding()) { case BitCodeAbbrevOp::Array: case BitCodeAbbrevOp::Blob: - assert(0 && "Should not reach here"); + llvm_unreachable("Should not reach here"); case BitCodeAbbrevOp::Fixed: Vals.push_back(Read((unsigned)Op.getEncodingData())); break; @@ -117,7 +117,7 @@ void BitstreamCursor::skipAbbreviatedField(const BitCodeAbbrevOp &Op) { switch (Op.getEncoding()) { case BitCodeAbbrevOp::Array: case BitCodeAbbrevOp::Blob: - assert(0 && "Should not reach here"); + llvm_unreachable("Should not reach here"); case BitCodeAbbrevOp::Fixed: (void)Read((unsigned)Op.getEncodingData()); break; diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index cc73b84..dd9282a 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -177,6 +177,8 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_INLINE_HINT; case Attribute::InReg: return bitc::ATTR_KIND_IN_REG; + case Attribute::JumpTable: + return bitc::ATTR_KIND_JUMP_TABLE; case Attribute::MinSize: return bitc::ATTR_KIND_MIN_SIZE; case Attribute::Naked: @@ -511,7 +513,7 @@ static unsigned getEncodedDLLStorageClass(const GlobalValue &GV) { llvm_unreachable("Invalid DLL storage class"); } -static unsigned getEncodedThreadLocalMode(const GlobalVariable &GV) { +static unsigned getEncodedThreadLocalMode(const GlobalValue &GV) { switch (GV.getThreadLocalMode()) { case GlobalVariable::NotThreadLocal: return 0; case GlobalVariable::GeneralDynamicTLSModel: return 1; @@ -522,6 +524,35 @@ static unsigned getEncodedThreadLocalMode(const GlobalVariable &GV) { llvm_unreachable("Invalid TLS model"); } +static unsigned getEncodedComdatSelectionKind(const Comdat &C) { + switch (C.getSelectionKind()) { + case Comdat::Any: + return bitc::COMDAT_SELECTION_KIND_ANY; + case Comdat::ExactMatch: + return bitc::COMDAT_SELECTION_KIND_EXACT_MATCH; + case Comdat::Largest: + return bitc::COMDAT_SELECTION_KIND_LARGEST; + case Comdat::NoDuplicates: + return bitc::COMDAT_SELECTION_KIND_NO_DUPLICATES; + case Comdat::SameSize: + return bitc::COMDAT_SELECTION_KIND_SAME_SIZE; + } + llvm_unreachable("Invalid selection kind"); +} + +static void writeComdats(const ValueEnumerator &VE, BitstreamWriter &Stream) { + SmallVector<uint8_t, 64> Vals; + for (const Comdat *C : VE.getComdats()) { + // COMDAT: [selection_kind, name] + Vals.push_back(getEncodedComdatSelectionKind(*C)); + Vals.push_back(C->getName().size()); + for (char Chr : C->getName()) + Vals.push_back((unsigned char)Chr); + Stream.EmitRecord(bitc::MODULE_CODE_COMDAT, Vals, /*AbbrevToUse=*/0); + Vals.clear(); + } +} + // Emit top-level description of module, including target triple, inline asm, // descriptors for global variables, and function prototype info. static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, @@ -623,12 +654,14 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, if (GV.isThreadLocal() || GV.getVisibility() != GlobalValue::DefaultVisibility || GV.hasUnnamedAddr() || GV.isExternallyInitialized() || - GV.getDLLStorageClass() != GlobalValue::DefaultStorageClass) { + GV.getDLLStorageClass() != GlobalValue::DefaultStorageClass || + GV.hasComdat()) { Vals.push_back(getEncodedVisibility(GV)); Vals.push_back(getEncodedThreadLocalMode(GV)); Vals.push_back(GV.hasUnnamedAddr()); Vals.push_back(GV.isExternallyInitialized()); Vals.push_back(getEncodedDLLStorageClass(GV)); + Vals.push_back(GV.hasComdat() ? VE.getComdatID(GV.getComdat()) : 0); } else { AbbrevToUse = SimpleGVarAbbrev; } @@ -654,6 +687,7 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, Vals.push_back(F.hasPrefixData() ? (VE.getValueID(F.getPrefixData()) + 1) : 0); Vals.push_back(getEncodedDLLStorageClass(F)); + Vals.push_back(F.hasComdat() ? VE.getComdatID(F.getComdat()) : 0); unsigned AbbrevToUse = 0; Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse); @@ -668,6 +702,8 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, Vals.push_back(getEncodedLinkage(A)); Vals.push_back(getEncodedVisibility(A)); Vals.push_back(getEncodedDLLStorageClass(A)); + Vals.push_back(getEncodedThreadLocalMode(A)); + Vals.push_back(A.hasUnnamedAddr()); unsigned AbbrevToUse = 0; Stream.EmitRecord(bitc::MODULE_CODE_ALIAS, Vals, AbbrevToUse); Vals.clear(); @@ -1445,6 +1481,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, cast<AtomicCmpXchgInst>(I).getSynchScope())); Vals.push_back(GetEncodedOrdering( cast<AtomicCmpXchgInst>(I).getFailureOrdering())); + Vals.push_back(cast<AtomicCmpXchgInst>(I).isWeak()); break; case Instruction::AtomicRMW: Code = bitc::FUNC_CODE_INST_ATOMICRMW; @@ -1910,6 +1947,8 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) { // Emit information describing all of the types in the module. WriteTypeTable(VE, Stream); + writeComdats(VE, Stream); + // Emit top-level description of module, including target triple, inline asm, // descriptors for global variables, and function prototype info. WriteModuleInfo(M, VE, Stream); diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index 8531e76..15f8034 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -73,37 +73,34 @@ ValueEnumerator::ValueEnumerator(const Module *M) { SmallVector<std::pair<unsigned, MDNode*>, 8> MDs; // Enumerate types used by function bodies and argument lists. - for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) { - - for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I) - EnumerateType(I->getType()); - - for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;++I){ - for (User::const_op_iterator OI = I->op_begin(), E = I->op_end(); - OI != E; ++OI) { - if (MDNode *MD = dyn_cast<MDNode>(*OI)) + for (const Function &F : *M) { + for (const Argument &A : F.args()) + EnumerateType(A.getType()); + + for (const BasicBlock &BB : F) + for (const Instruction &I : BB) { + for (const Use &Op : I.operands()) { + if (MDNode *MD = dyn_cast<MDNode>(&Op)) if (MD->isFunctionLocal() && MD->getFunction()) // These will get enumerated during function-incorporation. continue; - EnumerateOperandType(*OI); + EnumerateOperandType(Op); } - EnumerateType(I->getType()); - if (const CallInst *CI = dyn_cast<CallInst>(I)) + EnumerateType(I.getType()); + if (const CallInst *CI = dyn_cast<CallInst>(&I)) EnumerateAttributes(CI->getAttributes()); - else if (const InvokeInst *II = dyn_cast<InvokeInst>(I)) + else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) EnumerateAttributes(II->getAttributes()); // Enumerate metadata attached with this instruction. MDs.clear(); - I->getAllMetadataOtherThanDebugLoc(MDs); + I.getAllMetadataOtherThanDebugLoc(MDs); for (unsigned i = 0, e = MDs.size(); i != e; ++i) EnumerateMetadata(MDs[i].second); - if (!I->getDebugLoc().isUnknown()) { + if (!I.getDebugLoc().isUnknown()) { MDNode *Scope, *IA; - I->getDebugLoc().getScopeAndInlinedAt(Scope, IA, I->getContext()); + I.getDebugLoc().getScopeAndInlinedAt(Scope, IA, I.getContext()); if (Scope) EnumerateMetadata(Scope); if (IA) EnumerateMetadata(IA); } @@ -120,6 +117,12 @@ unsigned ValueEnumerator::getInstructionID(const Instruction *Inst) const { return I->second; } +unsigned ValueEnumerator::getComdatID(const Comdat *C) const { + unsigned ComdatID = Comdats.idFor(C); + assert(ComdatID && "Comdat not found!"); + return ComdatID; +} + void ValueEnumerator::setInstructionID(const Instruction *I) { InstructionMap[I] = InstructionCount++; } @@ -310,6 +313,10 @@ void ValueEnumerator::EnumerateValue(const Value *V) { return; } + if (auto *GO = dyn_cast<GlobalObject>(V)) + if (const Comdat *C = GO->getComdat()) + Comdats.insert(C); + // Enumerate the type of this value. EnumerateType(V->getType()); diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index d1ca15f..1c9f38e 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -16,6 +16,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/UniqueVector.h" #include "llvm/IR/Attributes.h" #include <vector> @@ -25,6 +26,7 @@ class Type; class Value; class Instruction; class BasicBlock; +class Comdat; class Function; class Module; class MDNode; @@ -48,6 +50,10 @@ private: typedef DenseMap<const Value*, unsigned> ValueMapType; ValueMapType ValueMap; ValueList Values; + + typedef UniqueVector<const Comdat *> ComdatSetType; + ComdatSetType Comdats; + ValueList MDValues; SmallVector<const MDNode *, 8> FunctionLocalMDs; ValueMapType MDValueMap; @@ -139,6 +145,9 @@ public: return AttributeGroups; } + const ComdatSetType &getComdats() const { return Comdats; } + unsigned getComdatID(const Comdat *C) const; + /// getGlobalBasicBlockID - This returns the function-specific ID for the /// specified basic block. This is relatively expensive information, so it /// should only be used by rare constructs such as address-of-label. diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 6fc83a2..1bdf312 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -7,13 +7,14 @@ // //===----------------------------------------------------------------------===// // -// This file defines several CodeGen-specific LLVM IR analysis utilties. +// This file defines several CodeGen-specific LLVM IR analysis utilities. // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Analysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" @@ -474,8 +475,7 @@ static bool nextRealType(SmallVectorImpl<CompositeType *> &SubTypes, /// between it and the return. /// /// This function only tests target-independent requirements. -bool llvm::isInTailCallPosition(ImmutableCallSite CS, - const TargetLowering &TLI) { +bool llvm::isInTailCallPosition(ImmutableCallSite CS, const SelectionDAG &DAG) { const Instruction *I = CS.getInstruction(); const BasicBlock *ExitBB = I->getParent(); const TerminatorInst *Term = ExitBB->getTerminator(); @@ -490,7 +490,7 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, // longjmp on x86), it can end up causing miscompilation that has not // been fully understood. if (!Ret && - (!TLI.getTargetMachine().Options.GuaranteedTailCallOpt || + (!DAG.getTarget().Options.GuaranteedTailCallOpt || !isa<UnreachableInst>(Term))) return false; @@ -509,7 +509,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, return false; } - return returnTypeIsEligibleForTailCall(ExitBB->getParent(), I, Ret, TLI); + return returnTypeIsEligibleForTailCall(ExitBB->getParent(), I, Ret, + *DAG.getTarget().getTargetLowering()); } bool llvm::returnTypeIsEligibleForTailCall(const Function *F, diff --git a/lib/CodeGen/Android.mk b/lib/CodeGen/Android.mk index 7feb42c..05e5c45 100644 --- a/lib/CodeGen/Android.mk +++ b/lib/CodeGen/Android.mk @@ -24,11 +24,13 @@ codegen_SRC_FILES := \ GCMetadata.cpp \ GCMetadataPrinter.cpp \ GCStrategy.cpp \ + GlobalMerge.cpp \ IfConversion.cpp \ InlineSpiller.cpp \ InterferenceCache.cpp \ IntrinsicLowering.cpp \ JITCodeEmitter.cpp \ + JumpInstrTables.cpp \ LatencyPriorityQueue.cpp \ LexicalScopes.cpp \ LiveDebugVariables.cpp \ diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index 1cb0159..251f5ef 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -37,8 +37,7 @@ using namespace llvm; ARMException::ARMException(AsmPrinter *A) - : DwarfException(A), - shouldEmitCFI(false) {} + : EHStreamer(A), shouldEmitCFI(false) {} ARMException::~ARMException() {} @@ -100,7 +99,7 @@ void ARMException::endFunction(const MachineFunction *) { ATS.emitHandlerData(); // Emit actual exception table - EmitExceptionTable(); + emitExceptionTable(); } } @@ -108,7 +107,7 @@ void ARMException::endFunction(const MachineFunction *) { ATS.emitFnEnd(); } -void ARMException::EmitTypeInfos(unsigned TTypeEncoding) { +void ARMException::emitTypeInfos(unsigned TTypeEncoding) { const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); diff --git a/lib/CodeGen/AsmPrinter/Android.mk b/lib/CodeGen/AsmPrinter/Android.mk index f56eb6e..083cc0d 100644 --- a/lib/CodeGen/AsmPrinter/Android.mk +++ b/lib/CodeGen/AsmPrinter/Android.mk @@ -1,33 +1,33 @@ LOCAL_PATH := $(call my-dir) codegen_asmprinter_SRC_FILES := \ - AsmPrinter.cpp + AddressPool.cpp \ + ARMException.cpp \ + AsmPrinter.cpp \ + AsmPrinterDwarf.cpp \ + AsmPrinterInlineAsm.cpp \ + DbgValueHistoryCalculator.cpp \ + DIE.cpp \ + DIEHash.cpp \ + DwarfAccelTable.cpp \ + DwarfCFIException.cpp \ + DwarfDebug.cpp \ + DwarfFile.cpp \ + DwarfStringPool.cpp \ + DwarfUnit.cpp \ + EHStreamer.cpp \ + ErlangGCPrinter.cpp \ + OcamlGCPrinter.cpp \ + Win64Exception.cpp \ + WinCodeViewLineTables.cpp + + # For the host # ===================================================== include $(CLEAR_VARS) -LOCAL_SRC_FILES := \ - AddressPool.cpp \ - AsmPrinter.cpp \ - AsmPrinterDwarf.cpp \ - AsmPrinterInlineAsm.cpp \ - ARMException.cpp \ - DbgValueHistoryCalculator.cpp \ - DIE.cpp \ - DIEHash.cpp \ - DwarfAccelTable.cpp \ - DwarfCFIException.cpp \ - DwarfDebug.cpp \ - DwarfException.cpp \ - DwarfFile.cpp \ - DwarfStringPool.cpp \ - DwarfUnit.cpp \ - ErlangGCPrinter.cpp \ - OcamlGCPrinter.cpp \ - Win64Exception.cpp \ - WinCodeViewLineTables.cpp - +LOCAL_SRC_FILES := $(codegen_asmprinter_SRC_FILES) LOCAL_MODULE:= libLLVMAsmPrinter LOCAL_MODULE_TAGS := optional @@ -41,27 +41,7 @@ include $(BUILD_HOST_STATIC_LIBRARY) ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS)) include $(CLEAR_VARS) -LOCAL_SRC_FILES := \ - AddressPool.cpp \ - AsmPrinter.cpp \ - AsmPrinterDwarf.cpp \ - AsmPrinterInlineAsm.cpp \ - ARMException.cpp \ - DbgValueHistoryCalculator.cpp \ - DIE.cpp \ - DIEHash.cpp \ - DwarfAccelTable.cpp \ - DwarfCFIException.cpp \ - DwarfDebug.cpp \ - DwarfException.cpp \ - DwarfFile.cpp \ - DwarfStringPool.cpp \ - DwarfUnit.cpp \ - ErlangGCPrinter.cpp \ - OcamlGCPrinter.cpp \ - Win64Exception.cpp \ - WinCodeViewLineTables.cpp - +LOCAL_SRC_FILES := $(codegen_asmprinter_SRC_FILES) LOCAL_MODULE:= libLLVMAsmPrinter LOCAL_MODULE_TAGS := optional diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 7de9c6d..f80fdea 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/JumpInstrTableInfo.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -46,7 +47,6 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/GlobalStatus.h" @@ -232,23 +232,23 @@ bool AsmPrinter::doInitialization(Module &M) { } } - DwarfException *DE = nullptr; + EHStreamer *ES = nullptr; switch (MAI->getExceptionHandlingType()) { case ExceptionHandling::None: break; case ExceptionHandling::SjLj: case ExceptionHandling::DwarfCFI: - DE = new DwarfCFIException(this); + ES = new DwarfCFIException(this); break; case ExceptionHandling::ARM: - DE = new ARMException(this); + ES = new ARMException(this); break; - case ExceptionHandling::Win64: - DE = new Win64Exception(this); + case ExceptionHandling::WinEH: + ES = new Win64Exception(this); break; } - if (DE) - Handlers.push_back(HandlerInfo(DE, EHTimerName, DWARFGroupName)); + if (ES) + Handlers.push_back(HandlerInfo(ES, EHTimerName, DWARFGroupName)); return false; } @@ -709,13 +709,12 @@ AsmPrinter::CFIMoveType AsmPrinter::needsCFIMoves() { } bool AsmPrinter::needsSEHMoves() { - return MAI->getExceptionHandlingType() == ExceptionHandling::Win64 && + return MAI->getExceptionHandlingType() == ExceptionHandling::WinEH && MF->getFunction()->needsUnwindTableEntry(); } void AsmPrinter::emitCFIInstruction(const MachineInstr &MI) { - ExceptionHandling::ExceptionsType ExceptionHandlingType = - MAI->getExceptionHandlingType(); + ExceptionHandling ExceptionHandlingType = MAI->getExceptionHandlingType(); if (ExceptionHandlingType != ExceptionHandling::DwarfCFI && ExceptionHandlingType != ExceptionHandling::ARM) return; @@ -870,6 +869,8 @@ void AsmPrinter::EmitFunctionBody() { OutStreamer.AddBlankLine(); } +static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP); + bool AsmPrinter::doFinalization(Module &M) { // Emit global variables. for (const auto &G : M.globals()) @@ -887,6 +888,54 @@ bool AsmPrinter::doFinalization(Module &M) { EmitVisibility(Name, V, false); } + // Get information about jump-instruction tables to print. + JumpInstrTableInfo *JITI = getAnalysisIfAvailable<JumpInstrTableInfo>(); + + if (JITI && !JITI->getTables().empty()) { + unsigned Arch = Triple(getTargetTriple()).getArch(); + bool IsThumb = (Arch == Triple::thumb || Arch == Triple::thumbeb); + MCInst TrapInst; + TM.getInstrInfo()->getTrap(TrapInst); + for (const auto &KV : JITI->getTables()) { + uint64_t Count = 0; + for (const auto &FunPair : KV.second) { + // Emit the function labels to make this be a function entry point. + MCSymbol *FunSym = + OutContext.GetOrCreateSymbol(FunPair.second->getName()); + OutStreamer.EmitSymbolAttribute(FunSym, MCSA_Global); + // FIXME: JumpTableInstrInfo should store information about the required + // alignment of table entries and the size of the padding instruction. + EmitAlignment(3); + if (IsThumb) + OutStreamer.EmitThumbFunc(FunSym); + if (MAI->hasDotTypeDotSizeDirective()) + OutStreamer.EmitSymbolAttribute(FunSym, MCSA_ELF_TypeFunction); + OutStreamer.EmitLabel(FunSym); + + // Emit the jump instruction to transfer control to the original + // function. + MCInst JumpToFun; + MCSymbol *TargetSymbol = + OutContext.GetOrCreateSymbol(FunPair.first->getName()); + const MCSymbolRefExpr *TargetSymRef = + MCSymbolRefExpr::Create(TargetSymbol, MCSymbolRefExpr::VK_PLT, + OutContext); + TM.getInstrInfo()->getUnconditionalBranch(JumpToFun, TargetSymRef); + OutStreamer.EmitInstruction(JumpToFun, getSubtargetInfo()); + ++Count; + } + + // Emit enough padding instructions to fill up to the next power of two. + // This assumes that the trap instruction takes 8 bytes or fewer. + uint64_t Remaining = NextPowerOf2(Count) - Count; + for (uint64_t C = 0; C < Remaining; ++C) { + EmitAlignment(3); + OutStreamer.EmitInstruction(TrapInst, getSubtargetInfo()); + } + + } + } + // Emit module flags. SmallVector<Module::ModuleFlagEntry, 8> ModuleFlags; M.getModuleFlagsMetadata(ModuleFlags); @@ -932,10 +981,6 @@ bool AsmPrinter::doFinalization(Module &M) { for (const auto &Alias : M.aliases()) { MCSymbol *Name = getSymbol(&Alias); - const GlobalValue *GV = Alias.getAliasee(); - assert(!GV->isDeclaration()); - MCSymbol *Target = getSymbol(GV); - if (Alias.hasExternalLinkage() || !MAI->getWeakRefDirective()) OutStreamer.EmitSymbolAttribute(Name, MCSA_Global); else if (Alias.hasWeakLinkage() || Alias.hasLinkOnceLinkage()) @@ -947,7 +992,7 @@ bool AsmPrinter::doFinalization(Module &M) { // Emit the directives as assignments aka .set: OutStreamer.EmitAssignment(Name, - MCSymbolRefExpr::Create(Target, OutContext)); + lowerConstant(Alias.getAliasee(), *this)); } } @@ -1248,7 +1293,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) { } // Ignore debug and non-emitted data. This handles llvm.compiler.used. - if (GV->getSection() == "llvm.metadata" || + if (StringRef(GV->getSection()) == "llvm.metadata" || GV->hasAvailableExternallyLinkage()) return true; @@ -1350,14 +1395,17 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { for (Structor &S : Structors) { const TargetLoweringObjectFile &Obj = getObjFileLowering(); const MCSymbol *KeySym = nullptr; - const MCSection *KeySec = nullptr; - if (S.ComdatKey) { - KeySym = getSymbol(S.ComdatKey); - KeySec = getObjFileLowering().SectionForGlobal(S.ComdatKey, *Mang, TM); + if (GlobalValue *GV = S.ComdatKey) { + if (GV->hasAvailableExternallyLinkage()) + // If the associated variable is available_externally, some other TU + // will provide its dynamic initializer. + continue; + + KeySym = getSymbol(GV); } const MCSection *OutputSection = - (isCtor ? Obj.getStaticCtorSection(S.Priority, KeySym, KeySec) - : Obj.getStaticDtorSection(S.Priority, KeySym, KeySec)); + (isCtor ? Obj.getStaticCtorSection(S.Priority, KeySym) + : Obj.getStaticDtorSection(S.Priority, KeySym)); OutStreamer.SwitchSection(OutputSection); if (OutStreamer.getCurrentSection() != OutStreamer.getPreviousSection()) EmitAlignment(Align); @@ -1817,7 +1865,10 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { SmallString<8> StrVal; CFP->getValueAPF().toString(StrVal); - CFP->getType()->print(AP.OutStreamer.GetCommentOS()); + if (CFP->getType()) + CFP->getType()->print(AP.OutStreamer.GetCommentOS()); + else + AP.OutStreamer.GetCommentOS() << "Printing <null> Type"; AP.OutStreamer.GetCommentOS() << ' ' << StrVal << '\n'; } @@ -1830,7 +1881,8 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { // PPC's long double has odd notions of endianness compared to how LLVM // handles it: p[0] goes first for *big* endian on PPC. - if (AP.TM.getDataLayout()->isBigEndian() != CFP->getType()->isPPC_FP128Ty()) { + if (AP.TM.getDataLayout()->isBigEndian() && + !CFP->getType()->isPPC_FP128Ty()) { int Chunk = API.getNumWords() - 1; if (TrailingBytes) diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt index b4ef185..f555f21 100644 --- a/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -10,10 +10,10 @@ add_llvm_library(LLVMAsmPrinter DwarfAccelTable.cpp DwarfCFIException.cpp DwarfDebug.cpp - DwarfException.cpp DwarfFile.cpp DwarfStringPool.cpp DwarfUnit.cpp + EHStreamer.cpp ErlangGCPrinter.cpp OcamlGCPrinter.cpp Win64Exception.cpp diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp index 6103254..a66d08e 100644 --- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp +++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp @@ -15,6 +15,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include <algorithm> #include <map> +#include <set> #define DEBUG_TYPE "dwarfdebug" @@ -110,45 +111,73 @@ static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo, RegVars.erase(I); } -// \brief Terminate location ranges for all variables, described by registers -// clobbered by @MI. -static void clobberRegisterUses(RegDescribedVarsMap &RegVars, - const MachineInstr &MI, - const TargetRegisterInfo *TRI, - DbgValueHistoryMap &HistMap) { +// \brief Collect all registers clobbered by @MI and insert them to @Regs. +static void collectClobberedRegisters(const MachineInstr &MI, + const TargetRegisterInfo *TRI, + std::set<unsigned> &Regs) { for (const MachineOperand &MO : MI.operands()) { if (!MO.isReg() || !MO.isDef() || !MO.getReg()) continue; - for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); - ++AI) { - unsigned RegNo = *AI; - clobberRegisterUses(RegVars, RegNo, HistMap, MI); - } + for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) + Regs.insert(*AI); } } -// \brief Terminate the location range for all register-described variables -// by inserting @ClobberingInstr to their history. -static void clobberAllRegistersUses(RegDescribedVarsMap &RegVars, - DbgValueHistoryMap &HistMap, - const MachineInstr &ClobberingInstr) { - for (const auto &I : RegVars) - for (const auto &Var : I.second) - HistMap.endInstrRange(Var, ClobberingInstr); - RegVars.clear(); +// \brief Returns the first instruction in @MBB which corresponds to +// the function epilogue, or nullptr if @MBB doesn't contain an epilogue. +static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) { + auto LastMI = MBB.getLastNonDebugInstr(); + if (LastMI == MBB.end() || !LastMI->isReturn()) + return nullptr; + // Assume that epilogue starts with instruction having the same debug location + // as the return instruction. + DebugLoc LastLoc = LastMI->getDebugLoc(); + auto Res = LastMI; + for (MachineBasicBlock::const_reverse_iterator I(std::next(LastMI)); I != MBB.rend(); + ++I) { + if (I->getDebugLoc() != LastLoc) + return Res; + Res = std::prev(I.base()); + } + // If all instructions have the same debug location, assume whole MBB is + // an epilogue. + return MBB.begin(); +} + +// \brief Collect registers that are modified in the function body (their +// contents is changed only in the prologue and epilogue). +static void collectChangingRegs(const MachineFunction *MF, + const TargetRegisterInfo *TRI, + std::set<unsigned> &Regs) { + for (const auto &MBB : *MF) { + auto FirstEpilogueInst = getFirstEpilogueInst(MBB); + bool IsInEpilogue = false; + for (const auto &MI : MBB) { + IsInEpilogue |= &MI == FirstEpilogueInst; + if (!MI.getFlag(MachineInstr::FrameSetup) && !IsInEpilogue) + collectClobberedRegisters(MI, TRI, Regs); + } + } } void calculateDbgValueHistory(const MachineFunction *MF, const TargetRegisterInfo *TRI, DbgValueHistoryMap &Result) { - RegDescribedVarsMap RegVars; + std::set<unsigned> ChangingRegs; + collectChangingRegs(MF, TRI, ChangingRegs); + RegDescribedVarsMap RegVars; for (const auto &MBB : *MF) { for (const auto &MI : MBB) { if (!MI.isDebugValue()) { // Not a DBG_VALUE instruction. It may clobber registers which describe // some variables. - clobberRegisterUses(RegVars, MI, TRI, Result); + std::set<unsigned> MIClobberedRegs; + collectClobberedRegisters(MI, TRI, MIClobberedRegs); + for (unsigned RegNo : MIClobberedRegs) { + if (ChangingRegs.count(RegNo)) + clobberRegisterUses(RegVars, RegNo, Result, MI); + } continue; } @@ -167,8 +196,10 @@ void calculateDbgValueHistory(const MachineFunction *MF, // Make sure locations for register-described variables are valid only // until the end of the basic block (unless it's the last basic block, in // which case let their liveness run off to the end of the function). - if (!MBB.empty() && &MBB != &MF->back()) - clobberAllRegistersUses(RegVars, Result, MBB.back()); + if (!MBB.empty() && &MBB != &MF->back()) { + for (unsigned RegNo : ChangingRegs) + clobberRegisterUses(RegVars, RegNo, Result, MBB.back()); + } } } diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 30312ac..74215aa 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -40,9 +40,8 @@ using namespace llvm; DwarfCFIException::DwarfCFIException(AsmPrinter *A) - : DwarfException(A), - shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false), - moveTypeModule(AsmPrinter::CFI_M_None) {} + : EHStreamer(A), shouldEmitPersonality(false), shouldEmitLSDA(false), + shouldEmitMoves(false), moveTypeModule(AsmPrinter::CFI_M_None) {} DwarfCFIException::~DwarfCFIException() {} @@ -59,26 +58,16 @@ void DwarfCFIException::endModule() { unsigned PerEncoding = TLOF.getPersonalityEncoding(); - if ((PerEncoding & 0x70) != dwarf::DW_EH_PE_pcrel) + if ((PerEncoding & 0x80) != dwarf::DW_EH_PE_indirect) return; // Emit references to all used personality functions - bool AtLeastOne = false; const std::vector<const Function*> &Personalities = MMI->getPersonalities(); for (size_t i = 0, e = Personalities.size(); i != e; ++i) { if (!Personalities[i]) continue; MCSymbol *Sym = Asm->getSymbol(Personalities[i]); TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym); - AtLeastOne = true; - } - - if (AtLeastOne && !TLOF.isFunctionEHFrameSymbolPrivate()) { - // This is a temporary hack to keep sections in the same order they - // were before. This lets us produce bit identical outputs while - // transitioning to CFI. - Asm->OutStreamer.SwitchSection( - const_cast<TargetLoweringObjectFile&>(TLOF).getEHFrameSection()); } } @@ -123,9 +112,17 @@ void DwarfCFIException::beginFunction(const MachineFunction *MF) { TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding); - Asm->OutStreamer.EmitDebugLabel - (Asm->GetTempSymbol("eh_func_begin", - Asm->getFunctionNumber())); + MCSymbol *EHBegin = + Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber()); + if (Asm->MAI->useAssignmentForEHBegin()) { + MCContext &Ctx = Asm->OutContext; + MCSymbol *CurPos = Ctx.CreateTempSymbol(); + Asm->OutStreamer.EmitLabel(CurPos); + Asm->OutStreamer.EmitAssignment(EHBegin, + MCSymbolRefExpr::Create(CurPos, Ctx)); + } else { + Asm->OutStreamer.EmitLabel(EHBegin); + } // Provide LSDA information. if (!shouldEmitLSDA) @@ -153,5 +150,5 @@ void DwarfCFIException::endFunction(const MachineFunction *) { // Map all labels and get rid of any dead landing pads. MMI->TidyLandingPads(); - EmitExceptionTable(); + emitExceptionTable(); } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 2a0615d..77860c0 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -98,10 +98,6 @@ DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden, clEnumVal(Disable, "Disabled"), clEnumValEnd), cl::init(Default)); -static cl::opt<unsigned> -DwarfVersionNumber("dwarf-version", cl::Hidden, - cl::desc("Generate DWARF for dwarf version."), cl::init(0)); - static const char *const DWARFGroupName = "DWARF Emission"; static const char *const DbgTimerName = "DWARF Debug Writer"; @@ -209,9 +205,12 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) else HasDwarfPubSections = DwarfPubSections == Enable; + unsigned DwarfVersionNumber = Asm->TM.Options.MCOptions.DwarfVersion; DwarfVersion = DwarfVersionNumber ? DwarfVersionNumber : MMI->getModule()->getDwarfVersion(); + Asm->OutStreamer.getContext().setDwarfVersion(DwarfVersion); + { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); beginModule(); @@ -531,8 +530,7 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &TheCU, // shouldn't be found by lookup. AbsDef = &SPCU.createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, DIDescriptor()); - SPCU.applySubprogramAttributes(SP, *AbsDef); - SPCU.addGlobalName(SP.getName(), *AbsDef, resolve(SP.getContext())); + SPCU.applySubprogramAttributesToDefinition(SP, *AbsDef); SPCU.addUInt(*AbsDef, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); createAndAddScopeChildren(SPCU, Scope, *AbsDef); @@ -732,6 +730,8 @@ void DwarfDebug::beginModule() { const Module *M = MMI->getModule(); + FunctionDIs = makeSubprogramMap(*M); + // If module has named metadata anchors then use them, otherwise scan the // module using debug info finder to collect debug info. NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); @@ -784,6 +784,26 @@ void DwarfDebug::beginModule() { SectionMap[Asm->getObjFileLowering().getTextSection()]; } +void DwarfDebug::finishVariableDefinitions() { + for (const auto &Var : ConcreteVariables) { + DIE *VariableDie = Var->getDIE(); + // FIXME: There shouldn't be any variables without DIEs. + if (!VariableDie) + continue; + // FIXME: Consider the time-space tradeoff of just storing the unit pointer + // in the ConcreteVariables list, rather than looking it up again here. + // DIE::getUnit isn't simple - it walks parent pointers, etc. + DwarfCompileUnit *Unit = lookupUnit(VariableDie->getUnit()); + assert(Unit); + DbgVariable *AbsVar = getExistingAbstractVariable(Var->getVariable()); + if (AbsVar && AbsVar->getDIE()) { + Unit->addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin, + *AbsVar->getDIE()); + } else + Unit->applyVariableAttributes(*Var, *VariableDie); + } +} + void DwarfDebug::finishSubprogramDefinitions() { const Module *M = MMI->getModule(); @@ -811,8 +831,7 @@ void DwarfDebug::finishSubprogramDefinitions() { // inlined versions during codegen. D = SPCU->getOrCreateSubprogramDIE(SP); // And attach the attributes - SPCU->applySubprogramAttributes(SP, *D); - SPCU->addGlobalName(SP.getName(), *D, resolve(SP.getContext())); + SPCU->applySubprogramAttributesToDefinition(SP, *D); } } } @@ -850,8 +869,10 @@ void DwarfDebug::collectDeadVariables() { for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) { DIVariable DV(Variables.getElement(vi)); assert(DV.isVariable()); - DbgVariable NewVar(DV, nullptr, this); - SPDIE->addChild(SPCU->constructVariableDIE(NewVar)); + DbgVariable NewVar(DV, this); + auto VariableDie = SPCU->constructVariableDIE(NewVar); + SPCU->applyVariableAttributes(NewVar, *VariableDie); + SPDIE->addChild(std::move(VariableDie)); } } } @@ -861,6 +882,8 @@ void DwarfDebug::collectDeadVariables() { void DwarfDebug::finalizeModuleInfo() { finishSubprogramDefinitions(); + finishVariableDefinitions(); + // Collect info for variables that were optimized out. collectDeadVariables(); @@ -1017,9 +1040,9 @@ void DwarfDebug::endModule() { emitDebugInfoDWO(); emitDebugAbbrevDWO(); emitDebugLineDWO(); + emitDebugLocDWO(); // Emit DWO addresses. AddrPool.emit(*Asm, Asm->getObjFileLowering().getDwarfAddrSection()); - emitDebugLocDWO(); } else // Emit info into a debug loc section. emitDebugLoc(); @@ -1047,27 +1070,51 @@ void DwarfDebug::endModule() { } // Find abstract variable, if any, associated with Var. -DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV, - DebugLoc ScopeLoc) { - return findAbstractVariable(DV, ScopeLoc.getScope(DV->getContext())); -} - -DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV, - const MDNode *ScopeNode) { +DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV, + DIVariable &Cleansed) { LLVMContext &Ctx = DV->getContext(); // More then one inlined variable corresponds to one abstract variable. - DIVariable Var = cleanseInlinedVariable(DV, Ctx); - auto I = AbstractVariables.find(Var); + // FIXME: This duplication of variables when inlining should probably be + // removed. It's done to allow each DIVariable to describe its location + // because the DebugLoc on the dbg.value/declare isn't accurate. We should + // make it accurate then remove this duplication/cleansing stuff. + Cleansed = cleanseInlinedVariable(DV, Ctx); + auto I = AbstractVariables.find(Cleansed); if (I != AbstractVariables.end()) return I->second.get(); + return nullptr; +} - LexicalScope *Scope = LScopes.findAbstractScope(ScopeNode); - if (!Scope) - return nullptr; +DbgVariable *DwarfDebug::getExistingAbstractVariable(const DIVariable &DV) { + DIVariable Cleansed; + return getExistingAbstractVariable(DV, Cleansed); +} - auto AbsDbgVariable = make_unique<DbgVariable>(Var, nullptr, this); +void DwarfDebug::createAbstractVariable(const DIVariable &Var, + LexicalScope *Scope) { + auto AbsDbgVariable = make_unique<DbgVariable>(Var, this); addScopeVariable(Scope, AbsDbgVariable.get()); - return (AbstractVariables[Var] = std::move(AbsDbgVariable)).get(); + AbstractVariables[Var] = std::move(AbsDbgVariable); +} + +void DwarfDebug::ensureAbstractVariableIsCreated(const DIVariable &DV, + const MDNode *ScopeNode) { + DIVariable Cleansed = DV; + if (getExistingAbstractVariable(DV, Cleansed)) + return; + + createAbstractVariable(Cleansed, LScopes.getOrCreateAbstractScope(ScopeNode)); +} + +void +DwarfDebug::ensureAbstractVariableIsCreatedIfScoped(const DIVariable &DV, + const MDNode *ScopeNode) { + DIVariable Cleansed = DV; + if (getExistingAbstractVariable(DV, Cleansed)) + return; + + if (LexicalScope *Scope = LScopes.findAbstractScope(ScopeNode)) + createAbstractVariable(Cleansed, Scope); } // If Var is a current function argument then add it to CurrentFnArguments list. @@ -1106,11 +1153,11 @@ void DwarfDebug::collectVariableInfoFromMMITable( if (!Scope) continue; - DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VI.Loc); - DbgVariable *RegVar = new DbgVariable(DV, AbsDbgVariable, this); + ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode()); + ConcreteVariables.push_back(make_unique<DbgVariable>(DV, this)); + DbgVariable *RegVar = ConcreteVariables.back().get(); RegVar->setFrameIndex(VI.Slot); - if (!addCurrentFnArgument(RegVar, Scope)) - addScopeVariable(Scope, RegVar); + addScopeVariable(Scope, RegVar); } } @@ -1175,18 +1222,14 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) { Processed.insert(DV); const MachineInstr *MInsn = Ranges.front().first; assert(MInsn->isDebugValue() && "History must begin with debug value"); - DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc()); - DbgVariable *RegVar = new DbgVariable(DV, AbsVar, this); - if (!addCurrentFnArgument(RegVar, Scope)) - addScopeVariable(Scope, RegVar); - if (AbsVar) - AbsVar->setMInsn(MInsn); + ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode()); + ConcreteVariables.push_back(make_unique<DbgVariable>(MInsn, this)); + DbgVariable *RegVar = ConcreteVariables.back().get(); + addScopeVariable(Scope, RegVar); // Check if the first DBG_VALUE is valid for the rest of the function. - if (Ranges.size() == 1 && Ranges.front().second == nullptr) { - RegVar->setMInsn(MInsn); + if (Ranges.size() == 1 && Ranges.front().second == nullptr) continue; - } // Handle multiple DBG_VALUE instructions describing one variable. RegVar->setDotDebugLocOffset(DotDebugLocEntries.size()); @@ -1205,6 +1248,11 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) { if (Begin->getNumOperands() > 1 && Begin->getOperand(0).isReg() && !Begin->getOperand(0).getReg()) continue; + DEBUG(dbgs() << "DotDebugLoc Pair:\n" << "\t" << *Begin); + if (End != nullptr) + DEBUG(dbgs() << "\t" << *End); + else + DEBUG(dbgs() << "\tNULL\n"); const MCSymbol *StartLabel = getLabelBeforeInsn(Begin); assert(StartLabel && "Forgot label before DBG_VALUE starting a range!"); @@ -1218,8 +1266,6 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) { EndLabel = getLabelBeforeInsn(std::next(I)->first); assert(EndLabel && "Forgot label after instruction ending a range!"); - DEBUG(dbgs() << "DotDebugLoc Pair:\n" - << "\t" << *Begin << "\t" << *End << "\n"); DebugLocEntry Loc(StartLabel, EndLabel, getDebugLocValue(Begin), TheCU); if (DebugLoc.empty() || !DebugLoc.back().Merge(Loc)) DebugLoc.push_back(std::move(Loc)); @@ -1233,11 +1279,11 @@ DwarfDebug::collectVariableInfo(SmallPtrSet<const MDNode *, 16> &Processed) { assert(DV.isVariable()); if (!Processed.insert(DV)) continue; - if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) - addScopeVariable( - Scope, - new DbgVariable(DV, findAbstractVariable(DV, Scope->getScopeNode()), - this)); + if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) { + ensureAbstractVariableIsCreatedIfScoped(DV, Scope->getScopeNode()); + ConcreteVariables.push_back(make_unique<DbgVariable>(DV, this)); + addScopeVariable(Scope, ConcreteVariables.back().get()); + } } } @@ -1371,6 +1417,10 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (!MMI->hasDebugInfo()) return; + auto DI = FunctionDIs.find(MF->getFunction()); + if (DI == FunctionDIs.end()) + return; + // Grab the lexical scopes for the function, if we don't have any of those // then we're not going to be able to do anything. LScopes.initialize(*MF); @@ -1386,6 +1436,14 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // belongs to so that we add to the correct per-cu line table in the // non-asm case. LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); + // FnScope->getScopeNode() and DI->second should represent the same function, + // though they may not be the same MDNode due to inline functions merged in + // LTO where the debug info metadata still differs (either due to distinct + // written differences - two versions of a linkonce_odr function + // written/copied into two separate files, or some sub-optimal metadata that + // isn't structurally identical (see: file path/name info from clang, which + // includes the directory of the cpp file being built, even when the file name + // is absolute (such as an <> lookup header))) DwarfCompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); assert(TheCU && "Unable to find compile unit!"); if (Asm->OutStreamer.hasRawTextSupport()) @@ -1440,6 +1498,8 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { } void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { + if (addCurrentFnArgument(Var, LS)) + return; SmallVectorImpl<DbgVariable *> &Vars = ScopeVariables[LS]; DIVariable DV = Var->getVariable(); // Variables with positive arg numbers are parameters. @@ -1481,7 +1541,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { assert(CurFn == MF); assert(CurFn != nullptr); - if (!MMI->hasDebugInfo() || LScopes.empty()) { + if (!MMI->hasDebugInfo() || LScopes.empty() || + !FunctionDIs.count(MF->getFunction())) { // If we don't have a lexical scope for this function then there will // be a hole in the range information. Keep note of this by setting the // previously used section to nullptr. @@ -1517,7 +1578,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { assert(DV && DV.isVariable()); if (!ProcessedVars.insert(DV)) continue; - findAbstractVariable(DV, DV.getContext()); + ensureAbstractVariableIsCreated(DV, DV.getContext()); } constructAbstractSubprogramScopeDIE(TheCU, AScope); } @@ -1536,12 +1597,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { // Ownership of DbgVariables is a bit subtle - ScopeVariables owns all the // DbgVariables except those that are also in AbstractVariables (since they // can be used cross-function) - for (const auto &I : ScopeVariables) - for (const auto *Var : I.second) - if (!AbstractVariables.count(Var->getVariable()) || Var->getAbstractVariable()) - delete Var; ScopeVariables.clear(); - DeleteContainerPointers(CurrentFnArguments); + CurrentFnArguments.clear(); DbgValues.clear(); LabelsBeforeInsn.clear(); LabelsAfterInsn.clear(); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 2f5abc8..ffe4843 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -27,6 +27,7 @@ #include "llvm/ADT/StringMap.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugLoc.h" #include "llvm/MC/MachineLocation.h" @@ -71,16 +72,21 @@ class DbgVariable { DIVariable Var; // Variable Descriptor. DIE *TheDIE; // Variable DIE. unsigned DotDebugLocOffset; // Offset in DotDebugLocEntries. - DbgVariable *AbsVar; // Corresponding Abstract variable, if any. const MachineInstr *MInsn; // DBG_VALUE instruction of the variable. int FrameIndex; DwarfDebug *DD; public: - // AbsVar may be NULL. - DbgVariable(DIVariable V, DbgVariable *AV, DwarfDebug *DD) - : Var(V), TheDIE(nullptr), DotDebugLocOffset(~0U), AbsVar(AV), - MInsn(nullptr), FrameIndex(~0), DD(DD) {} + /// Construct a DbgVariable from a DIVariable. + DbgVariable(DIVariable V, DwarfDebug *DD) + : Var(V), TheDIE(nullptr), DotDebugLocOffset(~0U), MInsn(nullptr), + FrameIndex(~0), DD(DD) {} + + /// Construct a DbgVariable from a DEBUG_VALUE. + /// AbstractVar may be NULL. + DbgVariable(const MachineInstr *DbgValue, DwarfDebug *DD) + : Var(DbgValue->getDebugVariable()), TheDIE(nullptr), + DotDebugLocOffset(~0U), MInsn(DbgValue), FrameIndex(~0), DD(DD) {} // Accessors. DIVariable getVariable() const { return Var; } @@ -89,9 +95,7 @@ public: void setDotDebugLocOffset(unsigned O) { DotDebugLocOffset = O; } unsigned getDotDebugLocOffset() const { return DotDebugLocOffset; } StringRef getName() const { return Var.getName(); } - DbgVariable *getAbstractVariable() const { return AbsVar; } const MachineInstr *getMInsn() const { return MInsn; } - void setMInsn(const MachineInstr *M) { MInsn = M; } int getFrameIndex() const { return FrameIndex; } void setFrameIndex(int FI) { FrameIndex = FI; } // Translate tag to proper Dwarf tag. @@ -200,6 +204,7 @@ class DwarfDebug : public AsmPrinterHandler { // Collection of abstract variables. DenseMap<const MDNode *, std::unique_ptr<DbgVariable>> AbstractVariables; + SmallVector<std::unique_ptr<DbgVariable>, 64> ConcreteVariables; // Collection of DebugLocEntry. Stored in a linked list so that DIELocLists // can refer to them in spite of insertions into this list. @@ -325,6 +330,8 @@ class DwarfDebug : public AsmPrinterHandler { DwarfAccelTable AccelNamespace; DwarfAccelTable AccelTypes; + DenseMap<const Function *, DISubprogram> FunctionDIs; + MCDwarfDwoLineTable *getDwoLineTable(const DwarfCompileUnit &); void addScopeVariable(LexicalScope *LS, DbgVariable *Var); @@ -334,8 +341,14 @@ class DwarfDebug : public AsmPrinterHandler { } /// \brief Find abstract variable associated with Var. - DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc); - DbgVariable *findAbstractVariable(DIVariable &Var, const MDNode *Scope); + DbgVariable *getExistingAbstractVariable(const DIVariable &DV, + DIVariable &Cleansed); + DbgVariable *getExistingAbstractVariable(const DIVariable &DV); + void createAbstractVariable(const DIVariable &DV, LexicalScope *Scope); + void ensureAbstractVariableIsCreated(const DIVariable &Var, + const MDNode *Scope); + void ensureAbstractVariableIsCreatedIfScoped(const DIVariable &Var, + const MDNode *Scope); /// \brief Find DIE for the given subprogram and attach appropriate /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global @@ -389,6 +402,8 @@ class DwarfDebug : public AsmPrinterHandler { /// \brief Collect info for variables that were optimized out. void collectDeadVariables(); + void finishVariableDefinitions(); + void finishSubprogramDefinitions(); /// \brief Finish off debug information after all functions have been diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index f792482..0440fce 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -14,138 +14,14 @@ #ifndef LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H #define LLVM_CODEGEN_ASMPRINTER_DWARFEXCEPTION_H -#include "AsmPrinterHandler.h" -#include "llvm/ADT/DenseMap.h" +#include "EHStreamer.h" #include "llvm/CodeGen/AsmPrinter.h" -#include <vector> namespace llvm { - -template <typename T> class SmallVectorImpl; -struct LandingPadInfo; -class MachineModuleInfo; -class MachineInstr; class MachineFunction; -class MCAsmInfo; -class MCExpr; -class MCSymbol; -class Function; class ARMTargetStreamer; -class AsmPrinter; - -//===----------------------------------------------------------------------===// -/// DwarfException - Emits Dwarf exception handling directives. -/// -class DwarfException : public AsmPrinterHandler { -protected: - /// Asm - Target of Dwarf emission. - AsmPrinter *Asm; - - /// MMI - Collected machine module information. - MachineModuleInfo *MMI; - - /// SharedTypeIds - How many leading type ids two landing pads have in common. - static unsigned SharedTypeIds(const LandingPadInfo *L, - const LandingPadInfo *R); - - /// PadRange - Structure holding a try-range and the associated landing pad. - struct PadRange { - // The index of the landing pad. - unsigned PadIndex; - // The index of the begin and end labels in the landing pad's label lists. - unsigned RangeIndex; - }; - - typedef DenseMap<MCSymbol *, PadRange> RangeMapType; - - /// ActionEntry - Structure describing an entry in the actions table. - struct ActionEntry { - int ValueForTypeID; // The value to write - may not be equal to the type id. - int NextAction; - unsigned Previous; - }; - - /// CallSiteEntry - Structure describing an entry in the call-site table. - struct CallSiteEntry { - // The 'try-range' is BeginLabel .. EndLabel. - MCSymbol *BeginLabel; // zero indicates the start of the function. - MCSymbol *EndLabel; // zero indicates the end of the function. - - // The landing pad starts at PadLabel. - MCSymbol *PadLabel; // zero indicates that there is no landing pad. - unsigned Action; - }; - - /// ComputeActionsTable - Compute the actions table and gather the first - /// action index for each landing pad site. - unsigned ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs, - SmallVectorImpl<ActionEntry> &Actions, - SmallVectorImpl<unsigned> &FirstActions); - - /// CallToNoUnwindFunction - Return `true' if this is a call to a function - /// marked `nounwind'. Return `false' otherwise. - bool CallToNoUnwindFunction(const MachineInstr *MI); - - /// ComputeCallSiteTable - Compute the call-site table. The entry for an - /// invoke has a try-range containing the call, a non-zero landing pad and an - /// appropriate action. The entry for an ordinary call has a try-range - /// containing the call and zero for the landing pad and the action. Calls - /// marked 'nounwind' have no entry and must not be contained in the try-range - /// of any entry - they form gaps in the table. Entries must be ordered by - /// try-range address. - void ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, - const RangeMapType &PadMap, - const SmallVectorImpl<const LandingPadInfo *> &LPs, - const SmallVectorImpl<unsigned> &FirstActions); - - /// EmitExceptionTable - Emit landing pads and actions. - /// - /// The general organization of the table is complex, but the basic concepts - /// are easy. First there is a header which describes the location and - /// organization of the three components that follow. - /// 1. The landing pad site information describes the range of code covered - /// by the try. In our case it's an accumulation of the ranges covered - /// by the invokes in the try. There is also a reference to the landing - /// pad that handles the exception once processed. Finally an index into - /// the actions table. - /// 2. The action table, in our case, is composed of pairs of type ids - /// and next action offset. Starting with the action index from the - /// landing pad site, each type Id is checked for a match to the current - /// exception. If it matches then the exception and type id are passed - /// on to the landing pad. Otherwise the next action is looked up. This - /// chain is terminated with a next action of zero. If no type id is - /// found the frame is unwound and handling continues. - /// 3. Type id table contains references to all the C++ typeinfo for all - /// catches in the function. This tables is reversed indexed base 1. - void EmitExceptionTable(); - - virtual void EmitTypeInfos(unsigned TTypeEncoding); - -public: - //===--------------------------------------------------------------------===// - // Main entry points. - // - DwarfException(AsmPrinter *A); - virtual ~DwarfException(); - - /// endModule - Emit all exception information that should come after the - /// content. - void endModule() override; - - /// beginFunction - Gather pre-function exception information. Assumes being - /// emitted immediately after the function entry point. - void beginFunction(const MachineFunction *MF) override; - - /// endFunction - Gather and emit post-function exception information. - void endFunction(const MachineFunction *) override; - - // We don't need these. - void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} - void beginInstruction(const MachineInstr *MI) override {} - void endInstruction() override {} -}; -class DwarfCFIException : public DwarfException { +class DwarfCFIException : public EHStreamer { /// shouldEmitPersonality - Per-function flag to indicate if .cfi_personality /// should be emitted. bool shouldEmitPersonality; @@ -179,8 +55,8 @@ public: void endFunction(const MachineFunction *) override; }; -class ARMException : public DwarfException { - void EmitTypeInfos(unsigned TTypeEncoding) override; +class ARMException : public EHStreamer { + void emitTypeInfos(unsigned TTypeEncoding) override; ARMTargetStreamer &getTargetStreamer(); /// shouldEmitCFI - Per-function flag to indicate if frame CFI info @@ -206,7 +82,7 @@ public: void endFunction(const MachineFunction *) override; }; -class Win64Exception : public DwarfException { +class Win64Exception : public EHStreamer { /// shouldEmitPersonality - Per-function flag to indicate if personality /// info should be emitted. bool shouldEmitPersonality; diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index a70c0f7..9538bee 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -1071,6 +1071,8 @@ std::string DwarfUnit::getParentContextString(DIScope Context) const { I != E; ++I) { DIScope Ctx = *I; StringRef Name = Ctx.getName(); + if (Name.empty() && Ctx.isNameSpace()) + Name = "(anonymous namespace)"; if (!Name.empty()) { CS += Name; CS += "::"; @@ -1359,12 +1361,13 @@ DIE *DwarfUnit::getOrCreateNameSpace(DINameSpace NS) { return NDie; DIE &NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS); - if (!NS.getName().empty()) { + StringRef Name = NS.getName(); + if (!Name.empty()) addString(NDie, dwarf::DW_AT_name, NS.getName()); - DD->addAccelNamespace(NS.getName(), NDie); - addGlobalName(NS.getName(), NDie, NS.getContext()); - } else - DD->addAccelNamespace("(anonymous namespace)", NDie); + else + Name = "(anonymous namespace)"; + DD->addAccelNamespace(Name, NDie); + addGlobalName(Name, NDie, NS.getContext()); addSourceLine(NDie, NS); return &NDie; } @@ -1382,14 +1385,14 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) { if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { // Add subprogram definitions to the CU die directly. ContextDIE = &getUnitDie(); - // Build the decl now to ensure it preceeds the definition. + // Build the decl now to ensure it precedes the definition. getOrCreateSubprogramDIE(SPDecl); } // DW_TAG_inlined_subroutine may refer to this DIE. DIE &SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP); - // Abort here and fill this in later, depending on whether or not this + // Stop here and fill this in later, depending on whether or not this // subprogram turns out to have inlined instances or not. if (SP.isDefinition()) return &SPDie; @@ -1398,12 +1401,21 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(DISubprogram SP) { return &SPDie; } +void DwarfUnit::applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie) { + DISubprogram SPDecl = SP.getFunctionDeclaration(); + DIScope Context = resolve(SPDecl ? SPDecl.getContext() : SP.getContext()); + applySubprogramAttributes(SP, SPDie); + addGlobalName(SP.getName(), SPDie, Context); +} + void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) { DIE *DeclDie = nullptr; StringRef DeclLinkageName; if (DISubprogram SPDecl = SP.getFunctionDeclaration()) { DeclDie = getDIE(SPDecl); - assert(DeclDie); + assert(DeclDie && "This DIE should've already been constructed when the " + "definition DIE was created in " + "getOrCreateSubprogramDIE"); DeclLinkageName = SPDecl.getLinkageName(); } @@ -1502,6 +1514,17 @@ void DwarfUnit::applySubprogramAttributes(DISubprogram SP, DIE &SPDie) { addFlag(SPDie, dwarf::DW_AT_explicit); } +void DwarfUnit::applyVariableAttributes(const DbgVariable &Var, + DIE &VariableDie) { + StringRef Name = Var.getName(); + if (!Name.empty()) + addString(VariableDie, dwarf::DW_AT_name, Name); + addSourceLine(VariableDie, Var.getVariable()); + addType(VariableDie, Var.getType()); + if (Var.isArtificial()) + addFlag(VariableDie, dwarf::DW_AT_artificial); +} + // Return const expression if value is a GEP to access merged global // constant. e.g. // i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0) @@ -1665,10 +1688,8 @@ void DwarfCompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { DD->addAccelName(GV.getLinkageName(), AddrDIE); } - if (!GV.isLocalToUnit()) - addGlobalName(GV.getName(), - VariableSpecDIE ? *VariableSpecDIE : *VariableDIE, - GV.getContext()); + addGlobalName(GV.getName(), VariableSpecDIE ? *VariableSpecDIE : *VariableDIE, + GV.getContext()); } /// constructSubrangeDIE - Construct subrange DIE from DISubrange. @@ -1777,24 +1798,13 @@ std::unique_ptr<DIE> DwarfUnit::constructVariableDIE(DbgVariable &DV, std::unique_ptr<DIE> DwarfUnit::constructVariableDIEImpl(const DbgVariable &DV, bool Abstract) { - StringRef Name = DV.getName(); - // Define variable debug information entry. auto VariableDie = make_unique<DIE>(DV.getTag()); - DbgVariable *AbsVar = DV.getAbstractVariable(); - if (AbsVar && AbsVar->getDIE()) - addDIEEntry(*VariableDie, dwarf::DW_AT_abstract_origin, *AbsVar->getDIE()); - else { - if (!Name.empty()) - addString(*VariableDie, dwarf::DW_AT_name, Name); - addSourceLine(*VariableDie, DV.getVariable()); - addType(*VariableDie, DV.getType()); - if (DV.isArtificial()) - addFlag(*VariableDie, dwarf::DW_AT_artificial); - } - if (Abstract) + if (Abstract) { + applyVariableAttributes(DV, *VariableDie); return VariableDie; + } // Add variable address. diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h index acb7528..b7b83b2 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h @@ -400,6 +400,8 @@ public: DIE *getOrCreateSubprogramDIE(DISubprogram SP); void applySubprogramAttributes(DISubprogram SP, DIE &SPDie); + void applySubprogramAttributesToDefinition(DISubprogram SP, DIE &SPDie); + void applyVariableAttributes(const DbgVariable &Var, DIE &VariableDie); /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the /// given DIType. diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp index 3a12c73..73f62bf 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp @@ -1,4 +1,4 @@ -//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===// +//===-- CodeGen/AsmPrinter/EHStreamer.cpp - Exception Directive Streamer --===// // // The LLVM Compiler Infrastructure // @@ -7,45 +7,31 @@ // //===----------------------------------------------------------------------===// // -// This file contains support for writing DWARF exception info into asm files. +// This file contains support for writing exception info into assembly files. // //===----------------------------------------------------------------------===// -#include "DwarfException.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/Twine.h" +#include "EHStreamer.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Mangler.h" -#include "llvm/IR/Module.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormattedStream.h" #include "llvm/Support/LEB128.h" -#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" + using namespace llvm; -DwarfException::DwarfException(AsmPrinter *A) - : Asm(A), MMI(Asm->MMI) {} +EHStreamer::EHStreamer(AsmPrinter *A) : Asm(A), MMI(Asm->MMI) {} -DwarfException::~DwarfException() {} +EHStreamer::~EHStreamer() {} -/// SharedTypeIds - How many leading type ids two landing pads have in common. -unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L, - const LandingPadInfo *R) { +/// How many leading type ids two landing pads have in common. +unsigned EHStreamer::sharedTypeIDs(const LandingPadInfo *L, + const LandingPadInfo *R) { const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds; unsigned LSize = LIds.size(), RSize = RIds.size(); unsigned MinSize = LSize < RSize ? LSize : RSize; @@ -58,10 +44,10 @@ unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L, return Count; } -/// ComputeActionsTable - Compute the actions table and gather the first action -/// index for each landing pad site. -unsigned DwarfException:: -ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, +/// Compute the actions table and gather the first action index for each landing +/// pad site. +unsigned EHStreamer:: +computeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, SmallVectorImpl<ActionEntry> &Actions, SmallVectorImpl<unsigned> &FirstActions) { @@ -109,7 +95,7 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, I = LandingPads.begin(), E = LandingPads.end(); I != E; ++I) { const LandingPadInfo *LPI = *I; const std::vector<int> &TypeIds = LPI->TypeIds; - unsigned NumShared = PrevLPI ? SharedTypeIds(LPI, PrevLPI) : 0; + unsigned NumShared = PrevLPI ? sharedTypeIDs(LPI, PrevLPI) : 0; unsigned SizeSiteActions = 0; if (NumShared < TypeIds.size()) { @@ -167,9 +153,9 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads, return SizeActions; } -/// CallToNoUnwindFunction - Return `true' if this is a call to a function -/// marked `nounwind'. Return `false' otherwise. -bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) { +/// Return `true' if this is a call to a function marked `nounwind'. Return +/// `false' otherwise. +bool EHStreamer::callToNoUnwindFunction(const MachineInstr *MI) { assert(MI->isCall() && "This should be a call instruction!"); bool MarkedNoUnwind = false; @@ -201,15 +187,14 @@ bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) { return MarkedNoUnwind; } -/// ComputeCallSiteTable - Compute the call-site table. The entry for an invoke -/// has a try-range containing the call, a non-zero landing pad, and an -/// appropriate action. The entry for an ordinary call has a try-range -/// containing the call and zero for the landing pad and the action. Calls -/// marked 'nounwind' have no entry and must not be contained in the try-range -/// of any entry - they form gaps in the table. Entries must be ordered by -/// try-range address. -void DwarfException:: -ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, +/// Compute the call-site table. The entry for an invoke has a try-range +/// containing the call, a non-zero landing pad, and an appropriate action. The +/// entry for an ordinary call has a try-range containing the call and zero for +/// the landing pad and the action. Calls marked 'nounwind' have no entry and +/// must not be contained in the try-range of any entry - they form gaps in the +/// table. Entries must be ordered by try-range address. +void EHStreamer:: +computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, const RangeMapType &PadMap, const SmallVectorImpl<const LandingPadInfo *> &LandingPads, const SmallVectorImpl<unsigned> &FirstActions) { @@ -228,7 +213,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, for (const auto &MI : MBB) { if (!MI.isEHLabel()) { if (MI.isCall()) - SawPotentiallyThrowing |= !CallToNoUnwindFunction(&MI); + SawPotentiallyThrowing |= !callToNoUnwindFunction(&MI); continue; } @@ -308,7 +293,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, } } -/// EmitExceptionTable - Emit landing pads and actions. +/// Emit landing pads and actions. /// /// The general organization of the table is complex, but the basic concepts are /// easy. First there is a header which describes the location and organization @@ -328,7 +313,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, /// unwound and handling continues. /// 3. Type ID table contains references to all the C++ typeinfo for all /// catches in the function. This tables is reverse indexed base 1. -void DwarfException::EmitExceptionTable() { +void EHStreamer::emitExceptionTable() { const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads(); @@ -350,7 +335,8 @@ void DwarfException::EmitExceptionTable() { // landing pad site. SmallVector<ActionEntry, 32> Actions; SmallVector<unsigned, 64> FirstActions; - unsigned SizeActions=ComputeActionsTable(LandingPads, Actions, FirstActions); + unsigned SizeActions = + computeActionsTable(LandingPads, Actions, FirstActions); // Invokes and nounwind calls have entries in PadMap (due to being bracketed // by try-range labels when lowered). Ordinary calls do not, so appropriate @@ -368,7 +354,7 @@ void DwarfException::EmitExceptionTable() { // Compute the call-site table. SmallVector<CallSiteEntry, 64> CallSites; - ComputeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions); + computeCallSiteTable(CallSites, PadMap, LandingPads, FirstActions); // Final tallies. @@ -657,12 +643,12 @@ void DwarfException::EmitExceptionTable() { Asm->EmitSLEB128(Action.NextAction); } - EmitTypeInfos(TTypeEncoding); + emitTypeInfos(TTypeEncoding); Asm->EmitAlignment(2); } -void DwarfException::EmitTypeInfos(unsigned TTypeEncoding) { +void EHStreamer::emitTypeInfos(unsigned TTypeEncoding) { const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos(); const std::vector<unsigned> &FilterIds = MMI->getFilterIds(); @@ -703,19 +689,18 @@ void DwarfException::EmitTypeInfos(unsigned TTypeEncoding) { } } -/// endModule - Emit all exception information that should come after the -/// content. -void DwarfException::endModule() { +/// Emit all exception information that should come after the content. +void EHStreamer::endModule() { llvm_unreachable("Should be implemented"); } -/// beginFunction - Gather pre-function exception information. Assumes it's -/// being emitted immediately after the function entry point. -void DwarfException::beginFunction(const MachineFunction *MF) { +/// Gather pre-function exception information. Assumes it's being emitted +/// immediately after the function entry point. +void EHStreamer::beginFunction(const MachineFunction *MF) { llvm_unreachable("Should be implemented"); } -/// endFunction - Gather and emit post-function exception information. -void DwarfException::endFunction(const MachineFunction *) { +/// Gather and emit post-function exception information. +void EHStreamer::endFunction(const MachineFunction *) { llvm_unreachable("Should be implemented"); } diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h new file mode 100644 index 0000000..2b6ba78 --- /dev/null +++ b/lib/CodeGen/AsmPrinter/EHStreamer.h @@ -0,0 +1,138 @@ +//===-- EHStreamer.h - Exception Handling Directive Streamer ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing exception info into assembly files. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CODEGEN_ASMPRINTER_EHSTREAMER_H +#define LLVM_CODEGEN_ASMPRINTER_EHSTREAMER_H + +#include "AsmPrinterHandler.h" +#include "llvm/ADT/DenseMap.h" + +namespace llvm { +struct LandingPadInfo; +class MachineModuleInfo; +class MachineInstr; +class MachineFunction; +class AsmPrinter; + +template <typename T> +class SmallVectorImpl; + +/// Emits exception handling directives. +class EHStreamer : public AsmPrinterHandler { +protected: + /// Target of directive emission. + AsmPrinter *Asm; + + /// Collected machine module information. + MachineModuleInfo *MMI; + + /// How many leading type ids two landing pads have in common. + static unsigned sharedTypeIDs(const LandingPadInfo *L, + const LandingPadInfo *R); + + /// Structure holding a try-range and the associated landing pad. + struct PadRange { + // The index of the landing pad. + unsigned PadIndex; + // The index of the begin and end labels in the landing pad's label lists. + unsigned RangeIndex; + }; + + typedef DenseMap<MCSymbol *, PadRange> RangeMapType; + + /// Structure describing an entry in the actions table. + struct ActionEntry { + int ValueForTypeID; // The value to write - may not be equal to the type id. + int NextAction; + unsigned Previous; + }; + + /// Structure describing an entry in the call-site table. + struct CallSiteEntry { + // The 'try-range' is BeginLabel .. EndLabel. + MCSymbol *BeginLabel; // zero indicates the start of the function. + MCSymbol *EndLabel; // zero indicates the end of the function. + + // The landing pad starts at PadLabel. + MCSymbol *PadLabel; // zero indicates that there is no landing pad. + unsigned Action; + }; + + /// Compute the actions table and gather the first action index for each + /// landing pad site. + unsigned computeActionsTable(const SmallVectorImpl<const LandingPadInfo*>&LPs, + SmallVectorImpl<ActionEntry> &Actions, + SmallVectorImpl<unsigned> &FirstActions); + + /// Return `true' if this is a call to a function marked `nounwind'. Return + /// `false' otherwise. + bool callToNoUnwindFunction(const MachineInstr *MI); + + /// Compute the call-site table. The entry for an invoke has a try-range + /// containing the call, a non-zero landing pad and an appropriate action. + /// The entry for an ordinary call has a try-range containing the call and + /// zero for the landing pad and the action. Calls marked 'nounwind' have + /// no entry and must not be contained in the try-range of any entry - they + /// form gaps in the table. Entries must be ordered by try-range address. + + void computeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites, + const RangeMapType &PadMap, + const SmallVectorImpl<const LandingPadInfo *> &LPs, + const SmallVectorImpl<unsigned> &FirstActions); + + /// Emit landing pads and actions. + /// + /// The general organization of the table is complex, but the basic concepts + /// are easy. First there is a header which describes the location and + /// organization of the three components that follow. + /// 1. The landing pad site information describes the range of code covered + /// by the try. In our case it's an accumulation of the ranges covered + /// by the invokes in the try. There is also a reference to the landing + /// pad that handles the exception once processed. Finally an index into + /// the actions table. + /// 2. The action table, in our case, is composed of pairs of type ids + /// and next action offset. Starting with the action index from the + /// landing pad site, each type Id is checked for a match to the current + /// exception. If it matches then the exception and type id are passed + /// on to the landing pad. Otherwise the next action is looked up. This + /// chain is terminated with a next action of zero. If no type id is + /// found the frame is unwound and handling continues. + /// 3. Type id table contains references to all the C++ typeinfo for all + /// catches in the function. This tables is reversed indexed base 1. + void emitExceptionTable(); + + virtual void emitTypeInfos(unsigned TTypeEncoding); + +public: + EHStreamer(AsmPrinter *A); + virtual ~EHStreamer(); + + /// Emit all exception information that should come after the content. + void endModule() override; + + /// Gather pre-function exception information. Assumes being emitted + /// immediately after the function entry point. + void beginFunction(const MachineFunction *MF) override; + + /// Gather and emit post-function exception information. + void endFunction(const MachineFunction *) override; + + // Unused. + void setSymbolSize(const MCSymbol *Sym, uint64_t Size) override {} + void beginInstruction(const MachineInstr *MI) override {} + void endInstruction() override {} +}; +} + +#endif + diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp index 17d8bff..81285d5 100644 --- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp +++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp @@ -38,9 +38,8 @@ using namespace llvm; Win64Exception::Win64Exception(AsmPrinter *A) - : DwarfException(A), - shouldEmitPersonality(false), shouldEmitLSDA(false), shouldEmitMoves(false) - {} + : EHStreamer(A), shouldEmitPersonality(false), shouldEmitLSDA(false), + shouldEmitMoves(false) {} Win64Exception::~Win64Exception() {} @@ -73,14 +72,14 @@ void Win64Exception::beginFunction(const MachineFunction *MF) { if (!shouldEmitPersonality && !shouldEmitMoves) return; - Asm->OutStreamer.EmitWin64EHStartProc(Asm->CurrentFnSym); + Asm->OutStreamer.EmitWinCFIStartProc(Asm->CurrentFnSym); if (!shouldEmitPersonality) return; - MCSymbol *GCCHandlerSym = - Asm->GetExternalSymbolSymbol("_GCC_specific_handler"); - Asm->OutStreamer.EmitWin64EHHandler(GCCHandlerSym, true, true); + const MCSymbol *PersHandlerSym = + TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); + Asm->OutStreamer.EmitWinEHHandler(PersHandlerSym, true, true); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber())); @@ -99,17 +98,10 @@ void Win64Exception::endFunction(const MachineFunction *) { MMI->TidyLandingPads(); if (shouldEmitPersonality) { - const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); - const Function *Per = MMI->getPersonalities()[MMI->getPersonalityIndex()]; - const MCSymbol *Sym = - TLOF.getCFIPersonalitySymbol(Per, *Asm->Mang, Asm->TM, MMI); - Asm->OutStreamer.PushSection(); - Asm->OutStreamer.EmitWin64EHHandlerData(); - Asm->OutStreamer.EmitValue(MCSymbolRefExpr::Create(Sym, Asm->OutContext), - 4); - EmitExceptionTable(); + Asm->OutStreamer.EmitWinEHHandlerData(); + emitExceptionTable(); Asm->OutStreamer.PopSection(); } - Asm->OutStreamer.EmitWin64EHEndProc(); + Asm->OutStreamer.EmitWinCFIEndProc(); } diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp index 2212941..6a5c431 100644 --- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp +++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp @@ -308,7 +308,7 @@ void WinCodeViewLineTables::endFunction(const MachineFunction *MF) { return; const Function *GV = MF->getFunction(); - assert(FnDebugInfo.count(GV) == true); + assert(FnDebugInfo.count(GV)); assert(CurFn == &FnDebugInfo[GV]); if (CurFn->Instrs.empty()) { diff --git a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp index d995333..421946d 100644 --- a/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp +++ b/lib/CodeGen/AtomicExpandLoadLinkedPass.cpp @@ -21,17 +21,19 @@ #include "llvm/Support/Debug.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetSubtargetInfo.h" + using namespace llvm; #define DEBUG_TYPE "arm-atomic-expand" namespace { class AtomicExpandLoadLinked : public FunctionPass { - const TargetLowering *TLI; + const TargetMachine *TM; public: static char ID; // Pass identification, replacement for typeid explicit AtomicExpandLoadLinked(const TargetMachine *TM = nullptr) - : FunctionPass(ID), TLI(TM ? TM->getTargetLowering() : nullptr) { + : FunctionPass(ID), TM(TM) { initializeAtomicExpandLoadLinkedPass(*PassRegistry::getPassRegistry()); } @@ -50,29 +52,16 @@ namespace { char AtomicExpandLoadLinked::ID = 0; char &llvm::AtomicExpandLoadLinkedID = AtomicExpandLoadLinked::ID; - -static void *initializeAtomicExpandLoadLinkedPassOnce(PassRegistry &Registry) { - PassInfo *PI = new PassInfo( - "Expand Atomic calls in terms of load-linked & store-conditional", - "atomic-ll-sc", &AtomicExpandLoadLinked::ID, - PassInfo::NormalCtor_t(callDefaultCtor<AtomicExpandLoadLinked>), false, - false, PassInfo::TargetMachineCtor_t( - callTargetMachineCtor<AtomicExpandLoadLinked>)); - Registry.registerPass(*PI, true); - return PI; -} - -void llvm::initializeAtomicExpandLoadLinkedPass(PassRegistry &Registry) { - CALL_ONCE_INITIALIZATION(initializeAtomicExpandLoadLinkedPassOnce) -} - +INITIALIZE_TM_PASS(AtomicExpandLoadLinked, "atomic-ll-sc", + "Expand Atomic calls in terms of load-linked & store-conditional", + false, false) FunctionPass *llvm::createAtomicExpandLoadLinkedPass(const TargetMachine *TM) { return new AtomicExpandLoadLinked(TM); } bool AtomicExpandLoadLinked::runOnFunction(Function &F) { - if (!TLI) + if (!TM || !TM->getSubtargetImpl()->enableAtomicExpandLoadLinked()) return false; SmallVector<Instruction *, 1> AtomicInsts; @@ -89,7 +78,7 @@ bool AtomicExpandLoadLinked::runOnFunction(Function &F) { bool MadeChange = false; for (Instruction *Inst : AtomicInsts) { - if (!TLI->shouldExpandAtomicInIR(Inst)) + if (!TM->getTargetLowering()->shouldExpandAtomicInIR(Inst)) continue; if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) @@ -111,13 +100,14 @@ bool AtomicExpandLoadLinked::expandAtomicLoad(LoadInst *LI) { // Load instructions don't actually need a leading fence, even in the // SequentiallyConsistent case. AtomicOrdering MemOpOrder = - TLI->getInsertFencesForAtomic() ? Monotonic : LI->getOrdering(); + TM->getTargetLowering()->getInsertFencesForAtomic() ? Monotonic + : LI->getOrdering(); // The only 64-bit load guaranteed to be single-copy atomic by the ARM ARM is // an ldrexd (A3.5.3). IRBuilder<> Builder(LI); - Value *Val = - TLI->emitLoadLinked(Builder, LI->getPointerOperand(), MemOpOrder); + Value *Val = TM->getTargetLowering()->emitLoadLinked( + Builder, LI->getPointerOperand(), MemOpOrder); insertTrailingFence(Builder, LI->getOrdering()); @@ -178,7 +168,8 @@ bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) { // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(LoopBB); - Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + Value *Loaded = + TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder); Value *NewVal; switch (AI->getOperation()) { @@ -195,7 +186,7 @@ bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) { NewVal = Builder.CreateAnd(Loaded, AI->getValOperand(), "new"); break; case AtomicRMWInst::Nand: - NewVal = Builder.CreateAnd(Loaded, Builder.CreateNot(AI->getValOperand()), + NewVal = Builder.CreateNot(Builder.CreateAnd(Loaded, AI->getValOperand()), "new"); break; case AtomicRMWInst::Or: @@ -224,8 +215,8 @@ bool AtomicExpandLoadLinked::expandAtomicRMW(AtomicRMWInst *AI) { llvm_unreachable("Unknown atomic op"); } - Value *StoreSuccess = - TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder); + Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional( + Builder, NewVal, Addr, MemOpOrder); Value *TryAgain = Builder.CreateICmpNE( StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain"); Builder.CreateCondBr(TryAgain, LoopBB, ExitBB); @@ -256,19 +247,26 @@ bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // %loaded = @load.linked(%addr) // %should_store = icmp eq %loaded, %desired // br i1 %should_store, label %cmpxchg.trystore, - // label %cmpxchg.end/%cmpxchg.barrier + // label %cmpxchg.failure // cmpxchg.trystore: // %stored = @store_conditional(%new, %addr) - // %try_again = icmp i32 ne %stored, 0 - // br i1 %try_again, label %loop, label %cmpxchg.end - // cmpxchg.barrier: + // %success = icmp eq i32 %stored, 0 + // br i1 %success, label %cmpxchg.success, label %loop/%cmpxchg.failure + // cmpxchg.success: + // fence? + // br label %cmpxchg.end + // cmpxchg.failure: // fence? // br label %cmpxchg.end // cmpxchg.end: + // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure] + // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0 + // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1 // [...] BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end"); - auto BarrierBB = BasicBlock::Create(Ctx, "cmpxchg.barrier", F, ExitBB); - auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, BarrierBB); + auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB); + auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB); + auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB); auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB); // This grabs the DebugLoc from CI @@ -284,37 +282,82 @@ bool AtomicExpandLoadLinked::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) { // Start the main loop block now that we've taken care of the preliminaries. Builder.SetInsertPoint(LoopBB); - Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder); + Value *Loaded = + TM->getTargetLowering()->emitLoadLinked(Builder, Addr, MemOpOrder); Value *ShouldStore = Builder.CreateICmpEQ(Loaded, CI->getCompareOperand(), "should_store"); // If the the cmpxchg doesn't actually need any ordering when it fails, we can // jump straight past that fence instruction (if it exists). - BasicBlock *FailureBB = FailureOrder == Monotonic ? ExitBB : BarrierBB; Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB); Builder.SetInsertPoint(TryStoreBB); - Value *StoreSuccess = TLI->emitStoreConditional( + Value *StoreSuccess = TM->getTargetLowering()->emitStoreConditional( Builder, CI->getNewValOperand(), Addr, MemOpOrder); - Value *TryAgain = Builder.CreateICmpNE( + StoreSuccess = Builder.CreateICmpEQ( StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success"); - Builder.CreateCondBr(TryAgain, LoopBB, BarrierBB); + Builder.CreateCondBr(StoreSuccess, SuccessBB, + CI->isWeak() ? FailureBB : LoopBB); - // Finally, make sure later instructions don't get reordered with a fence if - // necessary. - Builder.SetInsertPoint(BarrierBB); + // Make sure later instructions don't get reordered with a fence if necessary. + Builder.SetInsertPoint(SuccessBB); insertTrailingFence(Builder, SuccessOrder); Builder.CreateBr(ExitBB); - CI->replaceAllUsesWith(Loaded); - CI->eraseFromParent(); + Builder.SetInsertPoint(FailureBB); + insertTrailingFence(Builder, FailureOrder); + Builder.CreateBr(ExitBB); + + // Finally, we have control-flow based knowledge of whether the cmpxchg + // succeeded or not. We expose this to later passes by converting any + // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate PHI. + // Setup the builder so we can create any PHIs we need. + Builder.SetInsertPoint(ExitBB, ExitBB->begin()); + PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2); + Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB); + Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB); + + // Look for any users of the cmpxchg that are just comparing the loaded value + // against the desired one, and replace them with the CFG-derived version. + SmallVector<ExtractValueInst *, 2> PrunedInsts; + for (auto User : CI->users()) { + ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User); + if (!EV) + continue; + + assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 && + "weird extraction from { iN, i1 }"); + + if (EV->getIndices()[0] == 0) + EV->replaceAllUsesWith(Loaded); + else + EV->replaceAllUsesWith(Success); + + PrunedInsts.push_back(EV); + } + + // We can remove the instructions now we're no longer iterating through them. + for (auto EV : PrunedInsts) + EV->eraseFromParent(); + + if (!CI->use_empty()) { + // Some use of the full struct return that we don't understand has happened, + // so we've got to reconstruct it properly. + Value *Res; + Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0); + Res = Builder.CreateInsertValue(Res, Success, 1); + + CI->replaceAllUsesWith(Res); + } + + CI->eraseFromParent(); return true; } AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder, AtomicOrdering Ord) { - if (!TLI->getInsertFencesForAtomic()) + if (!TM->getTargetLowering()->getInsertFencesForAtomic()) return Ord; if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) @@ -327,7 +370,7 @@ AtomicOrdering AtomicExpandLoadLinked::insertLeadingFence(IRBuilder<> &Builder, void AtomicExpandLoadLinked::insertTrailingFence(IRBuilder<> &Builder, AtomicOrdering Ord) { - if (!TLI->getInsertFencesForAtomic()) + if (!TM->getTargetLowering()->getInsertFencesForAtomic()) return; if (Ord == Acquire || Ord == AcquireRelease) diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp index 7f31b1a..b2737bf 100644 --- a/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -39,6 +39,9 @@ class BasicTTI final : public ImmutablePass, public TargetTransformInfo { /// are set if the result needs to be inserted and/or extracted from vectors. unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + /// Estimate the cost overhead of SK_Alternate shuffle. + unsigned getAltShuffleOverhead(Type *Ty) const; + const TargetLoweringBase *getTLI() const { return TM->getTargetLowering(); } public: @@ -327,8 +330,28 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, return OpCost; } +unsigned BasicTTI::getAltShuffleOverhead(Type *Ty) const { + assert(Ty->isVectorTy() && "Can only shuffle vectors"); + unsigned Cost = 0; + // Shuffle cost is equal to the cost of extracting element from its argument + // plus the cost of inserting them onto the result vector. + + // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from index + // 0 of first vector, index 1 of second vector,index 2 of first vector and + // finally index 3 of second vector and insert them at index <0,1,2,3> of + // result vector. + for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { + Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); + Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i); + } + return Cost; +} + unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) const { + if (Kind == SK_Alternate) { + return getAltShuffleOverhead(Tp); + } return 1; } diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index f623a48..7503e57 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -1505,10 +1505,17 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB, if (MO.isUse()) { for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) Uses.insert(*AI); - } else if (!MO.isDead()) - // Don't try to hoist code in the rare case the terminator defines a - // register that is later used. - return MBB->end(); + } else { + if (!MO.isDead()) + // Don't try to hoist code in the rare case the terminator defines a + // register that is later used. + return MBB->end(); + + // If the terminator defines a register, make sure we don't hoist + // the instruction whose def might be clobbered by the terminator. + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + Defs.insert(*AI); + } } if (Uses.empty()) diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 0b492a9..57c24e8 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -22,11 +22,13 @@ add_llvm_library(LLVMCodeGen GCMetadata.cpp GCMetadataPrinter.cpp GCStrategy.cpp + GlobalMerge.cpp IfConversion.cpp InlineSpiller.cpp InterferenceCache.cpp IntrinsicLowering.cpp JITCodeEmitter.cpp + JumpInstrTables.cpp LLVMTargetMachine.cpp LatencyPriorityQueue.cpp LexicalScopes.cpp diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index 6aa60c6..ccac40c 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -151,19 +151,8 @@ typedef DenseMap<Instruction *, Type *> InstrToOrigTy; } char CodeGenPrepare::ID = 0; -static void *initializeCodeGenPreparePassOnce(PassRegistry &Registry) { - initializeTargetLibraryInfoPass(Registry); - PassInfo *PI = new PassInfo( - "Optimize for code generation", "codegenprepare", &CodeGenPrepare::ID, - PassInfo::NormalCtor_t(callDefaultCtor<CodeGenPrepare>), false, false, - PassInfo::TargetMachineCtor_t(callTargetMachineCtor<CodeGenPrepare>)); - Registry.registerPass(*PI, true); - return PI; -} - -void llvm::initializeCodeGenPreparePass(PassRegistry &Registry) { - CALL_ONCE_INITIALIZATION(initializeCodeGenPreparePassOnce) -} +INITIALIZE_TM_PASS(CodeGenPrepare, "codegenprepare", + "Optimize for code generation", false, false) FunctionPass *llvm::createCodeGenPreparePass(const TargetMachine *TM) { return new CodeGenPrepare(TM); @@ -1078,8 +1067,11 @@ void ExtAddrMode::print(raw_ostream &OS) const { NeedPlus = true; } - if (BaseOffs) - OS << (NeedPlus ? " + " : "") << BaseOffs, NeedPlus = true; + if (BaseOffs) { + OS << (NeedPlus ? " + " : "") + << BaseOffs; + NeedPlus = true; + } if (BaseReg) { OS << (NeedPlus ? " + " : "") diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 822636f..d3ffcc7 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -146,8 +146,8 @@ static const SDep *CriticalPathStep(const SUnit *SU) { void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { // It's not safe to change register allocation for source operands of - // that have special allocation requirements. Also assume all registers - // used in a call must not be changed (ABI). + // instructions that have special allocation requirements. Also assume all + // registers used in a call must not be changed (ABI). // FIXME: The issue with predicated instruction is more complex. We are being // conservative here because the kill markers cannot be trusted after // if-conversion: @@ -200,6 +200,28 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { if (Classes[Reg] != reinterpret_cast<TargetRegisterClass *>(-1)) RegRefs.insert(std::make_pair(Reg, &MO)); + // If this reg is tied and live (Classes[Reg] is set to -1), we can't change + // it or any of its sub or super regs. We need to use KeepRegs to mark the + // reg because not all uses of the same reg within an instruction are + // necessarily tagged as tied. + // Example: an x86 "xor %eax, %eax" will have one source operand tied to the + // def register but not the second (see PR20020 for details). + // FIXME: can this check be relaxed to account for undef uses + // of a register? In the above 'xor' example, the uses of %eax are undef, so + // earlier instructions could still replace %eax even though the 'xor' + // itself can't be changed. + if (MI->isRegTiedToUseOperand(i) && + Classes[Reg] == reinterpret_cast<TargetRegisterClass *>(-1)) { + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) { + KeepRegs.set(*SubRegs); + } + for (MCSuperRegIterator SuperRegs(Reg, TRI); + SuperRegs.isValid(); ++SuperRegs) { + KeepRegs.set(*SuperRegs); + } + } + if (MO.isUse() && Special) { if (!KeepRegs.test(Reg)) { for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); @@ -236,9 +258,15 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, unsigned Reg = MO.getReg(); if (Reg == 0) continue; if (!MO.isDef()) continue; + + // If we've already marked this reg as unchangeable, carry on. + if (KeepRegs.test(Reg)) continue; + // Ignore two-addr defs. if (MI->isRegTiedToUseOperand(i)) continue; + // FIXME: we should use a SubRegIterator that includes self (as above), so + // we don't have to repeat all this code for the reg itself. DefIndices[Reg] = Count; KillIndices[Reg] = ~0u; assert(((KillIndices[Reg] == ~0u) != @@ -281,6 +309,9 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, RegRefs.insert(std::make_pair(Reg, &MO)); + // FIXME: we should use an MCRegAliasIterator that includes self so we don't + // have to repeat all this code for the reg itself. + // It wasn't previously live but now it is, this is a kill. if (KillIndices[Reg] == ~0u) { KillIndices[Reg] = Count; @@ -309,7 +340,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, // the two-address instruction also defines NewReg, as may happen with // pre/postincrement loads. In this case, both the use and def operands are in // RegRefs because the def is inserted by PrescanInstruction and not erased -// during ScanInstruction. So checking for an instructions with definitions of +// during ScanInstruction. So checking for an instruction with definitions of // both NewReg and AntiDepReg covers it. bool CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin, @@ -325,7 +356,7 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin, if (RefOper->isDef() && RefOper->isEarlyClobber()) return true; - // Handle cases in which this instructions defines NewReg. + // Handle cases in which this instruction defines NewReg. MachineInstr *MI = RefOper->getParent(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &CheckOper = MI->getOperand(i); @@ -343,11 +374,11 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin, return true; // Don't allow an instruction using AntiDepReg to be earlyclobbered by - // NewReg + // NewReg. if (CheckOper.isEarlyClobber()) return true; - // Don't allow inline asm to define NewReg at all. Who know what it's + // Don't allow inline asm to define NewReg at all. Who knows what it's // doing with it. if (MI->isInlineAsm()) return true; @@ -494,8 +525,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // as we go to help determine which registers are available. unsigned Broken = 0; unsigned Count = InsertPosIndex - 1; - for (MachineBasicBlock::iterator I = End, E = Begin; - I != E; --Count) { + for (MachineBasicBlock::iterator I = End, E = Begin; I != E; --Count) { MachineInstr *MI = --I; if (MI->isDebugValue()) continue; @@ -526,7 +556,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // Don't break anti-dependencies on non-allocatable registers. AntiDepReg = 0; else if (KeepRegs.test(AntiDepReg)) - // Don't break anti-dependencies if an use down below requires + // Don't break anti-dependencies if a use down below requires // this exact register. AntiDepReg = 0; else { @@ -564,8 +594,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits, // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers // defined in a call must not be changed (ABI). - if (MI->isCall() || MI->hasExtraDefRegAllocReq() || - TII->isPredicated(MI)) + if (MI->isCall() || MI->hasExtraDefRegAllocReq() || TII->isPredicated(MI)) // If this instruction's defs have special allocation requirement, don't // break this anti-dependency. AntiDepReg = 0; diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index 1949a48..45e4ff5 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -55,12 +55,12 @@ class TargetRegisterInfo; typedef std::multimap<unsigned, MachineOperand *>::const_iterator RegRefIter; - /// KillIndices - The index of the most recent kill (proceding bottom-up), + /// KillIndices - The index of the most recent kill (proceeding bottom-up), /// or ~0u if the register is not live. std::vector<unsigned> KillIndices; - /// DefIndices - The index of the most recent complete def (proceding bottom - /// up), or ~0u if the register is live. + /// DefIndices - The index of the most recent complete def (proceeding + /// bottom up), or ~0u if the register is live. std::vector<unsigned> DefIndices; /// KeepRegs - A set of registers which are live and cannot be changed to diff --git a/lib/Transforms/Scalar/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp index 990d067..027ee38 100644 --- a/lib/Transforms/Scalar/GlobalMerge.cpp +++ b/lib/CodeGen/GlobalMerge.cpp @@ -64,6 +64,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/Pass.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" @@ -72,7 +73,7 @@ using namespace llvm; #define DEBUG_TYPE "global-merge" cl::opt<bool> -EnableGlobalMerge("global-merge", cl::Hidden, +EnableGlobalMerge("enable-global-merge", cl::Hidden, cl::desc("Enable global merge pass"), cl::init(true)); @@ -81,6 +82,13 @@ EnableGlobalMergeOnConst("global-merge-on-const", cl::Hidden, cl::desc("Enable global merge pass on constants"), cl::init(false)); +// FIXME: this could be a transitional option, and we probably need to remove +// it if only we are sure this optimization could always benefit all targets. +static cl::opt<bool> +EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden, + cl::desc("Enable global merge pass on external linkage"), + cl::init(false)); + STATISTIC(NumMerged , "Number of globals merged"); namespace { class GlobalMerge : public FunctionPass { @@ -129,9 +137,8 @@ namespace { } // end anonymous namespace char GlobalMerge::ID = 0; -INITIALIZE_PASS(GlobalMerge, "global-merge", - "Global Merge", false, false) - +INITIALIZE_TM_PASS(GlobalMerge, "global-merge", "Merge global variables", + false, false) bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, Module &M, bool isConst, unsigned AddrSpace) const { @@ -154,11 +161,23 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, Type *Int32Ty = Type::getInt32Ty(M.getContext()); + assert(Globals.size() > 1); + + // FIXME: This simple solution merges globals all together as maximum as + // possible. However, with this solution it would be hard to remove dead + // global symbols at link-time. An alternative solution could be checking + // global symbols references function by function, and make the symbols + // being referred in the same function merged and we would probably need + // to introduce heuristic algorithm to solve the merge conflict from + // different functions. for (size_t i = 0, e = Globals.size(); i != e; ) { size_t j = 0; uint64_t MergedSize = 0; std::vector<Type*> Tys; std::vector<Constant*> Inits; + + bool HasExternal = false; + GlobalVariable *TheFirstExternal = 0; for (j = i; j != e; ++j) { Type *Ty = Globals[j]->getType()->getElementType(); MergedSize += DL->getTypeAllocSize(Ty); @@ -167,17 +186,35 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, } Tys.push_back(Ty); Inits.push_back(Globals[j]->getInitializer()); + + if (Globals[j]->hasExternalLinkage() && !HasExternal) { + HasExternal = true; + TheFirstExternal = Globals[j]; + } } + // If merged variables doesn't have external linkage, we needn't to expose + // the symbol after merging. + GlobalValue::LinkageTypes Linkage = HasExternal + ? GlobalValue::ExternalLinkage + : GlobalValue::InternalLinkage; + StructType *MergedTy = StructType::get(M.getContext(), Tys); Constant *MergedInit = ConstantStruct::get(MergedTy, Inits); - GlobalVariable *MergedGV = new GlobalVariable(M, MergedTy, isConst, - GlobalValue::InternalLinkage, - MergedInit, "_MergedGlobals", - nullptr, - GlobalVariable::NotThreadLocal, - AddrSpace); + + // If merged variables have external linkage, we use symbol name of the + // first variable merged as the suffix of global symbol name. This would + // be able to avoid the link-time naming conflict for globalm symbols. + GlobalVariable *MergedGV = new GlobalVariable( + M, MergedTy, isConst, Linkage, MergedInit, + HasExternal ? "_MergedGlobals_" + TheFirstExternal->getName() + : "_MergedGlobals", + nullptr, GlobalVariable::NotThreadLocal, AddrSpace); + for (size_t k = i; k < j; ++k) { + GlobalValue::LinkageTypes Linkage = Globals[k]->getLinkage(); + std::string Name = Globals[k]->getName(); + Constant *Idx[2] = { ConstantInt::get(Int32Ty, 0), ConstantInt::get(Int32Ty, k-i) @@ -185,6 +222,14 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals, Constant *GEP = ConstantExpr::getInBoundsGetElementPtr(MergedGV, Idx); Globals[k]->replaceAllUsesWith(GEP); Globals[k]->eraseFromParent(); + + if (Linkage != GlobalValue::InternalLinkage) { + // Generate a new alias... + auto *PTy = cast<PointerType>(GEP->getType()); + GlobalAlias::create(PTy->getElementType(), PTy->getAddressSpace(), + Linkage, Name, GEP, &M); + } + NumMerged++; } i = j; @@ -245,8 +290,12 @@ bool GlobalMerge::doInitialization(Module &M) { // Grab all non-const globals. for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { - // Merge is safe for "normal" internal globals only - if (!I->hasLocalLinkage() || I->isThreadLocal() || I->hasSection()) + // Merge is safe for "normal" internal or external globals only + if (I->isDeclaration() || I->isThreadLocal() || I->hasSection()) + continue; + + if (!(EnableGlobalMergeOnExternal && I->hasExternalLinkage()) && + !I->hasInternalLinkage()) continue; PointerType *PT = dyn_cast<PointerType>(I->getType()); @@ -270,8 +319,7 @@ bool GlobalMerge::doInitialization(Module &M) { continue; if (DL->getTypeAllocSize(Ty) < MaxOffset) { - if (TargetLoweringObjectFile::getKindForGlobal(I, TLI->getTargetMachine()) - .isBSSLocal()) + if (TargetLoweringObjectFile::getKindForGlobal(I, *TM).isBSSLocal()) BSSGlobals[AddressSpace].push_back(I); else if (I->isConstant()) ConstGlobals[AddressSpace].push_back(I); diff --git a/lib/CodeGen/JumpInstrTables.cpp b/lib/CodeGen/JumpInstrTables.cpp new file mode 100644 index 0000000..61ef722 --- /dev/null +++ b/lib/CodeGen/JumpInstrTables.cpp @@ -0,0 +1,301 @@ +//===-- JumpInstrTables.cpp: Jump-Instruction Tables ----------------------===// +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief An implementation of jump-instruction tables. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "jt" + +#include "llvm/CodeGen/JumpInstrTables.h" + +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/JumpInstrTableInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/CallSite.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#include <vector> + +using namespace llvm; + +char JumpInstrTables::ID = 0; + +INITIALIZE_PASS_BEGIN(JumpInstrTables, "jump-instr-tables", + "Jump-Instruction Tables", true, true) +INITIALIZE_PASS_DEPENDENCY(JumpInstrTableInfo); +INITIALIZE_PASS_END(JumpInstrTables, "jump-instr-tables", + "Jump-Instruction Tables", true, true) + +STATISTIC(NumJumpTables, "Number of indirect call tables generated"); +STATISTIC(NumFuncsInJumpTables, "Number of functions in the jump tables"); + +ModulePass *llvm::createJumpInstrTablesPass() { + // The default implementation uses a single table for all functions. + return new JumpInstrTables(JumpTable::Single); +} + +ModulePass *llvm::createJumpInstrTablesPass(JumpTable::JumpTableType JTT) { + return new JumpInstrTables(JTT); +} + +namespace { +static const char jump_func_prefix[] = "__llvm_jump_instr_table_"; +static const char jump_section_prefix[] = ".jump.instr.table.text."; + +// Checks to see if a given CallSite is making an indirect call, including +// cases where the indirect call is made through a bitcast. +bool isIndirectCall(CallSite &CS) { + if (CS.getCalledFunction()) + return false; + + // Check the value to see if it is merely a bitcast of a function. In + // this case, it will translate to a direct function call in the resulting + // assembly, so we won't treat it as an indirect call here. + const Value *V = CS.getCalledValue(); + if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) { + return !(CE->isCast() && isa<Function>(CE->getOperand(0))); + } + + // Otherwise, since we know it's a call, it must be an indirect call + return true; +} + +// Replaces Functions and GlobalAliases with a different Value. +bool replaceGlobalValueIndirectUse(GlobalValue *GV, Value *V, Use *U) { + User *Us = U->getUser(); + if (!Us) + return false; + if (Instruction *I = dyn_cast<Instruction>(Us)) { + CallSite CS(I); + + // Don't do the replacement if this use is a direct call to this function. + // If the use is not the called value, then replace it. + if (CS && (isIndirectCall(CS) || CS.isCallee(U))) { + return false; + } + + U->set(V); + } else if (Constant *C = dyn_cast<Constant>(Us)) { + // Don't replace calls to bitcasts of function symbols, since they get + // translated to direct calls. + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Us)) { + if (CE->getOpcode() == Instruction::BitCast) { + // This bitcast must have exactly one user. + if (CE->user_begin() != CE->user_end()) { + User *ParentUs = *CE->user_begin(); + if (CallInst *CI = dyn_cast<CallInst>(ParentUs)) { + CallSite CS(CI); + Use &CEU = *CE->use_begin(); + if (CS.isCallee(&CEU)) { + return false; + } + } + } + } + } + + // GlobalAlias doesn't support replaceUsesOfWithOnConstant. And the verifier + // requires alias to point to a defined function. So, GlobalAlias is handled + // as a separate case in runOnModule. + if (!isa<GlobalAlias>(C)) + C->replaceUsesOfWithOnConstant(GV, V, U); + } else { + assert(false && "The Use of a Function symbol is neither an instruction nor" + " a constant"); + } + + return true; +} + +// Replaces all replaceable address-taken uses of GV with a pointer to a +// jump-instruction table entry. +void replaceValueWithFunction(GlobalValue *GV, Function *F) { + // Go through all uses of this function and replace the uses of GV with the + // jump-table version of the function. Get the uses as a vector before + // replacing them, since replacing them changes the use list and invalidates + // the iterator otherwise. + for (Value::use_iterator I = GV->use_begin(), E = GV->use_end(); I != E;) { + Use &U = *I++; + + // Replacement of constants replaces all instances in the constant. So, some + // uses might have already been handled by the time we reach them here. + if (U.get() == GV) + replaceGlobalValueIndirectUse(GV, F, &U); + } + + return; +} +} // end anonymous namespace + +JumpInstrTables::JumpInstrTables() + : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0), + JTType(JumpTable::Single) { + initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry()); +} + +JumpInstrTables::JumpInstrTables(JumpTable::JumpTableType JTT) + : ModulePass(ID), Metadata(), JITI(nullptr), TableCount(0), JTType(JTT) { + initializeJumpInstrTablesPass(*PassRegistry::getPassRegistry()); +} + +JumpInstrTables::~JumpInstrTables() {} + +void JumpInstrTables::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<JumpInstrTableInfo>(); +} + +Function *JumpInstrTables::insertEntry(Module &M, Function *Target) { + FunctionType *OrigFunTy = Target->getFunctionType(); + FunctionType *FunTy = transformType(OrigFunTy); + + JumpMap::iterator it = Metadata.find(FunTy); + if (Metadata.end() == it) { + struct TableMeta Meta; + Meta.TableNum = TableCount; + Meta.Count = 0; + Metadata[FunTy] = Meta; + it = Metadata.find(FunTy); + ++NumJumpTables; + ++TableCount; + } + + it->second.Count++; + + std::string NewName(jump_func_prefix); + NewName += (Twine(it->second.TableNum) + "_" + Twine(it->second.Count)).str(); + Function *JumpFun = + Function::Create(OrigFunTy, GlobalValue::ExternalLinkage, NewName, &M); + // The section for this table + JumpFun->setSection((jump_section_prefix + Twine(it->second.TableNum)).str()); + JITI->insertEntry(FunTy, Target, JumpFun); + + ++NumFuncsInJumpTables; + return JumpFun; +} + +bool JumpInstrTables::hasTable(FunctionType *FunTy) { + FunctionType *TransTy = transformType(FunTy); + return Metadata.end() != Metadata.find(TransTy); +} + +FunctionType *JumpInstrTables::transformType(FunctionType *FunTy) { + // Returning nullptr forces all types into the same table, since all types map + // to the same type + Type *VoidPtrTy = Type::getInt8PtrTy(FunTy->getContext()); + + // Ignore the return type. + Type *RetTy = VoidPtrTy; + bool IsVarArg = FunTy->isVarArg(); + std::vector<Type *> ParamTys(FunTy->getNumParams()); + FunctionType::param_iterator PI, PE; + int i = 0; + + std::vector<Type *> EmptyParams; + Type *Int32Ty = Type::getInt32Ty(FunTy->getContext()); + FunctionType *VoidFnTy = FunctionType::get( + Type::getVoidTy(FunTy->getContext()), EmptyParams, false); + switch (JTType) { + case JumpTable::Single: + + return FunctionType::get(RetTy, EmptyParams, false); + case JumpTable::Arity: + // Transform all types to void* so that all functions with the same arity + // end up in the same table. + for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE; + PI++, i++) { + ParamTys[i] = VoidPtrTy; + } + + return FunctionType::get(RetTy, ParamTys, IsVarArg); + case JumpTable::Simplified: + // Project all parameters types to one of 3 types: composite, integer, and + // function, matching the three subclasses of Type. + for (PI = FunTy->param_begin(), PE = FunTy->param_end(); PI != PE; + ++PI, ++i) { + assert((isa<IntegerType>(*PI) || isa<FunctionType>(*PI) || + isa<CompositeType>(*PI)) && + "This type is not an Integer or a Composite or a Function"); + if (isa<CompositeType>(*PI)) { + ParamTys[i] = VoidPtrTy; + } else if (isa<FunctionType>(*PI)) { + ParamTys[i] = VoidFnTy; + } else if (isa<IntegerType>(*PI)) { + ParamTys[i] = Int32Ty; + } + } + + return FunctionType::get(RetTy, ParamTys, IsVarArg); + case JumpTable::Full: + // Don't transform this type at all. + return FunTy; + } + + return nullptr; +} + +bool JumpInstrTables::runOnModule(Module &M) { + // Make sure the module is well-formed, especially with respect to jumptable. + if (verifyModule(M)) + return false; + + JITI = &getAnalysis<JumpInstrTableInfo>(); + + // Get the set of jumptable-annotated functions. + DenseMap<Function *, Function *> Functions; + for (Function &F : M) { + if (F.hasFnAttribute(Attribute::JumpTable)) { + assert(F.hasUnnamedAddr() && + "Attribute 'jumptable' requires 'unnamed_addr'"); + Functions[&F] = nullptr; + } + } + + // Create the jump-table functions. + for (auto &KV : Functions) { + Function *F = KV.first; + KV.second = insertEntry(M, F); + } + + // GlobalAlias is a special case, because the target of an alias statement + // must be a defined function. So, instead of replacing a given function in + // the alias, we replace all uses of aliases that target jumptable functions. + // Note that there's no need to create these functions, since only aliases + // that target known jumptable functions are replaced, and there's no way to + // put the jumptable annotation on a global alias. + DenseMap<GlobalAlias *, Function *> Aliases; + for (GlobalAlias &GA : M.aliases()) { + Constant *Aliasee = GA.getAliasee(); + if (Function *F = dyn_cast<Function>(Aliasee)) { + auto it = Functions.find(F); + if (it != Functions.end()) { + Aliases[&GA] = it->second; + } + } + } + + // Replace each address taken function with its jump-instruction table entry. + for (auto &KV : Functions) + replaceValueWithFunction(KV.first, KV.second); + + for (auto &KV : Aliases) + replaceValueWithFunction(KV.first, KV.second); + + return !Functions.empty(); +} diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index a5ac057..df96b94 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -12,11 +12,15 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetMachine.h" + +#include "llvm/Analysis/Passes.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/JumpInstrTables.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/Verifier.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" @@ -82,6 +86,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) { + // Add internal analysis passes from the target machine. TM->addAnalysisPasses(PM); @@ -136,6 +141,11 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, bool DisableVerify, AnalysisID StartAfter, AnalysisID StopAfter) { + // Passes to handle jumptable function annotations. These can't be handled at + // JIT time, so we don't add them directly to addPassesToGenerateCode. + PM.add(createJumpInstrTableInfoPass()); + PM.add(createJumpInstrTablesPass(Options.JTType)); + // Add common CodeGen passes. MCContext *Context = addPassesToGenerateCode(this, PM, DisableVerify, StartAfter, StopAfter); @@ -199,7 +209,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_Null: // The Null output is intended for use for performance analysis and testing, // not real users. - AsmStreamer.reset(createNullStreamer(*Context)); + AsmStreamer.reset(getTarget().createNullStreamer(*Context)); break; } diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 388f58f..7d5646b 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -329,12 +329,13 @@ class LDVImpl { void computeIntervals(); public: - LDVImpl(LiveDebugVariables *ps) : pass(*ps), EmitDone(false), - ModifiedMF(false) {} + LDVImpl(LiveDebugVariables *ps) + : pass(*ps), MF(nullptr), EmitDone(false), ModifiedMF(false) {} bool runOnMachineFunction(MachineFunction &mf); /// clear - Release all memory. void clear() { + MF = nullptr; userValues.clear(); virtRegToEqClass.clear(); userVarMap.clear(); @@ -693,11 +694,11 @@ void LDVImpl::computeIntervals() { } bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { + clear(); MF = &mf; LIS = &pass.getAnalysis<LiveIntervals>(); MDT = &pass.getAnalysis<MachineDominatorTree>(); TRI = mf.getTarget().getRegisterInfo(); - clear(); LS.initialize(mf); DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: " << mf.getName() << " **********\n"); @@ -712,6 +713,8 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) { if (!EnableLDV) return false; + if (!FunctionDIs.count(mf.getFunction())) + return false; if (!pImpl) pImpl = new LDVImpl(this); return static_cast<LDVImpl*>(pImpl)->runOnMachineFunction(mf); @@ -974,6 +977,8 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, void LDVImpl::emitDebugValues(VirtRegMap *VRM) { DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n"); + if (!MF) + return; const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); for (unsigned i = 0, e = userValues.size(); i != e; ++i) { DEBUG(userValues[i]->print(dbgs(), &MF->getTarget())); @@ -988,6 +993,10 @@ void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) { static_cast<LDVImpl*>(pImpl)->emitDebugValues(VRM); } +bool LiveDebugVariables::doInitialization(Module &M) { + FunctionDIs = makeSubprogramMap(M); + return Pass::doInitialization(M); +} #ifndef NDEBUG void LiveDebugVariables::dump() { diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h index bb67435..7ec0d17 100644 --- a/lib/CodeGen/LiveDebugVariables.h +++ b/lib/CodeGen/LiveDebugVariables.h @@ -22,6 +22,7 @@ #define LLVM_CODEGEN_LIVEDEBUGVARIABLES_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" namespace llvm { @@ -32,6 +33,7 @@ class VirtRegMap; class LiveDebugVariables : public MachineFunctionPass { void *pImpl; + DenseMap<const Function*, DISubprogram> FunctionDIs; public: static char ID; // Pass identification, replacement for typeid @@ -64,6 +66,7 @@ private: bool runOnMachineFunction(MachineFunction &) override; void releaseMemory() override; void getAnalysisUsage(AnalysisUsage &) const override; + bool doInitialization(Module &) override; }; diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 3563f8e..1559560 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -186,6 +186,7 @@ void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); LRCalc->createDeadDefs(LI); LRCalc->extendToUses(LI); + computeDeadValues(&LI, LI, nullptr, nullptr); } void LiveIntervals::computeVirtRegs() { @@ -412,21 +413,34 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // Handle dead values. bool CanSeparate = false; + computeDeadValues(li, NewLR, &CanSeparate, dead); + + // Move the trimmed segments back. + li->segments.swap(NewLR.segments); + DEBUG(dbgs() << "Shrunk: " << *li << '\n'); + return CanSeparate; +} + +void LiveIntervals::computeDeadValues(LiveInterval *li, + LiveRange &LR, + bool *CanSeparate, + SmallVectorImpl<MachineInstr*> *dead) { for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end(); I != E; ++I) { VNInfo *VNI = *I; if (VNI->isUnused()) continue; - LiveRange::iterator LRI = NewLR.FindSegmentContaining(VNI->def); - assert(LRI != NewLR.end() && "Missing segment for PHI"); + LiveRange::iterator LRI = LR.FindSegmentContaining(VNI->def); + assert(LRI != LR.end() && "Missing segment for PHI"); if (LRI->end != VNI->def.getDeadSlot()) continue; if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. VNI->markUnused(); - NewLR.removeSegment(LRI->start, LRI->end); + LR.removeSegment(LRI->start, LRI->end); DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); - CanSeparate = true; + if (CanSeparate) + *CanSeparate = true; } else { // This is a dead def. Make sure the instruction knows. MachineInstr *MI = getInstructionFromIndex(VNI->def); @@ -438,11 +452,6 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, } } } - - // Move the trimmed segments back. - li->segments.swap(NewLR.segments); - DEBUG(dbgs() << "Shrunk: " << *li << '\n'); - return CanSeparate; } void LiveIntervals::extendToIndices(LiveRange &LR, diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 0ec5c33..08fef5f 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -332,7 +332,7 @@ void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const { } } -void MachineBasicBlock::printAsOperand(raw_ostream &OS, bool /*PrintType*/) { +void MachineBasicBlock::printAsOperand(raw_ostream &OS, bool /*PrintType*/) const { OS << "BB#" << getNumber(); } diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index eb3d71f..6138aef 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -457,7 +457,7 @@ unsigned MachineFunction::addLiveIn(unsigned PReg, /// getJTISymbol - Return the MCSymbol for the specified non-empty jump table. /// If isLinkerPrivate is specified, an 'l' label is returned, otherwise a /// normal 'L' label is returned. -MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, +MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, bool isLinkerPrivate) const { const DataLayout *DL = getTarget().getDataLayout(); assert(JumpTableInfo && "No jump tables"); @@ -530,10 +530,9 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, /// int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, unsigned Alignment) { - Alignment = - clampStackAlignment(!getFrameLowering()->isStackRealignable() || - !RealignOption, - Alignment, getFrameLowering()->getStackAlignment()); + Alignment = clampStackAlignment( + !getFrameLowering()->isStackRealignable() || !RealignOption, Alignment, + getFrameLowering()->getStackAlignment()); CreateStackObject(Size, Alignment, true); int Index = (int)Objects.size() - NumFixedObjects - 1; ensureMaxAlignment(Alignment); @@ -548,10 +547,9 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment, const AllocaInst *Alloca) { HasVarSizedObjects = true; - Alignment = - clampStackAlignment(!getFrameLowering()->isStackRealignable() || - !RealignOption, - Alignment, getFrameLowering()->getStackAlignment()); + Alignment = clampStackAlignment( + !getFrameLowering()->isStackRealignable() || !RealignOption, Alignment, + getFrameLowering()->getStackAlignment()); Objects.push_back(StackObject(0, Alignment, 0, false, false, Alloca)); ensureMaxAlignment(Alignment); return (int)Objects.size()-NumFixedObjects-1; @@ -571,16 +569,30 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, // object is 16-byte aligned. unsigned StackAlign = getFrameLowering()->getStackAlignment(); unsigned Align = MinAlign(SPOffset, StackAlign); - Align = - clampStackAlignment(!getFrameLowering()->isStackRealignable() || - !RealignOption, - Align, getFrameLowering()->getStackAlignment()); + Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() || + !RealignOption, + Align, getFrameLowering()->getStackAlignment()); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, /*isSS*/ false, /*Alloca*/ nullptr)); return -++NumFixedObjects; } +/// CreateFixedSpillStackObject - Create a spill slot at a fixed location +/// on the stack. Returns an index with a negative value. +int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size, + int64_t SPOffset) { + unsigned StackAlign = getFrameLowering()->getStackAlignment(); + unsigned Align = MinAlign(SPOffset, StackAlign); + Align = clampStackAlignment(!getFrameLowering()->isStackRealignable() || + !RealignOption, + Align, getFrameLowering()->getStackAlignment()); + Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, + /*Immutable*/ true, + /*isSS*/ true, + /*Alloca*/ nullptr)); + return -++NumFixedObjects; +} BitVector MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { @@ -849,11 +861,10 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, if (isa<StructType>(A->getType()) || isa<ArrayType>(A->getType()) || isa<StructType>(B->getType()) || isa<ArrayType>(B->getType())) return false; - + // For now, only support constants with the same size. uint64_t StoreSize = TD->getTypeStoreSize(A->getType()); - if (StoreSize != TD->getTypeStoreSize(B->getType()) || - StoreSize > 128) + if (StoreSize != TD->getTypeStoreSize(B->getType()) || StoreSize > 128) return false; Type *IntTy = IntegerType::get(A->getContext(), StoreSize*8); @@ -882,7 +893,7 @@ static bool CanShareConstantPoolEntry(const Constant *A, const Constant *B, /// an existing one. User must specify the log2 of the minimum required /// alignment for the object. /// -unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, +unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, unsigned Alignment) { assert(Alignment && "Alignment must be specified!"); if (Alignment > PoolAlignment) PoolAlignment = Alignment; diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index 23847d6..44191f7 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -333,6 +333,12 @@ bool PostMachineScheduler::runOnMachineFunction(MachineFunction &mf) { if (skipOptnoneFunction(*mf.getFunction())) return false; + const TargetSubtargetInfo &ST = + mf.getTarget().getSubtarget<TargetSubtargetInfo>(); + if (!ST.enablePostMachineScheduler()) { + DEBUG(dbgs() << "Subtarget disables post-MI-sched.\n"); + return false; + } DEBUG(dbgs() << "Before post-MI-sched:\n"; mf.print(dbgs())); // Initialize the context of the pass. @@ -472,14 +478,13 @@ void MachineSchedulerBase::print(raw_ostream &O, const Module* m) const { // unimplemented } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void ReadyQueue::dump() { dbgs() << Name << ": "; for (unsigned i = 0, e = Queue.size(); i < e; ++i) dbgs() << Queue[i]->NodeNum << " "; dbgs() << "\n"; } -#endif //===----------------------------------------------------------------------===// // ScheduleDAGMI - Basic machine instruction scheduling. This is @@ -529,6 +534,11 @@ void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { llvm_unreachable(nullptr); } #endif + // SU->TopReadyCycle was set to CurrCycle when it was scheduled. However, + // CurrCycle may have advanced since then. + if (SuccSU->TopReadyCycle < SU->TopReadyCycle + SuccEdge->getLatency()) + SuccSU->TopReadyCycle = SU->TopReadyCycle + SuccEdge->getLatency(); + --SuccSU->NumPredsLeft; if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) SchedImpl->releaseTopNode(SuccSU); @@ -563,6 +573,11 @@ void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { llvm_unreachable(nullptr); } #endif + // SU->BotReadyCycle was set to CurrCycle when it was scheduled. However, + // CurrCycle may have advanced since then. + if (PredSU->BotReadyCycle < SU->BotReadyCycle + PredEdge->getLatency()) + PredSU->BotReadyCycle = SU->BotReadyCycle + PredEdge->getLatency(); + --PredSU->NumSuccsLeft; if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) SchedImpl->releaseBottomNode(PredSU); @@ -674,10 +689,13 @@ void ScheduleDAGMI::schedule() { CurrentBottom = MI; } } - updateQueues(SU, IsTopNode); - - // Notify the scheduling strategy after updating the DAG. + // Notify the scheduling strategy before updating the DAG. + // This sets the scheduled node's ReadyCycle to CurrCycle. When updateQueues + // runs, it can then use the accurate ReadyCycle time to determine whether + // newly released nodes can move to the readyQ. SchedImpl->schedNode(SU, IsTopNode); + + updateQueues(SU, IsTopNode); } assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone."); @@ -1568,7 +1586,7 @@ void SchedBoundary::reset() { // Track the maximum number of stall cycles that could arise either from the // latency of a DAG edge or the number of cycles that a processor resource is // reserved (SchedBoundary::ReservedCycles). - MaxObservedLatency = 0; + MaxObservedStall = 0; #endif // Reserve a zero-count for invalid CritResIdx. ExecutedResCounts.resize(1); @@ -1668,8 +1686,16 @@ bool SchedBoundary::checkHazard(SUnit *SU) { for (TargetSchedModel::ProcResIter PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { - if (getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles) > CurrCycle) + unsigned NRCycle = getNextResourceCycle(PI->ProcResourceIdx, PI->Cycles); + if (NRCycle > CurrCycle) { +#ifndef NDEBUG + MaxObservedStall = std::max(PI->Cycles, MaxObservedStall); +#endif + DEBUG(dbgs() << " SU(" << SU->NodeNum << ") " + << SchedModel->getResourceName(PI->ProcResourceIdx) + << "=" << NRCycle << "c\n"); return true; + } } } return false; @@ -1725,6 +1751,16 @@ getOtherResourceCount(unsigned &OtherCritIdx) { } void SchedBoundary::releaseNode(SUnit *SU, unsigned ReadyCycle) { + assert(SU->getInstr() && "Scheduled SUnit must have instr"); + +#ifndef NDEBUG + // ReadyCycle was been bumped up to the CurrCycle when this node was + // scheduled, but CurrCycle may have been eagerly advanced immediately after + // scheduling, so may now be greater than ReadyCycle. + if (ReadyCycle > CurrCycle) + MaxObservedStall = std::max(ReadyCycle - CurrCycle, MaxObservedStall); +#endif + if (ReadyCycle < MinReadyCycle) MinReadyCycle = ReadyCycle; @@ -1744,18 +1780,6 @@ void SchedBoundary::releaseTopNode(SUnit *SU) { if (SU->isScheduled) return; - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) { - if (I->isWeak()) - continue; - unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; - unsigned Latency = I->getLatency(); -#ifndef NDEBUG - MaxObservedLatency = std::max(Latency, MaxObservedLatency); -#endif - if (SU->TopReadyCycle < PredReadyCycle + Latency) - SU->TopReadyCycle = PredReadyCycle + Latency; - } releaseNode(SU, SU->TopReadyCycle); } @@ -1763,20 +1787,6 @@ void SchedBoundary::releaseBottomNode(SUnit *SU) { if (SU->isScheduled) return; - assert(SU->getInstr() && "Scheduled SUnit must have instr"); - - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - if (I->isWeak()) - continue; - unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; - unsigned Latency = I->getLatency(); -#ifndef NDEBUG - MaxObservedLatency = std::max(Latency, MaxObservedLatency); -#endif - if (SU->BotReadyCycle < SuccReadyCycle + Latency) - SU->BotReadyCycle = SuccReadyCycle + Latency; - } releaseNode(SU, SU->BotReadyCycle); } @@ -1943,10 +1953,12 @@ void SchedBoundary::bumpNode(SUnit *SU) { PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { unsigned PIdx = PI->ProcResourceIdx; if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { - ReservedCycles[PIdx] = isTop() ? NextCycle + PI->Cycles : NextCycle; -#ifndef NDEBUG - MaxObservedLatency = std::max(PI->Cycles, MaxObservedLatency); -#endif + if (isTop()) { + ReservedCycles[PIdx] = + std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles); + } + else + ReservedCycles[PIdx] = NextCycle; } } } @@ -2049,8 +2061,10 @@ SUnit *SchedBoundary::pickOnlyChoice() { } } for (unsigned i = 0; Available.empty(); ++i) { - assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) && - "permanent hazard"); (void)i; +// FIXME: Re-enable assert once PR20057 is resolved. +// assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedStall) && +// "permanent hazard"); + (void)i; bumpCycle(CurrCycle + 1); releasePending(); } @@ -2090,111 +2104,6 @@ void SchedBoundary::dumpScheduledState() { // GenericScheduler - Generic implementation of MachineSchedStrategy. //===----------------------------------------------------------------------===// -namespace { -/// Base class for GenericScheduler. This class maintains information about -/// scheduling candidates based on TargetSchedModel making it easy to implement -/// heuristics for either preRA or postRA scheduling. -class GenericSchedulerBase : public MachineSchedStrategy { -public: - /// Represent the type of SchedCandidate found within a single queue. - /// pickNodeBidirectional depends on these listed by decreasing priority. - enum CandReason { - NoCand, PhysRegCopy, RegExcess, RegCritical, Stall, Cluster, Weak, RegMax, - ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, - TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; - -#ifndef NDEBUG - static const char *getReasonStr(GenericSchedulerBase::CandReason Reason); -#endif - - /// Policy for scheduling the next instruction in the candidate's zone. - struct CandPolicy { - bool ReduceLatency; - unsigned ReduceResIdx; - unsigned DemandResIdx; - - CandPolicy(): ReduceLatency(false), ReduceResIdx(0), DemandResIdx(0) {} - }; - - /// Status of an instruction's critical resource consumption. - struct SchedResourceDelta { - // Count critical resources in the scheduled region required by SU. - unsigned CritResources; - - // Count critical resources from another region consumed by SU. - unsigned DemandedResources; - - SchedResourceDelta(): CritResources(0), DemandedResources(0) {} - - bool operator==(const SchedResourceDelta &RHS) const { - return CritResources == RHS.CritResources - && DemandedResources == RHS.DemandedResources; - } - bool operator!=(const SchedResourceDelta &RHS) const { - return !operator==(RHS); - } - }; - - /// Store the state used by GenericScheduler heuristics, required for the - /// lifetime of one invocation of pickNode(). - struct SchedCandidate { - CandPolicy Policy; - - // The best SUnit candidate. - SUnit *SU; - - // The reason for this candidate. - CandReason Reason; - - // Set of reasons that apply to multiple candidates. - uint32_t RepeatReasonSet; - - // Register pressure values for the best candidate. - RegPressureDelta RPDelta; - - // Critical resource consumption of the best candidate. - SchedResourceDelta ResDelta; - - SchedCandidate(const CandPolicy &policy) - : Policy(policy), SU(nullptr), Reason(NoCand), RepeatReasonSet(0) {} - - bool isValid() const { return SU; } - - // Copy the status of another candidate without changing policy. - void setBest(SchedCandidate &Best) { - assert(Best.Reason != NoCand && "uninitialized Sched candidate"); - SU = Best.SU; - Reason = Best.Reason; - RPDelta = Best.RPDelta; - ResDelta = Best.ResDelta; - } - - bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); } - void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); } - - void initResourceDelta(const ScheduleDAGMI *DAG, - const TargetSchedModel *SchedModel); - }; - -protected: - const MachineSchedContext *Context; - const TargetSchedModel *SchedModel; - const TargetRegisterInfo *TRI; - - SchedRemainder Rem; -protected: - GenericSchedulerBase(const MachineSchedContext *C): - Context(C), SchedModel(nullptr), TRI(nullptr) {} - - void setPolicy(CandPolicy &Policy, bool IsPostRA, SchedBoundary &CurrZone, - SchedBoundary *OtherZone); - -#ifndef NDEBUG - void traceCandidate(const SchedCandidate &Cand); -#endif -}; -} // namespace - void GenericSchedulerBase::SchedCandidate:: initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { @@ -2430,65 +2339,6 @@ static void tracePick(const GenericSchedulerBase::SchedCandidate &Cand, << GenericSchedulerBase::getReasonStr(Cand.Reason) << '\n'); } -namespace { -/// GenericScheduler shrinks the unscheduled zone using heuristics to balance -/// the schedule. -class GenericScheduler : public GenericSchedulerBase { - ScheduleDAGMILive *DAG; - - // State of the top and bottom scheduled instruction boundaries. - SchedBoundary Top; - SchedBoundary Bot; - - MachineSchedPolicy RegionPolicy; -public: - GenericScheduler(const MachineSchedContext *C): - GenericSchedulerBase(C), DAG(nullptr), Top(SchedBoundary::TopQID, "TopQ"), - Bot(SchedBoundary::BotQID, "BotQ") {} - - void initPolicy(MachineBasicBlock::iterator Begin, - MachineBasicBlock::iterator End, - unsigned NumRegionInstrs) override; - - bool shouldTrackPressure() const override { - return RegionPolicy.ShouldTrackPressure; - } - - void initialize(ScheduleDAGMI *dag) override; - - SUnit *pickNode(bool &IsTopNode) override; - - void schedNode(SUnit *SU, bool IsTopNode) override; - - void releaseTopNode(SUnit *SU) override { - Top.releaseTopNode(SU); - } - - void releaseBottomNode(SUnit *SU) override { - Bot.releaseBottomNode(SU); - } - - void registerRoots() override; - -protected: - void checkAcyclicLatency(); - - void tryCandidate(SchedCandidate &Cand, - SchedCandidate &TryCand, - SchedBoundary &Zone, - const RegPressureTracker &RPTracker, - RegPressureTracker &TempTracker); - - SUnit *pickNodeBidirectional(bool &IsTopNode); - - void pickNodeFromQueue(SchedBoundary &Zone, - const RegPressureTracker &RPTracker, - SchedCandidate &Candidate); - - void reschedulePhysRegCopies(SUnit *SU, bool isTop); -}; -} // namespace - void GenericScheduler::initialize(ScheduleDAGMI *dag) { assert(dag->hasVRegLiveness() && "(PreRA)GenericScheduler needs vreg liveness"); @@ -3023,75 +2873,25 @@ GenericSchedRegistry("converge", "Standard converging scheduler.", // PostGenericScheduler - Generic PostRA implementation of MachineSchedStrategy. //===----------------------------------------------------------------------===// -namespace { -/// PostGenericScheduler - Interface to the scheduling algorithm used by -/// ScheduleDAGMI. -/// -/// Callbacks from ScheduleDAGMI: -/// initPolicy -> initialize(DAG) -> registerRoots -> pickNode ... -class PostGenericScheduler : public GenericSchedulerBase { - ScheduleDAGMI *DAG; - SchedBoundary Top; - SmallVector<SUnit*, 8> BotRoots; -public: - PostGenericScheduler(const MachineSchedContext *C): - GenericSchedulerBase(C), Top(SchedBoundary::TopQID, "TopQ") {} - - virtual ~PostGenericScheduler() {} - - void initPolicy(MachineBasicBlock::iterator Begin, - MachineBasicBlock::iterator End, - unsigned NumRegionInstrs) override { - /* no configurable policy */ - }; - - /// PostRA scheduling does not track pressure. - bool shouldTrackPressure() const override { return false; } - - void initialize(ScheduleDAGMI *Dag) override { - DAG = Dag; - SchedModel = DAG->getSchedModel(); - TRI = DAG->TRI; - - Rem.init(DAG, SchedModel); - Top.init(DAG, SchedModel, &Rem); - BotRoots.clear(); - - // Initialize the HazardRecognizers. If itineraries don't exist, are empty, - // or are disabled, then these HazardRecs will be disabled. - const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); - const TargetMachine &TM = DAG->MF.getTarget(); - if (!Top.HazardRec) { - Top.HazardRec = - TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); - } - } - - void registerRoots() override; - - SUnit *pickNode(bool &IsTopNode) override; - - void scheduleTree(unsigned SubtreeID) override { - llvm_unreachable("PostRA scheduler does not support subtree analysis."); - } - - void schedNode(SUnit *SU, bool IsTopNode) override; +void PostGenericScheduler::initialize(ScheduleDAGMI *Dag) { + DAG = Dag; + SchedModel = DAG->getSchedModel(); + TRI = DAG->TRI; - void releaseTopNode(SUnit *SU) override { - Top.releaseTopNode(SU); - } + Rem.init(DAG, SchedModel); + Top.init(DAG, SchedModel, &Rem); + BotRoots.clear(); - // Only called for roots. - void releaseBottomNode(SUnit *SU) override { - BotRoots.push_back(SU); + // Initialize the HazardRecognizers. If itineraries don't exist, are empty, + // or are disabled, then these HazardRecs will be disabled. + const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); + const TargetMachine &TM = DAG->MF.getTarget(); + if (!Top.HazardRec) { + Top.HazardRec = + TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); } +} -protected: - void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand); - - void pickNodeFromQueue(SchedCandidate &Cand); -}; -} // namespace void PostGenericScheduler::registerRoots() { Rem.CriticalPath = DAG->ExitSU.getDepth(); diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index b3f7198..249b2d0 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -30,11 +30,6 @@ using namespace llvm; -namespace llvm { -extern cl::opt<bool> EnableStackMapLiveness; -extern cl::opt<bool> EnablePatchPointLiveness; -} - static cl::opt<bool> DisablePostRA("disable-post-ra", cl::Hidden, cl::desc("Disable Post Regalloc")); static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden, @@ -92,9 +87,9 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, // Temporary option to allow experimenting with MachineScheduler as a post-RA // scheduler. Targets can "properly" enable this with -// substitutePass(&PostRASchedulerID, &MachineSchedulerID); Ideally it wouldn't -// be part of the standard pass pipeline, and the target would just add a PostRA -// scheduling pass wherever it wants. +// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); Ideally it +// wouldn't be part of the standard pass pipeline, and the target would just add +// a PostRA scheduling pass wherever it wants. static cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden, cl::desc("Run MachineScheduler post regalloc (independent of preRA sched)")); @@ -421,7 +416,7 @@ void TargetPassConfig::addPassesToHandleExceptions() { // FALLTHROUGH case ExceptionHandling::DwarfCFI: case ExceptionHandling::ARM: - case ExceptionHandling::Win64: + case ExceptionHandling::WinEH: addPass(createDwarfEHPass(TM)); break; case ExceptionHandling::None: @@ -566,8 +561,7 @@ void TargetPassConfig::addMachinePasses() { if (addPreEmitPass()) printAndVerify("After PreEmit passes"); - if (EnableStackMapLiveness || EnablePatchPointLiveness) - addPass(&StackMapLivenessID); + addPass(&StackMapLivenessID); } /// Add passes that optimize machine instructions in SSA form. diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index eeee93a..716cb1f 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -91,6 +91,10 @@ static cl::opt<bool> DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer")); +static cl::opt<bool> +DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(true), + cl::desc("Disable advanced copy optimization")); + STATISTIC(NumReuse, "Number of extension results reused"); STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); @@ -137,6 +141,105 @@ namespace { bool isLoadFoldable(MachineInstr *MI, SmallSet<unsigned, 16> &FoldAsLoadDefCandidates); }; + + /// \brief Helper class to track the possible sources of a value defined by + /// a (chain of) copy related instructions. + /// Given a definition (instruction and definition index), this class + /// follows the use-def chain to find successive suitable sources. + /// The given source can be used to rewrite the definition into + /// def = COPY src. + /// + /// For instance, let us consider the following snippet: + /// v0 = + /// v2 = INSERT_SUBREG v1, v0, sub0 + /// def = COPY v2.sub0 + /// + /// Using a ValueTracker for def = COPY v2.sub0 will give the following + /// suitable sources: + /// v2.sub0 and v0. + /// Then, def can be rewritten into def = COPY v0. + class ValueTracker { + private: + /// The current point into the use-def chain. + const MachineInstr *Def; + /// The index of the definition in Def. + unsigned DefIdx; + /// The sub register index of the definition. + unsigned DefSubReg; + /// The register where the value can be found. + unsigned Reg; + /// Specifiy whether or not the value tracking looks through + /// complex instructions. When this is false, the value tracker + /// bails on everything that is not a copy or a bitcast. + /// + /// Note: This could have been implemented as a specialized version of + /// the ValueTracker class but that would have complicated the code of + /// the users of this class. + bool UseAdvancedTracking; + /// Optional MachineRegisterInfo used to perform some complex + /// tracking. + const MachineRegisterInfo *MRI; + + /// \brief Dispatcher to the right underlying implementation of + /// getNextSource. + bool getNextSourceImpl(unsigned &SrcIdx, unsigned &SrcSubReg); + /// \brief Specialized version of getNextSource for Copy instructions. + bool getNextSourceFromCopy(unsigned &SrcIdx, unsigned &SrcSubReg); + /// \brief Specialized version of getNextSource for Bitcast instructions. + bool getNextSourceFromBitcast(unsigned &SrcIdx, unsigned &SrcSubReg); + /// \brief Specialized version of getNextSource for RegSequence + /// instructions. + bool getNextSourceFromRegSequence(unsigned &SrcIdx, unsigned &SrcSubReg); + /// \brief Specialized version of getNextSource for InsertSubreg + /// instructions. + bool getNextSourceFromInsertSubreg(unsigned &SrcIdx, unsigned &SrcSubReg); + /// \brief Specialized version of getNextSource for ExtractSubreg + /// instructions. + bool getNextSourceFromExtractSubreg(unsigned &SrcIdx, unsigned &SrcSubReg); + /// \brief Specialized version of getNextSource for SubregToReg + /// instructions. + bool getNextSourceFromSubregToReg(unsigned &SrcIdx, unsigned &SrcSubReg); + + public: + /// \brief Create a ValueTracker instance for the value defines by \p MI + /// at the operand index \p DefIdx. + /// \p DefSubReg represents the sub register index the value tracker will + /// track. It does not need to match the sub register index used in \p MI. + /// \p UseAdvancedTracking specifies whether or not the value tracker looks + /// through complex instructions. By default (false), it handles only copy + /// and bitcast instructions. + /// \p MRI useful to perform some complex checks. + ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg, + bool UseAdvancedTracking = false, + const MachineRegisterInfo *MRI = nullptr) + : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg), + UseAdvancedTracking(UseAdvancedTracking), MRI(MRI) { + assert(Def->getOperand(DefIdx).isDef() && + Def->getOperand(DefIdx).isReg() && + "Definition does not match machine instruction"); + // Initially the value is in the defined register. + Reg = Def->getOperand(DefIdx).getReg(); + } + + /// \brief Following the use-def chain, get the next available source + /// for the tracked value. + /// When the returned value is not nullptr, getReg() gives the register + /// that contain the tracked value. + /// \note The sub register index returned in \p SrcSubReg must be used + /// on that getReg() to access the actual value. + /// \return Unless the returned value is nullptr (i.e., no source found), + /// \p SrcIdx gives the index of the next source in the returned + /// instruction and \p SrcSubReg the index to be used on that source to + /// get the tracked value. When nullptr is returned, no alternative source + /// has been found. + const MachineInstr *getNextSource(unsigned &SrcIdx, unsigned &SrcSubReg); + + /// \brief Get the last register where the initial value can be found. + /// Initially this is the register of the definition. + /// Then, after each successful call to getNextSource, this is the + /// register of the last source. + unsigned getReg() const { return Reg; } + }; } char PeepholeOptimizer::ID = 0; @@ -443,31 +546,32 @@ bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) { unsigned Src; unsigned SrcSubReg; bool ShouldRewrite = false; - MachineInstr *Copy = MI; const TargetRegisterInfo &TRI = *TM->getRegisterInfo(); - // Follow the chain of copies until we reach the top or find a - // more suitable source. + // Follow the chain of copies until we reach the top of the use-def chain + // or find a more suitable source. + ValueTracker ValTracker(*MI, DefIdx, DefSubReg, !DisableAdvCopyOpt, MRI); do { - unsigned CopyDefIdx, CopySrcIdx; - if (!getCopyOrBitcastDefUseIdx(*Copy, CopyDefIdx, CopySrcIdx)) + unsigned CopySrcIdx, CopySrcSubReg; + if (!ValTracker.getNextSource(CopySrcIdx, CopySrcSubReg)) break; - const MachineOperand &MO = Copy->getOperand(CopySrcIdx); - assert(MO.isReg() && "Copies must be between registers."); - Src = MO.getReg(); - + Src = ValTracker.getReg(); + SrcSubReg = CopySrcSubReg; + + // Do not extend the live-ranges of physical registers as they add + // constraints to the register allocator. + // Moreover, if we want to extend the live-range of a physical register, + // unlike SSA virtual register, we will have to check that they are not + // redefine before the related use. if (TargetRegisterInfo::isPhysicalRegister(Src)) break; const TargetRegisterClass *SrcRC = MRI->getRegClass(Src); - SrcSubReg = MO.getSubReg(); // If this source does not incur a cross register bank copy, use it. ShouldRewrite = shareSameRegisterFile(TRI, DefRC, DefSubReg, SrcRC, SrcSubReg); - // Follow the chain of copies: get the definition of Src. - Copy = MRI->getVRegDef(Src); - } while (!ShouldRewrite && Copy && (Copy->isCopy() || Copy->isBitcast())); + } while (!ShouldRewrite); // If we did not find a more suitable source, there is nothing to optimize. if (!ShouldRewrite || Src == MI->getOperand(SrcIdx).getReg()) @@ -483,6 +587,9 @@ bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) { MRI->replaceRegWith(Def, NewVR); MRI->clearKillFlags(NewVR); + // We extended the lifetime of Src. + // Clear the kill flags to account for that. + MRI->clearKillFlags(Src); MI->eraseFromParent(); ++NumCopiesBitcasts; return true; @@ -673,3 +780,251 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { return Changed; } + +bool ValueTracker::getNextSourceFromCopy(unsigned &SrcIdx, + unsigned &SrcSubReg) { + assert(Def->isCopy() && "Invalid definition"); + // Copy instruction are supposed to be: Def = Src. + // If someone breaks this assumption, bad things will happen everywhere. + assert(Def->getDesc().getNumOperands() == 2 && "Invalid number of operands"); + + if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) + // If we look for a different subreg, it means we want a subreg of src. + // Bails as we do not support composing subreg yet. + return false; + // Otherwise, we want the whole source. + SrcIdx = 1; + SrcSubReg = Def->getOperand(SrcIdx).getSubReg(); + return true; +} + +bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcIdx, + unsigned &SrcSubReg) { + assert(Def->isBitcast() && "Invalid definition"); + + // Bail if there are effects that a plain copy will not expose. + if (Def->hasUnmodeledSideEffects()) + return false; + + // Bitcasts with more than one def are not supported. + if (Def->getDesc().getNumDefs() != 1) + return false; + if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) + // If we look for a different subreg, it means we want a subreg of the src. + // Bails as we do not support composing subreg yet. + return false; + + SrcIdx = Def->getDesc().getNumOperands(); + for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; + ++OpIdx) { + const MachineOperand &MO = Def->getOperand(OpIdx); + if (!MO.isReg() || !MO.getReg()) + continue; + assert(!MO.isDef() && "We should have skipped all the definitions by now"); + if (SrcIdx != EndOpIdx) + // Multiple sources? + return false; + SrcIdx = OpIdx; + } + SrcSubReg = Def->getOperand(SrcIdx).getSubReg(); + return true; +} + +bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcIdx, + unsigned &SrcSubReg) { + assert(Def->isRegSequence() && "Invalid definition"); + + if (Def->getOperand(DefIdx).getSubReg()) + // If we are composing subreg, bails out. + // The case we are checking is Def.<subreg> = REG_SEQUENCE. + // This should almost never happen as the SSA property is tracked at + // the register level (as opposed to the subreg level). + // I.e., + // Def.sub0 = + // Def.sub1 = + // is a valid SSA representation for Def.sub0 and Def.sub1, but not for + // Def. Thus, it must not be generated. + // However, some code could theoretically generates a single + // Def.sub0 (i.e, not defining the other subregs) and we would + // have this case. + // If we can ascertain (or force) that this never happens, we could + // turn that into an assertion. + return false; + + // We are looking at: + // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... + // Check if one of the operand defines the subreg we are interested in. + for (unsigned OpIdx = DefIdx + 1, EndOpIdx = Def->getNumOperands(); + OpIdx != EndOpIdx; OpIdx += 2) { + const MachineOperand &MOSubIdx = Def->getOperand(OpIdx + 1); + assert(MOSubIdx.isImm() && + "One of the subindex of the reg_sequence is not an immediate"); + if (MOSubIdx.getImm() == DefSubReg) { + assert(Def->getOperand(OpIdx).isReg() && + "One of the source of the reg_sequence is not a register"); + SrcIdx = OpIdx; + SrcSubReg = Def->getOperand(SrcIdx).getSubReg(); + return true; + } + } + + // If the subreg we are tracking is super-defined by another subreg, + // we could follow this value. However, this would require to compose + // the subreg and we do not do that for now. + return false; +} + +bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcIdx, + unsigned &SrcSubReg) { + assert(Def->isInsertSubreg() && "Invalid definition"); + if (Def->getOperand(DefIdx).getSubReg()) + // If we are composing subreg, bails out. + // Same remark as getNextSourceFromRegSequence. + // I.e., this may be turned into an assert. + return false; + + // We are looking at: + // Def = INSERT_SUBREG v0, v1, sub1 + // There are two cases: + // 1. DefSubReg == sub1, get v1. + // 2. DefSubReg != sub1, the value may be available through v0. + + // #1 Check if the inserted register matches the require sub index. + unsigned InsertedSubReg = Def->getOperand(3).getImm(); + if (InsertedSubReg == DefSubReg) { + SrcIdx = 2; + SrcSubReg = Def->getOperand(SrcIdx).getSubReg(); + return true; + } + // #2 Otherwise, if the sub register we are looking for is not partial + // defined by the inserted element, we can look through the main + // register (v0). + // To check the overlapping we need a MRI and a TRI. + if (!MRI) + return false; + + const MachineOperand &MODef = Def->getOperand(DefIdx); + const MachineOperand &MOBase = Def->getOperand(1); + // If the result register (Def) and the base register (v0) do not + // have the same register class or if we have to compose + // subregisters, bails out. + if (MRI->getRegClass(MODef.getReg()) != MRI->getRegClass(MOBase.getReg()) || + MOBase.getSubReg()) + return false; + + // Get the TRI and check if inserted sub register overlaps with the + // sub register we are tracking. + const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); + if (!TRI || + (TRI->getSubRegIndexLaneMask(DefSubReg) & + TRI->getSubRegIndexLaneMask(InsertedSubReg)) != 0) + return false; + // At this point, the value is available in v0 via the same subreg + // we used for Def. + SrcIdx = 1; + SrcSubReg = DefSubReg; + return true; +} + +bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcIdx, + unsigned &SrcSubReg) { + assert(Def->isExtractSubreg() && "Invalid definition"); + // We are looking at: + // Def = EXTRACT_SUBREG v0, sub0 + + // Bails if we have to compose sub registers. + // Indeed, if DefSubReg != 0, we would have to compose it with sub0. + if (DefSubReg) + return false; + + // Bails if we have to compose sub registers. + // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0. + if (Def->getOperand(1).getSubReg()) + return false; + // Otherwise, the value is available in the v0.sub0. + SrcIdx = 1; + SrcSubReg = Def->getOperand(2).getImm(); + return true; +} + +bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcIdx, + unsigned &SrcSubReg) { + assert(Def->isSubregToReg() && "Invalid definition"); + // We are looking at: + // Def = SUBREG_TO_REG Imm, v0, sub0 + + // Bails if we have to compose sub registers. + // If DefSubReg != sub0, we would have to check that all the bits + // we track are included in sub0 and if yes, we would have to + // determine the right subreg in v0. + if (DefSubReg != Def->getOperand(3).getImm()) + return false; + // Bails if we have to compose sub registers. + // Likewise, if v0.subreg != 0, we would have to compose it with sub0. + if (Def->getOperand(2).getSubReg()) + return false; + + SrcIdx = 2; + SrcSubReg = Def->getOperand(3).getImm(); + return true; +} + +bool ValueTracker::getNextSourceImpl(unsigned &SrcIdx, unsigned &SrcSubReg) { + assert(Def && "This method needs a valid definition"); + + assert( + (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) && + Def->getOperand(DefIdx).isDef() && "Invalid DefIdx"); + if (Def->isCopy()) + return getNextSourceFromCopy(SrcIdx, SrcSubReg); + if (Def->isBitcast()) + return getNextSourceFromBitcast(SrcIdx, SrcSubReg); + // All the remaining cases involve "complex" instructions. + // Bails if we did not ask for the advanced tracking. + if (!UseAdvancedTracking) + return false; + if (Def->isRegSequence()) + return getNextSourceFromRegSequence(SrcIdx, SrcSubReg); + if (Def->isInsertSubreg()) + return getNextSourceFromInsertSubreg(SrcIdx, SrcSubReg); + if (Def->isExtractSubreg()) + return getNextSourceFromExtractSubreg(SrcIdx, SrcSubReg); + if (Def->isSubregToReg()) + return getNextSourceFromSubregToReg(SrcIdx, SrcSubReg); + return false; +} + +const MachineInstr *ValueTracker::getNextSource(unsigned &SrcIdx, + unsigned &SrcSubReg) { + // If we reach a point where we cannot move up in the use-def chain, + // there is nothing we can get. + if (!Def) + return nullptr; + + const MachineInstr *PrevDef = nullptr; + // Try to find the next source. + if (getNextSourceImpl(SrcIdx, SrcSubReg)) { + // Update definition, definition index, and subregister for the + // next call of getNextSource. + const MachineOperand &MO = Def->getOperand(SrcIdx); + assert(MO.isReg() && !MO.isDef() && "Source is invalid"); + // Update the current register. + Reg = MO.getReg(); + // Update the return value before moving up in the use-def chain. + PrevDef = Def; + // If we can still move up in the use-def chain, move to the next + // defintion. + if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { + Def = MRI->getVRegDef(Reg); + DefIdx = MRI->def_begin(Reg).getOperandNo(); + DefSubReg = SrcSubReg; + return PrevDef; + } + } + // If we end up here, this means we will not be able to find another source + // for the next iteration. + // Make sure any new call to getNextSource bails out early by cutting the + // use-def chain. + Def = nullptr; + return PrevDef; +} diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index c74a42f..b98d210 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -160,7 +160,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { replaceFrameIndices(Fn); // If register scavenging is needed, as we've enabled doing it as a - // post-pass, scavenge the virtual registers that frame index elimiation + // post-pass, scavenge the virtual registers that frame index elimination // inserted. if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) scavengeFrameVirtualRegs(Fn); @@ -268,51 +268,56 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) { } } - if (CSI.empty()) - return; // Early exit if no callee saved registers are modified! + if (!TFI->assignCalleeSavedSpillSlots(F, RegInfo, CSI)) { + // If target doesn't implement this, use generic code. - unsigned NumFixedSpillSlots; - const TargetFrameLowering::SpillSlot *FixedSpillSlots = - TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); + if (CSI.empty()) + return; // Early exit if no callee saved registers are modified! - // Now that we know which registers need to be saved and restored, allocate - // stack slots for them. - for (std::vector<CalleeSavedInfo>::iterator - I = CSI.begin(), E = CSI.end(); I != E; ++I) { - unsigned Reg = I->getReg(); - const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); + unsigned NumFixedSpillSlots; + const TargetFrameLowering::SpillSlot *FixedSpillSlots = + TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots); - int FrameIdx; - if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) { - I->setFrameIdx(FrameIdx); - continue; - } + // Now that we know which registers need to be saved and restored, allocate + // stack slots for them. + for (std::vector<CalleeSavedInfo>::iterator I = CSI.begin(), E = CSI.end(); + I != E; ++I) { + unsigned Reg = I->getReg(); + const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); - // Check to see if this physreg must be spilled to a particular stack slot - // on this target. - const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots; - while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots && - FixedSlot->Reg != Reg) - ++FixedSlot; - - if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { - // Nope, just spill it anywhere convenient. - unsigned Align = RC->getAlignment(); - unsigned StackAlign = TFI->getStackAlignment(); - - // We may not be able to satisfy the desired alignment specification of - // the TargetRegisterClass if the stack alignment is smaller. Use the - // min. - Align = std::min(Align, StackAlign); - FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true); - if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; - if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; - } else { - // Spill it to the stack where we must. - FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true); - } + int FrameIdx; + if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) { + I->setFrameIdx(FrameIdx); + continue; + } + + // Check to see if this physreg must be spilled to a particular stack slot + // on this target. + const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots; + while (FixedSlot != FixedSpillSlots + NumFixedSpillSlots && + FixedSlot->Reg != Reg) + ++FixedSlot; + + if (FixedSlot == FixedSpillSlots + NumFixedSpillSlots) { + // Nope, just spill it anywhere convenient. + unsigned Align = RC->getAlignment(); + unsigned StackAlign = TFI->getStackAlignment(); + + // We may not be able to satisfy the desired alignment specification of + // the TargetRegisterClass if the stack alignment is smaller. Use the + // min. + Align = std::min(Align, StackAlign); + FrameIdx = MFI->CreateStackObject(RC->getSize(), Align, true); + if ((unsigned)FrameIdx < MinCSFrameIndex) MinCSFrameIndex = FrameIdx; + if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; + } else { + // Spill it to the stack where we must. + FrameIdx = + MFI->CreateFixedSpillStackObject(RC->getSize(), FixedSlot->Offset); + } - I->setFrameIdx(FrameIdx); + I->setFrameIdx(FrameIdx); + } } MFI->setCalleeSavedInfo(CSI); diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index aa7c178..901b993 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -44,6 +44,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <queue> using namespace llvm; @@ -79,6 +80,12 @@ ExhaustiveSearch("exhaustive-register-search", cl::NotHidden, cl::desc("Exhaustive Search for registers bypassing the depth " "and interference cutoffs of last chance recoloring")); +static cl::opt<bool> EnableLocalReassignment( + "enable-local-reassign", cl::Hidden, + cl::desc("Local reassignment can yield better allocation decisions, but " + "may be compile time intensive"), + cl::init(false)); + // FIXME: Find a good default for this flag and remove the flag. static cl::opt<unsigned> CSRFirstTimeCost("regalloc-csr-first-time-cost", @@ -285,6 +292,10 @@ class RAGreedy : public MachineFunctionPass, /// Callee-save register cost, calculated once per machine function. BlockFrequency CSRCost; + /// Run or not the local reassignment heuristic. This information is + /// obtained from the TargetSubtargetInfo. + bool EnableLocalReassign; + public: RAGreedy(); @@ -731,7 +742,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, // Evicting another local live range in this case could lead to suboptimal // coloring. if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) && - !canReassign(*Intf, PhysReg)) { + (!EnableLocalReassign || !canReassign(*Intf, PhysReg))) { return false; } } @@ -2308,9 +2319,14 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { << "********** Function: " << mf.getName() << '\n'); MF = &mf; - TRI = MF->getTarget().getRegisterInfo(); - TII = MF->getTarget().getInstrInfo(); + const TargetMachine &TM = MF->getTarget(); + TRI = TM.getRegisterInfo(); + TII = TM.getInstrInfo(); RCI.runOnMachineFunction(mf); + + EnableLocalReassign = EnableLocalReassignment || + TM.getSubtargetImpl()->enableRALocalReassignment(TM.getOptLevel()); + if (VerifyEnabled) MF->verify(this, "Before greedy register allocator"); diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index b2909e0..617e459 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -41,7 +41,7 @@ static void decreaseSetPressure(std::vector<unsigned> &CurrSetPressure, } } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void llvm::dumpRegSetPressure(ArrayRef<unsigned> SetPressure, const TargetRegisterInfo *TRI) { bool Empty = true; @@ -55,6 +55,7 @@ void llvm::dumpRegSetPressure(ArrayRef<unsigned> SetPressure, dbgs() << "\n"; } +LLVM_DUMP_METHOD void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { dbgs() << "Max Pressure: "; dumpRegSetPressure(MaxSetPressure, TRI); @@ -68,6 +69,7 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { dbgs() << '\n'; } +LLVM_DUMP_METHOD void RegPressureTracker::dump() const { if (!isTopClosed() || !isBottomClosed()) { dbgs() << "Curr Pressure: "; @@ -75,7 +77,6 @@ void RegPressureTracker::dump() const { } P.dump(TRI); } -#endif /// Increase the current pressure as impacted by these registers and bump /// the high water mark if needed. diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 92a9a30..0f8b21c 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -1508,7 +1508,7 @@ void SchedDFSResult::scheduleTree(unsigned SubtreeID) { } } -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +LLVM_DUMP_METHOD void ILPValue::print(raw_ostream &OS) const { OS << InstrCount << " / " << Length << " = "; if (!Length) @@ -1517,16 +1517,17 @@ void ILPValue::print(raw_ostream &OS) const { OS << format("%g", ((double)InstrCount / Length)); } +LLVM_DUMP_METHOD void ILPValue::dump() const { dbgs() << *this << '\n'; } namespace llvm { +LLVM_DUMP_METHOD raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) { Val.print(OS); return OS; } } // namespace llvm -#endif // !NDEBUG || LLVM_ENABLE_DUMP diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2d2fd53..7c42e4d 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -167,9 +167,18 @@ namespace { bool CombineToPreIndexedLoadStore(SDNode *N); bool CombineToPostIndexedLoadStore(SDNode *N); - SDValue SplitIndexingFromLoad(LoadSDNode *LD); bool SliceUpLoad(SDNode *N); + /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed + /// load. + /// + /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. + /// \param InVecVT type of the input vector to EVE with bitcasts resolved. + /// \param EltNo index of the vector element to load. + /// \param OriginalLoad load that EVE came from to be replaced. + /// \returns EVE on success SDValue() on failure. + SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad( + SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad); void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); SDValue SExtPromoteOperand(SDValue Op, EVT PVT); @@ -646,10 +655,14 @@ static ConstantSDNode *isConstOrConstSplat(SDValue N) { return CN; if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { - ConstantSDNode *CN = BV->getConstantSplatValue(); + BitVector UndefElements; + ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); // BuildVectors can truncate their operands. Ignore that case here. - if (CN && CN->getValueType(0) == N.getValueType().getScalarType()) + // FIXME: We blindly ignore splats which include undef which is overly + // pessimistic. + if (CN && UndefElements.none() && + CN->getValueType(0) == N.getValueType().getScalarType()) return CN; } @@ -762,14 +775,10 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { // If the operands of this node are only used by the node, they will now // be dead. Make sure to visit them first to delete dead nodes early. - for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) { - SDNode *Op = TLO.Old.getNode()->getOperand(i).getNode(); - // For an operand generating multiple values, one of the values may - // become dead allowing further simplification (e.g. split index - // arithmetic from an indexed load). - if (Op->hasOneUse() || Op->getNumValues() > 1) - AddToWorkList(Op); - } + for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) + if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse()) + AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode()); + DAG.DeleteNode(TLO.Old.getNode()); } } @@ -1320,9 +1329,16 @@ SDValue DAGCombiner::combine(SDNode *N) { // Constant operands are canonicalized to RHS. if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { - SDValue Ops[] = { N1, N0 }; - SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), - Ops); + SDValue Ops[] = {N1, N0}; + SDNode *CSENode; + if (const BinaryWithFlagsSDNode *BinNode = + dyn_cast<BinaryWithFlagsSDNode>(N)) { + CSENode = DAG.getNodeIfExists( + N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(), + BinNode->hasNoSignedWrap(), BinNode->isExact()); + } else { + CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops); + } if (CSENode) return SDValue(CSENode, 0); } @@ -3942,14 +3958,14 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // If setcc produces all-one true value then: // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV) if (N1CV && N1CV->isConstant()) { - if (N0.getOpcode() == ISD::AND && - TLI.getBooleanContents(true) == - TargetLowering::ZeroOrNegativeOneBooleanContent) { + if (N0.getOpcode() == ISD::AND) { SDValue N00 = N0->getOperand(0); SDValue N01 = N0->getOperand(1); BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01); - if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC) { + if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && + TLI.getBooleanContents(N00.getOperand(0).getValueType()) == + TargetLowering::ZeroOrNegativeOneBooleanContent) { SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV); if (C.getNode()) return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); @@ -4508,11 +4524,20 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select C, 0, 1) -> (xor C, 1) + // We can't do this reliably if integer based booleans have different contents + // to floating point based booleans. This is because we can't tell whether we + // have an integer-based boolean or a floating-point-based boolean unless we + // can find the SETCC that produced it and inspect its operands. This is + // fairly easy if C is the SETCC node, but it can potentially be + // undiscoverable (or not reasonably discoverable). For example, it could be + // in another basic block or it could require searching a complicated + // expression. if (VT.isInteger() && - (VT0 == MVT::i1 || - (VT0.isInteger() && - TLI.getBooleanContents(false) == - TargetLowering::ZeroOrOneBooleanContent)) && + (VT0 == MVT::i1 || (VT0.isInteger() && + TLI.getBooleanContents(false, false) == + TLI.getBooleanContents(false, true) && + TLI.getBooleanContents(false, false) == + TargetLowering::ZeroOrOneBooleanContent)) && N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { SDValue XORNode; if (VT == VT0) @@ -4555,12 +4580,9 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // fold selects based on a setcc into other things, such as min/max/abs if (N0.getOpcode() == ISD::SETCC) { - // FIXME: - // Check against MVT::Other for SELECT_CC, which is a workaround for targets - // having to say they don't support SELECT_CC on every type the DAG knows - // about, since there is no way to mark an opcode illegal at all value types - if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) && - TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) + if ((!LegalOperations && + TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || + TLI.isOperationLegal(ISD::SELECT_CC, VT)) return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), N1, N2, N0.getOperand(2)); @@ -4587,6 +4609,56 @@ std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { return std::make_pair(Lo, Hi); } +// This function assumes all the vselect's arguments are CONCAT_VECTOR +// nodes and that the condition is a BV of ConstantSDNodes (or undefs). +static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { + SDLoc dl(N); + SDValue Cond = N->getOperand(0); + SDValue LHS = N->getOperand(1); + SDValue RHS = N->getOperand(2); + MVT VT = N->getSimpleValueType(0); + int NumElems = VT.getVectorNumElements(); + assert(LHS.getOpcode() == ISD::CONCAT_VECTORS && + RHS.getOpcode() == ISD::CONCAT_VECTORS && + Cond.getOpcode() == ISD::BUILD_VECTOR); + + // We're sure we have an even number of elements due to the + // concat_vectors we have as arguments to vselect. + // Skip BV elements until we find one that's not an UNDEF + // After we find an UNDEF element, keep looping until we get to half the + // length of the BV and see if all the non-undef nodes are the same. + ConstantSDNode *BottomHalf = nullptr; + for (int i = 0; i < NumElems / 2; ++i) { + if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) + continue; + + if (BottomHalf == nullptr) + BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i)); + else if (Cond->getOperand(i).getNode() != BottomHalf) + return SDValue(); + } + + // Do the same for the second half of the BuildVector + ConstantSDNode *TopHalf = nullptr; + for (int i = NumElems / 2; i < NumElems; ++i) { + if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) + continue; + + if (TopHalf == nullptr) + TopHalf = cast<ConstantSDNode>(Cond.getOperand(i)); + else if (Cond->getOperand(i).getNode() != TopHalf) + return SDValue(); + } + + assert(TopHalf && BottomHalf && + "One half of the selector was all UNDEFs and the other was all the " + "same value. This should have been addressed before this function."); + return DAG.getNode( + ISD::CONCAT_VECTORS, dl, VT, + BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0), + TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); +} + SDValue DAGCombiner::visitVSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -4659,6 +4731,17 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { if (ISD::isBuildVectorAllZeros(N0.getNode())) return N2; + // The ConvertSelectToConcatVector function is assuming both the above + // checks for (vselect (build_vector all{ones,zeros) ...) have been made + // and addressed. + if (N1.getOpcode() == ISD::CONCAT_VECTORS && + N2.getOpcode() == ISD::CONCAT_VECTORS && + ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { + SDValue CV = ConvertSelectToConcatVector(N, DAG); + if (CV.getNode()) + return CV; + } + return SDValue(); } @@ -5003,12 +5086,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } if (N0.getOpcode() == ISD::SETCC) { + EVT N0VT = N0.getOperand(0).getValueType(); // sext(setcc) -> sext_in_reg(vsetcc) for vectors. // Only do this before legalize for now. if (VT.isVector() && !LegalOperations && - TLI.getBooleanContents(true) == - TargetLowering::ZeroOrNegativeOneBooleanContent) { - EVT N0VT = N0.getOperand(0).getValueType(); + TLI.getBooleanContents(N0VT) == + TargetLowering::ZeroOrNegativeOneBooleanContent) { // On some architectures (such as SSE/NEON/etc) the SETCC result type is // of the same size as the compared operands. Only optimize sext(setcc()) // if this is the case. @@ -6140,6 +6223,9 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && // Do not change the width of a volatile load. !cast<LoadSDNode>(N0)->isVolatile() && + // Do not remove the cast if the types differ in endian layout. + TLI.hasBigEndianPartOrdering(N0.getValueType()) == + TLI.hasBigEndianPartOrdering(VT) && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); @@ -6955,11 +7041,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { } // The next optimizations are desirable only if SELECT_CC can be lowered. - // Check against MVT::Other for SELECT_CC, which is a workaround for targets - // having to say they don't support SELECT_CC on every type the DAG knows - // about, since there is no way to mark an opcode illegal at all value types - // (See also visitSELECT) - if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) { + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && !VT.isVector() && @@ -7012,11 +7094,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { } // The next optimizations are desirable only if SELECT_CC can be lowered. - // Check against MVT::Other for SELECT_CC, which is a workaround for targets - // having to say they don't support SELECT_CC on every type the DAG knows - // about, since there is no way to mark an opcode illegal at all value types - // (See also visitSELECT) - if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other)) { + if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && @@ -7849,17 +7927,6 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { return false; } -/// \brief Return the base-pointer arithmetic from an indexed \p LD. -SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) { - ISD::MemIndexedMode AM = LD->getAddressingMode(); - assert(AM != ISD::UNINDEXED); - SDValue BP = LD->getOperand(1); - SDValue Inc = LD->getOperand(2); - unsigned Opc = - (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB); - return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc); -} - SDValue DAGCombiner::visitLOAD(SDNode *N) { LoadSDNode *LD = cast<LoadSDNode>(N); SDValue Chain = LD->getChain(); @@ -7896,16 +7963,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { } else { // Indexed loads. assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); - if (!N->hasAnyUseOfValue(0)) { + if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) { SDValue Undef = DAG.getUNDEF(N->getValueType(0)); - SDValue Index; - if (N->hasAnyUseOfValue(1)) { - Index = SplitIndexingFromLoad(LD); - // Try to fold the base pointer arithmetic into subsequent loads and - // stores. - AddUsersToWorkList(N); - } else - Index = DAG.getUNDEF(N->getValueType(1)); DEBUG(dbgs() << "\nReplacing.7 "; N->dump(&DAG); dbgs() << "\nWith: "; @@ -7913,7 +7972,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { dbgs() << " and 2 other values\n"); WorkListRemover DeadNodes(*this); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); - DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), + DAG.getUNDEF(N->getValueType(1))); DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); removeFromWorkList(N); DAG.DeleteNode(N); @@ -9666,6 +9726,27 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { return SDValue(); unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); + // Canonicalize insert_vector_elt dag nodes. + // Example: + // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1) + // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0) + // + // Do this only if the child insert_vector node has one use; also + // do this only if indices are both constants and Idx1 < Idx0. + if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse() + && isa<ConstantSDNode>(InVec.getOperand(2))) { + unsigned OtherElt = + cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue(); + if (Elt < OtherElt) { + // Swap nodes. + SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT, + InVec.getOperand(0), InVal, EltNo); + AddToWorkList(NewOp.getNode()); + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()), + VT, NewOp, InVec.getOperand(1), InVec.getOperand(2)); + } + } + // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially // be converted to a BUILD_VECTOR). Fill in the Ops vector with the // vector elements. @@ -9698,6 +9779,86 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); } +SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( + SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { + EVT ResultVT = EVE->getValueType(0); + EVT VecEltVT = InVecVT.getVectorElementType(); + unsigned Align = OriginalLoad->getAlignment(); + unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment( + VecEltVT.getTypeForEVT(*DAG.getContext())); + + if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) + return SDValue(); + + Align = NewAlign; + + SDValue NewPtr = OriginalLoad->getBasePtr(); + SDValue Offset; + EVT PtrType = NewPtr.getValueType(); + MachinePointerInfo MPI; + if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) { + int Elt = ConstEltNo->getZExtValue(); + unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; + if (TLI.isBigEndian()) + PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff; + Offset = DAG.getConstant(PtrOff, PtrType); + MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); + } else { + Offset = DAG.getNode( + ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo, + DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType())); + if (TLI.isBigEndian()) + Offset = DAG.getNode( + ISD::SUB, SDLoc(EVE), EltNo.getValueType(), + DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset); + MPI = OriginalLoad->getPointerInfo(); + } + NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset); + + // The replacement we need to do here is a little tricky: we need to + // replace an extractelement of a load with a load. + // Use ReplaceAllUsesOfValuesWith to do the replacement. + // Note that this replacement assumes that the extractvalue is the only + // use of the load; that's okay because we don't want to perform this + // transformation in other cases anyway. + SDValue Load; + SDValue Chain; + if (ResultVT.bitsGT(VecEltVT)) { + // If the result type of vextract is wider than the load, then issue an + // extending load instead. + ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, VecEltVT) + ? ISD::ZEXTLOAD + : ISD::EXTLOAD; + Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), + NewPtr, MPI, VecEltVT, OriginalLoad->isVolatile(), + OriginalLoad->isNonTemporal(), Align, + OriginalLoad->getTBAAInfo()); + Chain = Load.getValue(1); + } else { + Load = DAG.getLoad( + VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, + OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), + OriginalLoad->isInvariant(), Align, OriginalLoad->getTBAAInfo()); + Chain = Load.getValue(1); + if (ResultVT.bitsLT(VecEltVT)) + Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); + else + Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load); + } + WorkListRemover DeadNodes(*this); + SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; + SDValue To[] = { Load, Chain }; + DAG.ReplaceAllUsesOfValuesWith(From, To, 2); + // Since we're explicitly calling ReplaceAllUses, add the new node to the + // worklist explicitly as well. + AddToWorkList(Load.getNode()); + AddUsersToWorkList(Load.getNode()); // Add users too + // Make sure to revisit this node to clean it up; it will usually be dead. + AddToWorkList(EVE); + ++OpsNarrowed; + return SDValue(EVE, 0); +} + SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // (vextract (scalar_to_vector val, 0) -> val SDValue InVec = N->getOperand(0); @@ -9766,6 +9927,38 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { } } + bool BCNumEltsChanged = false; + EVT ExtVT = VT.getVectorElementType(); + EVT LVT = ExtVT; + + // If the result of load has to be truncated, then it's not necessarily + // profitable. + if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) + return SDValue(); + + if (InVec.getOpcode() == ISD::BITCAST) { + // Don't duplicate a load with other uses. + if (!InVec.hasOneUse()) + return SDValue(); + + EVT BCVT = InVec.getOperand(0).getValueType(); + if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) + return SDValue(); + if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) + BCNumEltsChanged = true; + InVec = InVec.getOperand(0); + ExtVT = BCVT.getVectorElementType(); + } + + // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size) + if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() && + ISD::isNormalLoad(InVec.getNode())) { + SDValue Index = N->getOperand(1); + if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) + return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, + OrigLoad); + } + // Perform only after legalization to ensure build_vector / vector_shuffle // optimizations have already been done. if (!LegalOperations) return SDValue(); @@ -9776,30 +9969,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (ConstEltNo) { int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); - bool NewLoad = false; - bool BCNumEltsChanged = false; - EVT ExtVT = VT.getVectorElementType(); - EVT LVT = ExtVT; - - // If the result of load has to be truncated, then it's not necessarily - // profitable. - if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) - return SDValue(); - - if (InVec.getOpcode() == ISD::BITCAST) { - // Don't duplicate a load with other uses. - if (!InVec.hasOneUse()) - return SDValue(); - - EVT BCVT = InVec.getOperand(0).getValueType(); - if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) - return SDValue(); - if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) - BCNumEltsChanged = true; - InVec = InVec.getOperand(0); - ExtVT = BCVT.getVectorElementType(); - NewLoad = true; - } LoadSDNode *LN0 = nullptr; const ShuffleVectorSDNode *SVN = nullptr; @@ -9842,6 +10011,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (ISD::isNormalLoad(InVec.getNode())) { LN0 = cast<LoadSDNode>(InVec); Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; + EltNo = DAG.getConstant(Elt, EltNo.getValueType()); } } @@ -9854,72 +10024,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { if (Elt == -1) return DAG.getUNDEF(LVT); - unsigned Align = LN0->getAlignment(); - if (NewLoad) { - // Check the resultant load doesn't need a higher alignment than the - // original load. - unsigned NewAlign = - TLI.getDataLayout() - ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext())); - - if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT)) - return SDValue(); - - Align = NewAlign; - } - - SDValue NewPtr = LN0->getBasePtr(); - unsigned PtrOff = 0; - - if (Elt) { - PtrOff = LVT.getSizeInBits() * Elt / 8; - EVT PtrType = NewPtr.getValueType(); - if (TLI.isBigEndian()) - PtrOff = VT.getSizeInBits() / 8 - PtrOff; - NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr, - DAG.getConstant(PtrOff, PtrType)); - } - - // The replacement we need to do here is a little tricky: we need to - // replace an extractelement of a load with a load. - // Use ReplaceAllUsesOfValuesWith to do the replacement. - // Note that this replacement assumes that the extractvalue is the only - // use of the load; that's okay because we don't want to perform this - // transformation in other cases anyway. - SDValue Load; - SDValue Chain; - if (NVT.bitsGT(LVT)) { - // If the result type of vextract is wider than the load, then issue an - // extending load instead. - ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT) - ? ISD::ZEXTLOAD : ISD::EXTLOAD; - Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(), - NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), - LVT, LN0->isVolatile(), LN0->isNonTemporal(), - Align, LN0->getTBAAInfo()); - Chain = Load.getValue(1); - } else { - Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr, - LN0->getPointerInfo().getWithOffset(PtrOff), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), Align, LN0->getTBAAInfo()); - Chain = Load.getValue(1); - if (NVT.bitsLT(LVT)) - Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load); - else - Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load); - } - WorkListRemover DeadNodes(*this); - SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; - SDValue To[] = { Load, Chain }; - DAG.ReplaceAllUsesOfValuesWith(From, To, 2); - // Since we're explcitly calling ReplaceAllUses, add the new node to the - // worklist explicitly as well. - AddToWorkList(Load.getNode()); - AddUsersToWorkList(Load.getNode()); // Add users too - // Make sure to revisit this node to clean it up; it will usually be dead. - AddToWorkList(N); - return SDValue(N, 0); + return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0); } return SDValue(); @@ -10280,10 +10385,24 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { SmallVector<SDValue, 8> Opnds; unsigned BuildVecNumElts = N0.getNumOperands(); - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(N0.getOperand(i)); - for (unsigned i = 0; i != BuildVecNumElts; ++i) - Opnds.push_back(N1.getOperand(i)); + EVT SclTy0 = N0.getOperand(0)->getValueType(0); + EVT SclTy1 = N1.getOperand(0)->getValueType(0); + if (SclTy0.isFloatingPoint()) { + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(N0.getOperand(i)); + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(N1.getOperand(i)); + } else { + // If BUILD_VECTOR are from built from integer, they may have different + // operand types. Get the smaller type and truncate all operands to it. + EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1; + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, + N0.getOperand(i))); + for (unsigned i = 0; i != BuildVecNumElts; ++i) + Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, + N1.getOperand(i))); + } return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); } @@ -10558,22 +10677,19 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { } // If this shuffle node is simply a swizzle of another shuffle node, - // and it reverses the swizzle of the previous shuffle then we can - // optimize shuffle(shuffle(x, undef), undef) -> x. + // then try to simplify it. if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && N1.getOpcode() == ISD::UNDEF) { ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); - // Shuffle nodes can only reverse shuffles with a single non-undef value. - if (N0.getOperand(1).getOpcode() != ISD::UNDEF) - return SDValue(); - // The incoming shuffle must be of the same type as the result of the // current shuffle. assert(OtherSV->getOperand(0).getValueType() == VT && "Shuffle types don't match"); + SmallVector<int, 4> Mask; + // Compute the combined shuffle mask. for (unsigned i = 0; i != NumElts; ++i) { int Idx = SVN->getMaskElt(i); assert(Idx < (int)NumElts && "Index references undef operand"); @@ -10581,13 +10697,71 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { // shuffle. Adopt the incoming index. if (Idx >= 0) Idx = OtherSV->getMaskElt(Idx); + Mask.push_back(Idx); + } + + bool CommuteOperands = false; + if (N0.getOperand(1).getOpcode() != ISD::UNDEF) { + // To be valid, the combine shuffle mask should only reference elements + // from one of the two vectors in input to the inner shufflevector. + bool IsValidMask = true; + for (unsigned i = 0; i != NumElts && IsValidMask; ++i) + // See if the combined mask only reference undefs or elements coming + // from the first shufflevector operand. + IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] < NumElts; + + if (!IsValidMask) { + IsValidMask = true; + for (unsigned i = 0; i != NumElts && IsValidMask; ++i) + // Check that all the elements come from the second shuffle operand. + IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] >= NumElts; + CommuteOperands = IsValidMask; + } - // The combined shuffle must map each index to itself. - if (Idx >= 0 && (unsigned)Idx != i) + // Early exit if the combined shuffle mask is not valid. + if (!IsValidMask) return SDValue(); } - return OtherSV->getOperand(0); + // See if this pair of shuffles can be safely folded according to either + // of the following rules: + // shuffle(shuffle(x, y), undef) -> x + // shuffle(shuffle(x, undef), undef) -> x + // shuffle(shuffle(x, y), undef) -> y + bool IsIdentityMask = true; + unsigned BaseMaskIndex = CommuteOperands ? NumElts : 0; + for (unsigned i = 0; i != NumElts && IsIdentityMask; ++i) { + // Skip Undefs. + if (Mask[i] < 0) + continue; + + // The combined shuffle must map each index to itself. + IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex; + } + + if (IsIdentityMask) { + if (CommuteOperands) + // optimize shuffle(shuffle(x, y), undef) -> y. + return OtherSV->getOperand(1); + + // optimize shuffle(shuffle(x, undef), undef) -> x + // optimize shuffle(shuffle(x, y), undef) -> x + return OtherSV->getOperand(0); + } + + // It may still be beneficial to combine the two shuffles if the + // resulting shuffle is legal. + if (TLI.isShuffleMaskLegal(Mask, VT)) { + if (!CommuteOperands) + // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3). + // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3) + return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1, + &Mask[0]); + + // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(undef, y, M3) + return DAG.getVectorShuffle(VT, SDLoc(N), N1, N0->getOperand(1), + &Mask[0]); + } } return SDValue(); @@ -10729,6 +10903,27 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops); } + // Type legalization might introduce new shuffles in the DAG. + // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask))) + // -> (shuffle (VBinOp (A, B)), Undef, Mask). + if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) && + isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() && + LHS.getOperand(1).getOpcode() == ISD::UNDEF && + RHS.getOperand(1).getOpcode() == ISD::UNDEF) { + ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS); + ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS); + + if (SVN0->getMask().equals(SVN1->getMask())) { + EVT VT = N->getValueType(0); + SDValue UndefVector = LHS.getOperand(1); + SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT, + LHS.getOperand(0), RHS.getOperand(0)); + AddUsersToWorkList(N); + return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector, + &SVN0->getMask()[0]); + } + } + return SDValue(); } @@ -11080,8 +11275,8 @@ SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, // fold select C, 16, 0 -> shl C, 4 if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() && - TLI.getBooleanContents(N0.getValueType().isVector()) == - TargetLowering::ZeroOrOneBooleanContent) { + TLI.getBooleanContents(N0.getValueType()) == + TargetLowering::ZeroOrOneBooleanContent) { // If the caller doesn't want us to simplify this into a zext of a compare, // don't do it. diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 99931c1..445572a 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -42,12 +42,15 @@ #include "llvm/CodeGen/FastISel.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/Loads.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/Function.h" @@ -558,6 +561,107 @@ bool FastISel::SelectGetElementPtr(const User *I) { return true; } +/// \brief Add a stackmap or patchpoint intrinsic call's live variable operands +/// to a stackmap or patchpoint machine instruction. +bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops, + const CallInst *CI, unsigned StartIdx) { + for (unsigned i = StartIdx, e = CI->getNumArgOperands(); i != e; ++i) { + Value *Val = CI->getArgOperand(i); + // Check for constants and encode them with a StackMaps::ConstantOp prefix. + if (auto *C = dyn_cast<ConstantInt>(Val)) { + Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp)); + Ops.push_back(MachineOperand::CreateImm(C->getSExtValue())); + } else if (isa<ConstantPointerNull>(Val)) { + Ops.push_back(MachineOperand::CreateImm(StackMaps::ConstantOp)); + Ops.push_back(MachineOperand::CreateImm(0)); + } else if (auto *AI = dyn_cast<AllocaInst>(Val)) { + // Values coming from a stack location also require a sepcial encoding, + // but that is added later on by the target specific frame index + // elimination implementation. + auto SI = FuncInfo.StaticAllocaMap.find(AI); + if (SI != FuncInfo.StaticAllocaMap.end()) + Ops.push_back(MachineOperand::CreateFI(SI->second)); + else + return false; + } else { + unsigned Reg = getRegForValue(Val); + if (Reg == 0) + return false; + Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false)); + } + } + + return true; +} + +bool FastISel::SelectStackmap(const CallInst *I) { + // void @llvm.experimental.stackmap(i64 <id>, i32 <numShadowBytes>, + // [live variables...]) + assert(I->getCalledFunction()->getReturnType()->isVoidTy() && + "Stackmap cannot return a value."); + + // The stackmap intrinsic only records the live variables (the arguments + // passed to it) and emits NOPS (if requested). Unlike the patchpoint + // intrinsic, this won't be lowered to a function call. This means we don't + // have to worry about calling conventions and target-specific lowering code. + // Instead we perform the call lowering right here. + // + // CALLSEQ_START(0) + // STACKMAP(id, nbytes, ...) + // CALLSEQ_END(0, 0) + // + SmallVector<MachineOperand, 32> Ops; + + // Add the <id> and <numBytes> constants. + assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)) && + "Expected a constant integer."); + const auto *ID = cast<ConstantInt>(I->getOperand(PatchPointOpers::IDPos)); + Ops.push_back(MachineOperand::CreateImm(ID->getZExtValue())); + + assert(isa<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)) && + "Expected a constant integer."); + const auto *NumBytes = + cast<ConstantInt>(I->getOperand(PatchPointOpers::NBytesPos)); + Ops.push_back(MachineOperand::CreateImm(NumBytes->getZExtValue())); + + // Push live variables for the stack map (skipping the first two arguments + // <id> and <numBytes>). + if (!addStackMapLiveVars(Ops, I, 2)) + return false; + + // We are not adding any register mask info here, because the stackmap doesn't + // clobber anything. + + // Add scratch registers as implicit def and early clobber. + CallingConv::ID CC = I->getCallingConv(); + const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC); + for (unsigned i = 0; ScratchRegs[i]; ++i) + Ops.push_back(MachineOperand::CreateReg( + ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false, + /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true)); + + // Issue CALLSEQ_START + unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackDown)) + .addImm(0); + + // Issue STACKMAP. + MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::STACKMAP)); + for (auto const &MO : Ops) + MIB.addOperand(MO); + + // Issue CALLSEQ_END + unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AdjStackUp)) + .addImm(0).addImm(0); + + // Inform the Frame Information that we have a stackmap in this function. + FuncInfo.MF->getFrameInfo()->setHasStackMap(); + + return true; +} + bool FastISel::SelectCall(const User *I) { const CallInst *Call = cast<CallInst>(I); @@ -713,6 +817,8 @@ bool FastISel::SelectCall(const User *I) { UpdateValueMap(Call, ResultReg); return true; } + case Intrinsic::experimental_stackmap: + return SelectStackmap(Call); } // Usually, it does not make sense to initialize a value, @@ -879,7 +985,6 @@ FastISel::SelectInstruction(const Instruction *I) { /// the CFG. void FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) { - if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) { // For more accurate line information if this is the only instruction @@ -890,7 +995,11 @@ FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DbgLoc) { TII.InsertBranch(*FuncInfo.MBB, MSucc, nullptr, SmallVector<MachineOperand, 0>(), DbgLoc); } - FuncInfo.MBB->addSuccessor(MSucc); + uint32_t BranchWeight = 0; + if (FuncInfo.BPI) + BranchWeight = FuncInfo.BPI->getEdgeWeight(FuncInfo.MBB->getBasicBlock(), + MSucc->getBasicBlock()); + FuncInfo.MBB->addSuccessor(MSucc, BranchWeight); } /// SelectFNeg - Emit an FNeg operation. @@ -1101,6 +1210,7 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { FastISel::FastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) : FuncInfo(funcInfo), + MF(funcInfo.MF), MRI(FuncInfo.MF->getRegInfo()), MFI(*FuncInfo.MF->getFrameInfo()), MCP(*FuncInfo.MF->getConstantPool()), @@ -1635,3 +1745,47 @@ bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) { return isa<ConstantInt>(cast<AddOperator>(Add)->getOperand(1)); } +MachineMemOperand * +FastISel::createMachineMemOperandFor(const Instruction *I) const { + const Value *Ptr; + Type *ValTy; + unsigned Alignment; + unsigned Flags; + bool IsVolatile; + + if (const auto *LI = dyn_cast<LoadInst>(I)) { + Alignment = LI->getAlignment(); + IsVolatile = LI->isVolatile(); + Flags = MachineMemOperand::MOLoad; + Ptr = LI->getPointerOperand(); + ValTy = LI->getType(); + } else if (const auto *SI = dyn_cast<StoreInst>(I)) { + Alignment = SI->getAlignment(); + IsVolatile = SI->isVolatile(); + Flags = MachineMemOperand::MOStore; + Ptr = SI->getPointerOperand(); + ValTy = SI->getValueOperand()->getType(); + } else { + return nullptr; + } + + bool IsNonTemporal = I->getMetadata("nontemporal") != nullptr; + bool IsInvariant = I->getMetadata("invariant.load") != nullptr; + const MDNode *TBAAInfo = I->getMetadata(LLVMContext::MD_tbaa); + const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range); + + if (Alignment == 0) // Ensure that codegen never sees alignment 0. + Alignment = DL.getABITypeAlignment(ValTy); + + unsigned Size = TM.getDataLayout()->getTypeStoreSize(ValTy); + + if (IsVolatile) + Flags |= MachineMemOperand::MOVolatile; + if (IsNonTemporal) + Flags |= MachineMemOperand::MONonTemporal; + if (IsInvariant) + Flags |= MachineMemOperand::MOInvariant; + + return FuncInfo.MF->getMachineMemOperand(MachinePointerInfo(Ptr), Flags, Size, + Alignment, TBAAInfo, Ranges); +} diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index a59e895..c0e8c8c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2060,7 +2060,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) - .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) .setTailCall(isTailCall).setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -2095,7 +2095,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) - .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) .setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -2129,7 +2129,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) - .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) .setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -2266,7 +2266,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, SDLoc dl(Node); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(InChain) - .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) .setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -2381,7 +2381,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(InChain) .setCallee(TLI.getLibcallCallingConv(LC), - Type::getVoidTy(*DAG.getContext()), Callee, &Args, 0); + Type::getVoidTy(*DAG.getContext()), Callee, std::move(Args), 0); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -2650,12 +2650,15 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, NewOutTy = (MVT::SimpleValueType)(NewOutTy.getSimpleVT().SimpleTy+1); assert(NewOutTy.isInteger() && "Ran out of possibilities!"); + // A larger signed type can hold all unsigned values of the requested type, + // so using FP_TO_SINT is valid if (TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NewOutTy)) { OpToUse = ISD::FP_TO_SINT; break; } - if (TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) { + // However, if the value may be < 0.0, we *must* use some FP_TO_SINT. + if (!isSigned && TLI.isOperationLegalOrCustom(ISD::FP_TO_UINT, NewOutTy)) { OpToUse = ISD::FP_TO_UINT; break; } @@ -2996,8 +2999,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Node->getOperand(0)) .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol("__sync_synchronize", TLI.getPointerTy()), - &Args, 0); + DAG.getExternalSymbol("__sync_synchronize", + TLI.getPointerTy()), std::move(Args), 0); std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); @@ -3007,14 +3010,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::ATOMIC_LOAD: { // There is no libcall for atomic load; fake it with ATOMIC_CMP_SWAP. SDValue Zero = DAG.getConstant(0, Node->getValueType(0)); - SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, - cast<AtomicSDNode>(Node)->getMemoryVT(), - Node->getOperand(0), - Node->getOperand(1), Zero, Zero, - cast<AtomicSDNode>(Node)->getMemOperand(), - cast<AtomicSDNode>(Node)->getOrdering(), - cast<AtomicSDNode>(Node)->getOrdering(), - cast<AtomicSDNode>(Node)->getSynchScope()); + SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other); + SDValue Swap = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs, + Node->getOperand(0), Node->getOperand(1), Zero, Zero, + cast<AtomicSDNode>(Node)->getMemOperand(), + cast<AtomicSDNode>(Node)->getOrdering(), + cast<AtomicSDNode>(Node)->getOrdering(), + cast<AtomicSDNode>(Node)->getSynchScope()); Results.push_back(Swap.getValue(0)); Results.push_back(Swap.getValue(1)); break; @@ -3051,6 +3054,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp.second); break; } + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { + // Expanding an ATOMIC_CMP_SWAP_WITH_SUCCESS produces an ATOMIC_CMP_SWAP and + // splits out the success value as a comparison. Expanding the resulting + // ATOMIC_CMP_SWAP will produce a libcall. + SDVTList VTs = DAG.getVTList(Node->getValueType(0), MVT::Other); + SDValue Res = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP, dl, cast<AtomicSDNode>(Node)->getMemoryVT(), VTs, + Node->getOperand(0), Node->getOperand(1), Node->getOperand(2), + Node->getOperand(3), cast<MemSDNode>(Node)->getMemOperand(), + cast<AtomicSDNode>(Node)->getSuccessOrdering(), + cast<AtomicSDNode>(Node)->getFailureOrdering(), + cast<AtomicSDNode>(Node)->getSynchScope()); + + SDValue Success = DAG.getSetCC(SDLoc(Node), Node->getValueType(1), + Res, Node->getOperand(2), ISD::SETEQ); + + Results.push_back(Res.getValue(0)); + Results.push_back(Success); + Results.push_back(Res.getValue(1)); + break; + } case ISD::DYNAMIC_STACKALLOC: ExpandDYNAMIC_STACKALLOC(Node, Results); break; @@ -3074,7 +3098,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Node->getOperand(0)) .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol("abort", TLI.getPointerTy()), &Args, 0); + DAG.getExternalSymbol("abort", TLI.getPointerTy()), + std::move(Args), 0); std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); Results.push_back(CallResult.second); @@ -3128,6 +3153,65 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Node->getOperand(0), Node->getValueType(0), dl); Results.push_back(Tmp1); break; + case ISD::FP_TO_SINT: { + EVT VT = Node->getOperand(0).getValueType(); + EVT NVT = Node->getValueType(0); + + // FIXME: Only f32 to i64 conversions are supported. + if (VT != MVT::f32 || NVT != MVT::i64) + break; + + // Expand f32 -> i64 conversion + // This algorithm comes from compiler-rt's implementation of fixsfdi: + // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c + EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), + VT.getSizeInBits()); + SDValue ExponentMask = DAG.getConstant(0x7F800000, IntVT); + SDValue ExponentLoBit = DAG.getConstant(23, IntVT); + SDValue Bias = DAG.getConstant(127, IntVT); + SDValue SignMask = DAG.getConstant(APInt::getSignBit(VT.getSizeInBits()), + IntVT); + SDValue SignLowBit = DAG.getConstant(VT.getSizeInBits() - 1, IntVT); + SDValue MantissaMask = DAG.getConstant(0x007FFFFF, IntVT); + + SDValue Bits = DAG.getNode(ISD::BITCAST, dl, IntVT, Node->getOperand(0)); + + SDValue ExponentBits = DAG.getNode(ISD::SRL, dl, IntVT, + DAG.getNode(ISD::AND, dl, IntVT, Bits, ExponentMask), + DAG.getZExtOrTrunc(ExponentLoBit, dl, TLI.getShiftAmountTy(IntVT))); + SDValue Exponent = DAG.getNode(ISD::SUB, dl, IntVT, ExponentBits, Bias); + + SDValue Sign = DAG.getNode(ISD::SRA, dl, IntVT, + DAG.getNode(ISD::AND, dl, IntVT, Bits, SignMask), + DAG.getZExtOrTrunc(SignLowBit, dl, TLI.getShiftAmountTy(IntVT))); + Sign = DAG.getSExtOrTrunc(Sign, dl, NVT); + + SDValue R = DAG.getNode(ISD::OR, dl, IntVT, + DAG.getNode(ISD::AND, dl, IntVT, Bits, MantissaMask), + DAG.getConstant(0x00800000, IntVT)); + + R = DAG.getZExtOrTrunc(R, dl, NVT); + + + R = DAG.getSelectCC(dl, Exponent, ExponentLoBit, + DAG.getNode(ISD::SHL, dl, NVT, R, + DAG.getZExtOrTrunc( + DAG.getNode(ISD::SUB, dl, IntVT, Exponent, ExponentLoBit), + dl, TLI.getShiftAmountTy(IntVT))), + DAG.getNode(ISD::SRL, dl, NVT, R, + DAG.getZExtOrTrunc( + DAG.getNode(ISD::SUB, dl, IntVT, ExponentLoBit, Exponent), + dl, TLI.getShiftAmountTy(IntVT))), + ISD::SETGT); + + SDValue Ret = DAG.getNode(ISD::SUB, dl, NVT, + DAG.getNode(ISD::XOR, dl, NVT, R, Sign), + Sign); + + Results.push_back(DAG.getSelectCC(dl, Exponent, DAG.getConstant(0, IntVT), + DAG.getConstant(0, NVT), Ret, ISD::SETLT)); + break; + } case ISD::FP_TO_UINT: { SDValue True, False; EVT VT = Node->getOperand(0).getValueType(); @@ -3653,7 +3737,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { ISD::ADD : ISD::SUB, dl, LHS.getValueType(), LHS, RHS); Results.push_back(Sum); - EVT OType = Node->getValueType(1); + EVT ResultType = Node->getValueType(1); + EVT OType = getSetCCResultType(Node->getValueType(0)); SDValue Zero = DAG.getConstant(0, LHS.getValueType()); @@ -3676,7 +3761,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); - Results.push_back(Cmp); + Results.push_back(DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType)); break; } case ISD::UADDO: @@ -3687,9 +3772,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { ISD::ADD : ISD::SUB, dl, LHS.getValueType(), LHS, RHS); Results.push_back(Sum); - Results.push_back(DAG.getSetCC(dl, Node->getValueType(1), Sum, LHS, - Node->getOpcode () == ISD::UADDO ? - ISD::SETULT : ISD::SETUGT)); + + EVT ResultType = Node->getValueType(1); + EVT SetCCType = getSetCCResultType(Node->getValueType(0)); + ISD::CondCode CC + = Node->getOpcode() == ISD::UADDO ? ISD::SETULT : ISD::SETUGT; + SDValue SetCC = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC); + + Results.push_back(DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType)); break; } case ISD::UMULO: @@ -3879,7 +3969,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // illegal; expand it into a SELECT_CC. EVT VT = Node->getValueType(0); int TrueValue; - switch (TLI.getBooleanContents(VT.isVector())) { + switch (TLI.getBooleanContents(Tmp1->getValueType(0))) { case TargetLowering::ZeroOrOneBooleanContent: case TargetLowering::UndefinedBooleanContent: TrueValue = 1; @@ -3899,13 +3989,29 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp2 = Node->getOperand(1); // RHS Tmp3 = Node->getOperand(2); // True Tmp4 = Node->getOperand(3); // False + EVT VT = Node->getValueType(0); SDValue CC = Node->getOperand(4); + ISD::CondCode CCOp = cast<CondCodeSDNode>(CC)->get(); + + if (TLI.isCondCodeLegal(CCOp, Tmp1.getSimpleValueType())) { + // If the condition code is legal, then we need to expand this + // node using SETCC and SELECT. + EVT CmpVT = Tmp1.getValueType(); + assert(!TLI.isOperationExpand(ISD::SELECT, VT) && + "Cannot expand ISD::SELECT_CC when ISD::SELECT also needs to be " + "expanded."); + EVT CCVT = TLI.getSetCCResultType(*DAG.getContext(), CmpVT); + SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC); + Results.push_back(DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4)); + break; + } + // SELECT_CC is legal, so the condition code must not be. bool Legalized = false; // Try to legalize by inverting the condition. This is for targets that // might support an ordered version of a condition, but not the unordered // version (or vice versa). - ISD::CondCode InvCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), + ISD::CondCode InvCC = ISD::getSetCCInverse(CCOp, Tmp1.getValueType().isInteger()); if (TLI.isCondCodeLegal(InvCC, Tmp1.getSimpleValueType())) { // Use the new condition code and swap true and false diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 2483184..6feac0d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -138,7 +138,9 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) { Res = PromoteIntRes_Atomic1(cast<AtomicSDNode>(N)); break; case ISD::ATOMIC_CMP_SWAP: - Res = PromoteIntRes_Atomic2(cast<AtomicSDNode>(N)); break; + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: + Res = PromoteIntRes_AtomicCmpSwap(cast<AtomicSDNode>(N), ResNo); + break; } // If the result is null then the sub-method took care of registering it. @@ -192,16 +194,41 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { return Res; } -SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) { +SDValue DAGTypeLegalizer::PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, + unsigned ResNo) { + if (ResNo == 1) { + assert(N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); + EVT SVT = getSetCCResultType(N->getOperand(2).getValueType()); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1)); + + // Only use the result of getSetCCResultType if it is legal, + // otherwise just use the promoted result type (NVT). + if (!TLI.isTypeLegal(SVT)) + SVT = NVT; + + SDVTList VTs = DAG.getVTList(N->getValueType(0), SVT, MVT::Other); + SDValue Res = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, SDLoc(N), N->getMemoryVT(), VTs, + N->getChain(), N->getBasePtr(), N->getOperand(2), N->getOperand(3), + N->getMemOperand(), N->getSuccessOrdering(), N->getFailureOrdering(), + N->getSynchScope()); + ReplaceValueWith(SDValue(N, 0), Res.getValue(0)); + ReplaceValueWith(SDValue(N, 2), Res.getValue(2)); + return Res.getValue(1); + } + SDValue Op2 = GetPromotedInteger(N->getOperand(2)); SDValue Op3 = GetPromotedInteger(N->getOperand(3)); - SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(), - N->getChain(), N->getBasePtr(), Op2, Op3, - N->getMemOperand(), N->getSuccessOrdering(), - N->getFailureOrdering(), N->getSynchScope()); + SDVTList VTs = + DAG.getVTList(Op2.getValueType(), N->getValueType(1), MVT::Other); + SDValue Res = DAG.getAtomicCmpSwap( + N->getOpcode(), SDLoc(N), N->getMemoryVT(), VTs, N->getChain(), + N->getBasePtr(), Op2, Op3, N->getMemOperand(), N->getSuccessOrdering(), + N->getFailureOrdering(), N->getSynchScope()); // Legalized the chain result - switch anything that used the old chain to // use the new one. - ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + unsigned ChainOp = N->getNumValues() - 1; + ReplaceValueWith(SDValue(N, ChainOp), Res.getValue(ChainOp)); return Res; } @@ -492,7 +519,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) { EVT OpTy = N->getOperand(1).getValueType(); // Promote all the way up to the canonical SetCC type. - Mask = PromoteTargetBoolean(Mask, getSetCCResultType(OpTy)); + Mask = PromoteTargetBoolean(Mask, OpTy); SDValue LHS = GetPromotedInteger(N->getOperand(1)); SDValue RHS = GetPromotedInteger(N->getOperand(2)); return DAG.getNode(ISD::VSELECT, SDLoc(N), @@ -892,8 +919,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { assert(OpNo == 1 && "only know how to promote condition"); // Promote all the way up to the canonical SetCC type. - EVT SVT = getSetCCResultType(MVT::Other); - SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT); + SDValue Cond = PromoteTargetBoolean(N->getOperand(1), MVT::Other); // The chain (Op#0) and basic block destination (Op#2) are always legal types. return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond, @@ -986,9 +1012,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { EVT OpTy = N->getOperand(1).getValueType(); // Promote all the way up to the canonical SetCC type. - EVT SVT = getSetCCResultType(N->getOpcode() == ISD::SELECT ? - OpTy.getScalarType() : OpTy); - Cond = PromoteTargetBoolean(Cond, SVT); + EVT OpVT = N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy; + Cond = PromoteTargetBoolean(Cond, OpVT); return SDValue(DAG.UpdateNodeOperands(N, Cond, N->getOperand(1), N->getOperand(2)), 0); @@ -1143,6 +1168,26 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { ReplaceValueWith(SDValue(N, 1), Tmp.second); break; } + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { + AtomicSDNode *AN = cast<AtomicSDNode>(N); + SDVTList VTs = DAG.getVTList(N->getValueType(0), MVT::Other); + SDValue Tmp = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP, SDLoc(N), AN->getMemoryVT(), VTs, + N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), + AN->getMemOperand(), AN->getSuccessOrdering(), AN->getFailureOrdering(), + AN->getSynchScope()); + + // Expanding to the strong ATOMIC_CMP_SWAP node means we can determine + // success simply by comparing the loaded value against the ingoing + // comparison. + SDValue Success = DAG.getSetCC(SDLoc(N), N->getValueType(1), Tmp, + N->getOperand(2), ISD::SETEQ); + + SplitInteger(Tmp, Lo, Hi); + ReplaceValueWith(SDValue(N, 1), Success); + ReplaceValueWith(SDValue(N, 2), Tmp.getValue(1)); + break; + } case ISD::AND: case ISD::OR: @@ -2301,7 +2346,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Chain) - .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, &Args, 0) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Func, std::move(Args), 0) .setSExtResult(); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -2388,16 +2433,18 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); EVT VT = cast<AtomicSDNode>(N)->getMemoryVT(); + SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other); SDValue Zero = DAG.getConstant(0, VT); - SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT, - N->getOperand(0), - N->getOperand(1), Zero, Zero, - cast<AtomicSDNode>(N)->getMemOperand(), - cast<AtomicSDNode>(N)->getOrdering(), - cast<AtomicSDNode>(N)->getOrdering(), - cast<AtomicSDNode>(N)->getSynchScope()); + SDValue Swap = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, + cast<AtomicSDNode>(N)->getMemoryVT(), VTs, N->getOperand(0), + N->getOperand(1), Zero, Zero, cast<AtomicSDNode>(N)->getMemOperand(), + cast<AtomicSDNode>(N)->getOrdering(), + cast<AtomicSDNode>(N)->getOrdering(), + cast<AtomicSDNode>(N)->getSynchScope()); + ReplaceValueWith(SDValue(N, 0), Swap.getValue(0)); - ReplaceValueWith(SDValue(N, 1), Swap.getValue(1)); + ReplaceValueWith(SDValue(N, 1), Swap.getValue(2)); } //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 3971fc3..bd7dacf 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -1054,7 +1054,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(SDLoc(Node)).setChain(InChain) - .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, &Args, 0) + .setCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) .setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI); @@ -1065,11 +1065,14 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, /// PromoteTargetBoolean - Promote the given target boolean to a target boolean /// of the given type. A target boolean is an integer value, not necessarily of /// type i1, the bits of which conform to getBooleanContents. -SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) { +/// +/// ValVT is the type of values that produced the boolean. +SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT ValVT) { SDLoc dl(Bool); + EVT BoolVT = getSetCCResultType(ValVT); ISD::NodeType ExtendCode = - TargetLowering::getExtendForContent(TLI.getBooleanContents(VT.isVector())); - return DAG.getNode(ExtendCode, dl, VT, Bool); + TargetLowering::getExtendForContent(TLI.getBooleanContents(ValVT)); + return DAG.getNode(ExtendCode, dl, BoolVT, Bool); } /// SplitInteger - Return the lower LoVT bits of Op in Lo and the upper HiVT diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index e4bbc78..d0ca6f8 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -167,7 +167,7 @@ private: SDNode *Node, bool isSigned); std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node); - SDValue PromoteTargetBoolean(SDValue Bool, EVT VT); + SDValue PromoteTargetBoolean(SDValue Bool, EVT ValVT); void ReplaceValueWith(SDValue From, SDValue To); void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi); void SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, @@ -220,7 +220,7 @@ private: SDValue PromoteIntRes_AssertZext(SDNode *N); SDValue PromoteIntRes_Atomic0(AtomicSDNode *N); SDValue PromoteIntRes_Atomic1(AtomicSDNode *N); - SDValue PromoteIntRes_Atomic2(AtomicSDNode *N); + SDValue PromoteIntRes_AtomicCmpSwap(AtomicSDNode *N, unsigned ResNo); SDValue PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N); SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N); SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N); @@ -570,6 +570,7 @@ private: void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi); @@ -644,6 +645,7 @@ private: bool WidenVectorOperand(SDNode *N, unsigned OpNo); SDValue WidenVecOp_BITCAST(SDNode *N); SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N); + SDValue WidenVecOp_EXTEND(SDNode *N); SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index f40ed76..7e2f7b6 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -60,12 +60,15 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { case TargetLowering::TypeExpandFloat: // Convert the expanded pieces of the input. GetExpandedOp(InOp, Lo, Hi); + if (TLI.hasBigEndianPartOrdering(InVT) != + TLI.hasBigEndianPartOrdering(OutVT)) + std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; case TargetLowering::TypeSplitVector: GetSplitVector(InOp, Lo, Hi); - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(OutVT)) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); @@ -82,7 +85,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT); std::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT); - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(OutVT)) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); @@ -176,7 +179,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { false, false, MinAlign(Alignment, IncrementSize)); // Handle endianness of the load. - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(OutVT)) std::swap(Lo, Hi); } @@ -245,7 +248,8 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, SDLoc dl(N); LoadSDNode *LD = cast<LoadSDNode>(N); - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); + EVT ValueVT = LD->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); unsigned Alignment = LD->getAlignment(); @@ -275,7 +279,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, Hi.getValue(1)); // Handle endianness of the load. - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(ValueVT)) std::swap(Lo, Hi); // Modified the chain - switch anything that used the old chain to use @@ -295,7 +299,7 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0); // Handle endianness of the load. - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(OVT)) std::swap(Lo, Hi); // Modified the chain - switch anything that used the old chain to use @@ -459,8 +463,8 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { SDLoc dl(N); StoreSDNode *St = cast<StoreSDNode>(N); - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), - St->getValue().getValueType()); + EVT ValueVT = St->getValue().getValueType(); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), ValueVT); SDValue Chain = St->getChain(); SDValue Ptr = St->getBasePtr(); unsigned Alignment = St->getAlignment(); @@ -474,7 +478,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { SDValue Lo, Hi; GetExpandedOp(St->getValue(), Lo, Hi); - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(ValueVT)) std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 898cd29..507e7ff 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -37,12 +37,12 @@ class VectorLegalizer { const TargetLowering &TLI; bool Changed; // Keep track of whether anything changed - /// LegalizedNodes - For nodes that are of legal width, and that have more - /// than one use, this map indicates what regularized operand to use. This - /// allows us to avoid legalizing the same thing more than once. + /// For nodes that are of legal width, and that have more than one use, this + /// map indicates what regularized operand to use. This allows us to avoid + /// legalizing the same thing more than once. SmallDenseMap<SDValue, SDValue, 64> LegalizedNodes; - // Adds a node to the translation cache + /// \brief Adds a node to the translation cache. void AddLegalizedOperand(SDValue From, SDValue To) { LegalizedNodes.insert(std::make_pair(From, To)); // If someone requests legalization of the new node, return itself. @@ -50,41 +50,81 @@ class VectorLegalizer { LegalizedNodes.insert(std::make_pair(To, To)); } - // Legalizes the given node + /// \brief Legalizes the given node. SDValue LegalizeOp(SDValue Op); - // Assuming the node is legal, "legalize" the results + + /// \brief Assuming the node is legal, "legalize" the results. SDValue TranslateLegalizeResults(SDValue Op, SDValue Result); - // Implements unrolling a VSETCC. + + /// \brief Implements unrolling a VSETCC. SDValue UnrollVSETCC(SDValue Op); - // Implements expansion for FNEG; falls back to UnrollVectorOp if FSUB - // isn't legal. - // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if - // SINT_TO_FLOAT and SHR on vectors isn't legal. + + /// \brief Implement expand-based legalization of vector operations. + /// + /// This is just a high-level routine to dispatch to specific code paths for + /// operations to legalize them. + SDValue Expand(SDValue Op); + + /// \brief Implements expansion for FNEG; falls back to UnrollVectorOp if + /// FSUB isn't legal. + /// + /// Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if + /// SINT_TO_FLOAT and SHR on vectors isn't legal. SDValue ExpandUINT_TO_FLOAT(SDValue Op); - // Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. + + /// \brief Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. SDValue ExpandSEXTINREG(SDValue Op); - // Expand bswap of vectors into a shuffle if legal. + + /// \brief Implement expansion for ANY_EXTEND_VECTOR_INREG. + /// + /// Shuffles the low lanes of the operand into place and bitcasts to the proper + /// type. The contents of the bits in the extended part of each element are + /// undef. + SDValue ExpandANY_EXTEND_VECTOR_INREG(SDValue Op); + + /// \brief Implement expansion for SIGN_EXTEND_VECTOR_INREG. + /// + /// Shuffles the low lanes of the operand into place, bitcasts to the proper + /// type, then shifts left and arithmetic shifts right to introduce a sign + /// extension. + SDValue ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op); + + /// \brief Implement expansion for ZERO_EXTEND_VECTOR_INREG. + /// + /// Shuffles the low lanes of the operand into place and blends zeros into + /// the remaining lanes, finally bitcasting to the proper type. + SDValue ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op); + + /// \brief Expand bswap of vectors into a shuffle if legal. SDValue ExpandBSWAP(SDValue Op); - // Implement vselect in terms of XOR, AND, OR when blend is not supported - // by the target. + + /// \brief Implement vselect in terms of XOR, AND, OR when blend is not + /// supported by the target. SDValue ExpandVSELECT(SDValue Op); SDValue ExpandSELECT(SDValue Op); SDValue ExpandLoad(SDValue Op); SDValue ExpandStore(SDValue Op); SDValue ExpandFNEG(SDValue Op); - // Implements vector promotion; this is essentially just bitcasting the - // operands to a different type and bitcasting the result back to the - // original type. - SDValue PromoteVectorOp(SDValue Op); - // Implements [SU]INT_TO_FP vector promotion; this is a [zs]ext of the input - // operand to the next size up. - SDValue PromoteVectorOpINT_TO_FP(SDValue Op); - // Implements FP_TO_[SU]INT vector promotion of the result type; it is - // promoted to the next size up integer type. The result is then truncated - // back to the original type. - SDValue PromoteVectorOpFP_TO_INT(SDValue Op, bool isSigned); - - public: + + /// \brief Implements vector promotion. + /// + /// This is essentially just bitcasting the operands to a different type and + /// bitcasting the result back to the original type. + SDValue Promote(SDValue Op); + + /// \brief Implements [SU]INT_TO_FP vector promotion. + /// + /// This is a [zs]ext of the input operand to the next size up. + SDValue PromoteINT_TO_FP(SDValue Op); + + /// \brief Implements FP_TO_[SU]INT vector promotion of the result type. + /// + /// It is promoted to the next size up integer type. The result is then + /// truncated back to the original type. + SDValue PromoteFP_TO_INT(SDValue Op, bool isSigned); + +public: + /// \brief Begin legalizer the vector operations in the DAG. bool Run(); VectorLegalizer(SelectionDAG& dag) : DAG(dag), TLI(dag.getTargetLoweringInfo()), Changed(false) {} @@ -254,6 +294,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FP_EXTEND: case ISD::FMA: case ISD::SIGN_EXTEND_INREG: + case ISD::ANY_EXTEND_VECTOR_INREG: + case ISD::SIGN_EXTEND_VECTOR_INREG: + case ISD::ZERO_EXTEND_VECTOR_INREG: QueryType = Node->getValueType(0); break; case ISD::FP_ROUND_INREG: @@ -267,27 +310,11 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { case TargetLowering::Promote: - switch (Op.getOpcode()) { - default: - // "Promote" the operation by bitcasting - Result = PromoteVectorOp(Op); - Changed = true; - break; - case ISD::SINT_TO_FP: - case ISD::UINT_TO_FP: - // "Promote" the operation by extending the operand. - Result = PromoteVectorOpINT_TO_FP(Op); - Changed = true; - break; - case ISD::FP_TO_UINT: - case ISD::FP_TO_SINT: - // Promote the operation by extending the operand. - Result = PromoteVectorOpFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT); - Changed = true; - break; - } + Result = Promote(Op); + Changed = true; + break; + case TargetLowering::Legal: break; - case TargetLowering::Legal: break; case TargetLowering::Custom: { SDValue Tmp1 = TLI.LowerOperation(Op, DAG); if (Tmp1.getNode()) { @@ -297,23 +324,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { // FALL THROUGH } case TargetLowering::Expand: - if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG) - Result = ExpandSEXTINREG(Op); - else if (Node->getOpcode() == ISD::BSWAP) - Result = ExpandBSWAP(Op); - else if (Node->getOpcode() == ISD::VSELECT) - Result = ExpandVSELECT(Op); - else if (Node->getOpcode() == ISD::SELECT) - Result = ExpandSELECT(Op); - else if (Node->getOpcode() == ISD::UINT_TO_FP) - Result = ExpandUINT_TO_FLOAT(Op); - else if (Node->getOpcode() == ISD::FNEG) - Result = ExpandFNEG(Op); - else if (Node->getOpcode() == ISD::SETCC) - Result = UnrollVSETCC(Op); - else - Result = DAG.UnrollVectorOp(Op.getNode()); - break; + Result = Expand(Op); } // Make sure that the generated code is itself legal. @@ -328,10 +339,23 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { return Result; } -SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { - // Vector "promotion" is basically just bitcasting and doing the operation - // in a different type. For example, x86 promotes ISD::AND on v2i32 to - // v1i64. +SDValue VectorLegalizer::Promote(SDValue Op) { + // For a few operations there is a specific concept for promotion based on + // the operand's type. + switch (Op.getOpcode()) { + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + // "Promote" the operation by extending the operand. + return PromoteINT_TO_FP(Op); + case ISD::FP_TO_UINT: + case ISD::FP_TO_SINT: + // Promote the operation by extending the operand. + return PromoteFP_TO_INT(Op, Op->getOpcode() == ISD::FP_TO_SINT); + } + + // The rest of the time, vector "promotion" is basically just bitcasting and + // doing the operation in a different type. For example, x86 promotes + // ISD::AND on v2i32 to v1i64. MVT VT = Op.getSimpleValueType(); assert(Op.getNode()->getNumValues() == 1 && "Can't promote a vector with multiple results!"); @@ -351,7 +375,7 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { return DAG.getNode(ISD::BITCAST, dl, VT, Op); } -SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { +SDValue VectorLegalizer::PromoteINT_TO_FP(SDValue Op) { // INT_TO_FP operations may require the input operand be promoted even // when the type is otherwise legal. EVT VT = Op.getOperand(0).getValueType(); @@ -387,7 +411,7 @@ SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { // elements and then truncate the result. This is different from the default // PromoteVector which uses bitcast to promote thus assumning that the // promoted vector type has the same overall size. -SDValue VectorLegalizer::PromoteVectorOpFP_TO_INT(SDValue Op, bool isSigned) { +SDValue VectorLegalizer::PromoteFP_TO_INT(SDValue Op, bool isSigned) { assert(Op.getNode()->getNumValues() == 1 && "Can't promote a vector with multiple results!"); EVT VT = Op.getValueType(); @@ -609,6 +633,33 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { return TF; } +SDValue VectorLegalizer::Expand(SDValue Op) { + switch (Op->getOpcode()) { + case ISD::SIGN_EXTEND_INREG: + return ExpandSEXTINREG(Op); + case ISD::ANY_EXTEND_VECTOR_INREG: + return ExpandANY_EXTEND_VECTOR_INREG(Op); + case ISD::SIGN_EXTEND_VECTOR_INREG: + return ExpandSIGN_EXTEND_VECTOR_INREG(Op); + case ISD::ZERO_EXTEND_VECTOR_INREG: + return ExpandZERO_EXTEND_VECTOR_INREG(Op); + case ISD::BSWAP: + return ExpandBSWAP(Op); + case ISD::VSELECT: + return ExpandVSELECT(Op); + case ISD::SELECT: + return ExpandSELECT(Op); + case ISD::UINT_TO_FP: + return ExpandUINT_TO_FLOAT(Op); + case ISD::FNEG: + return ExpandFNEG(Op); + case ISD::SETCC: + return UnrollVSETCC(Op); + default: + return DAG.UnrollVectorOp(Op.getNode()); + } +} + SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { // Lower a select instruction where the condition is a scalar and the // operands are vectors. Lower this select to VSELECT and implement it @@ -686,6 +737,85 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); } +// Generically expand a vector anyext in register to a shuffle of the relevant +// lanes into the appropriate locations, with other lanes left undef. +SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + int NumElements = VT.getVectorNumElements(); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + int NumSrcElements = SrcVT.getVectorNumElements(); + + // Build a base mask of undef shuffles. + SmallVector<int, 16> ShuffleMask; + ShuffleMask.resize(NumSrcElements, -1); + + // Place the extended lanes into the correct locations. + int ExtLaneScale = NumSrcElements / NumElements; + int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0; + for (int i = 0; i < NumElements; ++i) + ShuffleMask[i * ExtLaneScale + EndianOffset] = i; + + return DAG.getNode( + ISD::BITCAST, DL, VT, + DAG.getVectorShuffle(SrcVT, DL, Src, DAG.getUNDEF(SrcVT), ShuffleMask)); +} + +SDValue VectorLegalizer::ExpandSIGN_EXTEND_VECTOR_INREG(SDValue Op) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + + // First build an any-extend node which can be legalized above when we + // recurse through it. + Op = DAG.getAnyExtendVectorInReg(Src, DL, VT); + + // Now we need sign extend. Do this by shifting the elements. Even if these + // aren't legal operations, they have a better chance of being legalized + // without full scalarization than the sign extension does. + unsigned EltWidth = VT.getVectorElementType().getSizeInBits(); + unsigned SrcEltWidth = SrcVT.getVectorElementType().getSizeInBits(); + SDValue ShiftAmount = DAG.getConstant(EltWidth - SrcEltWidth, VT); + return DAG.getNode(ISD::SRA, DL, VT, + DAG.getNode(ISD::SHL, DL, VT, Op, ShiftAmount), + ShiftAmount); +} + +// Generically expand a vector zext in register to a shuffle of the relevant +// lanes into the appropriate locations, a blend of zero into the high bits, +// and a bitcast to the wider element type. +SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + int NumElements = VT.getVectorNumElements(); + SDValue Src = Op.getOperand(0); + EVT SrcVT = Src.getValueType(); + int NumSrcElements = SrcVT.getVectorNumElements(); + + // Build up a zero vector to blend into this one. + EVT SrcScalarVT = SrcVT.getScalarType(); + SDValue ScalarZero = DAG.getTargetConstant(0, SrcScalarVT); + SmallVector<SDValue, 4> BuildVectorOperands(NumSrcElements, ScalarZero); + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, DL, SrcVT, BuildVectorOperands); + + // Shuffle the incoming lanes into the correct position, and pull all other + // lanes from the zero vector. + SmallVector<int, 16> ShuffleMask; + ShuffleMask.reserve(NumSrcElements); + for (int i = 0; i < NumSrcElements; ++i) + ShuffleMask.push_back(i); + + int ExtLaneScale = NumSrcElements / NumElements; + int EndianOffset = TLI.isBigEndian() ? ExtLaneScale - 1 : 0; + for (int i = 0; i < NumElements; ++i) + ShuffleMask[i * ExtLaneScale + EndianOffset] = NumSrcElements + i; + + return DAG.getNode(ISD::BITCAST, DL, VT, + DAG.getVectorShuffle(SrcVT, DL, Zero, Src, ShuffleMask)); +} + SDValue VectorLegalizer::ExpandBSWAP(SDValue Op) { EVT VT = Op.getValueType(); @@ -729,9 +859,9 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // FIXME: Sign extend 1 to all ones if thats legal on the target. if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || - TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || - TLI.getBooleanContents(true) != - TargetLowering::ZeroOrNegativeOneBooleanContent) + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand || + TLI.getBooleanContents(Op1.getValueType()) != + TargetLowering::ZeroOrNegativeOneBooleanContent) return DAG.UnrollVectorOp(Op.getNode()); // If the mask and the type are different sizes, unroll the vector op. This diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 368eba3..f77c592 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -257,8 +257,26 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { SDValue Cond = GetScalarizedVector(N->getOperand(0)); SDValue LHS = GetScalarizedVector(N->getOperand(1)); - TargetLowering::BooleanContent ScalarBool = TLI.getBooleanContents(false); - TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true); + TargetLowering::BooleanContent ScalarBool = + TLI.getBooleanContents(false, false); + TargetLowering::BooleanContent VecBool = TLI.getBooleanContents(true, false); + + // If integer and float booleans have different contents then we can't + // reliably optimize in all cases. There is a full explanation for this in + // DAGCombiner::visitSELECT() where the same issue affects folding + // (select C, 0, 1) to (xor C, 1). + if (TLI.getBooleanContents(false, false) != + TLI.getBooleanContents(false, true)) { + // At least try the common case where the boolean is generated by a + // comparison. + if (Cond->getOpcode() == ISD::SETCC) { + EVT OpVT = Cond->getOperand(0)->getValueType(0); + ScalarBool = TLI.getBooleanContents(OpVT.getScalarType()); + VecBool = TLI.getBooleanContents(OpVT); + } else + ScalarBool = TargetLowering::UndefinedBooleanContent; + } + if (ScalarBool != VecBool) { EVT CondVT = Cond.getValueType(); switch (ScalarBool) { @@ -357,7 +375,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { // Vectors may have a different boolean contents to scalars. Promote the // value appropriately. ISD::NodeType ExtendCode = - TargetLowering::getExtendForContent(TLI.getBooleanContents(true)); + TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT)); return DAG.getNode(ExtendCode, DL, NVT, Res); } @@ -545,6 +563,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break; case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break; case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break; + case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break; case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break; case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break; case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break; @@ -765,6 +784,43 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, TLI.getVectorIdxTy())); } +void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDValue Vec = N->getOperand(0); + SDValue SubVec = N->getOperand(1); + SDValue Idx = N->getOperand(2); + SDLoc dl(N); + GetSplitVector(Vec, Lo, Hi); + + // Spill the vector to the stack. + EVT VecVT = Vec.getValueType(); + EVT SubVecVT = VecVT.getVectorElementType(); + SDValue StackPtr = DAG.CreateStackTemporary(VecVT); + SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, + MachinePointerInfo(), false, false, 0); + + // Store the new subvector into the specified index. + SDValue SubVecPtr = GetVectorElementPointer(StackPtr, SubVecVT, Idx); + Type *VecType = VecVT.getTypeForEVT(*DAG.getContext()); + unsigned Alignment = TLI.getDataLayout()->getPrefTypeAlignment(VecType); + Store = DAG.getStore(Store, dl, SubVec, SubVecPtr, MachinePointerInfo(), + false, false, 0); + + // Load the Lo part from the stack slot. + Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), + false, false, false, 0); + + // Increment the pointer to the other part. + unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; + StackPtr = + DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, + DAG.getConstant(IncrementSize, StackPtr.getValueType())); + + // Load the Hi part from the stack slot. + Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), + false, false, false, MinAlign(Alignment, IncrementSize)); +} + void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi) { SDLoc dl(N); @@ -1511,7 +1567,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::ADD: case ISD::AND: - case ISD::BSWAP: case ISD::MUL: case ISD::MULHS: case ISD::MULHU: @@ -1558,6 +1613,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Convert(N); break; + case ISD::BSWAP: case ISD::CTLZ: case ISD::CTPOP: case ISD::CTTZ: @@ -2343,15 +2399,18 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { case ISD::STORE: Res = WidenVecOp_STORE(N); break; case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; + case ISD::ANY_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + Res = WidenVecOp_EXTEND(N); + break; + case ISD::FP_EXTEND: case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: case ISD::TRUNCATE: - case ISD::SIGN_EXTEND: - case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: Res = WidenVecOp_Convert(N); break; } @@ -2372,6 +2431,68 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) { return false; } +SDValue DAGTypeLegalizer::WidenVecOp_EXTEND(SDNode *N) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + + SDValue InOp = N->getOperand(0); + // If some legalization strategy other than widening is used on the operand, + // we can't safely assume that just extending the low lanes is the correct + // transformation. + if (getTypeAction(InOp.getValueType()) != TargetLowering::TypeWidenVector) + return WidenVecOp_Convert(N); + InOp = GetWidenedVector(InOp); + assert(VT.getVectorNumElements() < + InOp.getValueType().getVectorNumElements() && + "Input wasn't widened!"); + + // We may need to further widen the operand until it has the same total + // vector size as the result. + EVT InVT = InOp.getValueType(); + if (InVT.getSizeInBits() != VT.getSizeInBits()) { + EVT InEltVT = InVT.getVectorElementType(); + for (int i = MVT::FIRST_VECTOR_VALUETYPE, e = MVT::LAST_VECTOR_VALUETYPE; i < e; ++i) { + EVT FixedVT = (MVT::SimpleValueType)i; + EVT FixedEltVT = FixedVT.getVectorElementType(); + if (TLI.isTypeLegal(FixedVT) && + FixedVT.getSizeInBits() == VT.getSizeInBits() && + FixedEltVT == InEltVT) { + assert(FixedVT.getVectorNumElements() >= VT.getVectorNumElements() && + "Not enough elements in the fixed type for the operand!"); + assert(FixedVT.getVectorNumElements() != InVT.getVectorNumElements() && + "We can't have the same type as we started with!"); + if (FixedVT.getVectorNumElements() > InVT.getVectorNumElements()) + InOp = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, FixedVT, + DAG.getUNDEF(FixedVT), InOp, + DAG.getConstant(0, TLI.getVectorIdxTy())); + else + InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, FixedVT, InOp, + DAG.getConstant(0, TLI.getVectorIdxTy())); + break; + } + } + InVT = InOp.getValueType(); + if (InVT.getSizeInBits() != VT.getSizeInBits()) + // We couldn't find a legal vector type that was a widening of the input + // and could be extended in-register to the result type, so we have to + // scalarize. + return WidenVecOp_Convert(N); + } + + // Use special DAG nodes to represent the operation of extending the + // low lanes. + switch (N->getOpcode()) { + default: + llvm_unreachable("Extend legalization on on extend operation!"); + case ISD::ANY_EXTEND: + return DAG.getAnyExtendVectorInReg(InOp, DL, VT); + case ISD::SIGN_EXTEND: + return DAG.getSignExtendVectorInReg(InOp, DL, VT); + case ISD::ZERO_EXTEND: + return DAG.getZeroExtendVectorInReg(InOp, DL, VT); + } +} + SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { // Since the result is legal and the input is illegal, it is unlikely // that we can fix the input to a legal type so unroll the convert diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index f92230c..624003f 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -442,7 +442,7 @@ signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { ResCount -= (regPressureDelta(SU) * ScaleTwo); } - // These are platform specific things. + // These are platform-specific things. // Will need to go into the back end // and accessed from here via a hook. for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 78ec4df..13cfae7 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -170,7 +170,8 @@ public: if (DisableSchedCycles || !NeedLatency) HazardRec = new ScheduleHazardRecognizer(); else - HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); + HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer( + tm.getSubtargetImpl(), this); } ~ScheduleDAGRRList() { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 51c51d6..4589b0c 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -73,7 +73,8 @@ public: : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) { const TargetMachine &tm = mf.getTarget(); - HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); + HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer( + tm.getSubtargetImpl(), this); } ~ScheduleDAGVLIW() { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b1b8035..daff1f2 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -48,6 +48,7 @@ #include "llvm/Target/TargetSelectionDAGInfo.h" #include <algorithm> #include <cmath> + using namespace llvm; /// makeVTList - Return an instance of the SDVTList struct initialized with the @@ -147,33 +148,34 @@ bool ISD::isBuildVectorAllZeros(const SDNode *N) { if (N->getOpcode() != ISD::BUILD_VECTOR) return false; - unsigned i = 0, e = N->getNumOperands(); - - // Skip over all of the undef values. - while (i != e && N->getOperand(i).getOpcode() == ISD::UNDEF) - ++i; + bool IsAllUndef = true; + for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) { + if (N->getOperand(i).getOpcode() == ISD::UNDEF) + continue; + IsAllUndef = false; + // Do not accept build_vectors that aren't all constants or which have non-0 + // elements. We have to be a bit careful here, as the type of the constant + // may not be the same as the type of the vector elements due to type + // legalization (the elements are promoted to a legal type for the target + // and a vector of a type may be legal when the base element type is not). + // We only want to check enough bits to cover the vector elements, because + // we care if the resultant vector is all zeros, not whether the individual + // constants are. + SDValue Zero = N->getOperand(i); + unsigned EltSize = N->getValueType(0).getVectorElementType().getSizeInBits(); + if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) { + if (CN->getAPIntValue().countTrailingZeros() < EltSize) + return false; + } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) { + if (CFPN->getValueAPF().bitcastToAPInt().countTrailingZeros() < EltSize) + return false; + } else + return false; + } // Do not accept an all-undef vector. - if (i == e) return false; - - // Do not accept build_vectors that aren't all constants or which have non-0 - // elements. - SDValue Zero = N->getOperand(i); - if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Zero)) { - if (!CN->isNullValue()) - return false; - } else if (ConstantFPSDNode *CFPN = dyn_cast<ConstantFPSDNode>(Zero)) { - if (!CFPN->getValueAPF().isPosZero()) - return false; - } else + if (IsAllUndef) return false; - - // Okay, we have at least one 0 value, check to see if the rest match or are - // undefs. - for (++i; i != e; ++i) - if (N->getOperand(i) != Zero && - N->getOperand(i).getOpcode() != ISD::UNDEF) - return false; return true; } @@ -381,6 +383,20 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID, } } +static void AddBinaryNodeIDCustom(FoldingSetNodeID &ID, bool nuw, bool nsw, + bool exact) { + ID.AddBoolean(nuw); + ID.AddBoolean(nsw); + ID.AddBoolean(exact); +} + +/// AddBinaryNodeIDCustom - Add BinarySDNodes special infos +static void AddBinaryNodeIDCustom(FoldingSetNodeID &ID, unsigned Opcode, + bool nuw, bool nsw, bool exact) { + if (isBinOpWithFlags(Opcode)) + AddBinaryNodeIDCustom(ID, nuw, nsw, exact); +} + static void AddNodeIDNode(FoldingSetNodeID &ID, unsigned short OpC, SDVTList VTList, ArrayRef<SDValue> OpList) { AddNodeIDOpcode(ID, OpC); @@ -473,7 +489,21 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(ST->getPointerInfo().getAddrSpace()); break; } + case ISD::SDIV: + case ISD::UDIV: + case ISD::SRA: + case ISD::SRL: + case ISD::MUL: + case ISD::ADD: + case ISD::SUB: + case ISD::SHL: { + const BinaryWithFlagsSDNode *BinNode = cast<BinaryWithFlagsSDNode>(N); + AddBinaryNodeIDCustom(ID, N->getOpcode(), BinNode->hasNoUnsignedWrap(), + BinNode->hasNoSignedWrap(), BinNode->isExact()); + break; + } case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: case ISD::ATOMIC_SWAP: case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_SUB: @@ -527,7 +557,7 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { // Add the return value info. AddNodeIDValueTypes(ID, N->getVTList()); // Add the operand info. - AddNodeIDOperands(ID, makeArrayRef(N->op_begin(), N->op_end())); + AddNodeIDOperands(ID, N->ops()); // Handle SDNode leafs with special info. AddNodeIDCustom(ID, N); @@ -926,6 +956,25 @@ void SelectionDAG::allnodes_clear() { DeallocateNode(AllNodes.begin()); } +BinarySDNode *SelectionDAG::GetBinarySDNode(unsigned Opcode, SDLoc DL, + SDVTList VTs, SDValue N1, + SDValue N2, bool nuw, bool nsw, + bool exact) { + if (isBinOpWithFlags(Opcode)) { + BinaryWithFlagsSDNode *FN = new (NodeAllocator) BinaryWithFlagsSDNode( + Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2); + FN->setHasNoUnsignedWrap(nuw); + FN->setHasNoSignedWrap(nsw); + FN->setIsExact(exact); + + return FN; + } + + BinarySDNode *N = new (NodeAllocator) + BinarySDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), VTs, N1, N2); + return N; +} + void SelectionDAG::clear() { allnodes_clear(); OperandAllocator.Reset(); @@ -963,11 +1012,12 @@ SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) { getNode(ISD::TRUNCATE, DL, VT, Op); } -SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT) { +SDValue SelectionDAG::getBoolExtOrTrunc(SDValue Op, SDLoc SL, EVT VT, + EVT OpVT) { if (VT.bitsLE(Op.getValueType())) return getNode(ISD::TRUNCATE, SL, VT, Op); - TargetLowering::BooleanContent BType = TLI->getBooleanContents(VT.isVector()); + TargetLowering::BooleanContent BType = TLI->getBooleanContents(OpVT); return getNode(TLI->getExtendForContent(BType), SL, VT, Op); } @@ -983,6 +1033,36 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) { getConstant(Imm, Op.getValueType())); } +SDValue SelectionDAG::getAnyExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { + assert(VT.isVector() && "This DAG node is restricted to vector types."); + assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && + "The sizes of the input and result must match in order to perform the " + "extend in-register."); + assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && + "The destination vector type must have fewer lanes than the input."); + return getNode(ISD::ANY_EXTEND_VECTOR_INREG, DL, VT, Op); +} + +SDValue SelectionDAG::getSignExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { + assert(VT.isVector() && "This DAG node is restricted to vector types."); + assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && + "The sizes of the input and result must match in order to perform the " + "extend in-register."); + assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && + "The destination vector type must have fewer lanes than the input."); + return getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, VT, Op); +} + +SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, SDLoc DL, EVT VT) { + assert(VT.isVector() && "This DAG node is restricted to vector types."); + assert(VT.getSizeInBits() == Op.getValueType().getSizeInBits() && + "The sizes of the input and result must match in order to perform the " + "extend in-register."); + assert(VT.getVectorNumElements() < Op.getValueType().getVectorNumElements() && + "The destination vector type must have fewer lanes than the input."); + return getNode(ISD::ZERO_EXTEND_VECTOR_INREG, DL, VT, Op); +} + /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). /// SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) { @@ -995,7 +1075,7 @@ SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) { SDValue SelectionDAG::getLogicalNOT(SDLoc DL, SDValue Val, EVT VT) { EVT EltVT = VT.getScalarType(); SDValue TrueValue; - switch (TLI->getBooleanContents(VT.isVector())) { + switch (TLI->getBooleanContents(VT)) { case TargetLowering::ZeroOrOneBooleanContent: case TargetLowering::UndefinedBooleanContent: TrueValue = getConstant(1, VT); @@ -1190,15 +1270,8 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, if (BitWidth < 64) Offset = SignExtend64(Offset, BitWidth); - const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); - if (!GVar) { - // If GV is an alias then use the aliasee for determining thread-localness. - if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) - GVar = dyn_cast_or_null<GlobalVariable>(GA->getAliasee()); - } - unsigned Opc; - if (GVar && GVar->isThreadLocal()) + if (GV->isThreadLocal()) Opc = isTargetGA ? ISD::TargetGlobalTLSAddress : ISD::GlobalTLSAddress; else Opc = isTargetGA ? ISD::TargetGlobalAddress : ISD::GlobalAddress; @@ -1454,6 +1527,11 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, N1 = getUNDEF(VT); commuteShuffle(N1, N2, MaskVec); } + // Reset our undef status after accounting for the mask. + N2Undef = N2.getOpcode() == ISD::UNDEF; + // Re-check whether both sides ended up undef. + if (N1.getOpcode() == ISD::UNDEF && N2Undef) + return getUNDEF(VT); // If Identity shuffle return that node. bool Identity = true; @@ -1464,9 +1542,36 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, return N1; // Shuffling a constant splat doesn't change the result. - if (N2Undef && N1.getOpcode() == ISD::BUILD_VECTOR) - if (cast<BuildVectorSDNode>(N1)->getConstantSplatValue()) - return N1; + if (N2Undef) { + SDValue V = N1; + + // Look through any bitcasts. We check that these don't change the number + // (and size) of elements and just changes their types. + while (V.getOpcode() == ISD::BITCAST) + V = V->getOperand(0); + + // A splat should always show up as a build vector node. + if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) { + BitVector UndefElements; + SDValue Splat = BV->getSplatValue(&UndefElements); + // If this is a splat of an undef, shuffling it is also undef. + if (Splat && Splat.getOpcode() == ISD::UNDEF) + return getUNDEF(VT); + + // We only have a splat which can skip shuffles if there is a splatted + // value and no undef lanes rearranged by the shuffle. + if (Splat && UndefElements.none()) { + // Splat of <x, x, ..., x>, return <x, x, ..., x>, provided that the + // number of elements match or the value splatted is a zero constant. + if (V.getValueType().getVectorNumElements() == + VT.getVectorNumElements()) + return N1; + if (auto *C = dyn_cast<ConstantSDNode>(Splat)) + if (C->isNullValue()) + return N1; + } + } + } FoldingSetNodeID ID; SDValue Ops[2] = { N1, N2 }; @@ -1692,7 +1797,8 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, case ISD::SETTRUE: case ISD::SETTRUE2: { const TargetLowering *TLI = TM.getTargetLowering(); - TargetLowering::BooleanContent Cnt = TLI->getBooleanContents(VT.isVector()); + TargetLowering::BooleanContent Cnt = + TLI->getBooleanContents(N1->getValueType(0)); return getConstant( Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT); } @@ -1923,11 +2029,20 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, case ISD::UMULO: if (Op.getResNo() != 1) break; - // The boolean result conforms to getBooleanContents. Fall through. + // The boolean result conforms to getBooleanContents. + // If we know the result of a setcc has the top bits zero, use this info. + // We know that we have an integer-based boolean since these operations + // are only available for integer. + if (TLI->getBooleanContents(Op.getValueType().isVector(), false) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + break; case ISD::SETCC: // If we know the result of a setcc has the top bits zero, use this info. - if (TLI->getBooleanContents(Op.getValueType().isVector()) == - TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) + if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == + TargetLowering::ZeroOrOneBooleanContent && + BitWidth > 1) KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); break; case ISD::SHL: @@ -2043,7 +2158,7 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, unsigned MemBits = VT.getScalarType().getSizeInBits(); KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); } else if (const MDNode *Ranges = LD->getRanges()) { - computeKnownBitsLoad(*Ranges, KnownZero); + computeKnownBitsFromRangeMetadata(*Ranges, KnownZero); } break; } @@ -2192,8 +2307,11 @@ void SelectionDAG::computeKnownBits(SDValue Op, APInt &KnownZero, const APInt &RA = Rem->getAPIntValue(); if (RA.isPowerOf2()) { APInt LowBits = (RA - 1); - KnownZero |= ~LowBits; - computeKnownBits(Op.getOperand(0), KnownZero, KnownOne,Depth+1); + computeKnownBits(Op.getOperand(0), KnownZero2, KnownOne2, Depth + 1); + + // The upper bits are all zero, the lower ones are unchanged. + KnownZero = KnownZero2 | ~LowBits; + KnownOne = KnownOne2 & LowBits; break; } } @@ -2323,9 +2441,16 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ if (Op.getResNo() != 1) break; // The boolean result conforms to getBooleanContents. Fall through. + // If setcc returns 0/-1, all bits are sign bits. + // We know that we have an integer-based boolean since these operations + // are only available for integer. + if (TLI->getBooleanContents(Op.getValueType().isVector(), false) == + TargetLowering::ZeroOrNegativeOneBooleanContent) + return VTBits; + break; case ISD::SETCC: // If setcc returns 0/-1, all bits are sign bits. - if (TLI->getBooleanContents(Op.getValueType().isVector()) == + if (TLI->getBooleanContents(Op.getOperand(0).getValueType()) == TargetLowering::ZeroOrNegativeOneBooleanContent) return VTBits; break; @@ -2940,7 +3065,7 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, } SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, - SDValue N2) { + SDValue N2, bool nuw, bool nsw, bool exact) { ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); switch (Opcode) { @@ -3380,22 +3505,25 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, } // Memoize this node if possible. - SDNode *N; + BinarySDNode *N; SDVTList VTs = getVTList(VT); + const bool BinOpHasFlags = isBinOpWithFlags(Opcode); if (VT != MVT::Glue) { - SDValue Ops[] = { N1, N2 }; + SDValue Ops[] = {N1, N2}; FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTs, Ops); + if (BinOpHasFlags) + AddBinaryNodeIDCustom(ID, Opcode, nuw, nsw, exact); void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTs, N1, N2); + N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact); + CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), - DL.getDebugLoc(), VTs, N1, N2); + + N = GetBinarySDNode(Opcode, DL, VTs, N1, N2, nuw, nsw, exact); } AllNodes.push_back(N); @@ -3583,7 +3711,7 @@ static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG, if (Str.empty()) { if (VT.isInteger()) return DAG.getConstant(0, VT); - else if (VT == MVT::f32 || VT == MVT::f64) + else if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128) return DAG.getConstantFP(0.0, VT); else if (VT.isVector()) { unsigned NumElts = VT.getVectorNumElements(); @@ -4110,7 +4238,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMCPY), Type::getVoidTy(*getContext()), getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), - TLI->getPointerTy()), &Args, 0) + TLI->getPointerTy()), std::move(Args), 0) .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); @@ -4166,7 +4294,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMMOVE), Type::getVoidTy(*getContext()), getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), - TLI->getPointerTy()), &Args, 0) + TLI->getPointerTy()), std::move(Args), 0) .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); @@ -4230,7 +4358,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, .setCallee(TLI->getLibcallCallingConv(RTLIB::MEMSET), Type::getVoidTy(*getContext()), getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), - TLI->getPointerTy()), &Args, 0) + TLI->getPointerTy()), std::move(Args), 0) .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI->LowerCallTo(CLI); @@ -4281,51 +4409,47 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, Ordering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, - SDValue Chain, SDValue Ptr, SDValue Cmp, - SDValue Swp, MachinePointerInfo PtrInfo, - unsigned Alignment, - AtomicOrdering SuccessOrdering, - AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope) { +SDValue SelectionDAG::getAtomicCmpSwap( + unsigned Opcode, SDLoc dl, EVT MemVT, SDVTList VTs, SDValue Chain, + SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, + unsigned Alignment, AtomicOrdering SuccessOrdering, + AtomicOrdering FailureOrdering, SynchronizationScope SynchScope) { + assert(Opcode == ISD::ATOMIC_CMP_SWAP || + Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); + assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); + if (Alignment == 0) // Ensure that codegen never sees alignment 0 Alignment = getEVTAlignment(MemVT); MachineFunction &MF = getMachineFunction(); - // All atomics are load and store, except for ATMOIC_LOAD and ATOMIC_STORE. - // For now, atomics are considered to be volatile always. // FIXME: Volatile isn't really correct; we should keep track of atomic // orderings in the memoperand. unsigned Flags = MachineMemOperand::MOVolatile; - if (Opcode != ISD::ATOMIC_STORE) - Flags |= MachineMemOperand::MOLoad; - if (Opcode != ISD::ATOMIC_LOAD) - Flags |= MachineMemOperand::MOStore; + Flags |= MachineMemOperand::MOLoad; + Flags |= MachineMemOperand::MOStore; MachineMemOperand *MMO = MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment); - return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO, - SuccessOrdering, FailureOrdering, SynchScope); + return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO, + SuccessOrdering, FailureOrdering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, - SDValue Chain, - SDValue Ptr, SDValue Cmp, - SDValue Swp, MachineMemOperand *MMO, - AtomicOrdering SuccessOrdering, - AtomicOrdering FailureOrdering, - SynchronizationScope SynchScope) { - assert(Opcode == ISD::ATOMIC_CMP_SWAP && "Invalid Atomic Op"); +SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, SDLoc dl, EVT MemVT, + SDVTList VTs, SDValue Chain, SDValue Ptr, + SDValue Cmp, SDValue Swp, + MachineMemOperand *MMO, + AtomicOrdering SuccessOrdering, + AtomicOrdering FailureOrdering, + SynchronizationScope SynchScope) { + assert(Opcode == ISD::ATOMIC_CMP_SWAP || + Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS); assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types"); - EVT VT = Cmp.getValueType(); - - SDVTList VTs = getVTList(VT, MVT::Other); SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; - return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, SuccessOrdering, - FailureOrdering, SynchScope); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, MMO, + SuccessOrdering, FailureOrdering, SynchScope); } SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, @@ -5610,10 +5734,13 @@ SelectionDAG::getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT, /// getNodeIfExists - Get the specified node if it's already available, or /// else return NULL. SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList, - ArrayRef<SDValue> Ops) { - if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) { + ArrayRef<SDValue> Ops, bool nuw, bool nsw, + bool exact) { + if (VTList.VTs[VTList.NumVTs - 1] != MVT::Glue) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, VTList, Ops); + if (isBinOpWithFlags(Opcode)) + AddBinaryNodeIDCustom(ID, nuw, nsw, exact); void *IP = nullptr; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return E; @@ -5960,7 +6087,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() { // count of outstanding operands. for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ) { SDNode *N = I++; - checkForCycles(N); + checkForCycles(N, this); unsigned Degree = N->getNumOperands(); if (Degree == 0) { // A node with no uses, add it to the result array immediately. @@ -5980,7 +6107,7 @@ unsigned SelectionDAG::AssignTopologicalOrder() { // such that by the time the end is reached all nodes will be sorted. for (allnodes_iterator I = allnodes_begin(),E = allnodes_end(); I != E; ++I) { SDNode *N = I; - checkForCycles(N); + checkForCycles(N, this); // N is in sorted position, so all its uses have one less operand // that needs to be sorted. for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); @@ -6005,7 +6132,9 @@ unsigned SelectionDAG::AssignTopologicalOrder() { #ifndef NDEBUG SDNode *S = ++I; dbgs() << "Overran sorted position:\n"; - S->dumprFull(); + S->dumprFull(this); dbgs() << "\n"; + dbgs() << "Checking if this is due to cycles\n"; + checkForCycles(this, true); #endif llvm_unreachable(nullptr); } @@ -6554,16 +6683,43 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, return true; } -ConstantSDNode *BuildVectorSDNode::getConstantSplatValue() const { - SDValue Op0 = getOperand(0); - if (Op0.getOpcode() != ISD::Constant) - return nullptr; +SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const { + if (UndefElements) { + UndefElements->clear(); + UndefElements->resize(getNumOperands()); + } + SDValue Splatted; + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + SDValue Op = getOperand(i); + if (Op.getOpcode() == ISD::UNDEF) { + if (UndefElements) + (*UndefElements)[i] = true; + } else if (!Splatted) { + Splatted = Op; + } else if (Splatted != Op) { + return SDValue(); + } + } + + if (!Splatted) { + assert(getOperand(0).getOpcode() == ISD::UNDEF && + "Can only have a splat without a constant for all undefs."); + return getOperand(0); + } - for (unsigned i = 1, e = getNumOperands(); i != e; ++i) - if (getOperand(i) != Op0) - return nullptr; + return Splatted; +} - return cast<ConstantSDNode>(Op0); +ConstantSDNode * +BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const { + return dyn_cast_or_null<ConstantSDNode>( + getSplatValue(UndefElements).getNode()); +} + +ConstantFPSDNode * +BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const { + return dyn_cast_or_null<ConstantFPSDNode>( + getSplatValue(UndefElements).getNode()); } bool BuildVectorSDNode::isConstant() const { @@ -6591,10 +6747,11 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) { return true; } -#ifdef XDEBUG +#ifndef NDEBUG static void checkForCyclesHelper(const SDNode *N, SmallPtrSet<const SDNode*, 32> &Visited, - SmallPtrSet<const SDNode*, 32> &Checked) { + SmallPtrSet<const SDNode*, 32> &Checked, + const llvm::SelectionDAG *DAG) { // If this node has already been checked, don't check it again. if (Checked.count(N)) return; @@ -6602,29 +6759,37 @@ static void checkForCyclesHelper(const SDNode *N, // If a node has already been visited on this depth-first walk, reject it as // a cycle. if (!Visited.insert(N)) { - dbgs() << "Offending node:\n"; - N->dumprFull(); errs() << "Detected cycle in SelectionDAG\n"; + dbgs() << "Offending node:\n"; + N->dumprFull(DAG); dbgs() << "\n"; abort(); } for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked); + checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked, DAG); Checked.insert(N); Visited.erase(N); } #endif -void llvm::checkForCycles(const llvm::SDNode *N) { +void llvm::checkForCycles(const llvm::SDNode *N, + const llvm::SelectionDAG *DAG, + bool force) { +#ifndef NDEBUG + bool check = force; #ifdef XDEBUG - assert(N && "Checking nonexistent SDNode"); - SmallPtrSet<const SDNode*, 32> visited; - SmallPtrSet<const SDNode*, 32> checked; - checkForCyclesHelper(N, visited, checked); -#endif + check = true; +#endif // XDEBUG + if (check) { + assert(N && "Checking nonexistent SDNode"); + SmallPtrSet<const SDNode*, 32> visited; + SmallPtrSet<const SDNode*, 32> checked; + checkForCyclesHelper(N, visited, checked, DAG); + } +#endif // !NDEBUG } -void llvm::checkForCycles(const llvm::SelectionDAG *DAG) { - checkForCycles(DAG->getRoot().getNode()); +void llvm::checkForCycles(const llvm::SelectionDAG *DAG, bool force) { + checkForCycles(DAG->getRoot().getNode(), DAG, force); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 070e929..28d8e98 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -169,7 +169,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, SDValue Lo, Hi; Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]); - if (TLI.isBigEndian()) + if (TLI.hasBigEndianPartOrdering(ValueVT)) std::swap(Lo, Hi); Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi); } else { @@ -2784,8 +2784,22 @@ void SelectionDAGBuilder::visitFSub(const User &I) { void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(OpCode, getCurSDLoc(), - Op1.getValueType(), Op1, Op2)); + + bool nuw = false; + bool nsw = false; + bool exact = false; + if (const OverflowingBinaryOperator *OFBinOp = + dyn_cast<const OverflowingBinaryOperator>(&I)) { + nuw = OFBinOp->hasNoUnsignedWrap(); + nsw = OFBinOp->hasNoSignedWrap(); + } + if (const PossiblyExactOperator *ExactOp = + dyn_cast<const PossiblyExactOperator>(&I)) + exact = ExactOp->isExact(); + + SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), + Op1, Op2, nuw, nsw, exact); + setValue(&I, BinNodeValue); } void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { @@ -2816,8 +2830,25 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32); } - setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), - Op1.getValueType(), Op1, Op2)); + bool nuw = false; + bool nsw = false; + bool exact = false; + + if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) { + + if (const OverflowingBinaryOperator *OFBinOp = + dyn_cast<const OverflowingBinaryOperator>(&I)) { + nuw = OFBinOp->hasNoUnsignedWrap(); + nsw = OFBinOp->hasNoSignedWrap(); + } + if (const PossiblyExactOperator *ExactOp = + dyn_cast<const PossiblyExactOperator>(&I)) + exact = ExactOp->isExact(); + } + + SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2, + nuw, nsw, exact); + setValue(&I, Res); } void SelectionDAGBuilder::visitSDiv(const User &I) { @@ -3570,12 +3601,12 @@ static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, if (Before) { if (Order == AcquireRelease || Order == SequentiallyConsistent) Order = Release; - else if (Order == Acquire || Order == Monotonic) + else if (Order == Acquire || Order == Monotonic || Order == Unordered) return Chain; } else { if (Order == AcquireRelease) Order = Acquire; - else if (Order == Release || Order == Monotonic) + else if (Order == Release || Order == Monotonic || Order == Unordered) return Chain; } SDValue Ops[3]; @@ -3598,19 +3629,17 @@ void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { InChain = InsertFenceForAtomic(InChain, SuccessOrder, Scope, true, dl, DAG, *TLI); - SDValue L = - DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, - getValue(I.getCompareOperand()).getSimpleValueType(), - InChain, - getValue(I.getPointerOperand()), - getValue(I.getCompareOperand()), - getValue(I.getNewValOperand()), - MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */, - TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder, - TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder, - Scope); + MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType(); + SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other); + SDValue L = DAG.getAtomicCmpSwap( + ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain, + getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), + getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), + 0 /* Alignment */, + TLI->getInsertFencesForAtomic() ? Monotonic : SuccessOrder, + TLI->getInsertFencesForAtomic() ? Monotonic : FailureOrder, Scope); - SDValue OutChain = L.getValue(1); + SDValue OutChain = L.getValue(2); if (TLI->getInsertFencesForAtomic()) OutChain = InsertFenceForAtomic(OutChain, SuccessOrder, Scope, false, dl, @@ -5293,7 +5322,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { CLI.setDebugLoc(sdl).setChain(getRoot()) .setCallee(CallingConv::C, I.getType(), DAG.getExternalSymbol(TrapFuncName.data(), TLI->getPointerTy()), - &Args, 0); + std::move(Args), 0); std::pair<SDValue, SDValue> Result = TLI->LowerCallTo(CLI); DAG.setRoot(Result.second); @@ -5410,6 +5439,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool isTailCall, MachineBasicBlock *LandingPad) { + const TargetLowering *TLI = TM.getTargetLowering(); PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType()); FunctionType *FTy = cast<FunctionType>(PT->getElementType()); Type *RetTy = FTy->getReturnType(); @@ -5420,45 +5450,6 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, TargetLowering::ArgListEntry Entry; Args.reserve(CS.arg_size()); - // Check whether the function can return without sret-demotion. - SmallVector<ISD::OutputArg, 4> Outs; - const TargetLowering *TLI = TM.getTargetLowering(); - GetReturnInfo(RetTy, CS.getAttributes(), Outs, *TLI); - - bool CanLowerReturn = TLI->CanLowerReturn(CS.getCallingConv(), - DAG.getMachineFunction(), - FTy->isVarArg(), Outs, - FTy->getContext()); - - SDValue DemoteStackSlot; - int DemoteStackIdx = -100; - - if (!CanLowerReturn) { - assert(!CS.hasInAllocaArgument() && - "sret demotion is incompatible with inalloca"); - uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize( - FTy->getReturnType()); - unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment( - FTy->getReturnType()); - MachineFunction &MF = DAG.getMachineFunction(); - DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); - Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); - - DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI->getPointerTy()); - Entry.Node = DemoteStackSlot; - Entry.Ty = StackSlotPtrType; - Entry.isSExt = false; - Entry.isZExt = false; - Entry.isInReg = false; - Entry.isSRet = true; - Entry.isNest = false; - Entry.isByVal = false; - Entry.isReturned = false; - Entry.Alignment = Align; - Args.push_back(Entry); - RetTy = Type::getVoidTy(FTy->getContext()); - } - for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { const Value *V = *i; @@ -5499,58 +5490,20 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Check if target-independent constraints permit a tail call here. // Target-dependent constraints are checked within TLI->LowerCallTo. - if (isTailCall && !isInTailCallPosition(CS, *TLI)) + if (isTailCall && !isInTailCallPosition(CS, DAG)) isTailCall = false; TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) - .setCallee(RetTy, FTy, Callee, &Args, CS).setTailCall(isTailCall); + .setCallee(RetTy, FTy, Callee, std::move(Args), CS).setTailCall(isTailCall); std::pair<SDValue,SDValue> Result = TLI->LowerCallTo(CLI); assert((isTailCall || Result.second.getNode()) && "Non-null chain expected with non-tail call!"); assert((Result.second.getNode() || !Result.first.getNode()) && "Null value expected with tail call!"); - if (Result.first.getNode()) { + if (Result.first.getNode()) setValue(CS.getInstruction(), Result.first); - } else if (!CanLowerReturn && Result.second.getNode()) { - // The instruction result is the result of loading from the - // hidden sret parameter. - SmallVector<EVT, 1> PVTs; - Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType()); - - ComputeValueVTs(*TLI, PtrRetTy, PVTs); - assert(PVTs.size() == 1 && "Pointers should fit in one register"); - EVT PtrVT = PVTs[0]; - - SmallVector<EVT, 4> RetTys; - SmallVector<uint64_t, 4> Offsets; - RetTy = FTy->getReturnType(); - ComputeValueVTs(*TLI, RetTy, RetTys, &Offsets); - - unsigned NumValues = RetTys.size(); - SmallVector<SDValue, 4> Values(NumValues); - SmallVector<SDValue, 4> Chains(NumValues); - - for (unsigned i = 0; i < NumValues; ++i) { - SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, - DemoteStackSlot, - DAG.getConstant(Offsets[i], PtrVT)); - SDValue L = DAG.getLoad(RetTys[i], getCurSDLoc(), Result.second, Add, - MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), - false, false, false, 1); - Values[i] = L; - Chains[i] = L.getValue(1); - } - - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), - MVT::Other, Chains); - PendingLoads.push_back(Chain); - - setValue(CS.getInstruction(), - DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), - DAG.getVTList(RetTys), Values)); - } if (!Result.second.getNode()) { // As a special case, a null chain means that a tail call has been emitted @@ -6845,7 +6798,7 @@ SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType(); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(getCurSDLoc()).setChain(getRoot()) - .setCallee(CI.getCallingConv(), retTy, Callee, &Args, NumArgs) + .setCallee(CI.getCallingConv(), retTy, Callee, std::move(Args), NumArgs) .setDiscardResult(!CI.use_empty()); const TargetLowering *TLI = TM.getTargetLowering(); @@ -7092,6 +7045,21 @@ void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { FuncInfo.MF->getFrameInfo()->setHasPatchPoint(); } +/// Returns an AttributeSet representing the attributes applied to the return +/// value of the given call. +static AttributeSet getReturnAttrs(TargetLowering::CallLoweringInfo &CLI) { + SmallVector<Attribute::AttrKind, 2> Attrs; + if (CLI.RetSExt) + Attrs.push_back(Attribute::SExt); + if (CLI.RetZExt) + Attrs.push_back(Attribute::ZExt); + if (CLI.IsInReg) + Attrs.push_back(Attribute::InReg); + + return AttributeSet::get(CLI.RetTy->getContext(), AttributeSet::ReturnIndex, + Attrs); +} + /// TargetLowering::LowerCallTo - This is the default LowerCallTo /// implementation, which just calls LowerCall. /// FIXME: When all targets are @@ -7100,24 +7068,62 @@ std::pair<SDValue, SDValue> TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { // Handle the incoming return values from the call. CLI.Ins.clear(); + Type *OrigRetTy = CLI.RetTy; SmallVector<EVT, 4> RetTys; - ComputeValueVTs(*this, CLI.RetTy, RetTys); - for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { - EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); - for (unsigned i = 0; i != NumRegs; ++i) { - ISD::InputArg MyFlags; - MyFlags.VT = RegisterVT; - MyFlags.ArgVT = VT; - MyFlags.Used = CLI.IsReturnValueUsed; - if (CLI.RetSExt) - MyFlags.Flags.setSExt(); - if (CLI.RetZExt) - MyFlags.Flags.setZExt(); - if (CLI.IsInReg) - MyFlags.Flags.setInReg(); - CLI.Ins.push_back(MyFlags); + SmallVector<uint64_t, 4> Offsets; + ComputeValueVTs(*this, CLI.RetTy, RetTys, &Offsets); + + SmallVector<ISD::OutputArg, 4> Outs; + GetReturnInfo(CLI.RetTy, getReturnAttrs(CLI), Outs, *this); + + bool CanLowerReturn = + this->CanLowerReturn(CLI.CallConv, CLI.DAG.getMachineFunction(), + CLI.IsVarArg, Outs, CLI.RetTy->getContext()); + + SDValue DemoteStackSlot; + int DemoteStackIdx = -100; + if (!CanLowerReturn) { + // FIXME: equivalent assert? + // assert(!CS.hasInAllocaArgument() && + // "sret demotion is incompatible with inalloca"); + uint64_t TySize = getDataLayout()->getTypeAllocSize(CLI.RetTy); + unsigned Align = getDataLayout()->getPrefTypeAlignment(CLI.RetTy); + MachineFunction &MF = CLI.DAG.getMachineFunction(); + DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); + Type *StackSlotPtrType = PointerType::getUnqual(CLI.RetTy); + + DemoteStackSlot = CLI.DAG.getFrameIndex(DemoteStackIdx, getPointerTy()); + ArgListEntry Entry; + Entry.Node = DemoteStackSlot; + Entry.Ty = StackSlotPtrType; + Entry.isSExt = false; + Entry.isZExt = false; + Entry.isInReg = false; + Entry.isSRet = true; + Entry.isNest = false; + Entry.isByVal = false; + Entry.isReturned = false; + Entry.Alignment = Align; + CLI.getArgs().insert(CLI.getArgs().begin(), Entry); + CLI.RetTy = Type::getVoidTy(CLI.RetTy->getContext()); + } else { + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + for (unsigned i = 0; i != NumRegs; ++i) { + ISD::InputArg MyFlags; + MyFlags.VT = RegisterVT; + MyFlags.ArgVT = VT; + MyFlags.Used = CLI.IsReturnValueUsed; + if (CLI.RetSExt) + MyFlags.Flags.setSExt(); + if (CLI.RetZExt) + MyFlags.Flags.setZExt(); + if (CLI.IsInReg) + MyFlags.Flags.setInReg(); + CLI.Ins.push_back(MyFlags); + } } } @@ -7260,31 +7266,59 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { "LowerCall emitted a value with the wrong type!"); }); - // Collect the legal value parts into potentially illegal values - // that correspond to the original function's return values. - ISD::NodeType AssertOp = ISD::DELETED_NODE; - if (CLI.RetSExt) - AssertOp = ISD::AssertSext; - else if (CLI.RetZExt) - AssertOp = ISD::AssertZext; SmallVector<SDValue, 4> ReturnValues; - unsigned CurReg = 0; - for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { - EVT VT = RetTys[I]; - MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); - unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); - - ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], - NumRegs, RegisterVT, VT, nullptr, - AssertOp)); - CurReg += NumRegs; - } - - // For a function returning void, there is no return value. We can't create - // such a node, so we just return a null return value in that case. In - // that case, nothing will actually look at the value. - if (ReturnValues.empty()) - return std::make_pair(SDValue(), CLI.Chain); + if (!CanLowerReturn) { + // The instruction result is the result of loading from the + // hidden sret parameter. + SmallVector<EVT, 1> PVTs; + Type *PtrRetTy = PointerType::getUnqual(OrigRetTy); + + ComputeValueVTs(*this, PtrRetTy, PVTs); + assert(PVTs.size() == 1 && "Pointers should fit in one register"); + EVT PtrVT = PVTs[0]; + + unsigned NumValues = RetTys.size(); + ReturnValues.resize(NumValues); + SmallVector<SDValue, 4> Chains(NumValues); + + for (unsigned i = 0; i < NumValues; ++i) { + SDValue Add = CLI.DAG.getNode(ISD::ADD, CLI.DL, PtrVT, DemoteStackSlot, + CLI.DAG.getConstant(Offsets[i], PtrVT)); + SDValue L = CLI.DAG.getLoad( + RetTys[i], CLI.DL, CLI.Chain, Add, + MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false, + false, false, 1); + ReturnValues[i] = L; + Chains[i] = L.getValue(1); + } + + CLI.Chain = CLI.DAG.getNode(ISD::TokenFactor, CLI.DL, MVT::Other, Chains); + } else { + // Collect the legal value parts into potentially illegal values + // that correspond to the original function's return values. + ISD::NodeType AssertOp = ISD::DELETED_NODE; + if (CLI.RetSExt) + AssertOp = ISD::AssertSext; + else if (CLI.RetZExt) + AssertOp = ISD::AssertZext; + unsigned CurReg = 0; + for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { + EVT VT = RetTys[I]; + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); + unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); + + ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], + NumRegs, RegisterVT, VT, nullptr, + AssertOp)); + CurReg += NumRegs; + } + + // For a function returning void, there is no return value. We can't create + // such a node, so we just return a null return value in that case. In + // that case, nothing will actually look at the value. + if (ReturnValues.empty()) + return std::make_pair(SDValue(), CLI.Chain); + } SDValue Res = CLI.DAG.getNode(ISD::MERGE_VALUES, CLI.DL, CLI.DAG.getVTList(RetTys), ReturnValues); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index fb29691..84679f9 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -320,7 +320,7 @@ private: /// 1. Preserve the architecture independence of stack protector generation. /// /// 2. Preserve the normal IR level stack protector check for platforms like - /// OpenBSD for which we support platform specific stack protector + /// OpenBSD for which we support platform-specific stack protector /// generation. /// /// The main problem that guided the present solution is that one can not @@ -338,7 +338,7 @@ private: /// basic block (where the return inst is placed) and then move it back /// later at SelectionDAG/MI time before the stack protector check if the /// tail call optimization failed. The MI level option was nixed - /// immediately since it would require platform specific pattern + /// immediately since it would require platform-specific pattern /// matching. The SelectionDAG level option was nixed because /// SelectionDAG only processes one IR level basic block at a time /// implying one could not create a DAG Combine to move the callinst. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index d6b5255..b3a452f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -55,6 +55,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::PREFETCH: return "Prefetch"; case ISD::ATOMIC_FENCE: return "AtomicFence"; case ISD::ATOMIC_CMP_SWAP: return "AtomicCmpSwap"; + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: return "AtomicCmpSwapWithSuccess"; case ISD::ATOMIC_SWAP: return "AtomicSwap"; case ISD::ATOMIC_LOAD_ADD: return "AtomicLoadAdd"; case ISD::ATOMIC_LOAD_SUB: return "AtomicLoadSub"; @@ -220,6 +221,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::ZERO_EXTEND: return "zero_extend"; case ISD::ANY_EXTEND: return "any_extend"; case ISD::SIGN_EXTEND_INREG: return "sign_extend_inreg"; + case ISD::ANY_EXTEND_VECTOR_INREG: return "any_extend_vector_inreg"; + case ISD::SIGN_EXTEND_VECTOR_INREG: return "sign_extend_vector_inreg"; + case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg"; case ISD::TRUNCATE: return "truncate"; case ISD::FP_ROUND: return "fp_round"; case ISD::FLT_ROUNDS_: return "flt_rounds"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 472fc9c..57e22e2 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -141,6 +141,25 @@ STATISTIC(NumFastIselFailShuffleVector,"Fast isel fails on ShuffleVector"); STATISTIC(NumFastIselFailExtractValue,"Fast isel fails on ExtractValue"); STATISTIC(NumFastIselFailInsertValue,"Fast isel fails on InsertValue"); STATISTIC(NumFastIselFailLandingPad,"Fast isel fails on LandingPad"); + +// Intrinsic instructions... +STATISTIC(NumFastIselFailIntrinsicCall, "Fast isel fails on Intrinsic call"); +STATISTIC(NumFastIselFailSAddWithOverflow, + "Fast isel fails on sadd.with.overflow"); +STATISTIC(NumFastIselFailUAddWithOverflow, + "Fast isel fails on uadd.with.overflow"); +STATISTIC(NumFastIselFailSSubWithOverflow, + "Fast isel fails on ssub.with.overflow"); +STATISTIC(NumFastIselFailUSubWithOverflow, + "Fast isel fails on usub.with.overflow"); +STATISTIC(NumFastIselFailSMulWithOverflow, + "Fast isel fails on smul.with.overflow"); +STATISTIC(NumFastIselFailUMulWithOverflow, + "Fast isel fails on umul.with.overflow"); +STATISTIC(NumFastIselFailFrameaddress, "Fast isel fails on Frameaddress"); +STATISTIC(NumFastIselFailSqrt, "Fast isel fails on sqrt call"); +STATISTIC(NumFastIselFailStackMap, "Fast isel fails on StackMap call"); +STATISTIC(NumFastIselFailPatchPoint, "Fast isel fails on PatchPoint call"); #endif static cl::opt<bool> @@ -974,7 +993,37 @@ static void collectFailStats(const Instruction *I) { case Instruction::FCmp: NumFastIselFailFCmp++; return; case Instruction::PHI: NumFastIselFailPHI++; return; case Instruction::Select: NumFastIselFailSelect++; return; - case Instruction::Call: NumFastIselFailCall++; return; + case Instruction::Call: { + if (auto const *Intrinsic = dyn_cast<IntrinsicInst>(I)) { + switch (Intrinsic->getIntrinsicID()) { + default: + NumFastIselFailIntrinsicCall++; return; + case Intrinsic::sadd_with_overflow: + NumFastIselFailSAddWithOverflow++; return; + case Intrinsic::uadd_with_overflow: + NumFastIselFailUAddWithOverflow++; return; + case Intrinsic::ssub_with_overflow: + NumFastIselFailSSubWithOverflow++; return; + case Intrinsic::usub_with_overflow: + NumFastIselFailUSubWithOverflow++; return; + case Intrinsic::smul_with_overflow: + NumFastIselFailSMulWithOverflow++; return; + case Intrinsic::umul_with_overflow: + NumFastIselFailUMulWithOverflow++; return; + case Intrinsic::frameaddress: + NumFastIselFailFrameaddress++; return; + case Intrinsic::sqrt: + NumFastIselFailSqrt++; return; + case Intrinsic::experimental_stackmap: + NumFastIselFailStackMap++; return; + case Intrinsic::experimental_patchpoint_void: // fall-through + case Intrinsic::experimental_patchpoint_i64: + NumFastIselFailPatchPoint++; return; + } + } + NumFastIselFailCall++; + return; + } case Instruction::Shl: NumFastIselFailShl++; return; case Instruction::LShr: NumFastIselFailLShr++; return; case Instruction::AShr: NumFastIselFailAShr++; return; diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index b75d805..42372a2 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -105,7 +105,7 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) - .setCallee(getLibcallCallingConv(LC), RetTy, Callee, &Args, 0) + .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) .setNoReturn(doesNotReturn).setDiscardResult(!isReturnValueUsed) .setSExtResult(isSigned).setZExtResult(!isSigned); return LowerCallTo(CLI); @@ -327,6 +327,10 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, assert(Op.getNode()->getNumValues() == 1 && "ShrinkDemandedOp only supports nodes with one result!"); + // Early return, as this function cannot handle vector types. + if (Op.getValueType().isVector()) + return false; + // Don't do this if the node has another user, which may require the // full value. if (!Op.getNode()->hasOneUse()) @@ -1146,18 +1150,21 @@ bool TargetLowering::isConstTrueVal(const SDNode *N) const { if (!N) return false; - bool IsVec = false; const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); if (!CN) { const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); if (!BV) return false; - IsVec = true; - CN = BV->getConstantSplatValue(); + BitVector UndefElements; + CN = BV->getConstantSplatNode(&UndefElements); + // Only interested in constant splats, and we don't try to handle undef + // elements in identifying boolean constants. + if (!CN || UndefElements.none()) + return false; } - switch (getBooleanContents(IsVec)) { + switch (getBooleanContents(N->getValueType(0))) { case UndefinedBooleanContent: return CN->getAPIntValue()[0]; case ZeroOrOneBooleanContent: @@ -1173,18 +1180,21 @@ bool TargetLowering::isConstFalseVal(const SDNode *N) const { if (!N) return false; - bool IsVec = false; const ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N); if (!CN) { const BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); if (!BV) return false; - IsVec = true; - CN = BV->getConstantSplatValue(); + BitVector UndefElements; + CN = BV->getConstantSplatNode(&UndefElements); + // Only interested in constant splats, and we don't try to handle undef + // elements in identifying boolean constants. + if (!CN || UndefElements.none()) + return false; } - if (getBooleanContents(IsVec) == UndefinedBooleanContent) + if (getBooleanContents(N->getValueType(0)) == UndefinedBooleanContent) return !CN->getAPIntValue()[0]; return CN->isNullValue(); @@ -1205,7 +1215,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, case ISD::SETFALSE2: return DAG.getConstant(0, VT); case ISD::SETTRUE: case ISD::SETTRUE2: { - TargetLowering::BooleanContent Cnt = getBooleanContents(VT.isVector()); + TargetLowering::BooleanContent Cnt = + getBooleanContents(N0->getValueType(0)); return DAG.getConstant( Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT); } @@ -1412,7 +1423,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, SDValue NewSetCC = DAG.getSetCC(dl, NewSetCCVT, N0.getOperand(0), NewConst, Cond); - return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT); + return DAG.getBoolExtOrTrunc(NewSetCC, dl, VT, N0.getValueType()); } break; } @@ -1496,7 +1507,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } } else if (N1C->getAPIntValue() == 1 && (VT == MVT::i1 || - getBooleanContents(false) == ZeroOrOneBooleanContent)) { + getBooleanContents(N0->getValueType(0)) == + ZeroOrOneBooleanContent)) { SDValue Op0 = N0; if (Op0.getOpcode() == ISD::TRUNCATE) Op0 = Op0.getOperand(0); @@ -1767,7 +1779,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // The sext(setcc()) => setcc() optimization relies on the appropriate // constant being emitted. uint64_t EqVal = 0; - switch (getBooleanContents(N0.getValueType().isVector())) { + switch (getBooleanContents(N0.getValueType())) { case UndefinedBooleanContent: case ZeroOrOneBooleanContent: EqVal = ISD::isTrueWhenEqual(Cond); @@ -2613,7 +2625,8 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl, if (ShAmt) { // TODO: For UDIV use SRL instead of SRA. SDValue Amt = DAG.getConstant(ShAmt, getShiftAmountTy(Op1.getValueType())); - Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt); + Op1 = DAG.getNode(ISD::SRA, dl, Op1.getValueType(), Op1, Amt, false, false, + true); d = d.ashr(ShAmt); } diff --git a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp index 1120be8..0e89bad 100644 --- a/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp +++ b/lib/CodeGen/SelectionDAG/TargetSelectionDAGInfo.cpp @@ -15,8 +15,8 @@ #include "llvm/Target/TargetMachine.h" using namespace llvm; -TargetSelectionDAGInfo::TargetSelectionDAGInfo(const TargetMachine &TM) - : DL(TM.getDataLayout()) { +TargetSelectionDAGInfo::TargetSelectionDAGInfo(const DataLayout *DL) + : DL(DL) { } TargetSelectionDAGInfo::~TargetSelectionDAGInfo() { diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp index 4dd87dd..3ba502f 100644 --- a/lib/CodeGen/StackMapLivenessAnalysis.cpp +++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp @@ -28,10 +28,9 @@ using namespace llvm; #define DEBUG_TYPE "stackmaps" namespace llvm { -cl::opt<bool> EnableStackMapLiveness("enable-stackmap-liveness", - cl::Hidden, cl::desc("Enable StackMap Liveness Analysis Pass")); cl::opt<bool> EnablePatchPointLiveness("enable-patchpoint-liveness", - cl::Hidden, cl::desc("Enable PatchPoint Liveness Analysis Pass")); + cl::Hidden, cl::init(true), + cl::desc("Enable PatchPoint Liveness Analysis Pass")); } STATISTIC(NumStackMapFuncVisited, "Number of functions visited"); @@ -62,15 +61,17 @@ void StackMapLiveness::getAnalysisUsage(AnalysisUsage &AU) const { /// Calculate the liveness information for the given machine function. bool StackMapLiveness::runOnMachineFunction(MachineFunction &_MF) { + if (!EnablePatchPointLiveness) + return false; + DEBUG(dbgs() << "********** COMPUTING STACKMAP LIVENESS: " << _MF.getName() << " **********\n"); MF = &_MF; TRI = MF->getTarget().getRegisterInfo(); ++NumStackMapFuncVisited; - // Skip this function if there are no stackmaps or patchpoints to process. - if (!((MF->getFrameInfo()->hasStackMap() && EnableStackMapLiveness) || - (MF->getFrameInfo()->hasPatchPoint() && EnablePatchPointLiveness))) { + // Skip this function if there are no patchpoints to process. + if (!MF->getFrameInfo()->hasPatchPoint()) { ++NumStackMapFuncSkipped; return false; } @@ -88,13 +89,10 @@ bool StackMapLiveness::calculateLiveness() { LiveRegs.addLiveOuts(MBBI); bool HasStackMap = false; // Reverse iterate over all instructions and add the current live register - // set to an instruction if we encounter a stackmap or patchpoint - // instruction. + // set to an instruction if we encounter a patchpoint instruction. for (MachineBasicBlock::reverse_iterator I = MBBI->rbegin(), E = MBBI->rend(); I != E; ++I) { - int Opc = I->getOpcode(); - if ((EnableStackMapLiveness && (Opc == TargetOpcode::STACKMAP)) || - (EnablePatchPointLiveness && (Opc == TargetOpcode::PATCHPOINT))) { + if (I->getOpcode() == TargetOpcode::PATCHPOINT) { addLiveOutSetToMI(*I); HasChanged = true; HasStackMap = true; diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp index c3f84c6..83966bd0 100644 --- a/lib/CodeGen/TargetInstrInfo.cpp +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -671,7 +671,7 @@ bool TargetInstrInfo::usePreRAHazardRecognizer() const { // Default implementation of CreateTargetRAHazardRecognizer. ScheduleHazardRecognizer *TargetInstrInfo:: -CreateTargetHazardRecognizer(const TargetMachine *TM, +CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const { // Dummy hazard recognizer allows all instructions to issue. return new ScheduleHazardRecognizer(); diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 2634d71..c574fd4 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -39,7 +39,7 @@ using namespace llvm; /// InitLibcallNames - Set default libcall names. /// -static void InitLibcallNames(const char **Names, const TargetMachine &TM) { +static void InitLibcallNames(const char **Names, const Triple &TT) { Names[RTLIB::SHL_I16] = "__ashlhi3"; Names[RTLIB::SHL_I32] = "__ashlsi3"; Names[RTLIB::SHL_I64] = "__ashldi3"; @@ -384,7 +384,7 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::SYNC_FETCH_AND_UMIN_8] = "__sync_fetch_and_umin_8"; Names[RTLIB::SYNC_FETCH_AND_UMIN_16] = "__sync_fetch_and_umin_16"; - if (Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU) { + if (TT.getEnvironment() == Triple::GNU) { Names[RTLIB::SINCOS_F32] = "sincosf"; Names[RTLIB::SINCOS_F64] = "sincos"; Names[RTLIB::SINCOS_F80] = "sincosl"; @@ -399,7 +399,7 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::SINCOS_PPCF128] = nullptr; } - if (Triple(TM.getTargetTriple()).getOS() != Triple::OpenBSD) { + if (TT.getOS() != Triple::OpenBSD) { Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = "__stack_chk_fail"; } else { // These are generally not available. @@ -690,6 +690,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, ExceptionPointerRegister = 0; ExceptionSelectorRegister = 0; BooleanContents = UndefinedBooleanContent; + BooleanFloatContents = UndefinedBooleanContent; BooleanVectorContents = UndefinedBooleanContent; SchedPreferenceInfo = Sched::ILP; JumpBufSize = 0; @@ -702,7 +703,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, SupportJumpTables = true; MinimumJumpTableEntries = 4; - InitLibcallNames(LibcallRoutineNames, TM); + InitLibcallNames(LibcallRoutineNames, Triple(TM.getTargetTriple())); InitCmpLibcallCCs(CmpLibcallCCs); InitLibcallCallingConvs(LibcallCallingConvs); } @@ -730,6 +731,10 @@ void TargetLoweringBase::initActions() { setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand); } + // Most backends expect to see the node which just returns the value loaded. + setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, + (MVT::SimpleValueType)VT, Expand); + // These operations default to expand. setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand); @@ -739,8 +744,15 @@ void TargetLoweringBase::initActions() { // These operations default to expand for vector types. if (VT >= MVT::FIRST_VECTOR_VALUETYPE && - VT <= MVT::LAST_VECTOR_VALUETYPE) + VT <= MVT::LAST_VECTOR_VALUETYPE) { setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, + (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, + (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, + (MVT::SimpleValueType)VT, Expand); + } } // Most targets ignore the @llvm.prefetch intrinsic. @@ -1080,24 +1092,25 @@ void TargetLoweringBase::computeRegisterProperties() { // Loop over all of the vector value types to see which need transformations. for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { - MVT VT = (MVT::SimpleValueType)i; - if (isTypeLegal(VT)) continue; + MVT VT = (MVT::SimpleValueType) i; + if (isTypeLegal(VT)) + continue; - // Determine if there is a legal wider type. If so, we should promote to - // that wider vector type. MVT EltVT = VT.getVectorElementType(); unsigned NElts = VT.getVectorNumElements(); - if (NElts != 1 && !shouldSplitVectorType(VT)) { - bool IsLegalWiderType = false; - // First try to promote the elements of integer vectors. If no legal - // promotion was found, fallback to the widen-vector method. - for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - MVT SVT = (MVT::SimpleValueType)nVT; + bool IsLegalWiderType = false; + LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT); + switch (PreferredAction) { + case TypePromoteInteger: { + // Try to promote the elements of integer vectors. If no legal + // promotion was found, fall through to the widen-vector method. + for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType) nVT; // Promote vectors of integers to vectors with the same number // of elements, with a wider element type. if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits() - && SVT.getVectorNumElements() == NElts && - isTypeLegal(SVT) && SVT.getScalarType().isInteger()) { + && SVT.getVectorNumElements() == NElts && isTypeLegal(SVT) + && SVT.getScalarType().isInteger()) { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; NumRegistersForVT[i] = 1; @@ -1106,15 +1119,15 @@ void TargetLoweringBase::computeRegisterProperties() { break; } } - - if (IsLegalWiderType) continue; - + if (IsLegalWiderType) + break; + } + case TypeWidenVector: { // Try to widen the vector. - for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - MVT SVT = (MVT::SimpleValueType)nVT; - if (SVT.getVectorElementType() == EltVT && - SVT.getVectorNumElements() > NElts && - isTypeLegal(SVT)) { + for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType) nVT; + if (SVT.getVectorElementType() == EltVT + && SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) { TransformToType[i] = SVT; RegisterTypeForVT[i] = SVT; NumRegistersForVT[i] = 1; @@ -1123,27 +1136,34 @@ void TargetLoweringBase::computeRegisterProperties() { break; } } - if (IsLegalWiderType) continue; + if (IsLegalWiderType) + break; } - - MVT IntermediateVT; - MVT RegisterVT; - unsigned NumIntermediates; - NumRegistersForVT[i] = - getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, - RegisterVT, this); - RegisterTypeForVT[i] = RegisterVT; - - MVT NVT = VT.getPow2VectorType(); - if (NVT == VT) { - // Type is already a power of 2. The default action is to split. - TransformToType[i] = MVT::Other; - unsigned NumElts = VT.getVectorNumElements(); - ValueTypeActions.setTypeAction(VT, - NumElts > 1 ? TypeSplitVector : TypeScalarizeVector); - } else { - TransformToType[i] = NVT; - ValueTypeActions.setTypeAction(VT, TypeWidenVector); + case TypeSplitVector: + case TypeScalarizeVector: { + MVT IntermediateVT; + MVT RegisterVT; + unsigned NumIntermediates; + NumRegistersForVT[i] = getVectorTypeBreakdownMVT(VT, IntermediateVT, + NumIntermediates, RegisterVT, this); + RegisterTypeForVT[i] = RegisterVT; + + MVT NVT = VT.getPow2VectorType(); + if (NVT == VT) { + // Type is already a power of 2. The default action is to split. + TransformToType[i] = MVT::Other; + if (PreferredAction == TypeScalarizeVector) + ValueTypeActions.setTypeAction(VT, TypeScalarizeVector); + else + ValueTypeActions.setTypeAction(VT, TypeSplitVector); + } else { + TransformToType[i] = NVT; + ValueTypeActions.setTypeAction(VT, TypeWidenVector); + } + break; + } + default: + llvm_unreachable("Unknown vector legalization action!"); } } diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index dda2259..03f4a51 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -48,16 +48,12 @@ MCSymbol *TargetLoweringObjectFileELF::getCFIPersonalitySymbol( const GlobalValue *GV, Mangler &Mang, const TargetMachine &TM, MachineModuleInfo *MMI) const { unsigned Encoding = getPersonalityEncoding(); - switch (Encoding & 0x70) { - default: - report_fatal_error("We do not support this DWARF encoding yet!"); - case dwarf::DW_EH_PE_absptr: - return TM.getSymbol(GV, Mang); - case dwarf::DW_EH_PE_pcrel: { + if ((Encoding & 0x80) == dwarf::DW_EH_PE_indirect) return getContext().GetOrCreateSymbol(StringRef("DW.ref.") + TM.getSymbol(GV, Mang)->getName()); - } - } + if ((Encoding & 0x70) == dwarf::DW_EH_PE_absptr) + return TM.getSymbol(GV, Mang); + report_fatal_error("We do not support this DWARF encoding yet!"); } void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, @@ -196,6 +192,18 @@ getELFSectionFlags(SectionKind K) { return Flags; } +static const Comdat *getELFComdat(const GlobalValue *GV) { + const Comdat *C = GV->getComdat(); + if (!C) + return nullptr; + + if (C->getSelectionKind() != Comdat::Any) + report_fatal_error("ELF COMDATs only support SelectionKind::Any, '" + + C->getName() + "' cannot be lowered."); + + return C; +} + const MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( const GlobalValue *GV, SectionKind Kind, Mangler &Mang, const TargetMachine &TM) const { @@ -204,14 +212,20 @@ const MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal( // Infer section flags from the section name if we can. Kind = getELFKindForNamedSection(SectionName, Kind); + StringRef Group = ""; + unsigned Flags = getELFSectionFlags(Kind); + if (const Comdat *C = getELFComdat(GV)) { + Group = C->getName(); + Flags |= ELF::SHF_GROUP; + } return getContext().getELFSection(SectionName, - getELFSectionType(SectionName, Kind), - getELFSectionFlags(Kind), Kind); + getELFSectionType(SectionName, Kind), Flags, + Kind, /*EntrySize=*/0, Group); } /// getSectionPrefixForGlobal - Return the section prefix name used by options /// FunctionsSections and DataSections. -static const char *getSectionPrefixForGlobal(SectionKind Kind) { +static StringRef getSectionPrefixForGlobal(SectionKind Kind) { if (Kind.isText()) return ".text."; if (Kind.isReadOnly()) return ".rodata."; if (Kind.isBSS()) return ".bss."; @@ -228,7 +242,6 @@ static const char *getSectionPrefixForGlobal(SectionKind Kind) { return ".data.rel.ro."; } - const MCSection *TargetLoweringObjectFileELF:: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang, const TargetMachine &TM) const { @@ -242,18 +255,20 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // If this global is linkonce/weak and the target handles this by emitting it // into a 'uniqued' section name, create and return the section now. - if ((GV->isWeakForLinker() || EmitUniquedSection) && + if ((GV->isWeakForLinker() || EmitUniquedSection || GV->hasComdat()) && !Kind.isCommon()) { - const char *Prefix; - Prefix = getSectionPrefixForGlobal(Kind); + StringRef Prefix = getSectionPrefixForGlobal(Kind); - SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); + SmallString<128> Name(Prefix); TM.getNameWithPrefix(Name, GV, Mang, true); StringRef Group = ""; unsigned Flags = getELFSectionFlags(Kind); - if (GV->isWeakForLinker()) { - Group = Name.substr(strlen(Prefix)); + if (GV->isWeakForLinker() || GV->hasComdat()) { + if (const Comdat *C = getELFComdat(GV)) + Group = C->getName(); + else + Group = Name.substr(Prefix.size()); Flags |= ELF::SHF_GROUP; } @@ -340,7 +355,7 @@ getSectionForConstant(SectionKind Kind) const { } const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection( - unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const { + unsigned Priority, const MCSymbol *KeySym) const { // The default scheme is .ctor / .dtor, so we have to invert the priority // numbering. if (Priority == 65535) @@ -360,7 +375,7 @@ const MCSection *TargetLoweringObjectFileELF::getStaticCtorSection( } const MCSection *TargetLoweringObjectFileELF::getStaticDtorSection( - unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const { + unsigned Priority, const MCSymbol *KeySym) const { // The default scheme is .ctor / .dtor, so we have to invert the priority // numbering. if (Priority == 65535) @@ -487,6 +502,15 @@ emitModuleFlags(MCStreamer &Streamer, Streamer.AddBlankLine(); } +static void checkMachOComdat(const GlobalValue *GV) { + const Comdat *C = GV->getComdat(); + if (!C) + return; + + report_fatal_error("MachO doesn't support COMDATs, '" + C->getName() + + "' cannot be lowered."); +} + const MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal( const GlobalValue *GV, SectionKind Kind, Mangler &Mang, const TargetMachine &TM) const { @@ -494,6 +518,9 @@ const MCSection *TargetLoweringObjectFileMachO::getExplicitSectionGlobal( StringRef Segment, Section; unsigned TAA = 0, StubSize = 0; bool TAAParsed; + + checkMachOComdat(GV); + std::string ErrorCode = MCSectionMachO::ParseSectionSpecifier(GV->getSection(), Segment, Section, TAA, TAAParsed, StubSize); @@ -564,6 +591,7 @@ bool TargetLoweringObjectFileMachO::isSectionAtomizableBySymbols( const MCSection *TargetLoweringObjectFileMachO:: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler &Mang, const TargetMachine &TM) const { + checkMachOComdat(GV); // Handle thread local data. if (Kind.isThreadBSS()) return TLSBSSSection; @@ -732,6 +760,50 @@ getCOFFSectionFlags(SectionKind K) { return Flags; } +static const GlobalValue *getComdatGVForCOFF(const GlobalValue *GV) { + const Comdat *C = GV->getComdat(); + assert(C && "expected GV to have a Comdat!"); + + StringRef ComdatGVName = C->getName(); + const GlobalValue *ComdatGV = GV->getParent()->getNamedValue(ComdatGVName); + if (!ComdatGV) + report_fatal_error("Associative COMDAT symbol '" + ComdatGVName + + "' does not exist."); + + if (ComdatGV->getComdat() != C) + report_fatal_error("Associative COMDAT symbol '" + ComdatGVName + + "' is not a key for it's COMDAT."); + + return ComdatGV; +} + +static int getSelectionForCOFF(const GlobalValue *GV) { + if (const Comdat *C = GV->getComdat()) { + const GlobalValue *ComdatKey = getComdatGVForCOFF(GV); + if (const auto *GA = dyn_cast<GlobalAlias>(ComdatKey)) + ComdatKey = GA->getBaseObject(); + if (ComdatKey == GV) { + switch (C->getSelectionKind()) { + case Comdat::Any: + return COFF::IMAGE_COMDAT_SELECT_ANY; + case Comdat::ExactMatch: + return COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH; + case Comdat::Largest: + return COFF::IMAGE_COMDAT_SELECT_LARGEST; + case Comdat::NoDuplicates: + return COFF::IMAGE_COMDAT_SELECT_NODUPLICATES; + case Comdat::SameSize: + return COFF::IMAGE_COMDAT_SELECT_SAME_SIZE; + } + } else { + return COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE; + } + } else if (GV->isWeakForLinker()) { + return COFF::IMAGE_COMDAT_SELECT_ANY; + } + return 0; +} + const MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal( const GlobalValue *GV, SectionKind Kind, Mangler &Mang, const TargetMachine &TM) const { @@ -739,11 +811,21 @@ const MCSection *TargetLoweringObjectFileCOFF::getExplicitSectionGlobal( unsigned Characteristics = getCOFFSectionFlags(Kind); StringRef Name = GV->getSection(); StringRef COMDATSymName = ""; - if (GV->isWeakForLinker()) { - Selection = COFF::IMAGE_COMDAT_SELECT_ANY; - Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; - MCSymbol *Sym = TM.getSymbol(GV, Mang); - COMDATSymName = Sym->getName(); + if ((GV->isWeakForLinker() || GV->hasComdat()) && !Kind.isCommon()) { + Selection = getSelectionForCOFF(GV); + const GlobalValue *ComdatGV; + if (Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) + ComdatGV = getComdatGVForCOFF(GV); + else + ComdatGV = GV; + + if (!ComdatGV->hasPrivateLinkage()) { + MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang); + COMDATSymName = Sym->getName(); + Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; + } else { + Selection = 0; + } } return getContext().getCOFFSection(Name, Characteristics, @@ -780,17 +862,27 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, // into a 'uniqued' section name, create and return the section now. // Section names depend on the name of the symbol which is not feasible if the // symbol has private linkage. - if ((GV->isWeakForLinker() || EmitUniquedSection) && - !GV->hasPrivateLinkage() && !Kind.isCommon()) { + if ((GV->isWeakForLinker() || EmitUniquedSection || GV->hasComdat()) && + !Kind.isCommon()) { const char *Name = getCOFFSectionNameForUniqueGlobal(Kind); unsigned Characteristics = getCOFFSectionFlags(Kind); Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; - MCSymbol *Sym = TM.getSymbol(GV, Mang); - return getContext().getCOFFSection( - Name, Characteristics, Kind, Sym->getName(), - GV->isWeakForLinker() ? COFF::IMAGE_COMDAT_SELECT_ANY - : COFF::IMAGE_COMDAT_SELECT_NODUPLICATES); + int Selection = getSelectionForCOFF(GV); + if (!Selection) + Selection = COFF::IMAGE_COMDAT_SELECT_NODUPLICATES; + const GlobalValue *ComdatGV; + if (GV->hasComdat()) + ComdatGV = getComdatGVForCOFF(GV); + else + ComdatGV = GV; + + if (!ComdatGV->hasPrivateLinkage()) { + MCSymbol *Sym = TM.getSymbol(ComdatGV, Mang); + StringRef COMDATSymName = Sym->getName(); + return getContext().getCOFFSection(Name, Characteristics, Kind, + COMDATSymName, Selection); + } } if (Kind.isText()) @@ -868,8 +960,7 @@ emitModuleFlags(MCStreamer &Streamer, static const MCSection *getAssociativeCOFFSection(MCContext &Ctx, const MCSection *Sec, - const MCSymbol *KeySym, - const MCSection *KeySec) { + const MCSymbol *KeySym) { // Return the normal section if we don't have to be associative. if (!KeySym) return Sec; @@ -877,20 +968,19 @@ static const MCSection *getAssociativeCOFFSection(MCContext &Ctx, // Make an associative section with the same name and kind as the normal // section. const MCSectionCOFF *SecCOFF = cast<MCSectionCOFF>(Sec); - const MCSectionCOFF *KeySecCOFF = cast<MCSectionCOFF>(KeySec); unsigned Characteristics = SecCOFF->getCharacteristics() | COFF::IMAGE_SCN_LNK_COMDAT; return Ctx.getCOFFSection(SecCOFF->getSectionName(), Characteristics, SecCOFF->getKind(), KeySym->getName(), - COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE, KeySecCOFF); + COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE); } const MCSection *TargetLoweringObjectFileCOFF::getStaticCtorSection( - unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const { - return getAssociativeCOFFSection(getContext(), StaticCtorSection, KeySym, KeySec); + unsigned Priority, const MCSymbol *KeySym) const { + return getAssociativeCOFFSection(getContext(), StaticCtorSection, KeySym); } const MCSection *TargetLoweringObjectFileCOFF::getStaticDtorSection( - unsigned Priority, const MCSymbol *KeySym, const MCSection *KeySec) const { - return getAssociativeCOFFSection(getContext(), StaticDtorSection, KeySym, KeySec); + unsigned Priority, const MCSymbol *KeySym) const { + return getAssociativeCOFFSection(getContext(), StaticDtorSection, KeySym); } diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp index e52e8af..3961905 100644 --- a/lib/DebugInfo/DWARFContext.cpp +++ b/lib/DebugInfo/DWARFContext.cpp @@ -734,7 +734,7 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) object::RelocToApply R(V.visit(Type, Reloc, 0, SymAddr)); if (V.error()) { SmallString<32> Name; - error_code ec(Reloc.getTypeName(Name)); + std::error_code ec(Reloc.getTypeName(Name)); if (ec) { errs() << "Aaaaaa! Nameless relocation! Aaaaaa!\n"; } diff --git a/lib/DebugInfo/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARFDebugAranges.cpp index 2524adc..fe7e46d 100644 --- a/lib/DebugInfo/DWARFDebugAranges.cpp +++ b/lib/DebugInfo/DWARFDebugAranges.cpp @@ -15,6 +15,7 @@ #include "llvm/Support/raw_ostream.h" #include <algorithm> #include <cassert> +#include <set> using namespace llvm; void DWARFDebugAranges::extract(DataExtractor DebugArangesData) { @@ -30,6 +31,7 @@ void DWARFDebugAranges::extract(DataExtractor DebugArangesData) { uint64_t HighPC = Desc.getEndAddress(); appendRange(CUOffset, LowPC, HighPC); } + ParsedCUOffsets.insert(CUOffset); } } @@ -56,69 +58,55 @@ void DWARFDebugAranges::generate(DWARFContext *CTX) { } } - sortAndMinimize(); + construct(); } void DWARFDebugAranges::clear() { + Endpoints.clear(); Aranges.clear(); ParsedCUOffsets.clear(); } void DWARFDebugAranges::appendRange(uint32_t CUOffset, uint64_t LowPC, uint64_t HighPC) { - if (!Aranges.empty()) { - if (Aranges.back().CUOffset == CUOffset && - Aranges.back().HighPC() == LowPC) { - Aranges.back().setHighPC(HighPC); - return; - } - } - Aranges.push_back(Range(LowPC, HighPC, CUOffset)); -} - -void DWARFDebugAranges::sortAndMinimize() { - const size_t orig_arange_size = Aranges.size(); - // Size of one? If so, no sorting is needed - if (orig_arange_size <= 1) + if (LowPC >= HighPC) return; - // Sort our address range entries - std::stable_sort(Aranges.begin(), Aranges.end()); - - // Most address ranges are contiguous from function to function - // so our new ranges will likely be smaller. We calculate the size - // of the new ranges since although std::vector objects can be resized, - // the will never reduce their allocated block size and free any excesss - // memory, so we might as well start a brand new collection so it is as - // small as possible. - - // First calculate the size of the new minimal arange vector - // so we don't have to do a bunch of re-allocations as we - // copy the new minimal stuff over to the new collection. - size_t minimal_size = 1; - for (size_t i = 1; i < orig_arange_size; ++i) { - if (!Range::SortedOverlapCheck(Aranges[i-1], Aranges[i])) - ++minimal_size; - } + Endpoints.emplace_back(LowPC, CUOffset, true); + Endpoints.emplace_back(HighPC, CUOffset, false); +} - // Else, make a new RangeColl that _only_ contains what we need. - RangeColl minimal_aranges; - minimal_aranges.resize(minimal_size); - uint32_t j = 0; - minimal_aranges[j] = Aranges[0]; - for (size_t i = 1; i < orig_arange_size; ++i) { - if (Range::SortedOverlapCheck(minimal_aranges[j], Aranges[i])) { - minimal_aranges[j].setHighPC(Aranges[i].HighPC()); +void DWARFDebugAranges::construct() { + std::multiset<uint32_t> ValidCUs; // Maintain the set of CUs describing + // a current address range. + std::sort(Endpoints.begin(), Endpoints.end()); + uint64_t PrevAddress = -1ULL; + for (const auto &E : Endpoints) { + if (PrevAddress < E.Address && ValidCUs.size() > 0) { + // If the address range between two endpoints is described by some + // CU, first try to extend the last range in Aranges. If we can't + // do it, start a new range. + if (!Aranges.empty() && Aranges.back().HighPC() == PrevAddress && + ValidCUs.find(Aranges.back().CUOffset) != ValidCUs.end()) { + Aranges.back().setHighPC(E.Address); + } else { + Aranges.emplace_back(PrevAddress, E.Address, *ValidCUs.begin()); + } + } + // Update the set of valid CUs. + if (E.IsRangeStart) { + ValidCUs.insert(E.CUOffset); } else { - // Only increment j if we aren't merging - minimal_aranges[++j] = Aranges[i]; + auto CUPos = ValidCUs.find(E.CUOffset); + assert(CUPos != ValidCUs.end()); + ValidCUs.erase(CUPos); } + PrevAddress = E.Address; } - assert(j+1 == minimal_size); + assert(ValidCUs.empty()); - // Now swap our new minimal aranges into place. The local - // minimal_aranges will then contian the old big collection - // which will get freed. - minimal_aranges.swap(Aranges); + // Endpoints are not needed now. + std::vector<RangeEndpoint> EmptyEndpoints; + EmptyEndpoints.swap(Endpoints); } uint32_t DWARFDebugAranges::findAddress(uint64_t Address) const { diff --git a/lib/DebugInfo/DWARFDebugAranges.h b/lib/DebugInfo/DWARFDebugAranges.h index de96d7f..a9f37fe 100644 --- a/lib/DebugInfo/DWARFDebugAranges.h +++ b/lib/DebugInfo/DWARFDebugAranges.h @@ -27,9 +27,9 @@ private: void clear(); void extract(DataExtractor DebugArangesData); - // Use appendRange multiple times and then call sortAndMinimize. + // Call appendRange multiple times and then call construct. void appendRange(uint32_t CUOffset, uint64_t LowPC, uint64_t HighPC); - void sortAndMinimize(); + void construct(); struct Range { explicit Range(uint64_t LowPC = -1ULL, uint64_t HighPC = -1ULL, @@ -47,31 +47,39 @@ private: return LowPC + Length; return -1ULL; } + bool containsAddress(uint64_t Address) const { return LowPC <= Address && Address < HighPC(); } - - bool operator <(const Range &other) const { + bool operator<(const Range &other) const { return LowPC < other.LowPC; } - static bool SortedOverlapCheck(const Range &Left, const Range &Right) { - if (Left.CUOffset != Right.CUOffset) - return false; - return Left.HighPC() >= Right.LowPC; - } - uint64_t LowPC; // Start of address range. uint32_t Length; // End of address range (not including this address). uint32_t CUOffset; // Offset of the compile unit or die. }; + struct RangeEndpoint { + uint64_t Address; + uint32_t CUOffset; + bool IsRangeStart; + + RangeEndpoint(uint64_t Address, uint32_t CUOffset, bool IsRangeStart) + : Address(Address), CUOffset(CUOffset), IsRangeStart(IsRangeStart) {} + + bool operator<(const RangeEndpoint &Other) const { + return Address < Other.Address; + } + }; + + typedef std::vector<Range> RangeColl; typedef RangeColl::const_iterator RangeCollIterator; - typedef DenseSet<uint32_t> ParsedCUOffsetColl; + std::vector<RangeEndpoint> Endpoints; RangeColl Aranges; - ParsedCUOffsetColl ParsedCUOffsets; + DenseSet<uint32_t> ParsedCUOffsets; }; } diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARFDebugInfoEntry.cpp index b811ed7..2e7a54a 100644 --- a/lib/DebugInfo/DWARFDebugInfoEntry.cpp +++ b/lib/DebugInfo/DWARFDebugInfoEntry.cpp @@ -210,6 +210,16 @@ uint64_t DWARFDebugInfoEntryMinimal::getAttributeValueAsSectionOffset( return Result.hasValue() ? Result.getValue() : FailValue; } +uint64_t +DWARFDebugInfoEntryMinimal::getRangesBaseAttribute(const DWARFUnit *U, + uint64_t FailValue) const { + uint64_t Result = + getAttributeValueAsSectionOffset(U, DW_AT_ranges_base, -1ULL); + if (Result != -1ULL) + return Result; + return getAttributeValueAsSectionOffset(U, DW_AT_GNU_ranges_base, FailValue); +} + bool DWARFDebugInfoEntryMinimal::getLowAndHighPC(const DWARFUnit *U, uint64_t &LowPC, uint64_t &HighPC) const { diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.h b/lib/DebugInfo/DWARFDebugInfoEntry.h index 916e1ed..cc58eb6 100644 --- a/lib/DebugInfo/DWARFDebugInfoEntry.h +++ b/lib/DebugInfo/DWARFDebugInfoEntry.h @@ -106,6 +106,8 @@ public: const uint16_t Attr, uint64_t FailValue) const; + uint64_t getRangesBaseAttribute(const DWARFUnit *U, uint64_t FailValue) const; + /// Retrieves DW_AT_low_pc and DW_AT_high_pc from CU. /// Returns true if both attributes are present. bool getLowAndHighPC(const DWARFUnit *U, uint64_t &LowPC, diff --git a/lib/DebugInfo/DWARFUnit.cpp b/lib/DebugInfo/DWARFUnit.cpp index f5f5072..39d0a0f 100644 --- a/lib/DebugInfo/DWARFUnit.cpp +++ b/lib/DebugInfo/DWARFUnit.cpp @@ -226,7 +226,9 @@ size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) { AddrOffsetSectionBase = DieArray[0].getAttributeValueAsSectionOffset( this, DW_AT_GNU_addr_base, 0); RangeSectionBase = DieArray[0].getAttributeValueAsSectionOffset( - this, DW_AT_GNU_ranges_base, 0); + this, DW_AT_ranges_base, 0); + // Don't fall back to DW_AT_GNU_ranges_base: it should be ignored for + // skeleton CU DIE, so that DWARF users not aware of it are not broken. } setDIERelations(); @@ -272,7 +274,8 @@ bool DWARFUnit::parseDWO() { } // Share .debug_addr and .debug_ranges section with compile unit in .dwo DWOCU->setAddrOffsetSection(AddrOffsetSection, AddrOffsetSectionBase); - DWOCU->setRangesSection(RangeSection, RangeSectionBase); + uint32_t DWORangesBase = DieArray[0].getRangesBaseAttribute(this, 0); + DWOCU->setRangesSection(RangeSection, DWORangesBase); return true; } diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index 6766ef1..b0e985d 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -148,8 +148,7 @@ Function *ExecutionEngine::FindFunctionNamed(const char *FnName) { } -void *ExecutionEngineState::RemoveMapping(const MutexGuard &, - const GlobalValue *ToUnmap) { +void *ExecutionEngineState::RemoveMapping(const GlobalValue *ToUnmap) { GlobalAddressMapTy::iterator I = GlobalAddressMap.find(ToUnmap); void *OldVal; @@ -171,14 +170,14 @@ void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) { DEBUG(dbgs() << "JIT: Map \'" << GV->getName() << "\' to [" << Addr << "]\n";); - void *&CurVal = EEState.getGlobalAddressMap(locked)[GV]; + void *&CurVal = EEState.getGlobalAddressMap()[GV]; assert((!CurVal || !Addr) && "GlobalMapping already established!"); CurVal = Addr; // If we are using the reverse mapping, add it too. - if (!EEState.getGlobalAddressReverseMap(locked).empty()) { + if (!EEState.getGlobalAddressReverseMap().empty()) { AssertingVH<const GlobalValue> &V = - EEState.getGlobalAddressReverseMap(locked)[Addr]; + EEState.getGlobalAddressReverseMap()[Addr]; assert((!V || !GV) && "GlobalMapping already established!"); V = GV; } @@ -187,41 +186,41 @@ void ExecutionEngine::addGlobalMapping(const GlobalValue *GV, void *Addr) { void ExecutionEngine::clearAllGlobalMappings() { MutexGuard locked(lock); - EEState.getGlobalAddressMap(locked).clear(); - EEState.getGlobalAddressReverseMap(locked).clear(); + EEState.getGlobalAddressMap().clear(); + EEState.getGlobalAddressReverseMap().clear(); } void ExecutionEngine::clearGlobalMappingsFromModule(Module *M) { MutexGuard locked(lock); for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) - EEState.RemoveMapping(locked, FI); + EEState.RemoveMapping(FI); for (Module::global_iterator GI = M->global_begin(), GE = M->global_end(); GI != GE; ++GI) - EEState.RemoveMapping(locked, GI); + EEState.RemoveMapping(GI); } void *ExecutionEngine::updateGlobalMapping(const GlobalValue *GV, void *Addr) { MutexGuard locked(lock); ExecutionEngineState::GlobalAddressMapTy &Map = - EEState.getGlobalAddressMap(locked); + EEState.getGlobalAddressMap(); // Deleting from the mapping? if (!Addr) - return EEState.RemoveMapping(locked, GV); + return EEState.RemoveMapping(GV); void *&CurVal = Map[GV]; void *OldVal = CurVal; - if (CurVal && !EEState.getGlobalAddressReverseMap(locked).empty()) - EEState.getGlobalAddressReverseMap(locked).erase(CurVal); + if (CurVal && !EEState.getGlobalAddressReverseMap().empty()) + EEState.getGlobalAddressReverseMap().erase(CurVal); CurVal = Addr; // If we are using the reverse mapping, add it too. - if (!EEState.getGlobalAddressReverseMap(locked).empty()) { + if (!EEState.getGlobalAddressReverseMap().empty()) { AssertingVH<const GlobalValue> &V = - EEState.getGlobalAddressReverseMap(locked)[Addr]; + EEState.getGlobalAddressReverseMap()[Addr]; assert((!V || !GV) && "GlobalMapping already established!"); V = GV; } @@ -232,25 +231,25 @@ void *ExecutionEngine::getPointerToGlobalIfAvailable(const GlobalValue *GV) { MutexGuard locked(lock); ExecutionEngineState::GlobalAddressMapTy::iterator I = - EEState.getGlobalAddressMap(locked).find(GV); - return I != EEState.getGlobalAddressMap(locked).end() ? I->second : nullptr; + EEState.getGlobalAddressMap().find(GV); + return I != EEState.getGlobalAddressMap().end() ? I->second : nullptr; } const GlobalValue *ExecutionEngine::getGlobalValueAtAddress(void *Addr) { MutexGuard locked(lock); // If we haven't computed the reverse mapping yet, do so first. - if (EEState.getGlobalAddressReverseMap(locked).empty()) { + if (EEState.getGlobalAddressReverseMap().empty()) { for (ExecutionEngineState::GlobalAddressMapTy::iterator - I = EEState.getGlobalAddressMap(locked).begin(), - E = EEState.getGlobalAddressMap(locked).end(); I != E; ++I) - EEState.getGlobalAddressReverseMap(locked).insert(std::make_pair( + I = EEState.getGlobalAddressMap().begin(), + E = EEState.getGlobalAddressMap().end(); I != E; ++I) + EEState.getGlobalAddressReverseMap().insert(std::make_pair( I->second, I->first)); } std::map<void *, AssertingVH<const GlobalValue> >::iterator I = - EEState.getGlobalAddressReverseMap(locked).find(Addr); - return I != EEState.getGlobalAddressReverseMap(locked).end() ? I->second : nullptr; + EEState.getGlobalAddressReverseMap().find(Addr); + return I != EEState.getGlobalAddressReverseMap().end() ? I->second : nullptr; } namespace { @@ -412,13 +411,14 @@ ExecutionEngine *ExecutionEngine::create(Module *M, std::string *ErrorStr, CodeGenOpt::Level OptLevel, bool GVsWithCode) { - EngineBuilder EB = EngineBuilder(M) - .setEngineKind(ForceInterpreter - ? EngineKind::Interpreter - : EngineKind::JIT) - .setErrorStr(ErrorStr) - .setOptLevel(OptLevel) - .setAllocateGVsWithCode(GVsWithCode); + + EngineBuilder EB = + EngineBuilder(M) + .setEngineKind(ForceInterpreter ? EngineKind::Interpreter + : EngineKind::Either) + .setErrorStr(ErrorStr) + .setOptLevel(OptLevel) + .setAllocateGVsWithCode(GVsWithCode); return EB.create(); } @@ -457,6 +457,27 @@ ExecutionEngine *ExecutionEngine::createJIT(Module *M, return ExecutionEngine::JITCtor(M, ErrorStr, JMM, GVsWithCode, TM); } +void EngineBuilder::InitEngine() { + WhichEngine = EngineKind::Either; + ErrorStr = nullptr; + OptLevel = CodeGenOpt::Default; + MCJMM = nullptr; + JMM = nullptr; + Options = TargetOptions(); + AllocateGVsWithCode = false; + RelocModel = Reloc::Default; + CMModel = CodeModel::JITDefault; + UseMCJIT = false; + +// IR module verification is enabled by default in debug builds, and disabled +// by default in release builds. +#ifndef NDEBUG + VerifyModules = true; +#else + VerifyModules = false; +#endif +} + ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { std::unique_ptr<TargetMachine> TheTM(TM); // Take ownership. @@ -536,7 +557,7 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) { return getPointerToFunction(F); MutexGuard locked(lock); - if (void *P = EEState.getGlobalAddressMap(locked)[GV]) + if (void *P = EEState.getGlobalAddressMap()[GV]) return P; // Global variable might have been added since interpreter started. @@ -546,7 +567,7 @@ void *ExecutionEngine::getPointerToGlobal(const GlobalValue *GV) { else llvm_unreachable("Global hasn't had an address allocated yet!"); - return EEState.getGlobalAddressMap(locked)[GV]; + return EEState.getGlobalAddressMap()[GV]; } /// \brief Converts a Constant* into a GenericValue, including handling of diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp index 9a65fa0..4e22a8b 100644 --- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp +++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp @@ -86,7 +86,7 @@ static LineNumberInfo DILineInfoToIntelJITFormat(uintptr_t StartAddress, LineNumberInfo Result; Result.Offset = Address - StartAddress; - Result.LineNumber = Line.getLine(); + Result.LineNumber = Line.Line; return Result; } @@ -233,7 +233,7 @@ void IntelJITEventListener::NotifyObjectEmitted(const ObjectImage &Obj) { FunctionMessage.line_number_size = 0; FunctionMessage.line_number_table = 0; } else { - SourceFileName = Lines.front().second.getFileName(); + SourceFileName = Lines.front().second.FileName; FunctionMessage.source_file_name = const_cast<char *>(SourceFileName.c_str()); FunctionMessage.line_number_size = LineInfo.size(); FunctionMessage.line_number_table = &*LineInfo.begin(); diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/lib/ExecutionEngine/Interpreter/Interpreter.cpp index c589457..814efcc 100644 --- a/lib/ExecutionEngine/Interpreter/Interpreter.cpp +++ b/lib/ExecutionEngine/Interpreter/Interpreter.cpp @@ -34,7 +34,7 @@ extern "C" void LLVMLinkInInterpreter() { } /// ExecutionEngine *Interpreter::create(Module *M, std::string* ErrStr) { // Tell this Module to materialize everything and release the GVMaterializer. - if (error_code EC = M->materializeAllPermanently()) { + if (std::error_code EC = M->materializeAllPermanently()) { if (ErrStr) *ErrStr = EC.message(); // We got an error, just return 0 diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index f8b2827..83ec978 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -151,7 +151,7 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, // Add target data MutexGuard locked(lock); - FunctionPassManager &PM = jitstate->getPM(locked); + FunctionPassManager &PM = jitstate->getPM(); M->setDataLayout(TM.getDataLayout()); PM.add(new DataLayoutPass(M)); @@ -184,7 +184,7 @@ void JIT::addModule(Module *M) { jitstate = new JITState(M); - FunctionPassManager &PM = jitstate->getPM(locked); + FunctionPassManager &PM = jitstate->getPM(); M->setDataLayout(TM.getDataLayout()); PM.add(new DataLayoutPass(M)); @@ -216,7 +216,7 @@ bool JIT::removeModule(Module *M) { if (!jitstate && !Modules.empty()) { jitstate = new JITState(Modules[0]); - FunctionPassManager &PM = jitstate->getPM(locked); + FunctionPassManager &PM = jitstate->getPM(); M->setDataLayout(TM.getDataLayout()); PM.add(new DataLayoutPass(M)); @@ -460,41 +460,41 @@ void JIT::runJITOnFunction(Function *F, MachineCodeInfo *MCI) { if (MCI) RegisterJITEventListener(&MCIL); - runJITOnFunctionUnlocked(F, locked); + runJITOnFunctionUnlocked(F); if (MCI) UnregisterJITEventListener(&MCIL); } -void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) { +void JIT::runJITOnFunctionUnlocked(Function *F) { assert(!isAlreadyCodeGenerating && "Error: Recursive compilation detected!"); - jitTheFunction(F, locked); + jitTheFunctionUnlocked(F); // If the function referred to another function that had not yet been // read from bitcode, and we are jitting non-lazily, emit it now. - while (!jitstate->getPendingFunctions(locked).empty()) { - Function *PF = jitstate->getPendingFunctions(locked).back(); - jitstate->getPendingFunctions(locked).pop_back(); + while (!jitstate->getPendingFunctions().empty()) { + Function *PF = jitstate->getPendingFunctions().back(); + jitstate->getPendingFunctions().pop_back(); assert(!PF->hasAvailableExternallyLinkage() && "Externally-defined function should not be in pending list."); - jitTheFunction(PF, locked); + jitTheFunctionUnlocked(PF); // Now that the function has been jitted, ask the JITEmitter to rewrite // the stub with real address of the function. - updateFunctionStub(PF); + updateFunctionStubUnlocked(PF); } } -void JIT::jitTheFunction(Function *F, const MutexGuard &locked) { +void JIT::jitTheFunctionUnlocked(Function *F) { isAlreadyCodeGenerating = true; - jitstate->getPM(locked).run(*F); + jitstate->getPM().run(*F); isAlreadyCodeGenerating = false; // clear basic block addresses after this function is done - getBasicBlockAddressMap(locked).clear(); + getBasicBlockAddressMap().clear(); } /// getPointerToFunction - This method is used to get the address of the @@ -526,7 +526,7 @@ void *JIT::getPointerToFunction(Function *F) { return Addr; } - runJITOnFunctionUnlocked(F, locked); + runJITOnFunctionUnlocked(F); void *Addr = getPointerToGlobalIfAvailable(F); assert(Addr && "Code generation didn't add function to GlobalAddress table!"); @@ -537,9 +537,9 @@ void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) { MutexGuard locked(lock); BasicBlockAddressMapTy::iterator I = - getBasicBlockAddressMap(locked).find(BB); - if (I == getBasicBlockAddressMap(locked).end()) { - getBasicBlockAddressMap(locked)[BB] = Addr; + getBasicBlockAddressMap().find(BB); + if (I == getBasicBlockAddressMap().end()) { + getBasicBlockAddressMap()[BB] = Addr; } else { // ignore repeats: some BBs can be split into few MBBs? } @@ -547,7 +547,7 @@ void JIT::addPointerToBasicBlock(const BasicBlock *BB, void *Addr) { void JIT::clearPointerToBasicBlock(const BasicBlock *BB) { MutexGuard locked(lock); - getBasicBlockAddressMap(locked).erase(BB); + getBasicBlockAddressMap().erase(BB); } void *JIT::getPointerToBasicBlock(BasicBlock *BB) { @@ -558,8 +558,8 @@ void *JIT::getPointerToBasicBlock(BasicBlock *BB) { MutexGuard locked(lock); BasicBlockAddressMapTy::iterator I = - getBasicBlockAddressMap(locked).find(BB); - if (I != getBasicBlockAddressMap(locked).end()) { + getBasicBlockAddressMap().find(BB); + if (I != getBasicBlockAddressMap().end()) { return I->second; } else { llvm_unreachable("JIT does not have BB address for address-of-label, was" @@ -688,7 +688,7 @@ char* JIT::getMemoryForGV(const GlobalVariable* GV) { void JIT::addPendingFunction(Function *F) { MutexGuard locked(lock); - jitstate->getPendingFunctions(locked).push_back(F); + jitstate->getPendingFunctions().push_back(F); } diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h index d2bd508..69a7c36 100644 --- a/lib/ExecutionEngine/JIT/JIT.h +++ b/lib/ExecutionEngine/JIT/JIT.h @@ -39,12 +39,12 @@ private: public: explicit JITState(Module *M) : PM(M), M(M) {} - FunctionPassManager &getPM(const MutexGuard &L) { + FunctionPassManager &getPM() { return PM; } Module *getModule() const { return M; } - std::vector<AssertingVH<Function> > &getPendingFunctions(const MutexGuard &L){ + std::vector<AssertingVH<Function> > &getPendingFunctions() { return PendingFunctions; } }; @@ -205,7 +205,7 @@ public: void NotifyFreeingMachineCode(void *OldPtr); BasicBlockAddressMapTy & - getBasicBlockAddressMap(const MutexGuard &) { + getBasicBlockAddressMap() { return BasicBlockAddressMap; } @@ -213,9 +213,9 @@ public: private: static JITCodeEmitter *createEmitter(JIT &J, JITMemoryManager *JMM, TargetMachine &tm); - void runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked); - void updateFunctionStub(Function *F); - void jitTheFunction(Function *F, const MutexGuard &locked); + void runJITOnFunctionUnlocked(Function *F); + void updateFunctionStubUnlocked(Function *F); + void jitTheFunctionUnlocked(Function *F); protected: diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index cd7a500..50b8c10 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -32,6 +32,7 @@ #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" #include "llvm/IR/ValueMap.h" #include "llvm/Support/Debug.h" @@ -120,21 +121,16 @@ namespace { #endif } - FunctionToLazyStubMapTy& getFunctionToLazyStubMap( - const MutexGuard& locked) { - assert(locked.holds(TheJIT->lock)); + FunctionToLazyStubMapTy& getFunctionToLazyStubMap() { return FunctionToLazyStubMap; } - GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap(const MutexGuard& lck) { - assert(lck.holds(TheJIT->lock)); + GlobalToIndirectSymMapTy& getGlobalToIndirectSymMap() { return GlobalToIndirectSymMap; } std::pair<void *, Function *> LookupFunctionFromCallSite( - const MutexGuard &locked, void *CallSite) const { - assert(locked.holds(TheJIT->lock)); - + void *CallSite) const { // The address given to us for the stub may not be exactly right, it // might be a little bit after the stub. As such, use upper_bound to // find it. @@ -146,9 +142,7 @@ namespace { return *I; } - void AddCallSite(const MutexGuard &locked, void *CallSite, Function *F) { - assert(locked.holds(TheJIT->lock)); - + void AddCallSite(void *CallSite, Function *F) { bool Inserted = CallSiteToFunctionMap.insert( std::make_pair(CallSite, F)).second; (void)Inserted; @@ -503,7 +497,7 @@ void *JITResolver::getLazyFunctionStubIfAvailable(Function *F) { MutexGuard locked(TheJIT->lock); // If we already have a stub for this function, recycle it. - return state.getFunctionToLazyStubMap(locked).lookup(F); + return state.getFunctionToLazyStubMap().lookup(F); } /// getFunctionStub - This returns a pointer to a function stub, creating @@ -512,7 +506,7 @@ void *JITResolver::getLazyFunctionStub(Function *F) { MutexGuard locked(TheJIT->lock); // If we already have a lazy stub for this function, recycle it. - void *&Stub = state.getFunctionToLazyStubMap(locked)[F]; + void *&Stub = state.getFunctionToLazyStubMap()[F]; if (Stub) return Stub; // Call the lazy resolver function if we are JIT'ing lazily. Otherwise we @@ -554,7 +548,7 @@ void *JITResolver::getLazyFunctionStub(Function *F) { // Finally, keep track of the stub-to-Function mapping so that the // JITCompilerFn knows which function to compile! - state.AddCallSite(locked, Stub, F); + state.AddCallSite(Stub, F); } else if (!Actual) { // If we are JIT'ing non-lazily but need to call a function that does not // exist yet, add it to the JIT's work list so that we can fill in the @@ -573,7 +567,7 @@ void *JITResolver::getGlobalValueIndirectSym(GlobalValue *GV, void *GVAddress) { MutexGuard locked(TheJIT->lock); // If we already have a stub for this global variable, recycle it. - void *&IndirectSym = state.getGlobalToIndirectSymMap(locked)[GV]; + void *&IndirectSym = state.getGlobalToIndirectSymMap()[GV]; if (IndirectSym) return IndirectSym; // Otherwise, codegen a new indirect symbol. @@ -633,7 +627,7 @@ void *JITResolver::JITCompilerFn(void *Stub) { // The address given to us for the stub may not be exactly right, it might // be a little bit after the stub. As such, use upper_bound to find it. std::pair<void*, Function*> I = - JR->state.LookupFunctionFromCallSite(locked, Stub); + JR->state.LookupFunctionFromCallSite(Stub); F = I.second; ActualPtr = I.first; } @@ -684,13 +678,23 @@ void *JITResolver::JITCompilerFn(void *Stub) { //===----------------------------------------------------------------------===// // JITEmitter code. // + +static GlobalObject *getSimpleAliasee(Constant *C) { + C = C->stripPointerCasts(); + return dyn_cast<GlobalObject>(C); +} + void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference, bool MayNeedFarStub) { if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) return TheJIT->getOrEmitGlobalVariable(GV); - if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) - return TheJIT->getPointerToGlobal(GA->getAliasee()); + if (GlobalAlias *GA = dyn_cast<GlobalAlias>(V)) { + // We can only handle simple cases. + if (GlobalValue *GV = getSimpleAliasee(GA->getAliasee())) + return TheJIT->getPointerToGlobal(GV); + return nullptr; + } // If we have already compiled the function, return a pointer to its body. Function *F = cast<Function>(V); @@ -1225,7 +1229,7 @@ void *JIT::getPointerToFunctionOrStub(Function *F) { return JE->getJITResolver().getLazyFunctionStub(F); } -void JIT::updateFunctionStub(Function *F) { +void JIT::updateFunctionStubUnlocked(Function *F) { // Get the empty stub we generated earlier. JITEmitter *JE = static_cast<JITEmitter*>(getCodeEmitter()); void *Stub = JE->getJITResolver().getLazyFunctionStub(F); diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index 42cb4ea..e9ba96a 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -305,9 +305,13 @@ uint64_t MCJIT::getSymbolAddress(const std::string &Name, // Look for our symbols in each Archive object::Archive::child_iterator ChildIt = A->findSym(Name); if (ChildIt != A->child_end()) { - std::unique_ptr<object::Binary> ChildBin; // FIXME: Support nested archives? - if (!ChildIt->getAsBinary(ChildBin) && ChildBin->isObject()) { + ErrorOr<std::unique_ptr<object::Binary>> ChildBinOrErr = + ChildIt->getAsBinary(); + if (ChildBinOrErr.getError()) + continue; + std::unique_ptr<object::Binary> ChildBin = std::move(ChildBinOrErr.get()); + if (ChildBin->isObject()) { std::unique_ptr<object::ObjectFile> OF( static_cast<object::ObjectFile *>(ChildBin.release())); // This causes the object file to be loaded. diff --git a/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp b/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp index 9ceaa90..5986084 100644 --- a/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp +++ b/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp @@ -71,7 +71,7 @@ uint8_t *SectionMemoryManager::allocateSection(MemoryGroup &MemGroup, // // FIXME: Initialize the Near member for each memory group to avoid // interleaving. - error_code ec; + std::error_code ec; sys::MemoryBlock MB = sys::Memory::allocateMappedMemory(RequiredSize, &MemGroup.Near, sys::Memory::MF_READ | @@ -105,7 +105,7 @@ uint8_t *SectionMemoryManager::allocateSection(MemoryGroup &MemGroup, bool SectionMemoryManager::finalizeMemory(std::string *ErrMsg) { // FIXME: Should in-progress permissions be reverted if an error occurs? - error_code ec; + std::error_code ec; // Don't allow free memory blocks to be used after setting protection flags. CodeMem.FreeMem.clear(); @@ -143,19 +143,20 @@ bool SectionMemoryManager::finalizeMemory(std::string *ErrMsg) return false; } -error_code SectionMemoryManager::applyMemoryGroupPermissions(MemoryGroup &MemGroup, - unsigned Permissions) { +std::error_code +SectionMemoryManager::applyMemoryGroupPermissions(MemoryGroup &MemGroup, + unsigned Permissions) { for (int i = 0, e = MemGroup.AllocatedMem.size(); i != e; ++i) { - error_code ec; - ec = sys::Memory::protectMappedMemory(MemGroup.AllocatedMem[i], - Permissions); - if (ec) { - return ec; - } + std::error_code ec; + ec = + sys::Memory::protectMappedMemory(MemGroup.AllocatedMem[i], Permissions); + if (ec) { + return ec; + } } - return error_code::success(); + return std::error_code(); } void SectionMemoryManager::invalidateInstructionCache() { diff --git a/lib/ExecutionEngine/RuntimeDyld/Android.mk b/lib/ExecutionEngine/RuntimeDyld/Android.mk index e98e80a..eb2e438 100644 --- a/lib/ExecutionEngine/RuntimeDyld/Android.mk +++ b/lib/ExecutionEngine/RuntimeDyld/Android.mk @@ -7,6 +7,7 @@ include $(CLEAR_VARS) LOCAL_SRC_FILES := \ GDBRegistrar.cpp \ RuntimeDyld.cpp \ + RuntimeDyldChecker.cpp \ RuntimeDyldELF.cpp \ RuntimeDyldMachO.cpp diff --git a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt index cbf7cf1..eb1a60b 100644 --- a/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt +++ b/lib/ExecutionEngine/RuntimeDyld/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_library(LLVMRuntimeDyld GDBRegistrar.cpp RuntimeDyld.cpp + RuntimeDyldChecker.cpp RuntimeDyldELF.cpp RuntimeDyldMachO.cpp ) diff --git a/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt b/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt index 97dc861..8bd5621 100644 --- a/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt +++ b/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = RuntimeDyld parent = ExecutionEngine -required_libraries = Object Support +required_libraries = MC Object Support diff --git a/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h index 4917b93..c3a2182 100644 --- a/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h +++ b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h @@ -48,7 +48,8 @@ public: { // FIXME: error checking? createObjectFile returns an ErrorOr<ObjectFile*> // and should probably be checked for failure. - ObjFile.reset(object::ObjectFile::createObjectFile(Buffer->getMemBuffer()).get()); + std::unique_ptr<MemoryBuffer> Buf(Buffer->getMemBuffer()); + ObjFile.reset(object::ObjectFile::createObjectFile(Buf).get()); } ObjectImageCommon(std::unique_ptr<object::ObjectFile> Input) : ObjectImage(nullptr), ObjFile(std::move(Input)) {} diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index c1eb0fd..9dfd167 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -73,9 +73,9 @@ void RuntimeDyldImpl::mapSectionAddress(const void *LocalAddress, llvm_unreachable("Attempting to remap address of unknown section!"); } -static error_code getOffset(const SymbolRef &Sym, uint64_t &Result) { +static std::error_code getOffset(const SymbolRef &Sym, uint64_t &Result) { uint64_t Address; - if (error_code EC = Sym.getAddress(Address)) + if (std::error_code EC = Sym.getAddress(Address)) return EC; if (Address == UnknownAddressOrSize) { @@ -85,7 +85,7 @@ static error_code getOffset(const SymbolRef &Sym, uint64_t &Result) { const ObjectFile *Obj = Sym.getObject(); section_iterator SecI(Obj->section_begin()); - if (error_code EC = Sym.getSection(SecI)) + if (std::error_code EC = Sym.getSection(SecI)) return EC; if (SecI == Obj->section_end()) { @@ -94,7 +94,7 @@ static error_code getOffset(const SymbolRef &Sym, uint64_t &Result) { } uint64_t SectionAddress; - if (error_code EC = SecI->getAddress(SectionAddress)) + if (std::error_code EC = SecI->getAddress(SectionAddress)) return EC; Result = Address - SectionAddress; diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp new file mode 100644 index 0000000..190bbbf --- /dev/null +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldChecker.cpp @@ -0,0 +1,641 @@ +//===--- RuntimeDyldChecker.cpp - RuntimeDyld tester framework --*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ExecutionEngine/RuntimeDyldChecker.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/StringRefMemoryObject.h" +#include "RuntimeDyldImpl.h" +#include <cctype> +#include <memory> + +#define DEBUG_TYPE "rtdyld" + +using namespace llvm; + +namespace llvm { + + // Helper class that implements the language evaluated by RuntimeDyldChecker. + class RuntimeDyldCheckerExprEval { + public: + + RuntimeDyldCheckerExprEval(const RuntimeDyldChecker &Checker, + llvm::raw_ostream &ErrStream) + : Checker(Checker), ErrStream(ErrStream) {} + + bool evaluate(StringRef Expr) const { + // Expect equality expression of the form 'LHS = RHS'. + Expr = Expr.trim(); + size_t EQIdx = Expr.find('='); + + // Evaluate LHS. + StringRef LHSExpr = Expr.substr(0, EQIdx).rtrim(); + StringRef RemainingExpr; + EvalResult LHSResult; + std::tie(LHSResult, RemainingExpr) = + evalComplexExpr(evalSimpleExpr(LHSExpr)); + if (LHSResult.hasError()) + return handleError(Expr, LHSResult); + if (RemainingExpr != "") + return handleError(Expr, unexpectedToken(RemainingExpr, LHSExpr, "")); + + // Evaluate RHS. + StringRef RHSExpr = Expr.substr(EQIdx + 1).ltrim(); + EvalResult RHSResult; + std::tie(RHSResult, RemainingExpr) = + evalComplexExpr(evalSimpleExpr(RHSExpr)); + if (RHSResult.hasError()) + return handleError(Expr, RHSResult); + if (RemainingExpr != "") + return handleError(Expr, unexpectedToken(RemainingExpr, RHSExpr, "")); + + if (LHSResult.getValue() != RHSResult.getValue()) { + ErrStream << "Expression '" << Expr << "' is false: " + << format("0x%lx", LHSResult.getValue()) << " != " + << format("0x%lx", RHSResult.getValue()) << "\n"; + return false; + } + return true; + } + + private: + const RuntimeDyldChecker &Checker; + llvm::raw_ostream &ErrStream; + + enum class BinOpToken : unsigned { Invalid, Add, Sub, BitwiseAnd, + BitwiseOr, ShiftLeft, ShiftRight }; + + class EvalResult { + public: + EvalResult() + : Value(0), ErrorMsg("") {} + EvalResult(uint64_t Value) + : Value(Value), ErrorMsg("") {} + EvalResult(std::string ErrorMsg) + : Value(0), ErrorMsg(ErrorMsg) {} + uint64_t getValue() const { return Value; } + bool hasError() const { return ErrorMsg != ""; } + const std::string& getErrorMsg() const { return ErrorMsg; } + private: + uint64_t Value; + std::string ErrorMsg; + }; + + StringRef getTokenForError(StringRef Expr) const { + if (Expr.empty()) + return ""; + + StringRef Token, Remaining; + if (isalpha(Expr[0])) + std::tie(Token, Remaining) = parseSymbol(Expr); + else if (isdigit(Expr[0])) + std::tie(Token, Remaining) = parseNumberString(Expr); + else { + unsigned TokLen = 1; + if (Expr.startswith("<<") || Expr.startswith(">>")) + TokLen = 2; + Token = Expr.substr(0, TokLen); + } + return Token; + } + + EvalResult unexpectedToken(StringRef TokenStart, + StringRef SubExpr, + StringRef ErrText) const { + std::string ErrorMsg("Encountered unexpected token '"); + ErrorMsg += getTokenForError(TokenStart); + if (SubExpr != "") { + ErrorMsg += "' while parsing subexpression '"; + ErrorMsg += SubExpr; + } + ErrorMsg += "'"; + if (ErrText != "") { + ErrorMsg += " "; + ErrorMsg += ErrText; + } + return EvalResult(std::move(ErrorMsg)); + } + + bool handleError(StringRef Expr, const EvalResult &R) const { + assert(R.hasError() && "Not an error result."); + ErrStream << "Error evaluating expression '" << Expr << "': " + << R.getErrorMsg() << "\n"; + return false; + } + + std::pair<BinOpToken, StringRef> parseBinOpToken(StringRef Expr) const { + if (Expr.empty()) + return std::make_pair(BinOpToken::Invalid, ""); + + // Handle the two 2-character tokens. + if (Expr.startswith("<<")) + return std::make_pair(BinOpToken::ShiftLeft, + Expr.substr(2).ltrim()); + if (Expr.startswith(">>")) + return std::make_pair(BinOpToken::ShiftRight, + Expr.substr(2).ltrim()); + + // Handle one-character tokens. + BinOpToken Op; + switch (Expr[0]) { + default: return std::make_pair(BinOpToken::Invalid, Expr); + case '+': Op = BinOpToken::Add; break; + case '-': Op = BinOpToken::Sub; break; + case '&': Op = BinOpToken::BitwiseAnd; break; + case '|': Op = BinOpToken::BitwiseOr; break; + } + + return std::make_pair(Op, Expr.substr(1).ltrim()); + } + + EvalResult computeBinOpResult(BinOpToken Op, const EvalResult &LHSResult, + const EvalResult &RHSResult) const { + switch (Op) { + default: llvm_unreachable("Tried to evaluate unrecognized operation."); + case BinOpToken::Add: + return EvalResult(LHSResult.getValue() + RHSResult.getValue()); + case BinOpToken::Sub: + return EvalResult(LHSResult.getValue() - RHSResult.getValue()); + case BinOpToken::BitwiseAnd: + return EvalResult(LHSResult.getValue() & RHSResult.getValue()); + case BinOpToken::BitwiseOr: + return EvalResult(LHSResult.getValue() | RHSResult.getValue()); + case BinOpToken::ShiftLeft: + return EvalResult(LHSResult.getValue() << RHSResult.getValue()); + case BinOpToken::ShiftRight: + return EvalResult(LHSResult.getValue() >> RHSResult.getValue()); + } + } + + // Parse a symbol and return a (string, string) pair representing the symbol + // name and expression remaining to be parsed. + std::pair<StringRef, StringRef> parseSymbol(StringRef Expr) const { + size_t FirstNonSymbol = + Expr.find_first_not_of("0123456789" + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + ":_"); + return std::make_pair(Expr.substr(0, FirstNonSymbol), + Expr.substr(FirstNonSymbol).ltrim()); + } + + // Evaluate a call to decode_operand. Decode the instruction operand at the + // given symbol and get the value of the requested operand. + // Returns an error if the instruction cannot be decoded, or the requested + // operand is not an immediate. + // On success, retuns a pair containing the value of the operand, plus + // the expression remaining to be evaluated. + std::pair<EvalResult, StringRef> evalDecodeOperand(StringRef Expr) const { + if (!Expr.startswith("(")) + return std::make_pair(unexpectedToken(Expr, Expr, "expected '('"), ""); + StringRef RemainingExpr = Expr.substr(1).ltrim(); + StringRef Symbol; + std::tie(Symbol, RemainingExpr) = parseSymbol(RemainingExpr); + + if (!Checker.checkSymbolIsValidForLoad(Symbol)) + return std::make_pair(EvalResult(("Cannot decode unknown symbol '" + + Symbol + "'").str()), + ""); + + if (!RemainingExpr.startswith(",")) + return std::make_pair(unexpectedToken(RemainingExpr, RemainingExpr, + "expected ','"), + ""); + RemainingExpr = RemainingExpr.substr(1).ltrim(); + + EvalResult OpIdxExpr; + std::tie(OpIdxExpr, RemainingExpr) = evalNumberExpr(RemainingExpr); + if (OpIdxExpr.hasError()) + return std::make_pair(OpIdxExpr, ""); + + if (!RemainingExpr.startswith(")")) + return std::make_pair(unexpectedToken(RemainingExpr, RemainingExpr, + "expected ')'"), + ""); + RemainingExpr = RemainingExpr.substr(1).ltrim(); + + MCInst Inst; + uint64_t Size; + if (!decodeInst(Symbol, Inst, Size)) + return std::make_pair(EvalResult(("Couldn't decode instruction at '" + + Symbol + "'").str()), + ""); + + unsigned OpIdx = OpIdxExpr.getValue(); + if (OpIdx >= Inst.getNumOperands()) { + std::string ErrMsg; + raw_string_ostream ErrMsgStream(ErrMsg); + ErrMsgStream << "Invalid operand index '" << format("%i", OpIdx) + << " for instruction '" << Symbol + << ". Instruction has only " + << format("%i", Inst.getNumOperands()) << " operands."; + return std::make_pair(EvalResult(ErrMsgStream.str()), ""); + } + + const MCOperand &Op = Inst.getOperand(OpIdx); + if (!Op.isImm()) { + std::string ErrMsg; + raw_string_ostream ErrMsgStream(ErrMsg); + ErrMsgStream << "Operand '" << format("%i", OpIdx) + << "' of instruction '" << Symbol + << "' is not an immediate.\nInstruction is:\n "; + Inst.dump_pretty(ErrMsgStream, + Checker.Disassembler->getContext().getAsmInfo(), + Checker.InstPrinter); + + return std::make_pair(EvalResult(ErrMsgStream.str()), ""); + } + + return std::make_pair(EvalResult(Op.getImm()), RemainingExpr); + } + + // Evaluate a call to next_pc. Decode the instruction at the given + // symbol and return the following program counter.. + // Returns an error if the instruction cannot be decoded. + // On success, returns a pair containing the next PC, plus the length of the + // expression remaining to be evaluated. + std::pair<EvalResult, StringRef> evalNextPC(StringRef Expr) const { + if (!Expr.startswith("(")) + return std::make_pair(unexpectedToken(Expr, Expr, "expected '('"), ""); + StringRef RemainingExpr = Expr.substr(1).ltrim(); + StringRef Symbol; + std::tie(Symbol, RemainingExpr) = parseSymbol(RemainingExpr); + + if (!Checker.checkSymbolIsValidForLoad(Symbol)) + return std::make_pair(EvalResult(("Cannot decode unknown symbol '" + + Symbol + "'").str()), + ""); + + if (!RemainingExpr.startswith(")")) + return std::make_pair(unexpectedToken(RemainingExpr, RemainingExpr, + "expected ')'"), + ""); + RemainingExpr = RemainingExpr.substr(1).ltrim(); + + MCInst Inst; + uint64_t Size; + if (!decodeInst(Symbol, Inst, Size)) + return std::make_pair(EvalResult(("Couldn't decode instruction at '" + + Symbol + "'").str()), + ""); + uint64_t NextPC = Checker.getSymbolAddress(Symbol) + Size; + + return std::make_pair(EvalResult(NextPC), RemainingExpr); + } + + // Evaluate an identiefer expr, which may be a symbol, or a call to + // one of the builtin functions: get_insn_opcode or get_insn_length. + // Return the result, plus the expression remaining to be parsed. + std::pair<EvalResult, StringRef> evalIdentifierExpr(StringRef Expr) const { + StringRef Symbol; + StringRef RemainingExpr; + std::tie(Symbol, RemainingExpr) = parseSymbol(Expr); + + // Check for builtin function calls. + if (Symbol == "decode_operand") + return evalDecodeOperand(RemainingExpr); + else if (Symbol == "next_pc") + return evalNextPC(RemainingExpr); + + // Looks like a plain symbol reference. + return std::make_pair(EvalResult(Checker.getSymbolAddress(Symbol)), + RemainingExpr); + } + + // Parse a number (hexadecimal or decimal) and return a (string, string) + // pair representing the number and the expression remaining to be parsed. + std::pair<StringRef, StringRef> parseNumberString(StringRef Expr) const { + size_t FirstNonDigit = StringRef::npos; + if (Expr.startswith("0x")) { + FirstNonDigit = Expr.find_first_not_of("0123456789abcdefABCDEF", 2); + if (FirstNonDigit == StringRef::npos) + FirstNonDigit = Expr.size(); + } else { + FirstNonDigit = Expr.find_first_not_of("0123456789"); + if (FirstNonDigit == StringRef::npos) + FirstNonDigit = Expr.size(); + } + return std::make_pair(Expr.substr(0, FirstNonDigit), + Expr.substr(FirstNonDigit)); + } + + // Evaluate a constant numeric expression (hexidecimal or decimal) and + // return a pair containing the result, and the expression remaining to be + // evaluated. + std::pair<EvalResult, StringRef> evalNumberExpr(StringRef Expr) const { + StringRef ValueStr; + StringRef RemainingExpr; + std::tie(ValueStr, RemainingExpr) = parseNumberString(Expr); + + if (ValueStr.empty() || !isdigit(ValueStr[0])) + return std::make_pair(unexpectedToken(RemainingExpr, RemainingExpr, + "expected number"), + ""); + uint64_t Value; + ValueStr.getAsInteger(0, Value); + return std::make_pair(EvalResult(Value), RemainingExpr); + } + + // Evaluate an expression of the form "(<expr>)" and return a pair + // containing the result of evaluating <expr>, plus the expression + // remaining to be parsed. + std::pair<EvalResult, StringRef> evalParensExpr(StringRef Expr) const { + assert(Expr.startswith("(") && "Not a parenthesized expression"); + EvalResult SubExprResult; + StringRef RemainingExpr; + std::tie(SubExprResult, RemainingExpr) = + evalComplexExpr(evalSimpleExpr(Expr.substr(1).ltrim())); + if (SubExprResult.hasError()) + return std::make_pair(SubExprResult, ""); + if (!RemainingExpr.startswith(")")) + return std::make_pair(unexpectedToken(RemainingExpr, Expr, + "expected ')'"), + ""); + RemainingExpr = RemainingExpr.substr(1).ltrim(); + return std::make_pair(SubExprResult, RemainingExpr); + } + + // Evaluate an expression in one of the following forms: + // *{<number>}<symbol> + // *{<number>}(<symbol> + <number>) + // *{<number>}(<symbol> - <number>) + // Return a pair containing the result, plus the expression remaining to be + // parsed. + std::pair<EvalResult, StringRef> evalLoadExpr(StringRef Expr) const { + assert(Expr.startswith("*") && "Not a load expression"); + StringRef RemainingExpr = Expr.substr(1).ltrim(); + // Parse read size. + if (!RemainingExpr.startswith("{")) + return std::make_pair(EvalResult("Expected '{' following '*'."), ""); + RemainingExpr = RemainingExpr.substr(1).ltrim(); + EvalResult ReadSizeExpr; + std::tie(ReadSizeExpr, RemainingExpr) = evalNumberExpr(RemainingExpr); + if (ReadSizeExpr.hasError()) + return std::make_pair(ReadSizeExpr, RemainingExpr); + uint64_t ReadSize = ReadSizeExpr.getValue(); + if (ReadSize < 1 || ReadSize > 8) + return std::make_pair(EvalResult("Invalid size for dereference."), ""); + if (!RemainingExpr.startswith("}")) + return std::make_pair(EvalResult("Missing '}' for dereference."), ""); + RemainingExpr = RemainingExpr.substr(1).ltrim(); + + // Check for '(symbol +/- constant)' form. + bool SymbolPlusConstant = false; + if (RemainingExpr.startswith("(")) { + SymbolPlusConstant = true; + RemainingExpr = RemainingExpr.substr(1).ltrim(); + } + + // Read symbol. + StringRef Symbol; + std::tie(Symbol, RemainingExpr) = parseSymbol(RemainingExpr); + + if (!Checker.checkSymbolIsValidForLoad(Symbol)) + return std::make_pair(EvalResult(("Cannot dereference unknown symbol '" + + Symbol + "'").str()), + ""); + + // Set up defaut offset. + int64_t Offset = 0; + + // Handle "+/- constant)" portion if necessary. + if (SymbolPlusConstant) { + char OpChar = RemainingExpr[0]; + if (OpChar != '+' && OpChar != '-') + return std::make_pair(EvalResult("Invalid operator in load address."), + ""); + RemainingExpr = RemainingExpr.substr(1).ltrim(); + + EvalResult OffsetExpr; + std::tie(OffsetExpr, RemainingExpr) = evalNumberExpr(RemainingExpr); + + Offset = (OpChar == '+') ? + OffsetExpr.getValue() : -1 * OffsetExpr.getValue(); + + if (!RemainingExpr.startswith(")")) + return std::make_pair(EvalResult("Missing ')' in load address."), + ""); + + RemainingExpr = RemainingExpr.substr(1).ltrim(); + } + + return std::make_pair( + EvalResult(Checker.readMemoryAtSymbol(Symbol, Offset, ReadSize)), + RemainingExpr); + } + + // Evaluate a "simple" expression. This is any expression that _isn't_ an + // un-parenthesized binary expression. + // + // "Simple" expressions can be optionally bit-sliced. See evalSlicedExpr. + // + // Returns a pair containing the result of the evaluation, plus the + // expression remaining to be parsed. + std::pair<EvalResult, StringRef> evalSimpleExpr(StringRef Expr) const { + EvalResult SubExprResult; + StringRef RemainingExpr; + + if (Expr.empty()) + return std::make_pair(EvalResult("Unexpected end of expression"), ""); + + if (Expr[0] == '(') + std::tie(SubExprResult, RemainingExpr) = evalParensExpr(Expr); + else if (Expr[0] == '*') + std::tie(SubExprResult, RemainingExpr) = evalLoadExpr(Expr); + else if (isalpha(Expr[0])) + std::tie(SubExprResult, RemainingExpr) = evalIdentifierExpr(Expr); + else if (isdigit(Expr[0])) + std::tie(SubExprResult, RemainingExpr) = evalNumberExpr(Expr); + + if (SubExprResult.hasError()) + return std::make_pair(SubExprResult, RemainingExpr); + + // Evaluate bit-slice if present. + if (RemainingExpr.startswith("[")) + std::tie(SubExprResult, RemainingExpr) = + evalSliceExpr(std::make_pair(SubExprResult, RemainingExpr)); + + return std::make_pair(SubExprResult, RemainingExpr); + } + + // Evaluate a bit-slice of an expression. + // A bit-slice has the form "<expr>[high:low]". The result of evaluating a + // slice is the bits between high and low (inclusive) in the original + // expression, right shifted so that the "low" bit is in position 0 in the + // result. + // Returns a pair containing the result of the slice operation, plus the + // expression remaining to be parsed. + std::pair<EvalResult, StringRef> evalSliceExpr( + std::pair<EvalResult, StringRef> Ctx) const{ + EvalResult SubExprResult; + StringRef RemainingExpr; + std::tie(SubExprResult, RemainingExpr) = Ctx; + + assert(RemainingExpr.startswith("[") && "Not a slice expr."); + RemainingExpr = RemainingExpr.substr(1).ltrim(); + + EvalResult HighBitExpr; + std::tie(HighBitExpr, RemainingExpr) = evalNumberExpr(RemainingExpr); + + if (HighBitExpr.hasError()) + return std::make_pair(HighBitExpr, RemainingExpr); + + if (!RemainingExpr.startswith(":")) + return std::make_pair(unexpectedToken(RemainingExpr, RemainingExpr, + "expected ':'"), + ""); + RemainingExpr = RemainingExpr.substr(1).ltrim(); + + EvalResult LowBitExpr; + std::tie(LowBitExpr, RemainingExpr) = evalNumberExpr(RemainingExpr); + + if (LowBitExpr.hasError()) + return std::make_pair(LowBitExpr, RemainingExpr); + + if (!RemainingExpr.startswith("]")) + return std::make_pair(unexpectedToken(RemainingExpr, RemainingExpr, + "expected ']'"), + ""); + RemainingExpr = RemainingExpr.substr(1).ltrim(); + + unsigned HighBit = HighBitExpr.getValue(); + unsigned LowBit = LowBitExpr.getValue(); + uint64_t Mask = ((uint64_t)1 << (HighBit - LowBit + 1)) - 1; + uint64_t SlicedValue = (SubExprResult.getValue() >> LowBit) & Mask; + return std::make_pair(EvalResult(SlicedValue), RemainingExpr); + } + + // Evaluate a "complex" expression. + // Takes an already evaluated subexpression and checks for the presence of a + // binary operator, computing the result of the binary operation if one is + // found. Used to make arithmetic expressions left-associative. + // Returns a pair containing the ultimate result of evaluating the + // expression, plus the expression remaining to be evaluated. + std::pair<EvalResult, StringRef> evalComplexExpr( + std::pair<EvalResult, StringRef> Ctx) const { + EvalResult LHSResult; + StringRef RemainingExpr; + std::tie(LHSResult, RemainingExpr) = Ctx; + + // If there was an error, or there's nothing left to evaluate, return the + // result. + if (LHSResult.hasError() || RemainingExpr == "") + return std::make_pair(LHSResult, RemainingExpr); + + // Otherwise check if this is a binary expressioan. + BinOpToken BinOp; + std::tie(BinOp, RemainingExpr) = parseBinOpToken(RemainingExpr); + + // If this isn't a recognized expression just return. + if (BinOp == BinOpToken::Invalid) + return std::make_pair(LHSResult, RemainingExpr); + + // This is a recognized bin-op. Evaluate the RHS, then evaluate the binop. + EvalResult RHSResult; + std::tie(RHSResult, RemainingExpr) = evalSimpleExpr(RemainingExpr); + + // If there was an error evaluating the RHS, return it. + if (RHSResult.hasError()) + return std::make_pair(RHSResult, RemainingExpr); + + // This is a binary expression - evaluate and try to continue as a + // complex expr. + EvalResult ThisResult(computeBinOpResult(BinOp, LHSResult, RHSResult)); + + return evalComplexExpr(std::make_pair(ThisResult, RemainingExpr)); + } + + bool decodeInst(StringRef Symbol, MCInst &Inst, uint64_t &Size) const { + MCDisassembler *Dis = Checker.Disassembler; + StringRef SectionMem = Checker.getSubsectionStartingAt(Symbol); + StringRefMemoryObject SectionBytes(SectionMem, 0); + + MCDisassembler::DecodeStatus S = + Dis->getInstruction(Inst, Size, SectionBytes, 0, nulls(), nulls()); + + return (S == MCDisassembler::Success); + } + + }; + +} + +bool RuntimeDyldChecker::check(StringRef CheckExpr) const { + CheckExpr = CheckExpr.trim(); + DEBUG(llvm::dbgs() << "RuntimeDyldChecker: Checking '" << CheckExpr + << "'...\n"); + RuntimeDyldCheckerExprEval P(*this, ErrStream); + bool Result = P.evaluate(CheckExpr); + (void)Result; + DEBUG(llvm::dbgs() << "RuntimeDyldChecker: '" << CheckExpr << "' " + << (Result ? "passed" : "FAILED") << ".\n"); + return Result; +} + +bool RuntimeDyldChecker::checkAllRulesInBuffer(StringRef RulePrefix, + MemoryBuffer* MemBuf) const { + bool DidAllTestsPass = true; + unsigned NumRules = 0; + + const char *LineStart = MemBuf->getBufferStart(); + + // Eat whitespace. + while (LineStart != MemBuf->getBufferEnd() && + std::isspace(*LineStart)) + ++LineStart; + + while (LineStart != MemBuf->getBufferEnd() && *LineStart != '\0') { + const char *LineEnd = LineStart; + while (LineEnd != MemBuf->getBufferEnd() && + *LineEnd != '\r' && *LineEnd != '\n') + ++LineEnd; + + StringRef Line(LineStart, LineEnd - LineStart); + if (Line.startswith(RulePrefix)) { + DidAllTestsPass &= check(Line.substr(RulePrefix.size())); + ++NumRules; + } + + // Eat whitespace. + LineStart = LineEnd; + while (LineStart != MemBuf->getBufferEnd() && + std::isspace(*LineStart)) + ++LineStart; + } + return DidAllTestsPass && (NumRules != 0); +} + +bool RuntimeDyldChecker::checkSymbolIsValidForLoad(StringRef Symbol) const { + return RTDyld.getSymbolAddress(Symbol) != nullptr; +} + +uint64_t RuntimeDyldChecker::getSymbolAddress(StringRef Symbol) const { + return RTDyld.getAnySymbolRemoteAddress(Symbol); +} + +uint64_t RuntimeDyldChecker::readMemoryAtSymbol(StringRef Symbol, + int64_t Offset, + unsigned Size) const { + uint8_t *Src = RTDyld.getSymbolAddress(Symbol); + uint64_t Result = 0; + memcpy(&Result, Src + Offset, Size); + return Result; +} + +StringRef RuntimeDyldChecker::getSubsectionStartingAt(StringRef Name) const { + RuntimeDyldImpl::SymbolTableMap::const_iterator pos = + RTDyld.GlobalSymbolTable.find(Name); + if (pos == RTDyld.GlobalSymbolTable.end()) + return StringRef(); + RuntimeDyldImpl::SymbolLoc Loc = pos->second; + uint8_t *SectionAddr = RTDyld.getSectionAddress(Loc.first); + return StringRef(reinterpret_cast<const char*>(SectionAddr) + Loc.second, + RTDyld.Sections[Loc.first].Size - Loc.second); +} diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 6ba24b9..80e489c 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -32,7 +32,7 @@ using namespace llvm::object; namespace { -static inline error_code check(error_code Err) { +static inline std::error_code check(std::error_code Err) { if (Err) { report_fatal_error(Err.message()); } @@ -55,9 +55,9 @@ template <class ELFT> class DyldELFObject : public ELFObjectFile<ELFT> { public: DyldELFObject(std::unique_ptr<ObjectFile> UnderlyingFile, - MemoryBuffer *Wrapper, error_code &ec); + std::unique_ptr<MemoryBuffer> Wrapper, std::error_code &ec); - DyldELFObject(MemoryBuffer *Wrapper, error_code &ec); + DyldELFObject(std::unique_ptr<MemoryBuffer> Wrapper, std::error_code &ec); void updateSectionAddress(const SectionRef &Sec, uint64_t Addr); void updateSymbolAddress(const SymbolRef &Sym, uint64_t Addr); @@ -109,15 +109,17 @@ public: // actual memory. Ultimately, the Binary parent class will take ownership of // this MemoryBuffer object but not the underlying memory. template <class ELFT> -DyldELFObject<ELFT>::DyldELFObject(MemoryBuffer *Wrapper, error_code &ec) - : ELFObjectFile<ELFT>(Wrapper, ec) { +DyldELFObject<ELFT>::DyldELFObject(std::unique_ptr<MemoryBuffer> Wrapper, + std::error_code &EC) + : ELFObjectFile<ELFT>(std::move(Wrapper), EC) { this->isDyldELFObject = true; } template <class ELFT> DyldELFObject<ELFT>::DyldELFObject(std::unique_ptr<ObjectFile> UnderlyingFile, - MemoryBuffer *Wrapper, error_code &ec) - : ELFObjectFile<ELFT>(Wrapper, ec), + std::unique_ptr<MemoryBuffer> Wrapper, + std::error_code &EC) + : ELFObjectFile<ELFT>(std::move(Wrapper), EC), UnderlyingFile(std::move(UnderlyingFile)) { this->isDyldELFObject = true; } @@ -182,30 +184,30 @@ RuntimeDyldELF::createObjectImageFromFile(std::unique_ptr<object::ObjectFile> Ob if (!ObjFile) return nullptr; - error_code ec; - MemoryBuffer *Buffer = - MemoryBuffer::getMemBuffer(ObjFile->getData(), "", false); + std::error_code ec; + std::unique_ptr<MemoryBuffer> Buffer( + MemoryBuffer::getMemBuffer(ObjFile->getData(), "", false)); if (ObjFile->getBytesInAddress() == 4 && ObjFile->isLittleEndian()) { auto Obj = llvm::make_unique<DyldELFObject<ELFType<support::little, 2, false>>>( - std::move(ObjFile), Buffer, ec); + std::move(ObjFile), std::move(Buffer), ec); return new ELFObjectImage<ELFType<support::little, 2, false>>( nullptr, std::move(Obj)); } else if (ObjFile->getBytesInAddress() == 4 && !ObjFile->isLittleEndian()) { auto Obj = llvm::make_unique<DyldELFObject<ELFType<support::big, 2, false>>>( - std::move(ObjFile), Buffer, ec); + std::move(ObjFile), std::move(Buffer), ec); return new ELFObjectImage<ELFType<support::big, 2, false>>(nullptr, std::move(Obj)); } else if (ObjFile->getBytesInAddress() == 8 && !ObjFile->isLittleEndian()) { auto Obj = llvm::make_unique<DyldELFObject<ELFType<support::big, 2, true>>>( - std::move(ObjFile), Buffer, ec); + std::move(ObjFile), std::move(Buffer), ec); return new ELFObjectImage<ELFType<support::big, 2, true>>(nullptr, std::move(Obj)); } else if (ObjFile->getBytesInAddress() == 8 && ObjFile->isLittleEndian()) { auto Obj = llvm::make_unique<DyldELFObject<ELFType<support::little, 2, true>>>( - std::move(ObjFile), Buffer, ec); + std::move(ObjFile), std::move(Buffer), ec); return new ELFObjectImage<ELFType<support::little, 2, true>>( nullptr, std::move(Obj)); } else @@ -218,31 +220,33 @@ ObjectImage *RuntimeDyldELF::createObjectImage(ObjectBuffer *Buffer) { std::pair<unsigned char, unsigned char> Ident = std::make_pair((uint8_t)Buffer->getBufferStart()[ELF::EI_CLASS], (uint8_t)Buffer->getBufferStart()[ELF::EI_DATA]); - error_code ec; + std::error_code ec; + + std::unique_ptr<MemoryBuffer> Buf(Buffer->getMemBuffer()); if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB) { auto Obj = llvm::make_unique<DyldELFObject<ELFType<support::little, 4, false>>>( - Buffer->getMemBuffer(), ec); + std::move(Buf), ec); return new ELFObjectImage<ELFType<support::little, 4, false>>( Buffer, std::move(Obj)); } else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB) { auto Obj = llvm::make_unique<DyldELFObject<ELFType<support::big, 4, false>>>( - Buffer->getMemBuffer(), ec); + std::move(Buf), ec); return new ELFObjectImage<ELFType<support::big, 4, false>>(Buffer, std::move(Obj)); } else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB) { auto Obj = llvm::make_unique<DyldELFObject<ELFType<support::big, 8, true>>>( - Buffer->getMemBuffer(), ec); + std::move(Buf), ec); return new ELFObjectImage<ELFType<support::big, 8, true>>(Buffer, std::move(Obj)); } else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB) { auto Obj = llvm::make_unique<DyldELFObject<ELFType<support::little, 8, true>>>( - Buffer->getMemBuffer(), ec); + std::move(Buf), ec); return new ELFObjectImage<ELFType<support::little, 8, true>>(Buffer, std::move(Obj)); } else llvm_unreachable("Unexpected ELF format"); @@ -612,30 +616,38 @@ void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section, } } -// Return the .TOC. section address to R_PPC64_TOC relocations. -uint64_t RuntimeDyldELF::findPPC64TOC() const { +// Return the .TOC. section and offset. +void RuntimeDyldELF::findPPC64TOCSection(ObjectImage &Obj, + ObjSectionToIDMap &LocalSections, + RelocationValueRef &Rel) { + // Set a default SectionID in case we do not find a TOC section below. + // This may happen for references to TOC base base (sym@toc, .odp + // relocation) without a .toc directive. In this case just use the + // first section (which is usually the .odp) since the code won't + // reference the .toc base directly. + Rel.SymbolName = NULL; + Rel.SectionID = 0; + // The TOC consists of sections .got, .toc, .tocbss, .plt in that // order. The TOC starts where the first of these sections starts. - SectionList::const_iterator it = Sections.begin(); - SectionList::const_iterator ite = Sections.end(); - for (; it != ite; ++it) { - if (it->Name == ".got" || it->Name == ".toc" || it->Name == ".tocbss" || - it->Name == ".plt") + for (section_iterator si = Obj.begin_sections(), se = Obj.end_sections(); + si != se; ++si) { + + StringRef SectionName; + check(si->getName(SectionName)); + + if (SectionName == ".got" + || SectionName == ".toc" + || SectionName == ".tocbss" + || SectionName == ".plt") { + Rel.SectionID = findOrEmitSection(Obj, *si, false, LocalSections); break; + } } - if (it == ite) { - // This may happen for - // * references to TOC base base (sym@toc, .odp relocation) without - // a .toc directive. - // In this case just use the first section (which is usually - // the .odp) since the code won't reference the .toc base - // directly. - it = Sections.begin(); - } - assert(it != ite); + // Per the ppc64-elf-linux ABI, The TOC base is TOC value plus 0x8000 // thus permitting a full 64 Kbytes segment. - return it->LoadAddress + 0x8000; + Rel.Addend = 0x8000; } // Returns the sections and offset associated with the ODP entry referenced @@ -702,24 +714,37 @@ void RuntimeDyldELF::findOPDEntrySection(ObjectImage &Obj, llvm_unreachable("Attempting to get address of ODP entry!"); } -// Relocation masks following the #lo(value), #hi(value), #higher(value), -// and #highest(value) macros defined in section 4.5.1. Relocation Types -// in PPC-elf64abi document. -// +// Relocation masks following the #lo(value), #hi(value), #ha(value), +// #higher(value), #highera(value), #highest(value), and #highesta(value) +// macros defined in section 4.5.1. Relocation Types of the PPC-elf64abi +// document. + static inline uint16_t applyPPClo(uint64_t value) { return value & 0xffff; } static inline uint16_t applyPPChi(uint64_t value) { return (value >> 16) & 0xffff; } +static inline uint16_t applyPPCha (uint64_t value) { + return ((value + 0x8000) >> 16) & 0xffff; +} + static inline uint16_t applyPPChigher(uint64_t value) { return (value >> 32) & 0xffff; } +static inline uint16_t applyPPChighera (uint64_t value) { + return ((value + 0x8000) >> 32) & 0xffff; +} + static inline uint16_t applyPPChighest(uint64_t value) { return (value >> 48) & 0xffff; } +static inline uint16_t applyPPChighesta (uint64_t value) { + return ((value + 0x8000) >> 48) & 0xffff; +} + void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, int64_t Addend) { @@ -728,24 +753,57 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, default: llvm_unreachable("Relocation type not implemented yet!"); break; + case ELF::R_PPC64_ADDR16: + writeInt16BE(LocalAddress, applyPPClo(Value + Addend)); + break; + case ELF::R_PPC64_ADDR16_DS: + writeInt16BE(LocalAddress, applyPPClo(Value + Addend) & ~3); + break; case ELF::R_PPC64_ADDR16_LO: writeInt16BE(LocalAddress, applyPPClo(Value + Addend)); break; + case ELF::R_PPC64_ADDR16_LO_DS: + writeInt16BE(LocalAddress, applyPPClo(Value + Addend) & ~3); + break; case ELF::R_PPC64_ADDR16_HI: writeInt16BE(LocalAddress, applyPPChi(Value + Addend)); break; + case ELF::R_PPC64_ADDR16_HA: + writeInt16BE(LocalAddress, applyPPCha(Value + Addend)); + break; case ELF::R_PPC64_ADDR16_HIGHER: writeInt16BE(LocalAddress, applyPPChigher(Value + Addend)); break; + case ELF::R_PPC64_ADDR16_HIGHERA: + writeInt16BE(LocalAddress, applyPPChighera(Value + Addend)); + break; case ELF::R_PPC64_ADDR16_HIGHEST: writeInt16BE(LocalAddress, applyPPChighest(Value + Addend)); break; + case ELF::R_PPC64_ADDR16_HIGHESTA: + writeInt16BE(LocalAddress, applyPPChighesta(Value + Addend)); + break; case ELF::R_PPC64_ADDR14: { assert(((Value + Addend) & 3) == 0); // Preserve the AA/LK bits in the branch instruction uint8_t aalk = *(LocalAddress + 3); writeInt16BE(LocalAddress + 2, (aalk & 3) | ((Value + Addend) & 0xfffc)); } break; + case ELF::R_PPC64_REL16_LO: { + uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t Delta = Value - FinalAddress + Addend; + writeInt16BE(LocalAddress, applyPPClo(Delta)); + } break; + case ELF::R_PPC64_REL16_HI: { + uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t Delta = Value - FinalAddress + Addend; + writeInt16BE(LocalAddress, applyPPChi(Delta)); + } break; + case ELF::R_PPC64_REL16_HA: { + uint64_t FinalAddress = (Section.LoadAddress + Offset); + uint64_t Delta = Value - FinalAddress + Addend; + writeInt16BE(LocalAddress, applyPPCha(Delta)); + } break; case ELF::R_PPC64_ADDR32: { int32_t Result = static_cast<int32_t>(Value + Addend); if (SignExtend32<32>(Result) != Result) @@ -775,19 +833,6 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, case ELF::R_PPC64_ADDR64: writeInt64BE(LocalAddress, Value + Addend); break; - case ELF::R_PPC64_TOC: - writeInt64BE(LocalAddress, findPPC64TOC()); - break; - case ELF::R_PPC64_TOC16: { - uint64_t TOCStart = findPPC64TOC(); - Value = applyPPClo((Value + Addend) - TOCStart); - writeInt16BE(LocalAddress, applyPPClo(Value)); - } break; - case ELF::R_PPC64_TOC16_DS: { - uint64_t TOCStart = findPPC64TOC(); - Value = ((Value + Addend) - TOCStart); - writeInt16BE(LocalAddress, applyPPClo(Value)); - } break; } } @@ -1139,14 +1184,20 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( ELF::R_PPC64_ADDR64, Value.Addend); // Generates the 64-bits address loads as exemplified in section - // 4.5.1 in PPC64 ELF ABI. - RelocationEntry REhst(SectionID, StubTargetAddr - Section.Address + 2, + // 4.5.1 in PPC64 ELF ABI. Note that the relocations need to + // apply to the low part of the instructions, so we have to update + // the offset according to the target endianness. + uint64_t StubRelocOffset = StubTargetAddr - Section.Address; + if (!IsTargetLittleEndian) + StubRelocOffset += 2; + + RelocationEntry REhst(SectionID, StubRelocOffset + 0, ELF::R_PPC64_ADDR16_HIGHEST, Value.Addend); - RelocationEntry REhr(SectionID, StubTargetAddr - Section.Address + 6, + RelocationEntry REhr(SectionID, StubRelocOffset + 4, ELF::R_PPC64_ADDR16_HIGHER, Value.Addend); - RelocationEntry REh(SectionID, StubTargetAddr - Section.Address + 14, + RelocationEntry REh(SectionID, StubRelocOffset + 12, ELF::R_PPC64_ADDR16_HI, Value.Addend); - RelocationEntry REl(SectionID, StubTargetAddr - Section.Address + 18, + RelocationEntry REl(SectionID, StubRelocOffset + 16, ELF::R_PPC64_ADDR16_LO, Value.Addend); if (Value.SymbolName) { @@ -1170,12 +1221,52 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( // Restore the TOC for external calls writeInt32BE(Target + 4, 0xE8410028); // ld r2,40(r1) } + } else if (RelType == ELF::R_PPC64_TOC16 || + RelType == ELF::R_PPC64_TOC16_DS || + RelType == ELF::R_PPC64_TOC16_LO || + RelType == ELF::R_PPC64_TOC16_LO_DS || + RelType == ELF::R_PPC64_TOC16_HI || + RelType == ELF::R_PPC64_TOC16_HA) { + // These relocations are supposed to subtract the TOC address from + // the final value. This does not fit cleanly into the RuntimeDyld + // scheme, since there may be *two* sections involved in determining + // the relocation value (the section of the symbol refered to by the + // relocation, and the TOC section associated with the current module). + // + // Fortunately, these relocations are currently only ever generated + // refering to symbols that themselves reside in the TOC, which means + // that the two sections are actually the same. Thus they cancel out + // and we can immediately resolve the relocation right now. + switch (RelType) { + case ELF::R_PPC64_TOC16: RelType = ELF::R_PPC64_ADDR16; break; + case ELF::R_PPC64_TOC16_DS: RelType = ELF::R_PPC64_ADDR16_DS; break; + case ELF::R_PPC64_TOC16_LO: RelType = ELF::R_PPC64_ADDR16_LO; break; + case ELF::R_PPC64_TOC16_LO_DS: RelType = ELF::R_PPC64_ADDR16_LO_DS; break; + case ELF::R_PPC64_TOC16_HI: RelType = ELF::R_PPC64_ADDR16_HI; break; + case ELF::R_PPC64_TOC16_HA: RelType = ELF::R_PPC64_ADDR16_HA; break; + default: llvm_unreachable("Wrong relocation type."); + } + + RelocationValueRef TOCValue; + findPPC64TOCSection(Obj, ObjSectionToID, TOCValue); + if (Value.SymbolName || Value.SectionID != TOCValue.SectionID) + llvm_unreachable("Unsupported TOC relocation."); + Value.Addend -= TOCValue.Addend; + resolveRelocation(Sections[SectionID], Offset, Value.Addend, RelType, 0); } else { + // There are two ways to refer to the TOC address directly: either + // via a ELF::R_PPC64_TOC relocation (where both symbol and addend are + // ignored), or via any relocation that refers to the magic ".TOC." + // symbols (in which case the addend is respected). + if (RelType == ELF::R_PPC64_TOC) { + RelType = ELF::R_PPC64_ADDR64; + findPPC64TOCSection(Obj, ObjSectionToID, Value); + } else if (TargetName == ".TOC.") { + findPPC64TOCSection(Obj, ObjSectionToID, Value); + Value.Addend += Addend; + } + RelocationEntry RE(SectionID, Offset, RelType, Value.Addend); - // Extra check to avoid relocation againt empty symbols (usually - // the R_PPC64_TOC). - if (SymType != SymbolRef::ST_Unknown && TargetName.empty()) - Value.SymbolName = nullptr; if (Value.SymbolName) addRelocationForSymbol(RE, Value.SymbolName); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h index a526073..59fdfbe 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h @@ -20,10 +20,9 @@ using namespace llvm; namespace llvm { - namespace { // Helper for extensive error checking in debug builds. -error_code Check(error_code Err) { +std::error_code Check(std::error_code Err) { if (Err) { report_fatal_error(Err.message()); } @@ -83,7 +82,8 @@ class RuntimeDyldELF : public RuntimeDyldImpl { return 1; } - uint64_t findPPC64TOC() const; + void findPPC64TOCSection(ObjectImage &Obj, ObjSectionToIDMap &LocalSections, + RelocationValueRef &Rel); void findOPDEntrySection(ObjectImage &Obj, ObjSectionToIDMap &LocalSections, RelocationValueRef &Rel); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index 412cf20..0336cba 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -20,6 +20,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/ExecutionEngine/ObjectImage.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" +#include "llvm/ExecutionEngine/RuntimeDyldChecker.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -28,8 +29,8 @@ #include "llvm/Support/Mutex.h" #include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/system_error.h" #include <map> +#include <system_error> using namespace llvm; using namespace llvm::object; @@ -158,6 +159,15 @@ public: }; class RuntimeDyldImpl { + friend class RuntimeDyldChecker; +private: + + uint64_t getAnySymbolRemoteAddress(StringRef Symbol) { + if (uint64_t InternalSymbolAddr = getSymbolLoadAddress(Symbol)) + return InternalSymbolAddr; + return MemMgr->getSymbolAddress(Symbol); + } + protected: // The MemoryManager to load objects into. RTDyldMemoryManager *MemMgr; @@ -245,14 +255,14 @@ protected: void writeInt16BE(uint8_t *Addr, uint16_t Value) { if (IsTargetLittleEndian) - Value = sys::SwapByteOrder(Value); + sys::swapByteOrder(Value); *Addr = (Value >> 8) & 0xFF; *(Addr + 1) = Value & 0xFF; } void writeInt32BE(uint8_t *Addr, uint32_t Value) { if (IsTargetLittleEndian) - Value = sys::SwapByteOrder(Value); + sys::swapByteOrder(Value); *Addr = (Value >> 24) & 0xFF; *(Addr + 1) = (Value >> 16) & 0xFF; *(Addr + 2) = (Value >> 8) & 0xFF; @@ -261,7 +271,7 @@ protected: void writeInt64BE(uint8_t *Addr, uint64_t Value) { if (IsTargetLittleEndian) - Value = sys::SwapByteOrder(Value); + sys::swapByteOrder(Value); *Addr = (Value >> 56) & 0xFF; *(Addr + 1) = (Value >> 48) & 0xFF; *(Addr + 2) = (Value >> 40) & 0xFF; @@ -339,7 +349,8 @@ protected: public: RuntimeDyldImpl(RTDyldMemoryManager *mm) - : MemMgr(mm), ProcessAllSections(false), HasError(false) {} + : MemMgr(mm), ProcessAllSections(false), HasError(false) { + } virtual ~RuntimeDyldImpl(); @@ -349,7 +360,7 @@ public: ObjectImage *loadObject(ObjectImage *InputObject); - void *getSymbolAddress(StringRef Name) { + uint8_t* getSymbolAddress(StringRef Name) { // FIXME: Just look up as a function for now. Overly simple of course. // Work in progress. SymbolTableMap::const_iterator pos = GlobalSymbolTable.find(Name); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp index 2b425fb..4eb516c 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -14,6 +14,8 @@ #include "RuntimeDyldMachO.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" +#include "ObjectImageCommon.h" +#include "JITRegistrar.h" using namespace llvm; using namespace llvm::object; @@ -21,6 +23,126 @@ using namespace llvm::object; namespace llvm { +class MachOObjectImage : public ObjectImageCommon { +private: + typedef SmallVector<uint64_t, 1> SectionAddrList; + SectionAddrList OldSectionAddrList; + +protected: + bool is64; + bool Registered; + +private: + void initOldAddress() { + MachOObjectFile *objf = static_cast<MachOObjectFile *>(ObjFile.get()); + // Unfortunately we need to do this, since there's information encoded + // in the original addr of the section that we could not otherwise + // recover. The reason for this is that symbols do not actually store + // their file offset, but only their vmaddr. This means that in order + // to locate the symbol correctly in the object file, we need to know + // where the original start of the section was (including any padding, + // etc). + for (section_iterator i = objf->section_begin(), e = objf->section_end(); + i != e; ++i) { + uint64_t Addr; + i->getAddress(Addr); + OldSectionAddrList[i->getRawDataRefImpl().d.a] = Addr; + } + } + +public: + MachOObjectImage(ObjectBuffer *Input, bool is64) + : ObjectImageCommon(Input), + OldSectionAddrList(ObjFile->section_end()->getRawDataRefImpl().d.a, 0), + is64(is64), Registered(false) { + initOldAddress(); + } + + MachOObjectImage(std::unique_ptr<object::ObjectFile> Input, bool is64) + : ObjectImageCommon(std::move(Input)), + OldSectionAddrList(ObjFile->section_end()->getRawDataRefImpl().d.a, 0), + is64(is64), Registered(false) { + initOldAddress(); + } + + virtual ~MachOObjectImage() { + if (Registered) + deregisterWithDebugger(); + } + + // Subclasses can override these methods to update the image with loaded + // addresses for sections and common symbols + virtual void updateSectionAddress(const SectionRef &Sec, uint64_t Addr) { + MachOObjectFile *objf = static_cast<MachOObjectFile *>(ObjFile.get()); + char *data = + const_cast<char *>(objf->getSectionPointer(Sec.getRawDataRefImpl())); + + uint64_t oldAddr = OldSectionAddrList[Sec.getRawDataRefImpl().d.a]; + + if (is64) { + ((MachO::section_64 *)data)->addr = Addr; + } else { + ((MachO::section *)data)->addr = Addr; + } + + for (symbol_iterator i = objf->symbol_begin(), e = objf->symbol_end(); + i != e; ++i) { + section_iterator symSec(objf->section_end()); + (*i).getSection(symSec); + if (*symSec == Sec) { + uint64_t symAddr; + (*i).getAddress(symAddr); + updateSymbolAddress(*i, symAddr + Addr - oldAddr); + } + } + } + + uint64_t getOldSectionAddr(const SectionRef &Sec) const { + return OldSectionAddrList[Sec.getRawDataRefImpl().d.a]; + } + + virtual void updateSymbolAddress(const SymbolRef &Sym, uint64_t Addr) { + char *data = const_cast<char *>( + reinterpret_cast<const char *>(Sym.getRawDataRefImpl().p)); + if (is64) + ((MachO::nlist_64 *)data)->n_value = Addr; + else + ((MachO::nlist *)data)->n_value = Addr; + } + + virtual void registerWithDebugger() { + JITRegistrar::getGDBRegistrar().registerObject(*Buffer); + Registered = true; + } + + virtual void deregisterWithDebugger() { + JITRegistrar::getGDBRegistrar().deregisterObject(*Buffer); + } +}; + +ObjectImage *RuntimeDyldMachO::createObjectImage(ObjectBuffer *Buffer) { + uint32_t magic = *((const uint32_t *)Buffer->getBufferStart()); + bool is64 = (magic == MachO::MH_MAGIC_64); + assert((magic == MachO::MH_MAGIC_64 || magic == MachO::MH_MAGIC) && + "Unrecognized Macho Magic"); + return new MachOObjectImage(Buffer, is64); +} + +ObjectImage *RuntimeDyldMachO::createObjectImageFromFile( + std::unique_ptr<object::ObjectFile> ObjFile) { + if (!ObjFile) + return nullptr; + + MemoryBuffer *Buffer = + MemoryBuffer::getMemBuffer(ObjFile->getData(), "", false); + + uint32_t magic = *((const uint32_t *)Buffer->getBufferStart()); + bool is64 = (magic == MachO::MH_MAGIC_64); + assert((magic == MachO::MH_MAGIC_64 || magic == MachO::MH_MAGIC) && + "Unrecognized Macho Magic"); + return new MachOObjectImage(std::move(ObjFile), is64); +} + static unsigned char *processFDE(unsigned char *P, intptr_t DeltaForText, intptr_t DeltaForEH) { DEBUG(dbgs() << "Processing FDE: Delta for text: " << DeltaForText @@ -533,6 +655,7 @@ relocation_iterator RuntimeDyldMachO::processRelocationRef( ObjSectionToIDMap &ObjSectionToID, const SymbolTableMap &Symbols, StubMap &Stubs) { const ObjectFile *OF = Obj.getObjectFile(); + const MachOObjectImage &MachOObj = *static_cast<MachOObjectImage *>(&Obj); const MachOObjectFile *MachO = static_cast<const MachOObjectFile *>(OF); MachO::any_relocation_info RE = MachO->getRelocation(RelI->getRawDataRefImpl()); @@ -609,8 +732,8 @@ relocation_iterator RuntimeDyldMachO::processRelocationRef( bool IsCode = false; Sec.isText(IsCode); Value.SectionID = findOrEmitSection(Obj, Sec, IsCode, ObjSectionToID); - uint64_t Addr; - Sec.getAddress(Addr); + uint64_t Addr = MachOObj.getOldSectionAddr(Sec); + DEBUG(dbgs() << "\nAddr: " << Addr << "\nAddend: " << Addend); Value.Addend = Addend - Addr; if (IsPCRel) Value.Addend += Offset + NumBytes; diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h index 060eb8c..35f0720 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h @@ -105,14 +105,9 @@ public: void finalizeLoad(ObjectImage &ObjImg, ObjSectionToIDMap &SectionMap) override; - static ObjectImage *createObjectImage(ObjectBuffer *InputBuffer) { - return new ObjectImageCommon(InputBuffer); - } - + static ObjectImage *createObjectImage(ObjectBuffer *Buffer); static ObjectImage * - createObjectImageFromFile(std::unique_ptr<object::ObjectFile> InputObject) { - return new ObjectImageCommon(std::move(InputObject)); - } + createObjectImageFromFile(std::unique_ptr<object::ObjectFile> InputObject); }; } // end namespace llvm diff --git a/lib/IR/Android.mk b/lib/IR/Android.mk index 2ffc86c..c51b241 100644 --- a/lib/IR/Android.mk +++ b/lib/IR/Android.mk @@ -5,6 +5,7 @@ vmcore_SRC_FILES := \ Attributes.cpp \ AutoUpgrade.cpp \ BasicBlock.cpp \ + Comdat.cpp \ ConstantFold.cpp \ ConstantRange.cpp \ Constants.cpp \ diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index 0fef0d0..a7499bc 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -106,6 +106,7 @@ static void PrintEscapedString(StringRef Name, raw_ostream &Out) { enum PrefixType { GlobalPrefix, + ComdatPrefix, LabelPrefix, LocalPrefix, NoPrefix @@ -119,6 +120,7 @@ static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) { switch (Prefix) { case NoPrefix: break; case GlobalPrefix: OS << '@'; break; + case ComdatPrefix: OS << '$'; break; case LabelPrefix: break; case LocalPrefix: OS << '%'; break; } @@ -1165,8 +1167,15 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, } void AssemblyWriter::init() { - if (TheModule) - TypePrinter.incorporateTypes(*TheModule); + if (!TheModule) + return; + TypePrinter.incorporateTypes(*TheModule); + for (const Function &F : *TheModule) + if (const Comdat *C = F.getComdat()) + Comdats.insert(C); + for (const GlobalVariable &GV : TheModule->globals()) + if (const Comdat *C = GV.getComdat()) + Comdats.insert(C); } @@ -1308,6 +1317,15 @@ void AssemblyWriter::printModule(const Module *M) { printTypeIdentities(); + // Output all comdats. + if (!Comdats.empty()) + Out << '\n'; + for (const Comdat *C : Comdats) { + printComdat(C); + if (C != Comdats.back()) + Out << '\n'; + } + // Output all globals. if (!M->global_empty()) Out << '\n'; for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); @@ -1451,10 +1469,11 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) { PrintVisibility(GV->getVisibility(), Out); PrintDLLStorageClass(GV->getDLLStorageClass(), Out); PrintThreadLocalModel(GV->getThreadLocalMode(), Out); + if (GV->hasUnnamedAddr()) + Out << "unnamed_addr "; if (unsigned AddressSpace = GV->getType()->getAddressSpace()) Out << "addrspace(" << AddressSpace << ") "; - if (GV->hasUnnamedAddr()) Out << "unnamed_addr "; if (GV->isExternallyInitialized()) Out << "externally_initialized "; Out << (GV->isConstant() ? "constant " : "global "); TypePrinter.print(GV->getType()->getElementType(), Out); @@ -1469,6 +1488,10 @@ void AssemblyWriter::printGlobal(const GlobalVariable *GV) { PrintEscapedString(GV->getSection(), Out); Out << '"'; } + if (GV->hasComdat()) { + Out << ", comdat "; + PrintLLVMName(Out, GV->getComdat()->getName(), ComdatPrefix); + } if (GV->getAlignment()) Out << ", align " << GV->getAlignment(); @@ -1488,21 +1511,18 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) { } PrintVisibility(GA->getVisibility(), Out); PrintDLLStorageClass(GA->getDLLStorageClass(), Out); + PrintThreadLocalModel(GA->getThreadLocalMode(), Out); + if (GA->hasUnnamedAddr()) + Out << "unnamed_addr "; Out << "alias "; PrintLinkage(GA->getLinkage(), Out); - PointerType *Ty = GA->getType(); const Constant *Aliasee = GA->getAliasee(); - if (!Aliasee || Ty != Aliasee->getType()) { - if (unsigned AddressSpace = Ty->getAddressSpace()) - Out << "addrspace(" << AddressSpace << ") "; - TypePrinter.print(Ty->getElementType(), Out); - Out << ", "; - } if (!Aliasee) { + TypePrinter.print(GA->getType(), Out); Out << " <<NULL ALIASEE>>"; } else { writeOperand(Aliasee, !isa<ConstantExpr>(Aliasee)); @@ -1512,6 +1532,10 @@ void AssemblyWriter::printAlias(const GlobalAlias *GA) { Out << '\n'; } +void AssemblyWriter::printComdat(const Comdat *C) { + C->print(Out); +} + void AssemblyWriter::printTypeIdentities() { if (TypePrinter.NumberedTypes.empty() && TypePrinter.NamedTypes.empty()) @@ -1649,6 +1673,10 @@ void AssemblyWriter::printFunction(const Function *F) { PrintEscapedString(F->getSection(), Out); Out << '"'; } + if (F->hasComdat()) { + Out << " comdat "; + PrintLLVMName(Out, F->getComdat()->getName(), ComdatPrefix); + } if (F->getAlignment()) Out << " align " << F->getAlignment(); if (F->hasGC()) @@ -1788,6 +1816,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) { (isa<StoreInst>(I) && cast<StoreInst>(I).isAtomic())) Out << " atomic"; + if (isa<AtomicCmpXchgInst>(I) && cast<AtomicCmpXchgInst>(I).isWeak()) + Out << " weak"; + // If this is a volatile operation, print out the volatile marker. if ((isa<LoadInst>(I) && cast<LoadInst>(I).isVolatile()) || (isa<StoreInst>(I) && cast<StoreInst>(I).isVolatile()) || @@ -2157,11 +2188,32 @@ void NamedMDNode::print(raw_ostream &ROS) const { W.printNamedMDNode(this); } -void Type::print(raw_ostream &OS) const { - if (!this) { - OS << "<null Type>"; - return; +void Comdat::print(raw_ostream &ROS) const { + PrintLLVMName(ROS, getName(), ComdatPrefix); + ROS << " = comdat "; + + switch (getSelectionKind()) { + case Comdat::Any: + ROS << "any"; + break; + case Comdat::ExactMatch: + ROS << "exactmatch"; + break; + case Comdat::Largest: + ROS << "largest"; + break; + case Comdat::NoDuplicates: + ROS << "noduplicates"; + break; + case Comdat::SameSize: + ROS << "samesize"; + break; } + + ROS << '\n'; +} + +void Type::print(raw_ostream &OS) const { TypePrinting TP; TP.print(const_cast<Type*>(this), OS); @@ -2174,10 +2226,6 @@ void Type::print(raw_ostream &OS) const { } void Value::print(raw_ostream &ROS) const { - if (!this) { - ROS << "printing a <null> value\n"; - return; - } formatted_raw_ostream OS(ROS); if (const Instruction *I = dyn_cast<Instruction>(this)) { const Function *F = I->getParent() ? I->getParent()->getParent() : nullptr; @@ -2248,5 +2296,8 @@ void Type::dump() const { print(dbgs()); } // Module::dump() - Allow printing of Modules from the debugger. void Module::dump() const { print(dbgs(), nullptr); } +// \brief Allow printing of Comdats from the debugger. +void Comdat::dump() const { print(dbgs()); } + // NamedMDNode::dump() - Allow printing of NamedMDNodes from the debugger. void NamedMDNode::dump() const { print(dbgs()); } diff --git a/lib/IR/AsmWriter.h b/lib/IR/AsmWriter.h index b4ce6de..aef9c8a 100644 --- a/lib/IR/AsmWriter.h +++ b/lib/IR/AsmWriter.h @@ -16,6 +16,7 @@ #define LLVM_IR_ASSEMBLYWRITER_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/TypeFinder.h" @@ -26,6 +27,7 @@ namespace llvm { class BasicBlock; class Function; class GlobalValue; +class Comdat; class Module; class NamedMDNode; class Value; @@ -70,6 +72,7 @@ private: SlotTracker &Machine; TypePrinting TypePrinter; AssemblyAnnotationWriter *AnnotationWriter; + SetVector<const Comdat *> Comdats; public: /// Construct an AssemblyWriter with an external SlotTracker @@ -101,6 +104,7 @@ public: void printTypeIdentities(); void printGlobal(const GlobalVariable *GV); void printAlias(const GlobalAlias *GV); + void printComdat(const Comdat *C); void printFunction(const Function *F); void printArgument(const Argument *FA, AttributeSet Attrs, unsigned Idx); void printBasicBlock(const BasicBlock *BB); diff --git a/lib/IR/Attributes.cpp b/lib/IR/Attributes.cpp index a9074bb..48a2ce8 100644 --- a/lib/IR/Attributes.cpp +++ b/lib/IR/Attributes.cpp @@ -173,6 +173,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const { return "inlinehint"; if (hasAttribute(Attribute::InReg)) return "inreg"; + if (hasAttribute(Attribute::JumpTable)) + return "jumptable"; if (hasAttribute(Attribute::MinSize)) return "minsize"; if (hasAttribute(Attribute::Naked)) @@ -291,7 +293,7 @@ bool Attribute::operator<(Attribute A) const { // AttributeImpl Definition //===----------------------------------------------------------------------===// -// Pin the vtabels to this file. +// Pin the vtables to this file. AttributeImpl::~AttributeImpl() {} void EnumAttributeImpl::anchor() {} void AlignAttributeImpl::anchor() {} @@ -395,6 +397,7 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { case Attribute::OptimizeNone: return 1ULL << 42; case Attribute::InAlloca: return 1ULL << 43; case Attribute::NonNull: return 1ULL << 44; + case Attribute::JumpTable: return 1ULL << 45; } llvm_unreachable("Unsupported attribute type"); } diff --git a/lib/IR/AutoUpgrade.cpp b/lib/IR/AutoUpgrade.cpp index e255113..6554b3c 100644 --- a/lib/IR/AutoUpgrade.cpp +++ b/lib/IR/AutoUpgrade.cpp @@ -114,6 +114,9 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name == "x86.avx.movnt.pd.256" || Name == "x86.avx.movnt.ps.256" || Name == "x86.sse42.crc32.64.8" || + Name == "x86.avx.vbroadcast.ss" || + Name == "x86.avx.vbroadcast.ss.256" || + Name == "x86.avx.vbroadcast.sd.256" || (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) { NewFn = nullptr; return true; @@ -335,6 +338,19 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C)); Rep = Builder.CreateCall2(CRC32, Trunc0, CI->getArgOperand(1)); Rep = Builder.CreateZExt(Rep, CI->getType(), ""); + } else if (Name.startswith("llvm.x86.avx.vbroadcast")) { + // Replace broadcasts with a series of insertelements. + Type *VecTy = CI->getType(); + Type *EltTy = VecTy->getVectorElementType(); + unsigned EltNum = VecTy->getVectorNumElements(); + Value *Cast = Builder.CreateBitCast(CI->getArgOperand(0), + EltTy->getPointerTo()); + Value *Load = Builder.CreateLoad(Cast); + Type *I32Ty = Type::getInt32Ty(C); + Rep = UndefValue::get(VecTy); + for (unsigned I = 0; I < EltNum; ++I) + Rep = Builder.CreateInsertElement(Rep, Load, + ConstantInt::get(I32Ty, I)); } else { bool PD128 = false, PD256 = false, PS128 = false, PS256 = false; if (Name == "llvm.x86.avx.vpermil.pd.256") @@ -561,3 +577,10 @@ bool llvm::UpgradeDebugInfo(Module &M) { } return RetCode; } + +void llvm::UpgradeMDStringConstant(std::string &String) { + const std::string OldPrefix = "llvm.vectorizer."; + if (String.find(OldPrefix) == 0) { + String.replace(0, OldPrefix.size(), "llvm.loop.vectorize."); + } +} diff --git a/lib/IR/CMakeLists.txt b/lib/IR/CMakeLists.txt index b027ae5..38a80b1 100644 --- a/lib/IR/CMakeLists.txt +++ b/lib/IR/CMakeLists.txt @@ -3,6 +3,7 @@ add_llvm_library(LLVMCore Attributes.cpp AutoUpgrade.cpp BasicBlock.cpp + Comdat.cpp ConstantFold.cpp ConstantRange.cpp Constants.cpp diff --git a/lib/IR/Comdat.cpp b/lib/IR/Comdat.cpp new file mode 100644 index 0000000..80715ff --- /dev/null +++ b/lib/IR/Comdat.cpp @@ -0,0 +1,25 @@ +//===-- Comdat.cpp - Implement Metadata classes --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Comdat class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/Comdat.h" +#include "llvm/ADT/StringMap.h" +using namespace llvm; + +Comdat::Comdat(SelectionKind SK, StringMapEntry<Comdat> *Name) + : Name(Name), SK(SK) {} + +Comdat::Comdat(Comdat &&C) : Name(C.Name), SK(C.SK) {} + +Comdat::Comdat() : Name(nullptr), SK(Comdat::Any) {} + +StringRef Comdat::getName() const { return Name->first(); } diff --git a/lib/IR/ConstantFold.cpp b/lib/IR/ConstantFold.cpp index 706e66f..395ac39 100644 --- a/lib/IR/ConstantFold.cpp +++ b/lib/IR/ConstantFold.cpp @@ -529,7 +529,10 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, // Try hard to fold cast of cast because they are often eliminable. if (unsigned newOpc = foldConstantCastPair(opc, CE, DestTy)) return ConstantExpr::getCast(newOpc, CE->getOperand(0), DestTy); - } else if (CE->getOpcode() == Instruction::GetElementPtr) { + } else if (CE->getOpcode() == Instruction::GetElementPtr && + // Do not fold addrspacecast (gep 0, .., 0). It might make the + // addrspacecast uncanonicalized. + opc != Instruction::AddrSpaceCast) { // If all of the indexes in the GEP are null values, there is no pointer // adjustment going on. We might as well cast the source pointer. bool isAllNull = true; @@ -1331,6 +1334,15 @@ static FCmpInst::Predicate evaluateFCmpRelation(Constant *V1, Constant *V2) { return FCmpInst::BAD_FCMP_PREDICATE; } +static ICmpInst::Predicate areGlobalsPotentiallyEqual(const GlobalValue *GV1, + const GlobalValue *GV2) { + // Don't try to decide equality of aliases. + if (!isa<GlobalAlias>(GV1) && !isa<GlobalAlias>(GV2)) + if (!GV1->hasExternalWeakLinkage() || !GV2->hasExternalWeakLinkage()) + return ICmpInst::ICMP_NE; + return ICmpInst::BAD_ICMP_PREDICATE; +} + /// evaluateICmpRelation - This function determines if there is anything we can /// decide about the two constants provided. This doesn't need to handle simple /// things like integer comparisons, but should instead handle ConstantExprs @@ -1392,10 +1404,7 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2, // constant (which, since the types must match, means that it's a // ConstantPointerNull). if (const GlobalValue *GV2 = dyn_cast<GlobalValue>(V2)) { - // Don't try to decide equality of aliases. - if (!isa<GlobalAlias>(GV) && !isa<GlobalAlias>(GV2)) - if (!GV->hasExternalWeakLinkage() || !GV2->hasExternalWeakLinkage()) - return ICmpInst::ICMP_NE; + return areGlobalsPotentiallyEqual(GV, GV2); } else if (isa<BlockAddress>(V2)) { return ICmpInst::ICMP_NE; // Globals never equal labels. } else { @@ -1460,7 +1469,8 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2, } break; - case Instruction::GetElementPtr: + case Instruction::GetElementPtr: { + GEPOperator *CE1GEP = cast<GEPOperator>(CE1); // Ok, since this is a getelementptr, we know that the constant has a // pointer type. Check the various cases. if (isa<ConstantPointerNull>(V2)) { @@ -1507,7 +1517,8 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2, "Surprising getelementptr!"); return isSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; } else { - // If they are different globals, we don't know what the value is. + if (CE1GEP->hasAllZeroIndices()) + return areGlobalsPotentiallyEqual(GV, GV2); return ICmpInst::BAD_ICMP_PREDICATE; } } @@ -1523,8 +1534,14 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2, // By far the most common case to handle is when the base pointers are // obviously to the same global. if (isa<GlobalValue>(CE1Op0) && isa<GlobalValue>(CE2Op0)) { - if (CE1Op0 != CE2Op0) // Don't know relative ordering. + // Don't know relative ordering, but check for inequality. + if (CE1Op0 != CE2Op0) { + GEPOperator *CE2GEP = cast<GEPOperator>(CE2); + if (CE1GEP->hasAllZeroIndices() && CE2GEP->hasAllZeroIndices()) + return areGlobalsPotentiallyEqual(cast<GlobalValue>(CE1Op0), + cast<GlobalValue>(CE2Op0)); return ICmpInst::BAD_ICMP_PREDICATE; + } // Ok, we know that both getelementptr instructions are based on the // same global. From this, we can precisely determine the relative // ordering of the resultant pointers. @@ -1570,6 +1587,7 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2, } } } + } default: break; } diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp index bb8d60b..b815936 100644 --- a/lib/IR/Constants.cpp +++ b/lib/IR/Constants.cpp @@ -107,6 +107,28 @@ bool Constant::isAllOnesValue() const { return false; } +bool Constant::isMinSignedValue() const { + // Check for INT_MIN integers + if (const ConstantInt *CI = dyn_cast<ConstantInt>(this)) + return CI->isMinValue(/*isSigned=*/true); + + // Check for FP which are bitcasted from INT_MIN integers + if (const ConstantFP *CFP = dyn_cast<ConstantFP>(this)) + return CFP->getValueAPF().bitcastToAPInt().isMinSignedValue(); + + // Check for constant vectors which are splats of INT_MIN values. + if (const ConstantVector *CV = dyn_cast<ConstantVector>(this)) + if (Constant *Splat = CV->getSplatValue()) + return Splat->isMinSignedValue(); + + // Check for constant vectors which are splats of INT_MIN values. + if (const ConstantDataVector *CV = dyn_cast<ConstantDataVector>(this)) + if (Constant *Splat = CV->getSplatValue()) + return Splat->isMinSignedValue(); + + return false; +} + // Constructor to create a '0' constant of arbitrary type... Constant *Constant::getNullValue(Type *Ty) { switch (Ty->getTypeID()) { @@ -278,35 +300,48 @@ bool Constant::canTrap() const { return canTrapImpl(this, NonTrappingOps); } -/// isThreadDependent - Return true if the value can vary between threads. -bool Constant::isThreadDependent() const { - SmallPtrSet<const Constant*, 64> Visited; - SmallVector<const Constant*, 64> WorkList; - WorkList.push_back(this); - Visited.insert(this); +/// Check if C contains a GlobalValue for which Predicate is true. +static bool +ConstHasGlobalValuePredicate(const Constant *C, + bool (*Predicate)(const GlobalValue *)) { + SmallPtrSet<const Constant *, 8> Visited; + SmallVector<const Constant *, 8> WorkList; + WorkList.push_back(C); + Visited.insert(C); while (!WorkList.empty()) { - const Constant *C = WorkList.pop_back_val(); - - if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { - if (GV->isThreadLocal()) + const Constant *WorkItem = WorkList.pop_back_val(); + if (const auto *GV = dyn_cast<GlobalValue>(WorkItem)) + if (Predicate(GV)) return true; - } - - for (unsigned I = 0, E = C->getNumOperands(); I != E; ++I) { - const Constant *D = dyn_cast<Constant>(C->getOperand(I)); - if (!D) + for (const Value *Op : WorkItem->operands()) { + const Constant *ConstOp = dyn_cast<Constant>(Op); + if (!ConstOp) continue; - if (Visited.insert(D)) - WorkList.push_back(D); + if (Visited.insert(ConstOp)) + WorkList.push_back(ConstOp); } } - return false; } -/// isConstantUsed - Return true if the constant has users other than constant -/// exprs and other dangling things. +/// Return true if the value can vary between threads. +bool Constant::isThreadDependent() const { + auto DLLImportPredicate = [](const GlobalValue *GV) { + return GV->isThreadLocal(); + }; + return ConstHasGlobalValuePredicate(this, DLLImportPredicate); +} + +bool Constant::isDLLImportDependent() const { + auto DLLImportPredicate = [](const GlobalValue *GV) { + return GV->hasDLLImportStorageClass(); + }; + return ConstHasGlobalValuePredicate(this, DLLImportPredicate); +} + +/// Return true if the constant has users other than constant exprs and other +/// dangling things. bool Constant::isConstantUsed() const { for (const User *U : users()) { const Constant *UC = dyn_cast<Constant>(U); @@ -1698,6 +1733,19 @@ Constant *ConstantExpr::getAddrSpaceCast(Constant *C, Type *DstTy) { assert(CastInst::castIsValid(Instruction::AddrSpaceCast, C, DstTy) && "Invalid constantexpr addrspacecast!"); + // Canonicalize addrspacecasts between different pointer types by first + // bitcasting the pointer type and then converting the address space. + PointerType *SrcScalarTy = cast<PointerType>(C->getType()->getScalarType()); + PointerType *DstScalarTy = cast<PointerType>(DstTy->getScalarType()); + Type *DstElemTy = DstScalarTy->getElementType(); + if (SrcScalarTy->getElementType() != DstElemTy) { + Type *MidTy = PointerType::get(DstElemTy, SrcScalarTy->getAddressSpace()); + if (VectorType *VT = dyn_cast<VectorType>(DstTy)) { + // Handle vectors of pointers. + MidTy = VectorType::get(MidTy, VT->getNumElements()); + } + C = getBitCast(C, MidTy); + } return getFoldedCast(Instruction::AddrSpaceCast, C, DstTy); } diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp index 27ce503..87099a6 100644 --- a/lib/IR/Core.cpp +++ b/lib/IR/Core.cpp @@ -17,9 +17,9 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/DiagnosticPrinter.h" -#include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GlobalAlias.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IRBuilder.h" @@ -35,10 +35,10 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Threading.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/system_error.h" #include <cassert> #include <cstdlib> #include <cstring> +#include <system_error> using namespace llvm; @@ -281,7 +281,11 @@ char *LLVMPrintTypeToString(LLVMTypeRef Ty) { std::string buf; raw_string_ostream os(buf); - unwrap(Ty)->print(os); + if (unwrap(Ty)) + unwrap(Ty)->print(os); + else + os << "Printing <null> Type"; + os.flush(); return strdup(buf.c_str()); @@ -531,7 +535,11 @@ char* LLVMPrintValueToString(LLVMValueRef Val) { std::string buf; raw_string_ostream os(buf); - unwrap(Val)->print(os); + if (unwrap(Val)) + unwrap(Val)->print(os); + else + os << "Printing <null> Value"; + os.flush(); return strdup(buf.c_str()); @@ -1286,7 +1294,7 @@ void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) { } const char *LLVMGetSection(LLVMValueRef Global) { - return unwrap<GlobalValue>(Global)->getSection().c_str(); + return unwrap<GlobalValue>(Global)->getSection(); } void LLVMSetSection(LLVMValueRef Global, const char *Section) { @@ -2598,28 +2606,24 @@ LLVMBool LLVMCreateMemoryBufferWithContentsOfFile( LLVMMemoryBufferRef *OutMemBuf, char **OutMessage) { - std::unique_ptr<MemoryBuffer> MB; - error_code ec; - if (!(ec = MemoryBuffer::getFile(Path, MB))) { - *OutMemBuf = wrap(MB.release()); - return 0; + ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = MemoryBuffer::getFile(Path); + if (std::error_code EC = MBOrErr.getError()) { + *OutMessage = strdup(EC.message().c_str()); + return 1; } - - *OutMessage = strdup(ec.message().c_str()); - return 1; + *OutMemBuf = wrap(MBOrErr.get().release()); + return 0; } LLVMBool LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf, char **OutMessage) { - std::unique_ptr<MemoryBuffer> MB; - error_code ec; - if (!(ec = MemoryBuffer::getSTDIN(MB))) { - *OutMemBuf = wrap(MB.release()); - return 0; + ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = MemoryBuffer::getSTDIN(); + if (std::error_code EC = MBOrErr.getError()) { + *OutMessage = strdup(EC.message().c_str()); + return 1; } - - *OutMessage = strdup(ec.message().c_str()); - return 1; + *OutMemBuf = wrap(MBOrErr.get().release()); + return 0; } LLVMMemoryBufferRef LLVMCreateMemoryBufferWithMemoryRange( @@ -2700,11 +2704,10 @@ void LLVMDisposePassManager(LLVMPassManagerRef PM) { /*===-- Threading ------------------------------------------------------===*/ LLVMBool LLVMStartMultithreaded() { - return llvm_start_multithreaded(); + return LLVMIsMultithreaded(); } void LLVMStopMultithreaded() { - llvm_stop_multithreaded(); } LLVMBool LLVMIsMultithreaded() { diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index 92edacc..218787c 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -102,7 +102,8 @@ DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, StringRef Producer, bool isOptimized, StringRef Flags, unsigned RunTimeVer, StringRef SplitName, - DebugEmissionKind Kind) { + DebugEmissionKind Kind, + bool EmitDebugInfo) { assert(((Lang <= dwarf::DW_LANG_OCaml && Lang >= dwarf::DW_LANG_C89) || (Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) && @@ -140,8 +141,14 @@ DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, MDNode *CUNode = MDNode::get(VMContext, Elts); // Create a named metadata so that it is easier to find cu in a module. - NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu"); - NMD->addOperand(CUNode); + // Note that we only generate this when the caller wants to actually + // emit debug information. When we are only interested in tracking + // source line locations throughout the backend, we prevent codegen from + // emitting debug info in the final output by not generating llvm.dbg.cu. + if (EmitDebugInfo) { + NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu"); + NMD->addOperand(CUNode); + } return DICompileUnit(CUNode); } @@ -1068,18 +1075,19 @@ DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope, DITypeRef Ty, ArrayRef<Value *> Addr, unsigned ArgNo) { - SmallVector<Value *, 15> Elts; - Elts.push_back(GetTagConstant(VMContext, Tag)); - Elts.push_back(getNonCompileUnitScope(Scope)), - Elts.push_back(MDString::get(VMContext, Name)); - Elts.push_back(F); - Elts.push_back(ConstantInt::get(Type::getInt32Ty(VMContext), - (LineNo | (ArgNo << 24)))); - Elts.push_back(Ty); - Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext))); - Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext))); - Elts.append(Addr.begin(), Addr.end()); - + assert(Addr.size() > 0 && "complex address is empty"); + Value *Elts[] = { + GetTagConstant(VMContext, Tag), + getNonCompileUnitScope(Scope), + MDString::get(VMContext, Name), + F, + ConstantInt::get(Type::getInt32Ty(VMContext), + (LineNo | (ArgNo << 24))), + Ty, + Constant::getNullValue(Type::getInt32Ty(VMContext)), + Constant::getNullValue(Type::getInt32Ty(VMContext)), + MDNode::get(VMContext, Addr) + }; return DIVariable(MDNode::get(VMContext, Elts)); } diff --git a/lib/IR/DebugInfo.cpp b/lib/IR/DebugInfo.cpp index db9e56d..5e39b24 100644 --- a/lib/IR/DebugInfo.cpp +++ b/lib/IR/DebugInfo.cpp @@ -138,8 +138,14 @@ void DIDescriptor::replaceFunctionField(unsigned Elt, Function *F) { } } -unsigned DIVariable::getNumAddrElements() const { - return DbgNode->getNumOperands() - 8; +uint64_t DIVariable::getAddrElement(unsigned Idx) const { + DIDescriptor ComplexExpr = getDescriptorField(8); + if (Idx < ComplexExpr->getNumOperands()) + if (auto *CI = dyn_cast_or_null<ConstantInt>(ComplexExpr->getOperand(Idx))) + return CI->getZExtValue(); + + assert(false && "non-existing complex address element requested"); + return 0; } /// getInlinedAt - If this variable is inlined then return inline location. @@ -566,7 +572,13 @@ bool DIVariable::Verify() const { // Make sure that type @ field 5 is a DITypeRef. if (!fieldIsTypeRef(DbgNode, 5)) return false; - return DbgNode->getNumOperands() >= 8; + + // Variable without a complex expression. + if (DbgNode->getNumOperands() == 8) + return true; + + // Make sure the complex expression is an MDNode. + return (DbgNode->getNumOperands() == 9 && fieldIsMDNode(DbgNode, 8)); } /// Verify - Verify that a location descriptor is well formed. @@ -1514,3 +1526,23 @@ unsigned llvm::getDebugMetadataVersionFromModule(const Module &M) { return 0; return cast<ConstantInt>(Val)->getZExtValue(); } + +llvm::DenseMap<const llvm::Function *, llvm::DISubprogram> +llvm::makeSubprogramMap(const Module &M) { + DenseMap<const Function *, DISubprogram> R; + + NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu"); + if (!CU_Nodes) + return R; + + for (MDNode *N : CU_Nodes->operands()) { + DICompileUnit CUNode(N); + DIArray SPs = CUNode.getSubprograms(); + for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) { + DISubprogram SP(SPs.getElement(i)); + if (Function *F = SP.getFunction()) + R.insert(std::make_pair(F, SP)); + } + } + return R; +} diff --git a/lib/IR/DebugLoc.cpp b/lib/IR/DebugLoc.cpp index 43360d3..e8bdcce 100644 --- a/lib/IR/DebugLoc.cpp +++ b/lib/IR/DebugLoc.cpp @@ -76,7 +76,7 @@ MDNode *DebugLoc::getScopeNode(const LLVMContext &Ctx) const { return getScope(Ctx); } -DebugLoc DebugLoc::getFnDebugLoc(const LLVMContext &Ctx) { +DebugLoc DebugLoc::getFnDebugLoc(const LLVMContext &Ctx) const { const MDNode *Scope = getScopeNode(Ctx); DISubprogram SP = getDISubprogram(Scope); if (SP.isSubprogram()) { diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp index 6eeb162..2727063 100644 --- a/lib/IR/DiagnosticInfo.cpp +++ b/lib/IR/DiagnosticInfo.cpp @@ -128,7 +128,7 @@ void DiagnosticInfoSampleProfile::print(DiagnosticPrinter &DP) const { } bool DiagnosticInfoOptimizationRemarkBase::isLocationAvailable() const { - return getFunction().getParent()->getNamedMetadata("llvm.dbg.cu") != nullptr; + return getDebugLoc().isUnknown() == false; } void DiagnosticInfoOptimizationRemarkBase::getLocation(StringRef *Filename, diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp index fe32c46..1443571 100644 --- a/lib/IR/Function.cpp +++ b/lib/IR/Function.cpp @@ -735,6 +735,11 @@ Function *Intrinsic::getDeclaration(Module *M, ID id, ArrayRef<Type*> Tys) { #include "llvm/IR/Intrinsics.gen" #undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN +// This defines the "Intrinsic::getIntrinsicForMSBuiltin()" method. +#define GET_LLVM_INTRINSIC_FOR_MS_BUILTIN +#include "llvm/IR/Intrinsics.gen" +#undef GET_LLVM_INTRINSIC_FOR_MS_BUILTIN + /// hasAddressTaken - returns true if there are any uses of this function /// other than direct calls or invokes to it. bool Function::hasAddressTaken(const User* *PutOffender) const { diff --git a/lib/IR/GCOV.cpp b/lib/IR/GCOV.cpp index f2099d6..1667401 100644 --- a/lib/IR/GCOV.cpp +++ b/lib/IR/GCOV.cpp @@ -19,8 +19,8 @@ #include "llvm/Support/Format.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/Path.h" -#include "llvm/Support/system_error.h" #include <algorithm> +#include <system_error> using namespace llvm; //===----------------------------------------------------------------------===// @@ -438,11 +438,15 @@ class LineConsumer { StringRef Remaining; public: LineConsumer(StringRef Filename) { - if (error_code EC = MemoryBuffer::getFileOrSTDIN(Filename, Buffer)) { + ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = + MemoryBuffer::getFileOrSTDIN(Filename); + if (std::error_code EC = BufferOrErr.getError()) { errs() << Filename << ": " << EC.message() << "\n"; Remaining = ""; - } else + } else { + Buffer = std::move(BufferOrErr.get()); Remaining = Buffer->getBuffer(); + } } bool empty() { return Remaining.empty(); } void printNext(raw_ostream &OS, uint32_t LineNum) { diff --git a/lib/IR/Globals.cpp b/lib/IR/Globals.cpp index c905cfe..244e3e4 100644 --- a/lib/IR/Globals.cpp +++ b/lib/IR/Globals.cpp @@ -20,6 +20,7 @@ #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/LeakDetector.h" #include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -59,9 +60,16 @@ void GlobalValue::copyAttributesFrom(const GlobalValue *Src) { } unsigned GlobalValue::getAlignment() const { - if (auto *GA = dyn_cast<GlobalAlias>(this)) - return GA->getAliasee()->getAlignment(); - + if (auto *GA = dyn_cast<GlobalAlias>(this)) { + // In general we cannot compute this at the IR level, but we try. + if (const GlobalObject *GO = GA->getBaseObject()) + return GO->getAlignment(); + + // FIXME: we should also be able to handle: + // Alias = Global + Offset + // Alias = Absolute + return 0; + } return cast<GlobalObject>(this)->getAlignment(); } @@ -80,12 +88,26 @@ void GlobalObject::copyAttributesFrom(const GlobalValue *Src) { setSection(GV->getSection()); } -const std::string &GlobalValue::getSection() const { - if (auto *GA = dyn_cast<GlobalAlias>(this)) - return GA->getAliasee()->getSection(); +const char *GlobalValue::getSection() const { + if (auto *GA = dyn_cast<GlobalAlias>(this)) { + // In general we cannot compute this at the IR level, but we try. + if (const GlobalObject *GO = GA->getBaseObject()) + return GO->getSection(); + return ""; + } return cast<GlobalObject>(this)->getSection(); } +Comdat *GlobalValue::getComdat() { + if (auto *GA = dyn_cast<GlobalAlias>(this)) { + // In general we cannot compute this at the IR level, but we try. + if (const GlobalObject *GO = GA->getBaseObject()) + return const_cast<GlobalObject *>(GO)->getComdat(); + return nullptr; + } + return cast<GlobalObject>(this)->getComdat(); +} + void GlobalObject::setSection(StringRef S) { Section = S; } bool GlobalValue::isDeclaration() const { @@ -113,8 +135,9 @@ GlobalVariable::GlobalVariable(Type *Ty, bool constant, LinkageTypes Link, : GlobalObject(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal, OperandTraits<GlobalVariable>::op_begin(this), InitVal != nullptr, Link, Name), - isConstantGlobal(constant), threadLocalMode(TLMode), + isConstantGlobal(constant), isExternallyInitializedConstant(isExternallyInitialized) { + setThreadLocalMode(TLMode); if (InitVal) { assert(InitVal->getType() == Ty && "Initializer should be the same type as the GlobalVariable!"); @@ -132,8 +155,9 @@ GlobalVariable::GlobalVariable(Module &M, Type *Ty, bool constant, : GlobalObject(PointerType::get(Ty, AddressSpace), Value::GlobalVariableVal, OperandTraits<GlobalVariable>::op_begin(this), InitVal != nullptr, Link, Name), - isConstantGlobal(constant), threadLocalMode(TLMode), + isConstantGlobal(constant), isExternallyInitializedConstant(isExternallyInitialized) { + setThreadLocalMode(TLMode); if (InitVal) { assert(InitVal->getType() == Ty && "Initializer should be the same type as the GlobalVariable!"); @@ -214,7 +238,7 @@ void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) { //===----------------------------------------------------------------------===// GlobalAlias::GlobalAlias(Type *Ty, unsigned AddressSpace, LinkageTypes Link, - const Twine &Name, GlobalObject *Aliasee, + const Twine &Name, Constant *Aliasee, Module *ParentModule) : GlobalValue(PointerType::get(Ty, AddressSpace), Value::GlobalAliasVal, &Op<0>(), 1, Link, Name) { @@ -227,7 +251,7 @@ GlobalAlias::GlobalAlias(Type *Ty, unsigned AddressSpace, LinkageTypes Link, GlobalAlias *GlobalAlias::create(Type *Ty, unsigned AddressSpace, LinkageTypes Link, const Twine &Name, - GlobalObject *Aliasee, Module *ParentModule) { + Constant *Aliasee, Module *ParentModule) { return new GlobalAlias(Ty, AddressSpace, Link, Name, Aliasee, ParentModule); } @@ -239,18 +263,18 @@ GlobalAlias *GlobalAlias::create(Type *Ty, unsigned AddressSpace, GlobalAlias *GlobalAlias::create(Type *Ty, unsigned AddressSpace, LinkageTypes Linkage, const Twine &Name, - GlobalObject *Aliasee) { + GlobalValue *Aliasee) { return create(Ty, AddressSpace, Linkage, Name, Aliasee, Aliasee->getParent()); } GlobalAlias *GlobalAlias::create(LinkageTypes Link, const Twine &Name, - GlobalObject *Aliasee) { + GlobalValue *Aliasee) { PointerType *PTy = Aliasee->getType(); return create(PTy->getElementType(), PTy->getAddressSpace(), Link, Name, Aliasee); } -GlobalAlias *GlobalAlias::create(const Twine &Name, GlobalObject *Aliasee) { +GlobalAlias *GlobalAlias::create(const Twine &Name, GlobalValue *Aliasee) { return create(Aliasee->getLinkage(), Name, Aliasee); } @@ -270,4 +294,8 @@ void GlobalAlias::eraseFromParent() { getParent()->getAliasList().erase(this); } -void GlobalAlias::setAliasee(GlobalObject *Aliasee) { setOperand(0, Aliasee); } +void GlobalAlias::setAliasee(Constant *Aliasee) { + assert((!Aliasee || Aliasee->getType() == getType()) && + "Alias and aliasee types should match!"); + setOperand(0, Aliasee); +} diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp index 28cc4cb..86421c4 100644 --- a/lib/IR/Instruction.cpp +++ b/lib/IR/Instruction.cpp @@ -145,31 +145,31 @@ void Instruction::setFastMathFlags(FastMathFlags FMF) { /// Determine whether the unsafe-algebra flag is set. bool Instruction::hasUnsafeAlgebra() const { - assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op"); + assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op"); return cast<FPMathOperator>(this)->hasUnsafeAlgebra(); } /// Determine whether the no-NaNs flag is set. bool Instruction::hasNoNaNs() const { - assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op"); + assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op"); return cast<FPMathOperator>(this)->hasNoNaNs(); } /// Determine whether the no-infs flag is set. bool Instruction::hasNoInfs() const { - assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op"); + assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op"); return cast<FPMathOperator>(this)->hasNoInfs(); } /// Determine whether the no-signed-zeros flag is set. bool Instruction::hasNoSignedZeros() const { - assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op"); + assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op"); return cast<FPMathOperator>(this)->hasNoSignedZeros(); } /// Determine whether the allow-reciprocal flag is set. bool Instruction::hasAllowReciprocal() const { - assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op"); + assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op"); return cast<FPMathOperator>(this)->hasAllowReciprocal(); } @@ -177,7 +177,7 @@ bool Instruction::hasAllowReciprocal() const { /// operator which supports these flags. See LangRef.html for the meaning of /// these flats. FastMathFlags Instruction::getFastMathFlags() const { - assert(isa<FPMathOperator>(this) && "setting fast-math flag on invalid op"); + assert(isa<FPMathOperator>(this) && "getting fast-math flag on invalid op"); return cast<FPMathOperator>(this)->getFastMathFlags(); } @@ -300,6 +300,7 @@ static bool haveSameSpecialState(const Instruction *I1, const Instruction *I2, FI->getSynchScope() == cast<FenceInst>(I2)->getSynchScope(); if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I1)) return CXI->isVolatile() == cast<AtomicCmpXchgInst>(I2)->isVolatile() && + CXI->isWeak() == cast<AtomicCmpXchgInst>(I2)->isWeak() && CXI->getSuccessOrdering() == cast<AtomicCmpXchgInst>(I2)->getSuccessOrdering() && CXI->getFailureOrdering() == @@ -331,6 +332,10 @@ bool Instruction::isIdenticalToWhenDefined(const Instruction *I) const { getType() != I->getType()) return false; + // If both instructions have no operands, they are identical. + if (getNumOperands() == 0 && I->getNumOperands() == 0) + return haveSameSpecialState(this, I); + // We have two instructions of identical opcode and #operands. Check to see // if all operands are the same. if (!std::equal(op_begin(), op_end(), I->op_begin())) diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index 13c51b8..a5ceacb 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -248,7 +248,7 @@ void LandingPadInst::growOperands(unsigned Size) { Use::zap(OldOps, OldOps + e, true); } -void LandingPadInst::addClause(Value *Val) { +void LandingPadInst::addClause(Constant *Val) { unsigned OpNo = getNumOperands(); growOperands(1); assert(OpNo < ReservedSpace && "Growing didn't work!"); @@ -1251,10 +1251,11 @@ AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal, AtomicOrdering FailureOrdering, SynchronizationScope SynchScope, Instruction *InsertBefore) - : Instruction(Cmp->getType(), AtomicCmpXchg, - OperandTraits<AtomicCmpXchgInst>::op_begin(this), - OperandTraits<AtomicCmpXchgInst>::operands(this), - InsertBefore) { + : Instruction( + StructType::get(Cmp->getType(), Type::getInt1Ty(Cmp->getContext()), + nullptr), + AtomicCmpXchg, OperandTraits<AtomicCmpXchgInst>::op_begin(this), + OperandTraits<AtomicCmpXchgInst>::operands(this), InsertBefore) { Init(Ptr, Cmp, NewVal, SuccessOrdering, FailureOrdering, SynchScope); } @@ -1263,13 +1264,14 @@ AtomicCmpXchgInst::AtomicCmpXchgInst(Value *Ptr, Value *Cmp, Value *NewVal, AtomicOrdering FailureOrdering, SynchronizationScope SynchScope, BasicBlock *InsertAtEnd) - : Instruction(Cmp->getType(), AtomicCmpXchg, - OperandTraits<AtomicCmpXchgInst>::op_begin(this), - OperandTraits<AtomicCmpXchgInst>::operands(this), - InsertAtEnd) { + : Instruction( + StructType::get(Cmp->getType(), Type::getInt1Ty(Cmp->getContext()), + nullptr), + AtomicCmpXchg, OperandTraits<AtomicCmpXchgInst>::op_begin(this), + OperandTraits<AtomicCmpXchgInst>::operands(this), InsertAtEnd) { Init(Ptr, Cmp, NewVal, SuccessOrdering, FailureOrdering, SynchScope); } - + //===----------------------------------------------------------------------===// // AtomicRMWInst Implementation //===----------------------------------------------------------------------===// @@ -2331,18 +2333,12 @@ unsigned CastInst::isEliminableCastPair( // Allowed, use first cast's opcode return firstOp; case 14: - // FIXME: this state can be merged with (2), but the following assert - // is useful to check the correcteness of the sequence due to semantic - // change of bitcast. - assert( - SrcTy->isPtrOrPtrVectorTy() && - MidTy->isPtrOrPtrVectorTy() && - DstTy->isPtrOrPtrVectorTy() && - SrcTy->getPointerAddressSpace() == MidTy->getPointerAddressSpace() && - MidTy->getPointerAddressSpace() != DstTy->getPointerAddressSpace() && - "Illegal bitcast, addrspacecast sequence!"); - // Allowed, use second cast's opcode - return secondOp; + // bitcast, addrspacecast -> addrspacecast if the element type of + // bitcast's source is the same as that of addrspacecast's destination. + if (SrcTy->getPointerElementType() == DstTy->getPointerElementType()) + return Instruction::AddrSpaceCast; + return 0; + case 15: // FIXME: this state can be merged with (1), but the following assert // is useful to check the correcteness of the sequence due to semantic @@ -3610,6 +3606,7 @@ AtomicCmpXchgInst *AtomicCmpXchgInst::clone_impl() const { getSuccessOrdering(), getFailureOrdering(), getSynchScope()); Result->setVolatile(isVolatile()); + Result->setWeak(isWeak()); return Result; } diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp index 4d932d0..59137e4 100644 --- a/lib/IR/Metadata.cpp +++ b/lib/IR/Metadata.cpp @@ -663,7 +663,7 @@ void Instruction::setMetadata(unsigned KindID, MDNode *Node) { // Otherwise, we're removing metadata from an instruction. assert((hasMetadataHashEntry() == - getContext().pImpl->MetadataStore.count(this)) && + (getContext().pImpl->MetadataStore.count(this) > 0)) && "HasMetadata bit out of date!"); if (!hasMetadataHashEntry()) return; // Nothing to remove! diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp index 5dbed69..f1b1f9a 100644 --- a/lib/IR/Module.cpp +++ b/lib/IR/Module.cpp @@ -24,6 +24,8 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/LeakDetector.h" #include "llvm/Support/Dwarf.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/RandomNumberGenerator.h" #include <algorithm> #include <cstdarg> #include <cstdlib> @@ -44,7 +46,7 @@ template class llvm::SymbolTableListTraits<GlobalAlias, Module>; // Module::Module(StringRef MID, LLVMContext &C) - : Context(C), Materializer(), ModuleID(MID), DL("") { + : Context(C), Materializer(), ModuleID(MID), RNG(nullptr), DL("") { ValSymTab = new ValueSymbolTable(); NamedMDSymTab = new StringMap<NamedMDNode *>(); Context.addModule(this); @@ -59,6 +61,7 @@ Module::~Module() { NamedMDList.clear(); delete ValSymTab; delete static_cast<StringMap<NamedMDNode *> *>(NamedMDSymTab); + delete RNG; } /// getNamedValue - Return the first global value in the module with @@ -355,6 +358,16 @@ const DataLayout *Module::getDataLayout() const { return &DL; } +// We want reproducible builds, but ModuleID may be a full path so we just use +// the filename to salt the RNG (although it is not guaranteed to be unique). +RandomNumberGenerator &Module::getRNG() const { + if (RNG == nullptr) { + StringRef Salt = sys::path::filename(ModuleID); + RNG = new RandomNumberGenerator(Salt); + } + return *RNG; +} + //===----------------------------------------------------------------------===// // Methods to control the materialization of GlobalValues in the Module. // @@ -381,7 +394,7 @@ bool Module::Materialize(GlobalValue *GV, std::string *ErrInfo) { if (!Materializer) return false; - error_code EC = Materializer->Materialize(GV); + std::error_code EC = Materializer->Materialize(GV); if (!EC) return false; if (ErrInfo) @@ -394,18 +407,21 @@ void Module::Dematerialize(GlobalValue *GV) { return Materializer->Dematerialize(GV); } -error_code Module::materializeAll() { +std::error_code Module::materializeAll() { if (!Materializer) - return error_code::success(); + return std::error_code(); return Materializer->MaterializeModule(this); } -error_code Module::materializeAllPermanently() { - if (error_code EC = materializeAll()) +std::error_code Module::materializeAllPermanently(bool ReleaseBuffer) { + if (std::error_code EC = materializeAll()) return EC; + if (ReleaseBuffer) + Materializer->releaseBuffer(); + Materializer.reset(); - return error_code::success(); + return std::error_code(); } //===----------------------------------------------------------------------===// @@ -421,14 +437,14 @@ error_code Module::materializeAllPermanently() { // has "dropped all references", except operator delete. // void Module::dropAllReferences() { - for(Module::iterator I = begin(), E = end(); I != E; ++I) - I->dropAllReferences(); + for (Function &F : *this) + F.dropAllReferences(); - for(Module::global_iterator I = global_begin(), E = global_end(); I != E; ++I) - I->dropAllReferences(); + for (GlobalVariable &GV : globals()) + GV.dropAllReferences(); - for(Module::alias_iterator I = alias_begin(), E = alias_end(); I != E; ++I) - I->dropAllReferences(); + for (GlobalAlias &GA : aliases()) + GA.dropAllReferences(); } unsigned Module::getDwarfVersion() const { @@ -437,3 +453,11 @@ unsigned Module::getDwarfVersion() const { return dwarf::DWARF_VERSION; return cast<ConstantInt>(Val)->getZExtValue(); } + +Comdat *Module::getOrInsertComdat(StringRef Name) { + Comdat C; + StringMapEntry<Comdat> &Entry = + ComdatSymTab.GetOrCreateValue(Name, std::move(C)); + Entry.second.Name = &Entry; + return &Entry.second; +} diff --git a/lib/IR/Pass.cpp b/lib/IR/Pass.cpp index bb55d2a..91d86ae 100644 --- a/lib/IR/Pass.cpp +++ b/lib/IR/Pass.cpp @@ -199,14 +199,6 @@ Pass *Pass::createPass(AnalysisID ID) { return PI->createPass(); } -Pass *PassInfo::createPass() const { - assert((!isAnalysisGroup() || NormalCtor) && - "No default implementation found for analysis group!"); - assert(NormalCtor && - "Cannot call createPass on PassInfo without default ctor!"); - return NormalCtor(); -} - //===----------------------------------------------------------------------===// // Analysis Group Implementation Code //===----------------------------------------------------------------------===// @@ -224,17 +216,6 @@ RegisterAGBase::RegisterAGBase(const char *Name, const void *InterfaceID, // PassRegistrationListener implementation // -// PassRegistrationListener ctor - Add the current object to the list of -// PassRegistrationListeners... -PassRegistrationListener::PassRegistrationListener() { - PassRegistry::getPassRegistry()->addRegistrationListener(this); -} - -// dtor - Remove object from list of listeners... -PassRegistrationListener::~PassRegistrationListener() { - PassRegistry::getPassRegistry()->removeRegistrationListener(this); -} - // enumeratePasses - Iterate over the registered passes, calling the // passEnumerate callback on each PassInfo object. // @@ -242,7 +223,16 @@ void PassRegistrationListener::enumeratePasses() { PassRegistry::getPassRegistry()->enumerateWith(this); } -PassNameParser::~PassNameParser() {} +PassNameParser::PassNameParser() + : Opt(nullptr) { + PassRegistry::getPassRegistry()->addRegistrationListener(this); +} + +PassNameParser::~PassNameParser() { + // This only gets called during static destruction, in which case the + // PassRegistry will have already been destroyed by llvm_shutdown(). So + // attempting to remove the registration listener is an error. +} //===----------------------------------------------------------------------===// // AnalysisUsage Class Implementation diff --git a/lib/IR/PassRegistry.cpp b/lib/IR/PassRegistry.cpp index 6a5bee2..91940a9 100644 --- a/lib/IR/PassRegistry.cpp +++ b/lib/IR/PassRegistry.cpp @@ -13,14 +13,10 @@ //===----------------------------------------------------------------------===// #include "llvm/PassRegistry.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/StringMap.h" #include "llvm/IR/Function.h" #include "llvm/PassSupport.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Mutex.h" #include "llvm/Support/RWMutex.h" #include <vector> @@ -36,62 +32,23 @@ PassRegistry *PassRegistry::getPassRegistry() { return &*PassRegistryObj; } -static ManagedStatic<sys::SmartRWMutex<true> > Lock; - -//===----------------------------------------------------------------------===// -// PassRegistryImpl -// - -namespace { -struct PassRegistryImpl { - /// PassInfoMap - Keep track of the PassInfo object for each registered pass. - typedef DenseMap<const void*, const PassInfo*> MapType; - MapType PassInfoMap; - - typedef StringMap<const PassInfo*> StringMapType; - StringMapType PassInfoStringMap; - - /// AnalysisGroupInfo - Keep track of information for each analysis group. - struct AnalysisGroupInfo { - SmallPtrSet<const PassInfo *, 8> Implementations; - }; - DenseMap<const PassInfo*, AnalysisGroupInfo> AnalysisGroupInfoMap; - - std::vector<std::unique_ptr<const PassInfo>> ToFree; - std::vector<PassRegistrationListener*> Listeners; -}; -} // end anonymous namespace - -void *PassRegistry::getImpl() const { - if (!pImpl) - pImpl = new PassRegistryImpl(); - return pImpl; -} - //===----------------------------------------------------------------------===// // Accessors // PassRegistry::~PassRegistry() { - sys::SmartScopedWriter<true> Guard(*Lock); - PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(pImpl); - delete Impl; - pImpl = nullptr; } const PassInfo *PassRegistry::getPassInfo(const void *TI) const { - sys::SmartScopedReader<true> Guard(*Lock); - PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl()); - PassRegistryImpl::MapType::const_iterator I = Impl->PassInfoMap.find(TI); - return I != Impl->PassInfoMap.end() ? I->second : nullptr; + sys::SmartScopedReader<true> Guard(Lock); + MapType::const_iterator I = PassInfoMap.find(TI); + return I != PassInfoMap.end() ? I->second : nullptr; } const PassInfo *PassRegistry::getPassInfo(StringRef Arg) const { - sys::SmartScopedReader<true> Guard(*Lock); - PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl()); - PassRegistryImpl::StringMapType::const_iterator - I = Impl->PassInfoStringMap.find(Arg); - return I != Impl->PassInfoStringMap.end() ? I->second : nullptr; + sys::SmartScopedReader<true> Guard(Lock); + StringMapType::const_iterator I = PassInfoStringMap.find(Arg); + return I != PassInfoStringMap.end() ? I->second : nullptr; } //===----------------------------------------------------------------------===// @@ -99,39 +56,34 @@ const PassInfo *PassRegistry::getPassInfo(StringRef Arg) const { // void PassRegistry::registerPass(const PassInfo &PI, bool ShouldFree) { - sys::SmartScopedWriter<true> Guard(*Lock); - PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl()); + sys::SmartScopedWriter<true> Guard(Lock); bool Inserted = - Impl->PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second; + PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second; assert(Inserted && "Pass registered multiple times!"); (void)Inserted; - Impl->PassInfoStringMap[PI.getPassArgument()] = &PI; + PassInfoStringMap[PI.getPassArgument()] = &PI; // Notify any listeners. for (std::vector<PassRegistrationListener*>::iterator - I = Impl->Listeners.begin(), E = Impl->Listeners.end(); I != E; ++I) + I = Listeners.begin(), E = Listeners.end(); I != E; ++I) (*I)->passRegistered(&PI); - if (ShouldFree) Impl->ToFree.push_back(std::unique_ptr<const PassInfo>(&PI)); + if (ShouldFree) ToFree.push_back(std::unique_ptr<const PassInfo>(&PI)); } void PassRegistry::unregisterPass(const PassInfo &PI) { - sys::SmartScopedWriter<true> Guard(*Lock); - PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl()); - PassRegistryImpl::MapType::iterator I = - Impl->PassInfoMap.find(PI.getTypeInfo()); - assert(I != Impl->PassInfoMap.end() && "Pass registered but not in map!"); + sys::SmartScopedWriter<true> Guard(Lock); + MapType::iterator I = PassInfoMap.find(PI.getTypeInfo()); + assert(I != PassInfoMap.end() && "Pass registered but not in map!"); // Remove pass from the map. - Impl->PassInfoMap.erase(I); - Impl->PassInfoStringMap.erase(PI.getPassArgument()); + PassInfoMap.erase(I); + PassInfoStringMap.erase(PI.getPassArgument()); } void PassRegistry::enumerateWith(PassRegistrationListener *L) { - sys::SmartScopedReader<true> Guard(*Lock); - PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl()); - for (PassRegistryImpl::MapType::const_iterator I = Impl->PassInfoMap.begin(), - E = Impl->PassInfoMap.end(); I != E; ++I) + sys::SmartScopedReader<true> Guard(Lock); + for (auto I = PassInfoMap.begin(), E = PassInfoMap.end(); I != E; ++I) L->passEnumerate(I->second); } @@ -156,15 +108,13 @@ void PassRegistry::registerAnalysisGroup(const void *InterfaceID, assert(ImplementationInfo && "Must register pass before adding to AnalysisGroup!"); - sys::SmartScopedWriter<true> Guard(*Lock); + sys::SmartScopedWriter<true> Guard(Lock); // Make sure we keep track of the fact that the implementation implements // the interface. ImplementationInfo->addInterfaceImplemented(InterfaceInfo); - PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl()); - PassRegistryImpl::AnalysisGroupInfo &AGI = - Impl->AnalysisGroupInfoMap[InterfaceInfo]; + AnalysisGroupInfo &AGI = AnalysisGroupInfoMap[InterfaceInfo]; assert(AGI.Implementations.count(ImplementationInfo) == 0 && "Cannot add a pass to the same analysis group more than once!"); AGI.Implementations.insert(ImplementationInfo); @@ -179,30 +129,18 @@ void PassRegistry::registerAnalysisGroup(const void *InterfaceID, } } - PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl()); if (ShouldFree) - Impl->ToFree.push_back(std::unique_ptr<const PassInfo>(&Registeree)); + ToFree.push_back(std::unique_ptr<const PassInfo>(&Registeree)); } void PassRegistry::addRegistrationListener(PassRegistrationListener *L) { - sys::SmartScopedWriter<true> Guard(*Lock); - PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl()); - Impl->Listeners.push_back(L); + sys::SmartScopedWriter<true> Guard(Lock); + Listeners.push_back(L); } void PassRegistry::removeRegistrationListener(PassRegistrationListener *L) { - sys::SmartScopedWriter<true> Guard(*Lock); - - // NOTE: This is necessary, because removeRegistrationListener() can be called - // as part of the llvm_shutdown sequence. Since we have no control over the - // order of that sequence, we need to gracefully handle the case where the - // PassRegistry is destructed before the object that triggers this call. - if (!pImpl) return; + sys::SmartScopedWriter<true> Guard(Lock); - PassRegistryImpl *Impl = static_cast<PassRegistryImpl*>(getImpl()); - std::vector<PassRegistrationListener*>::iterator I = - std::find(Impl->Listeners.begin(), Impl->Listeners.end(), L); - assert(I != Impl->Listeners.end() && - "PassRegistrationListener not registered!"); - Impl->Listeners.erase(I); + auto I = std::find(Listeners.begin(), Listeners.end(), L); + Listeners.erase(I); } diff --git a/lib/IR/Value.cpp b/lib/IR/Value.cpp index d734e4e..35c241a 100644 --- a/lib/IR/Value.cpp +++ b/lib/IR/Value.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/InstrTypes.h" @@ -38,13 +39,12 @@ using namespace llvm; static inline Type *checkType(Type *Ty) { assert(Ty && "Value defined with a null type: Error!"); - return const_cast<Type*>(Ty); + return Ty; } Value::Value(Type *ty, unsigned scid) - : SubclassID(scid), HasValueHandle(0), - SubclassOptionalData(0), SubclassData(0), VTy((Type*)checkType(ty)), - UseList(nullptr), Name(nullptr) { + : VTy(checkType(ty)), UseList(nullptr), Name(nullptr), SubclassID(scid), + HasValueHandle(0), SubclassOptionalData(0), SubclassData(0) { // FIXME: Why isn't this in the subclass gunk?? // Note, we cannot call isa<CallInst> before the CallInst has been // constructed. @@ -214,7 +214,7 @@ void Value::setName(const Twine &NewName) { // then reallocated. // Create the new name. - Name = ValueName::Create(NameRef.begin(), NameRef.end()); + Name = ValueName::Create(NameRef); Name->setValue(this); return; } @@ -301,27 +301,6 @@ void Value::takeName(Value *V) { ST->reinsertValue(this); } -static GlobalObject &findReplacementForAliasUse(Value &C) { - if (auto *GO = dyn_cast<GlobalObject>(&C)) - return *GO; - if (auto *GA = dyn_cast<GlobalAlias>(&C)) - return *GA->getAliasee(); - auto *CE = cast<ConstantExpr>(&C); - assert(CE->getOpcode() == Instruction::BitCast || - CE->getOpcode() == Instruction::GetElementPtr || - CE->getOpcode() == Instruction::AddrSpaceCast); - if (CE->getOpcode() == Instruction::GetElementPtr) - assert(cast<GEPOperator>(CE)->hasAllZeroIndices()); - return findReplacementForAliasUse(*CE->getOperand(0)); -} - -static void replaceAliasUseWith(Use &U, Value *New) { - GlobalObject &Replacement = findReplacementForAliasUse(*New); - assert(&cast<GlobalObject>(*U) != &Replacement && - "replaceAliasUseWith cannot form an alias cycle"); - U.set(&Replacement); -} - #ifndef NDEBUG static bool contains(SmallPtrSet<ConstantExpr *, 4> &Cache, ConstantExpr *Expr, Constant *C) { @@ -373,10 +352,6 @@ void Value::replaceAllUsesWith(Value *New) { // Must handle Constants specially, we cannot call replaceUsesOfWith on a // constant because they are uniqued. if (auto *C = dyn_cast<Constant>(U.getUser())) { - if (isa<GlobalAlias>(C)) { - replaceAliasUseWith(U, New); - continue; - } if (!isa<GlobalValue>(C)) { C->replaceUsesOfWithOnConstant(this, New, &U); continue; @@ -498,18 +473,33 @@ Value *Value::stripInBoundsOffsets() { /// isDereferenceablePointer - Test if this value is always a pointer to /// allocated and suitably aligned memory for a simple load or store. -static bool isDereferenceablePointer(const Value *V, +static bool isDereferenceablePointer(const Value *V, const DataLayout *DL, SmallPtrSet<const Value *, 32> &Visited) { // Note that it is not safe to speculate into a malloc'd region because // malloc may return null. - // It's also not always safe to follow a bitcast, for example: - // bitcast i8* (alloca i8) to i32* - // would result in a 4-byte load from a 1-byte alloca. Some cases could - // be handled using DataLayout to check sizes and alignments though. // These are obviously ok. if (isa<AllocaInst>(V)) return true; + // It's not always safe to follow a bitcast, for example: + // bitcast i8* (alloca i8) to i32* + // would result in a 4-byte load from a 1-byte alloca. However, + // if we're casting from a pointer from a type of larger size + // to a type of smaller size (or the same size), and the alignment + // is at least as large as for the resulting pointer type, then + // we can look through the bitcast. + if (DL) + if (const BitCastInst* BC = dyn_cast<BitCastInst>(V)) { + Type *STy = BC->getSrcTy()->getPointerElementType(), + *DTy = BC->getDestTy()->getPointerElementType(); + if (STy->isSized() && DTy->isSized() && + (DL->getTypeStoreSize(STy) >= + DL->getTypeStoreSize(DTy)) && + (DL->getABITypeAlignment(STy) >= + DL->getABITypeAlignment(DTy))) + return isDereferenceablePointer(BC->getOperand(0), DL, Visited); + } + // Global variables which can't collapse to null are ok. if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) return !GV->hasExternalWeakLinkage(); @@ -523,7 +513,7 @@ static bool isDereferenceablePointer(const Value *V, // Conservatively require that the base pointer be fully dereferenceable. if (!Visited.insert(GEP->getOperand(0))) return false; - if (!isDereferenceablePointer(GEP->getOperand(0), Visited)) + if (!isDereferenceablePointer(GEP->getOperand(0), DL, Visited)) return false; // Check the indices. gep_type_iterator GTI = gep_type_begin(GEP); @@ -559,9 +549,9 @@ static bool isDereferenceablePointer(const Value *V, /// isDereferenceablePointer - Test if this value is always a pointer to /// allocated and suitably aligned memory for a simple load or store. -bool Value::isDereferenceablePointer() const { +bool Value::isDereferenceablePointer(const DataLayout *DL) const { SmallPtrSet<const Value *, 32> Visited; - return ::isDereferenceablePointer(this, Visited); + return ::isDereferenceablePointer(this, DL, Visited); } /// DoPHITranslation - If this value is a PHI node with CurBB as its parent, diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index bcc38c1..314bad3 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -107,6 +107,12 @@ struct VerifierSupport { OS << ' ' << *T; } + void WriteComdat(const Comdat *C) { + if (!C) + return; + OS << *C; + } + // CheckFailed - A check failed, so print out the condition and the message // that failed. This provides a nice place to put a breakpoint if you want // to see why something is not correct. @@ -138,6 +144,12 @@ struct VerifierSupport { WriteType(T3); Broken = true; } + + void CheckFailed(const Twine &Message, const Comdat *C) { + OS << Message.str() << "\n"; + WriteComdat(C); + Broken = true; + } }; class Verifier : public InstVisitor<Verifier>, VerifierSupport { friend class InstVisitor<Verifier>; @@ -230,6 +242,9 @@ public: I != E; ++I) visitNamedMDNode(*I); + for (const StringMapEntry<Comdat> &SMEC : M.getComdatSymbolTable()) + visitComdat(SMEC.getValue()); + visitModuleFlags(M); visitModuleIdents(M); @@ -241,8 +256,12 @@ private: void visitGlobalValue(const GlobalValue &GV); void visitGlobalVariable(const GlobalVariable &GV); void visitGlobalAlias(const GlobalAlias &GA); + void visitAliaseeSubExpr(const GlobalAlias &A, const Constant &C); + void visitAliaseeSubExpr(SmallPtrSet<const GlobalAlias *, 4> &Visited, + const GlobalAlias &A, const Constant &C); void visitNamedMDNode(const NamedMDNode &NMD); void visitMDNode(MDNode &MD, Function *F); + void visitComdat(const Comdat &C); void visitModuleIdents(const Module &M); void visitModuleFlags(const Module &M); void visitModuleFlag(const MDNode *Op, @@ -384,6 +403,7 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) { "'common' global must have a zero initializer!", &GV); Assert1(!GV.isConstant(), "'common' global may not be marked constant!", &GV); + Assert1(!GV.hasComdat(), "'common' global may not be in a Comdat!", &GV); } } else { Assert1(GV.hasExternalLinkage() || GV.hasExternalWeakLinkage(), @@ -474,36 +494,57 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) { visitGlobalValue(GV); } +void Verifier::visitAliaseeSubExpr(const GlobalAlias &GA, const Constant &C) { + SmallPtrSet<const GlobalAlias*, 4> Visited; + Visited.insert(&GA); + visitAliaseeSubExpr(Visited, GA, C); +} + +void Verifier::visitAliaseeSubExpr(SmallPtrSet<const GlobalAlias *, 4> &Visited, + const GlobalAlias &GA, const Constant &C) { + if (const auto *GV = dyn_cast<GlobalValue>(&C)) { + Assert1(!GV->isDeclaration(), "Alias must point to a definition", &GA); + + if (const auto *GA2 = dyn_cast<GlobalAlias>(GV)) { + Assert1(Visited.insert(GA2), "Aliases cannot form a cycle", &GA); + + Assert1(!GA2->mayBeOverridden(), "Alias cannot point to a weak alias", + &GA); + } else { + // Only continue verifying subexpressions of GlobalAliases. + // Do not recurse into global initializers. + return; + } + } + + if (const auto *CE = dyn_cast<ConstantExpr>(&C)) + VerifyConstantExprBitcastType(CE); + + for (const Use &U : C.operands()) { + Value *V = &*U; + if (const auto *GA2 = dyn_cast<GlobalAlias>(V)) + visitAliaseeSubExpr(Visited, GA, *GA2->getAliasee()); + else if (const auto *C2 = dyn_cast<Constant>(V)) + visitAliaseeSubExpr(Visited, GA, *C2); + } +} + void Verifier::visitGlobalAlias(const GlobalAlias &GA) { Assert1(!GA.getName().empty(), "Alias name cannot be empty!", &GA); Assert1(GlobalAlias::isValidLinkage(GA.getLinkage()), - "Alias should have external or external weak linkage!", &GA); - Assert1(GA.getAliasee(), - "Aliasee cannot be NULL!", &GA); - Assert1(!GA.hasUnnamedAddr(), "Alias cannot have unnamed_addr!", &GA); - + "Alias should have private, internal, linkonce, weak, linkonce_odr, " + "weak_odr, or external linkage!", + &GA); const Constant *Aliasee = GA.getAliasee(); - const GlobalValue *GV = dyn_cast<GlobalValue>(Aliasee); - - if (!GV) { - const ConstantExpr *CE = dyn_cast<ConstantExpr>(Aliasee); - if (CE && (CE->getOpcode() == Instruction::BitCast || - CE->getOpcode() == Instruction::AddrSpaceCast || - CE->getOpcode() == Instruction::GetElementPtr)) - GV = dyn_cast<GlobalValue>(CE->getOperand(0)); + Assert1(Aliasee, "Aliasee cannot be NULL!", &GA); + Assert1(GA.getType() == Aliasee->getType(), + "Alias and aliasee types should match!", &GA); - Assert1(GV, "Aliasee should be either GlobalValue, bitcast or " - "addrspacecast of GlobalValue", - &GA); + Assert1(isa<GlobalValue>(Aliasee) || isa<ConstantExpr>(Aliasee), + "Aliasee should be either GlobalValue or ConstantExpr", &GA); - VerifyConstantExprBitcastType(CE); - } - Assert1(!GV->isDeclaration(), "Alias must point to a definition", &GA); - if (const GlobalAlias *GAAliasee = dyn_cast<GlobalAlias>(GV)) { - Assert1(!GAAliasee->mayBeOverridden(), "Alias cannot point to a weak alias", - &GA); - } + visitAliaseeSubExpr(GA, *Aliasee); visitGlobalValue(GA); } @@ -556,6 +597,22 @@ void Verifier::visitMDNode(MDNode &MD, Function *F) { } } +void Verifier::visitComdat(const Comdat &C) { + // All Comdat::SelectionKind values other than Comdat::Any require a + // GlobalValue with the same name as the Comdat. + const GlobalValue *GV = M->getNamedValue(C.getName()); + if (C.getSelectionKind() != Comdat::Any) + Assert1(GV, + "comdat selection kind requires a global value with the same name", + &C); + // The Module is invalid if the GlobalValue has local linkage. Allowing + // otherwise opens us up to seeing the underling global value get renamed if + // collisions occur. + if (GV) + Assert1(!GV->hasLocalLinkage(), "comdat global value has local linkage", + GV); +} + void Verifier::visitModuleIdents(const Module &M) { const NamedMDNode *Idents = M.getNamedMetadata("llvm.ident"); if (!Idents) @@ -716,7 +773,8 @@ void Verifier::VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx, I->getKindAsEnum() == Attribute::Builtin || I->getKindAsEnum() == Attribute::NoBuiltin || I->getKindAsEnum() == Attribute::Cold || - I->getKindAsEnum() == Attribute::OptimizeNone) { + I->getKindAsEnum() == Attribute::OptimizeNone || + I->getKindAsEnum() == Attribute::JumpTable) { if (!isFunction) { CheckFailed("Attribute '" + I->getAsString() + "' only applies to functions!", V); @@ -890,6 +948,14 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs, Attribute::MinSize), "Attributes 'minsize and optnone' are incompatible!", V); } + + if (Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::JumpTable)) { + const GlobalValue *GV = cast<GlobalValue>(V); + Assert1(GV->hasUnnamedAddr(), + "Attribute 'jumptable' requires 'unnamed_addr'", V); + + } } void Verifier::VerifyBitcastType(const Value *V, Type *DestTy, Type *SrcTy) { @@ -2058,8 +2124,7 @@ void Verifier::visitLandingPadInst(LandingPadInst &LPI) { Assert1(isa<Constant>(PersonalityFn), "Personality function is not constant!", &LPI); for (unsigned i = 0, e = LPI.getNumClauses(); i < e; ++i) { - Value *Clause = LPI.getClause(i); - Assert1(isa<Constant>(Clause), "Clause is not constant!", &LPI); + Constant *Clause = LPI.getClause(i); if (LPI.isCatch(i)) { Assert1(isa<PointerType>(Clause->getType()), "Catch operand does not have pointer type!", &LPI); @@ -2203,7 +2268,8 @@ void Verifier::visitInstruction(Instruction &I) { } MDNode *MD = I.getMetadata(LLVMContext::MD_range); - Assert1(!MD || isa<LoadInst>(I), "Ranges are only for loads!", &I); + Assert1(!MD || isa<LoadInst>(I) || isa<CallInst>(I) || isa<InvokeInst>(I), + "Ranges are only for loads, calls and invokes!", &I); InstsInThisBlock.insert(&I); } diff --git a/lib/IRReader/IRReader.cpp b/lib/IRReader/IRReader.cpp index f4ed437..f8d2f5a 100644 --- a/lib/IRReader/IRReader.cpp +++ b/lib/IRReader/IRReader.cpp @@ -18,7 +18,7 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/Timer.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/system_error.h" +#include <system_error> using namespace llvm; @@ -29,17 +29,16 @@ namespace llvm { static const char *const TimeIRParsingGroupName = "LLVM IR Parsing"; static const char *const TimeIRParsingName = "Parse IR"; - -Module *llvm::getLazyIRModule(MemoryBuffer *Buffer, SMDiagnostic &Err, - LLVMContext &Context) { +static Module *getLazyIRModule(MemoryBuffer *Buffer, SMDiagnostic &Err, + LLVMContext &Context) { if (isBitcode((const unsigned char *)Buffer->getBufferStart(), (const unsigned char *)Buffer->getBufferEnd())) { std::string ErrMsg; ErrorOr<Module *> ModuleOrErr = getLazyBitcodeModule(Buffer, Context); - if (error_code EC = ModuleOrErr.getError()) { + if (std::error_code EC = ModuleOrErr.getError()) { Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error, EC.message()); - // ParseBitcodeFile does not take ownership of the Buffer in the + // getLazyBitcodeModule does not take ownership of the Buffer in the // case of an error. delete Buffer; return nullptr; @@ -52,14 +51,15 @@ Module *llvm::getLazyIRModule(MemoryBuffer *Buffer, SMDiagnostic &Err, Module *llvm::getLazyIRFileModule(const std::string &Filename, SMDiagnostic &Err, LLVMContext &Context) { - std::unique_ptr<MemoryBuffer> File; - if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, File)) { + ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = + MemoryBuffer::getFileOrSTDIN(Filename); + if (std::error_code EC = FileOrErr.getError()) { Err = SMDiagnostic(Filename, SourceMgr::DK_Error, - "Could not open input file: " + ec.message()); + "Could not open input file: " + EC.message()); return nullptr; } - return getLazyIRModule(File.release(), Err, Context); + return getLazyIRModule(FileOrErr.get().release(), Err, Context); } Module *llvm::ParseIR(MemoryBuffer *Buffer, SMDiagnostic &Err, @@ -70,29 +70,31 @@ Module *llvm::ParseIR(MemoryBuffer *Buffer, SMDiagnostic &Err, (const unsigned char *)Buffer->getBufferEnd())) { ErrorOr<Module *> ModuleOrErr = parseBitcodeFile(Buffer, Context); Module *M = nullptr; - if (error_code EC = ModuleOrErr.getError()) + if (std::error_code EC = ModuleOrErr.getError()) Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error, EC.message()); else M = ModuleOrErr.get(); // parseBitcodeFile does not take ownership of the Buffer. - delete Buffer; return M; } - return ParseAssembly(Buffer, nullptr, Err, Context); + return ParseAssembly(MemoryBuffer::getMemBuffer( + Buffer->getBuffer(), Buffer->getBufferIdentifier()), + nullptr, Err, Context); } Module *llvm::ParseIRFile(const std::string &Filename, SMDiagnostic &Err, LLVMContext &Context) { - std::unique_ptr<MemoryBuffer> File; - if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, File)) { + ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = + MemoryBuffer::getFileOrSTDIN(Filename); + if (std::error_code EC = FileOrErr.getError()) { Err = SMDiagnostic(Filename, SourceMgr::DK_Error, - "Could not open input file: " + ec.message()); + "Could not open input file: " + EC.message()); return nullptr; } - return ParseIR(File.release(), Err, Context); + return ParseIR(FileOrErr.get().get(), Err, Context); } //===----------------------------------------------------------------------===// @@ -104,7 +106,8 @@ LLVMBool LLVMParseIRInContext(LLVMContextRef ContextRef, char **OutMessage) { SMDiagnostic Diag; - *OutM = wrap(ParseIR(unwrap(MemBuf), Diag, *unwrap(ContextRef))); + std::unique_ptr<MemoryBuffer> MB(unwrap(MemBuf)); + *OutM = wrap(ParseIR(MB.get(), Diag, *unwrap(ContextRef))); if(!*OutM) { if (OutMessage) { diff --git a/lib/LTO/LLVMBuild.txt b/lib/LTO/LLVMBuild.txt index c9b5212..29ed92c 100644 --- a/lib/LTO/LLVMBuild.txt +++ b/lib/LTO/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = LTO parent = Libraries -required_libraries = BitReader BitWriter Core IPA IPO InstCombine Linker MC MCParser ObjCARC Scalar Support Target TransformUtils +required_libraries = BitReader BitWriter Core IPA IPO InstCombine Linker MC MCParser ObjCARC Object Scalar Support Target TransformUtils diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp index 99236bd..335197a 100644 --- a/lib/LTO/LTOCodeGenerator.cpp +++ b/lib/LTO/LTOCodeGenerator.cpp @@ -44,7 +44,6 @@ #include "llvm/Support/TargetSelect.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/system_error.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" @@ -52,6 +51,7 @@ #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" #include "llvm/Transforms/ObjCARC.h" +#include <system_error> using namespace llvm; const char* LTOCodeGenerator::getVersionString() { @@ -114,7 +114,7 @@ void LTOCodeGenerator::initializeLTOPasses() { } bool LTOCodeGenerator::addModule(LTOModule* mod, std::string& errMsg) { - bool ret = IRLinker.linkInModule(mod->getLLVVMModule(), &errMsg); + bool ret = IRLinker.linkInModule(&mod->getModule(), &errMsg); const std::vector<const char*> &undefs = mod->getAsmUndefinedRefs(); for (int i = 0, e = undefs.size(); i != e; ++i) @@ -124,23 +124,7 @@ bool LTOCodeGenerator::addModule(LTOModule* mod, std::string& errMsg) { } void LTOCodeGenerator::setTargetOptions(TargetOptions options) { - Options.LessPreciseFPMADOption = options.LessPreciseFPMADOption; - Options.NoFramePointerElim = options.NoFramePointerElim; - Options.AllowFPOpFusion = options.AllowFPOpFusion; - Options.UnsafeFPMath = options.UnsafeFPMath; - Options.NoInfsFPMath = options.NoInfsFPMath; - Options.NoNaNsFPMath = options.NoNaNsFPMath; - Options.HonorSignDependentRoundingFPMathOption = - options.HonorSignDependentRoundingFPMathOption; - Options.UseSoftFloat = options.UseSoftFloat; - Options.FloatABIType = options.FloatABIType; - Options.NoZerosInBSS = options.NoZerosInBSS; - Options.GuaranteedTailCallOpt = options.GuaranteedTailCallOpt; - Options.DisableTailCalls = options.DisableTailCalls; - Options.StackAlignmentOverride = options.StackAlignmentOverride; - Options.TrapFuncName = options.TrapFuncName; - Options.PositionIndependentExecutable = options.PositionIndependentExecutable; - Options.UseInitArray = options.UseInitArray; + Options = options; } void LTOCodeGenerator::setDebugInfo(lto_debug_model debug) { @@ -208,7 +192,8 @@ bool LTOCodeGenerator::compile_to_file(const char** name, // make unique temp .o file to put generated object file SmallString<128> Filename; int FD; - error_code EC = sys::fs::createTemporaryFile("lto-llvm", "o", FD, Filename); + std::error_code EC = + sys::fs::createTemporaryFile("lto-llvm", "o", FD, Filename); if (EC) { errMsg = EC.message(); return false; @@ -251,13 +236,14 @@ const void* LTOCodeGenerator::compile(size_t* length, delete NativeObjectFile; // read .o file into memory buffer - std::unique_ptr<MemoryBuffer> BuffPtr; - if (error_code ec = MemoryBuffer::getFile(name, BuffPtr, -1, false)) { - errMsg = ec.message(); + ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = + MemoryBuffer::getFile(name, -1, false); + if (std::error_code EC = BufferOrErr.getError()) { + errMsg = EC.message(); sys::fs::remove(NativeObjectPath); return nullptr; } - NativeObjectFile = BuffPtr.release(); + NativeObjectFile = BufferOrErr.get().release(); // remove temp files sys::fs::remove(NativeObjectPath); diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp index d117514..844c0f2 100644 --- a/lib/LTO/LTOModule.cpp +++ b/lib/LTO/LTOModule.cpp @@ -24,7 +24,6 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCParser/MCAsmParser.h" #include "llvm/MC/MCSection.h" -#include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCTargetAsmParser.h" @@ -37,21 +36,16 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TargetSelect.h" -#include "llvm/Support/system_error.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Transforms/Utils/GlobalStatus.h" +#include <system_error> using namespace llvm; -LTOModule::LTOModule(llvm::Module *m, llvm::TargetMachine *t) - : _module(m), _target(t), - _context(_target->getMCAsmInfo(), _target->getRegisterInfo(), &ObjFileInfo), - _mangler(t->getDataLayout()) { - ObjFileInfo.InitMCObjectFileInfo(t->getTargetTriple(), - t->getRelocationModel(), t->getCodeModel(), - _context); -} +LTOModule::LTOModule(std::unique_ptr<object::IRObjectFile> Obj, + llvm::TargetMachine *TM) + : IRFile(std::move(Obj)), _target(TM) {} /// isBitcodeFile - Returns 'true' if the file (or memory contents) is LLVM /// bitcode. @@ -67,87 +61,63 @@ bool LTOModule::isBitcodeFile(const char *path) { return type == sys::fs::file_magic::bitcode; } -/// isBitcodeFileForTarget - Returns 'true' if the file (or memory contents) is -/// LLVM bitcode for the specified triple. -bool LTOModule::isBitcodeFileForTarget(const void *mem, size_t length, - const char *triplePrefix) { - MemoryBuffer *buffer = makeBuffer(mem, length); - if (!buffer) - return false; - return isTargetMatch(buffer, triplePrefix); -} - -bool LTOModule::isBitcodeFileForTarget(const char *path, - const char *triplePrefix) { - std::unique_ptr<MemoryBuffer> buffer; - if (MemoryBuffer::getFile(path, buffer)) - return false; - return isTargetMatch(buffer.release(), triplePrefix); -} - -/// isTargetMatch - Returns 'true' if the memory buffer is for the specified -/// target triple. -bool LTOModule::isTargetMatch(MemoryBuffer *buffer, const char *triplePrefix) { +bool LTOModule::isBitcodeForTarget(MemoryBuffer *buffer, + StringRef triplePrefix) { std::string Triple = getBitcodeTargetTriple(buffer, getGlobalContext()); - delete buffer; - return strncmp(Triple.c_str(), triplePrefix, strlen(triplePrefix)) == 0; + return StringRef(Triple).startswith(triplePrefix); } -/// makeLTOModule - Create an LTOModule. N.B. These methods take ownership of -/// the buffer. -LTOModule *LTOModule::makeLTOModule(const char *path, TargetOptions options, - std::string &errMsg) { - std::unique_ptr<MemoryBuffer> buffer; - if (error_code ec = MemoryBuffer::getFile(path, buffer)) { - errMsg = ec.message(); +LTOModule *LTOModule::createFromFile(const char *path, TargetOptions options, + std::string &errMsg) { + ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = + MemoryBuffer::getFile(path); + if (std::error_code EC = BufferOrErr.getError()) { + errMsg = EC.message(); return nullptr; } - return makeLTOModule(buffer.release(), options, errMsg); + return makeLTOModule(std::move(BufferOrErr.get()), options, errMsg); } -LTOModule *LTOModule::makeLTOModule(int fd, const char *path, - size_t size, TargetOptions options, - std::string &errMsg) { - return makeLTOModule(fd, path, size, 0, options, errMsg); +LTOModule *LTOModule::createFromOpenFile(int fd, const char *path, size_t size, + TargetOptions options, + std::string &errMsg) { + return createFromOpenFileSlice(fd, path, size, 0, options, errMsg); } -LTOModule *LTOModule::makeLTOModule(int fd, const char *path, - size_t map_size, - off_t offset, - TargetOptions options, - std::string &errMsg) { - std::unique_ptr<MemoryBuffer> buffer; - if (error_code ec = - MemoryBuffer::getOpenFileSlice(fd, path, buffer, map_size, offset)) { - errMsg = ec.message(); +LTOModule *LTOModule::createFromOpenFileSlice(int fd, const char *path, + size_t map_size, off_t offset, + TargetOptions options, + std::string &errMsg) { + ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = + MemoryBuffer::getOpenFileSlice(fd, path, map_size, offset); + if (std::error_code EC = BufferOrErr.getError()) { + errMsg = EC.message(); return nullptr; } - return makeLTOModule(buffer.release(), options, errMsg); + return makeLTOModule(std::move(BufferOrErr.get()), options, errMsg); } -LTOModule *LTOModule::makeLTOModule(const void *mem, size_t length, - TargetOptions options, - std::string &errMsg, StringRef path) { +LTOModule *LTOModule::createFromBuffer(const void *mem, size_t length, + TargetOptions options, + std::string &errMsg, StringRef path) { std::unique_ptr<MemoryBuffer> buffer(makeBuffer(mem, length, path)); if (!buffer) return nullptr; - return makeLTOModule(buffer.release(), options, errMsg); + return makeLTOModule(std::move(buffer), options, errMsg); } -LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer, +LTOModule *LTOModule::makeLTOModule(std::unique_ptr<MemoryBuffer> Buffer, TargetOptions options, std::string &errMsg) { - // parse bitcode buffer - ErrorOr<Module *> ModuleOrErr = - getLazyBitcodeModule(buffer, getGlobalContext()); - if (error_code EC = ModuleOrErr.getError()) { + ErrorOr<Module *> MOrErr = + getLazyBitcodeModule(Buffer.get(), getGlobalContext()); + if (std::error_code EC = MOrErr.getError()) { errMsg = EC.message(); - delete buffer; return nullptr; } - std::unique_ptr<Module> m(ModuleOrErr.get()); + std::unique_ptr<Module> M(MOrErr.get()); - std::string TripleStr = m->getTargetTriple(); + std::string TripleStr = M->getTargetTriple(); if (TripleStr.empty()) TripleStr = sys::getDefaultTargetTriple(); llvm::Triple Triple(TripleStr); @@ -175,18 +145,13 @@ LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer, TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr, options); - m->materializeAllPermanently(); + M->materializeAllPermanently(true); + M->setDataLayout(target->getDataLayout()); - LTOModule *Ret = new LTOModule(m.release(), target); + std::unique_ptr<object::IRObjectFile> IRObj( + new object::IRObjectFile(std::move(Buffer), std::move(M))); - // We need a MCContext set up in order to get mangled names of private - // symbols. It is a bit odd that we need to report uses and definitions - // of private symbols, but it does look like ld64 expects to be informed - // of at least the ones with an 'l' prefix. - MCContext &Context = Ret->_context; - const TargetLoweringObjectFile &TLOF = - target->getTargetLowering()->getObjFileLowering(); - const_cast<TargetLoweringObjectFile &>(TLOF).Initialize(Context, *target); + LTOModule *Ret = new LTOModule(std::move(IRObj), target); if (Ret->parseSymbols(errMsg)) { delete Ret; @@ -305,10 +270,20 @@ void LTOModule::addObjCClassRef(const GlobalVariable *clgv) { entry.setValue(info); } -/// addDefinedDataSymbol - Add a data symbol as defined to the list. -void LTOModule::addDefinedDataSymbol(const GlobalValue *v) { +void LTOModule::addDefinedDataSymbol(const object::BasicSymbolRef &Sym) { + SmallString<64> Buffer; + { + raw_svector_ostream OS(Buffer); + Sym.printName(OS); + } + + const GlobalValue *V = IRFile->getSymbolGV(Sym.getRawDataRefImpl()); + addDefinedDataSymbol(Buffer.c_str(), V); +} + +void LTOModule::addDefinedDataSymbol(const char *Name, const GlobalValue *v) { // Add to list of defined symbols. - addDefinedSymbol(v, false); + addDefinedSymbol(Name, v, false); if (!v->hasSection() /* || !isTargetDarwin */) return; @@ -334,31 +309,43 @@ void LTOModule::addDefinedDataSymbol(const GlobalValue *v) { // from the ObjC data structures generated by the front end. // special case if this data blob is an ObjC class definition - if (v->getSection().compare(0, 15, "__OBJC,__class,") == 0) { + std::string Section = v->getSection(); + if (Section.compare(0, 15, "__OBJC,__class,") == 0) { if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) { addObjCClass(gv); } } // special case if this data blob is an ObjC category definition - else if (v->getSection().compare(0, 18, "__OBJC,__category,") == 0) { + else if (Section.compare(0, 18, "__OBJC,__category,") == 0) { if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) { addObjCCategory(gv); } } // special case if this data blob is the list of referenced classes - else if (v->getSection().compare(0, 18, "__OBJC,__cls_refs,") == 0) { + else if (Section.compare(0, 18, "__OBJC,__cls_refs,") == 0) { if (const GlobalVariable *gv = dyn_cast<GlobalVariable>(v)) { addObjCClassRef(gv); } } } -/// addDefinedFunctionSymbol - Add a function symbol as defined to the list. -void LTOModule::addDefinedFunctionSymbol(const Function *f) { +void LTOModule::addDefinedFunctionSymbol(const object::BasicSymbolRef &Sym) { + SmallString<64> Buffer; + { + raw_svector_ostream OS(Buffer); + Sym.printName(OS); + } + + const Function *F = + cast<Function>(IRFile->getSymbolGV(Sym.getRawDataRefImpl())); + addDefinedFunctionSymbol(Buffer.c_str(), F); +} + +void LTOModule::addDefinedFunctionSymbol(const char *Name, const Function *F) { // add to list of defined symbols - addDefinedSymbol(f, true); + addDefinedSymbol(Name, F, true); } static bool canBeHidden(const GlobalValue *GV) { @@ -385,16 +372,8 @@ static bool canBeHidden(const GlobalValue *GV) { return !GS.IsCompared; } -/// addDefinedSymbol - Add a defined symbol to the list. -void LTOModule::addDefinedSymbol(const GlobalValue *def, bool isFunction) { - // ignore all llvm.* symbols - if (def->getName().startswith("llvm.")) - return; - - // string is owned by _defines - SmallString<64> Buffer; - _target->getNameWithPrefix(Buffer, def, _mangler); - +void LTOModule::addDefinedSymbol(const char *Name, const GlobalValue *def, + bool isFunction) { // set alignment part log2() can have rounding errors uint32_t align = def->getAlignment(); uint32_t attr = align ? countTrailingZeros(align) : 0; @@ -431,14 +410,14 @@ void LTOModule::addDefinedSymbol(const GlobalValue *def, bool isFunction) { else attr |= LTO_SYMBOL_SCOPE_DEFAULT; - StringSet::value_type &entry = _defines.GetOrCreateValue(Buffer); + StringSet::value_type &entry = _defines.GetOrCreateValue(Name); entry.setValue(1); // fill information structure NameAndAttributes info; - StringRef Name = entry.getKey(); - info.name = Name.data(); - assert(info.name[Name.size()] == '\0'); + StringRef NameRef = entry.getKey(); + info.name = NameRef.data(); + assert(info.name[NameRef.size()] == '\0'); info.attributes = attr; info.isFunction = isFunction; info.symbol = def; @@ -483,9 +462,9 @@ void LTOModule::addAsmGlobalSymbol(const char *name, } if (info.isFunction) - addDefinedFunctionSymbol(cast<Function>(info.symbol)); + addDefinedFunctionSymbol(info.name, cast<Function>(info.symbol)); else - addDefinedDataSymbol(info.symbol); + addDefinedDataSymbol(info.name, info.symbol); _symbols.back().attributes &= ~LTO_SYMBOL_SCOPE_MASK; _symbols.back().attributes |= scope; @@ -514,20 +493,14 @@ void LTOModule::addAsmGlobalSymbolUndef(const char *name) { entry.setValue(info); } -/// addPotentialUndefinedSymbol - Add a symbol which isn't defined just yet to a -/// list to be resolved later. -void -LTOModule::addPotentialUndefinedSymbol(const GlobalValue *decl, bool isFunc) { - // ignore all llvm.* symbols - if (decl->getName().startswith("llvm.")) - return; - - // ignore all aliases - if (isa<GlobalAlias>(decl)) - return; - +/// Add a symbol which isn't defined just yet to a list to be resolved later. +void LTOModule::addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym, + bool isFunc) { SmallString<64> name; - _target->getNameWithPrefix(name, decl, _mangler); + { + raw_svector_ostream OS(name); + Sym.printName(OS); + } StringMap<NameAndAttributes>::value_type &entry = _undefines.GetOrCreateValue(name); @@ -540,6 +513,8 @@ LTOModule::addPotentialUndefinedSymbol(const GlobalValue *decl, bool isFunc) { info.name = entry.getKey().data(); + const GlobalValue *decl = IRFile->getSymbolGV(Sym.getRawDataRefImpl()); + if (decl->hasExternalWeakLinkage()) info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF; else @@ -551,259 +526,54 @@ LTOModule::addPotentialUndefinedSymbol(const GlobalValue *decl, bool isFunc) { entry.setValue(info); } -namespace { - - class RecordStreamer : public MCStreamer { - public: - enum State { NeverSeen, Global, Defined, DefinedGlobal, Used }; - - private: - StringMap<State> Symbols; - - void markDefined(const MCSymbol &Symbol) { - State &S = Symbols[Symbol.getName()]; - switch (S) { - case DefinedGlobal: - case Global: - S = DefinedGlobal; - break; - case NeverSeen: - case Defined: - case Used: - S = Defined; - break; - } - } - void markGlobal(const MCSymbol &Symbol) { - State &S = Symbols[Symbol.getName()]; - switch (S) { - case DefinedGlobal: - case Defined: - S = DefinedGlobal; - break; - - case NeverSeen: - case Global: - case Used: - S = Global; - break; - } - } - void markUsed(const MCSymbol &Symbol) { - State &S = Symbols[Symbol.getName()]; - switch (S) { - case DefinedGlobal: - case Defined: - case Global: - break; - - case NeverSeen: - case Used: - S = Used; - break; - } - } - - // FIXME: mostly copied for the obj streamer. - void AddValueSymbols(const MCExpr *Value) { - switch (Value->getKind()) { - case MCExpr::Target: - // FIXME: What should we do in here? - break; - - case MCExpr::Constant: - break; - - case MCExpr::Binary: { - const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value); - AddValueSymbols(BE->getLHS()); - AddValueSymbols(BE->getRHS()); - break; - } - - case MCExpr::SymbolRef: - markUsed(cast<MCSymbolRefExpr>(Value)->getSymbol()); - break; - - case MCExpr::Unary: - AddValueSymbols(cast<MCUnaryExpr>(Value)->getSubExpr()); - break; +/// parseSymbols - Parse the symbols from the module and model-level ASM and add +/// them to either the defined or undefined lists. +bool LTOModule::parseSymbols(std::string &errMsg) { + for (auto &Sym : IRFile->symbols()) { + const GlobalValue *GV = IRFile->getSymbolGV(Sym.getRawDataRefImpl()); + uint32_t Flags = Sym.getFlags(); + if (Flags & object::BasicSymbolRef::SF_FormatSpecific) + continue; + + bool IsUndefined = Flags & object::BasicSymbolRef::SF_Undefined; + + if (!GV) { + SmallString<64> Buffer; + { + raw_svector_ostream OS(Buffer); + Sym.printName(OS); } + const char *Name = Buffer.c_str(); + + if (IsUndefined) + addAsmGlobalSymbolUndef(Name); + else if (Flags & object::BasicSymbolRef::SF_Global) + addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_DEFAULT); + else + addAsmGlobalSymbol(Name, LTO_SYMBOL_SCOPE_INTERNAL); + continue; } - public: - typedef StringMap<State>::const_iterator const_iterator; - - const_iterator begin() { - return Symbols.begin(); + auto *F = dyn_cast<Function>(GV); + if (IsUndefined) { + addPotentialUndefinedSymbol(Sym, F != nullptr); + continue; } - const_iterator end() { - return Symbols.end(); + if (F) { + addDefinedFunctionSymbol(Sym); + continue; } - RecordStreamer(MCContext &Context) : MCStreamer(Context) {} - - void EmitInstruction(const MCInst &Inst, - const MCSubtargetInfo &STI) override { - // Scan for values. - for (unsigned i = Inst.getNumOperands(); i--; ) - if (Inst.getOperand(i).isExpr()) - AddValueSymbols(Inst.getOperand(i).getExpr()); - } - void EmitLabel(MCSymbol *Symbol) override { - Symbol->setSection(*getCurrentSection().first); - markDefined(*Symbol); - } - void EmitDebugLabel(MCSymbol *Symbol) override { - EmitLabel(Symbol); - } - void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override { - // FIXME: should we handle aliases? - markDefined(*Symbol); - AddValueSymbols(Value); - } - bool EmitSymbolAttribute(MCSymbol *Symbol, - MCSymbolAttr Attribute) override { - if (Attribute == MCSA_Global) - markGlobal(*Symbol); - return true; + if (isa<GlobalVariable>(GV)) { + addDefinedDataSymbol(Sym); + continue; } - void EmitZerofill(const MCSection *Section, MCSymbol *Symbol, - uint64_t Size , unsigned ByteAlignment) override { - markDefined(*Symbol); - } - void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment) override { - markDefined(*Symbol); - } - - void EmitBundleAlignMode(unsigned AlignPow2) override {} - void EmitBundleLock(bool AlignToEnd) override {} - void EmitBundleUnlock() override {} - - // Noop calls. - void ChangeSection(const MCSection *Section, - const MCExpr *Subsection) override {} - void EmitAssemblerFlag(MCAssemblerFlag Flag) override {} - void EmitThumbFunc(MCSymbol *Func) override {} - void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) override {} - void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override {} - void BeginCOFFSymbolDef(const MCSymbol *Symbol) override {} - void EmitCOFFSymbolStorageClass(int StorageClass) override {} - void EmitCOFFSymbolType(int Type) override {} - void EndCOFFSymbolDef() override {} - void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) override {} - void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment) override {} - void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, - uint64_t Size, unsigned ByteAlignment) override {} - void EmitBytes(StringRef Data) override {} - void EmitValueImpl(const MCExpr *Value, unsigned Size, - const SMLoc &Loc) override {} - void EmitULEB128Value(const MCExpr *Value) override {} - void EmitSLEB128Value(const MCExpr *Value) override {} - void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, - unsigned ValueSize, - unsigned MaxBytesToEmit) override {} - void EmitCodeAlignment(unsigned ByteAlignment, - unsigned MaxBytesToEmit) override {} - bool EmitValueToOffset(const MCExpr *Offset, - unsigned char Value) override { return false; } - void EmitFileDirective(StringRef Filename) override {} - void FinishImpl() override {} - void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override { - RecordProcEnd(Frame); - } - }; -} // end anonymous namespace - -/// addAsmGlobalSymbols - Add global symbols from module-level ASM to the -/// defined or undefined lists. -bool LTOModule::addAsmGlobalSymbols(std::string &errMsg) { - const std::string &inlineAsm = _module->getModuleInlineAsm(); - if (inlineAsm.empty()) - return false; - - std::unique_ptr<RecordStreamer> Streamer(new RecordStreamer(_context)); - MemoryBuffer *Buffer = MemoryBuffer::getMemBuffer(inlineAsm); - SourceMgr SrcMgr; - SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); - std::unique_ptr<MCAsmParser> Parser( - createMCAsmParser(SrcMgr, _context, *Streamer, *_target->getMCAsmInfo())); - const Target &T = _target->getTarget(); - std::unique_ptr<MCInstrInfo> MCII(T.createMCInstrInfo()); - std::unique_ptr<MCSubtargetInfo> STI(T.createMCSubtargetInfo( - _target->getTargetTriple(), _target->getTargetCPU(), - _target->getTargetFeatureString())); - std::unique_ptr<MCTargetAsmParser> TAP( - T.createMCAsmParser(*STI, *Parser.get(), *MCII, - _target->Options.MCOptions)); - if (!TAP) { - errMsg = "target " + std::string(T.getName()) + - " does not define AsmParser."; - return true; - } - - Parser->setTargetParser(*TAP); - if (Parser->Run(false)) - return true; - for (RecordStreamer::const_iterator i = Streamer->begin(), - e = Streamer->end(); i != e; ++i) { - StringRef Key = i->first(); - RecordStreamer::State Value = i->second; - if (Value == RecordStreamer::DefinedGlobal) - addAsmGlobalSymbol(Key.data(), LTO_SYMBOL_SCOPE_DEFAULT); - else if (Value == RecordStreamer::Defined) - addAsmGlobalSymbol(Key.data(), LTO_SYMBOL_SCOPE_INTERNAL); - else if (Value == RecordStreamer::Global || - Value == RecordStreamer::Used) - addAsmGlobalSymbolUndef(Key.data()); + assert(isa<GlobalAlias>(GV)); + addDefinedDataSymbol(Sym); } - return false; -} - -/// isDeclaration - Return 'true' if the global value is a declaration. -static bool isDeclaration(const GlobalValue &V) { - if (V.hasAvailableExternallyLinkage()) - return true; - - if (V.isMaterializable()) - return false; - - return V.isDeclaration(); -} - -/// parseSymbols - Parse the symbols from the module and model-level ASM and add -/// them to either the defined or undefined lists. -bool LTOModule::parseSymbols(std::string &errMsg) { - // add functions - for (Module::iterator f = _module->begin(), e = _module->end(); f != e; ++f) { - if (isDeclaration(*f)) - addPotentialUndefinedSymbol(f, true); - else - addDefinedFunctionSymbol(f); - } - - // add data - for (Module::global_iterator v = _module->global_begin(), - e = _module->global_end(); v != e; ++v) { - if (isDeclaration(*v)) - addPotentialUndefinedSymbol(v, false); - else - addDefinedDataSymbol(v); - } - - // add asm globals - if (addAsmGlobalSymbols(errMsg)) - return true; - - // add aliases - for (const auto &Alias : _module->aliases()) - addDefinedDataSymbol(&Alias); - // make symbols for all undefines for (StringMap<NameAndAttributes>::iterator u =_undefines.begin(), e = _undefines.end(); u != e; ++u) { @@ -820,7 +590,7 @@ bool LTOModule::parseSymbols(std::string &errMsg) { /// parseMetadata - Parse metadata from the module void LTOModule::parseMetadata() { // Linker Options - if (Value *Val = _module->getModuleFlag("Linker Options")) { + if (Value *Val = getModule().getModuleFlag("Linker Options")) { MDNode *LinkerOptions = cast<MDNode>(Val); for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) { MDNode *MDOptions = cast<MDNode>(LinkerOptions->getOperand(i)); diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index 45f2d4e..5bb2862 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -24,6 +24,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Cloning.h" #include <cctype> +#include <tuple> using namespace llvm; @@ -389,8 +390,6 @@ namespace { /// actually need, but this allows us to reuse the ValueMapper code. ValueToValueMapTy ValueMap; - std::vector<std::pair<GlobalValue *, GlobalAlias *>> ReplaceWithAlias; - struct AppendingVarInfo { GlobalVariable *NewGV; // New aggregate global in dest module. Constant *DstInit; // Old initializer from dest module. @@ -428,6 +427,18 @@ namespace { return true; } + bool getComdatLeader(Module *M, StringRef ComdatName, + const GlobalVariable *&GVar); + bool computeResultingSelectionKind(StringRef ComdatName, + Comdat::SelectionKind Src, + Comdat::SelectionKind Dst, + Comdat::SelectionKind &Result, + bool &LinkFromSrc); + std::map<const Comdat *, std::pair<Comdat::SelectionKind, bool>> + ComdatsChosen; + bool getComdatResult(const Comdat *SrcC, Comdat::SelectionKind &SK, + bool &LinkFromSrc); + /// getLinkageResult - This analyzes the two global values and determines /// what the result will look like in the destination module. bool getLinkageResult(GlobalValue *Dest, const GlobalValue *Src, @@ -536,6 +547,115 @@ Value *ValueMaterializerTy::materializeValueFor(Value *V) { return DF; } +bool ModuleLinker::getComdatLeader(Module *M, StringRef ComdatName, + const GlobalVariable *&GVar) { + const GlobalValue *GVal = M->getNamedValue(ComdatName); + if (const auto *GA = dyn_cast_or_null<GlobalAlias>(GVal)) { + GVal = GA->getBaseObject(); + if (!GVal) + // We cannot resolve the size of the aliasee yet. + return emitError("Linking COMDATs named '" + ComdatName + + "': COMDAT key involves incomputable alias size."); + } + + GVar = dyn_cast_or_null<GlobalVariable>(GVal); + if (!GVar) + return emitError( + "Linking COMDATs named '" + ComdatName + + "': GlobalVariable required for data dependent selection!"); + + return false; +} + +bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName, + Comdat::SelectionKind Src, + Comdat::SelectionKind Dst, + Comdat::SelectionKind &Result, + bool &LinkFromSrc) { + // The ability to mix Comdat::SelectionKind::Any with + // Comdat::SelectionKind::Largest is a behavior that comes from COFF. + bool DstAnyOrLargest = Dst == Comdat::SelectionKind::Any || + Dst == Comdat::SelectionKind::Largest; + bool SrcAnyOrLargest = Src == Comdat::SelectionKind::Any || + Src == Comdat::SelectionKind::Largest; + if (DstAnyOrLargest && SrcAnyOrLargest) { + if (Dst == Comdat::SelectionKind::Largest || + Src == Comdat::SelectionKind::Largest) + Result = Comdat::SelectionKind::Largest; + else + Result = Comdat::SelectionKind::Any; + } else if (Src == Dst) { + Result = Dst; + } else { + return emitError("Linking COMDATs named '" + ComdatName + + "': invalid selection kinds!"); + } + + switch (Result) { + case Comdat::SelectionKind::Any: + // Go with Dst. + LinkFromSrc = false; + break; + case Comdat::SelectionKind::NoDuplicates: + return emitError("Linking COMDATs named '" + ComdatName + + "': noduplicates has been violated!"); + case Comdat::SelectionKind::ExactMatch: + case Comdat::SelectionKind::Largest: + case Comdat::SelectionKind::SameSize: { + const GlobalVariable *DstGV; + const GlobalVariable *SrcGV; + if (getComdatLeader(DstM, ComdatName, DstGV) || + getComdatLeader(SrcM, ComdatName, SrcGV)) + return true; + + const DataLayout *DstDL = DstM->getDataLayout(); + const DataLayout *SrcDL = SrcM->getDataLayout(); + if (!DstDL || !SrcDL) { + return emitError( + "Linking COMDATs named '" + ComdatName + + "': can't do size dependent selection without DataLayout!"); + } + uint64_t DstSize = + DstDL->getTypeAllocSize(DstGV->getType()->getPointerElementType()); + uint64_t SrcSize = + SrcDL->getTypeAllocSize(SrcGV->getType()->getPointerElementType()); + if (Result == Comdat::SelectionKind::ExactMatch) { + if (SrcGV->getInitializer() != DstGV->getInitializer()) + return emitError("Linking COMDATs named '" + ComdatName + + "': ExactMatch violated!"); + LinkFromSrc = false; + } else if (Result == Comdat::SelectionKind::Largest) { + LinkFromSrc = SrcSize > DstSize; + } else if (Result == Comdat::SelectionKind::SameSize) { + if (SrcSize != DstSize) + return emitError("Linking COMDATs named '" + ComdatName + + "': SameSize violated!"); + LinkFromSrc = false; + } else { + llvm_unreachable("unknown selection kind"); + } + break; + } + } + + return false; +} + +bool ModuleLinker::getComdatResult(const Comdat *SrcC, + Comdat::SelectionKind &Result, + bool &LinkFromSrc) { + StringRef ComdatName = SrcC->getName(); + Module::ComdatSymTabType &ComdatSymTab = DstM->getComdatSymbolTable(); + Module::ComdatSymTabType::iterator DstCI = ComdatSymTab.find(ComdatName); + if (DstCI != ComdatSymTab.end()) { + const Comdat *DstC = &DstCI->second; + Comdat::SelectionKind SSK = SrcC->getSelectionKind(); + Comdat::SelectionKind DSK = DstC->getSelectionKind(); + if (computeResultingSelectionKind(ComdatName, SSK, DSK, Result, LinkFromSrc)) + return true; + } + return false; +} /// getLinkageResult - This analyzes the two global values and determines what /// the result will look like in the destination module. In particular, it @@ -723,7 +843,7 @@ bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV, return emitError( "Appending variables with different unnamed_addr need to be linked!"); - if (DstGV->getSection() != SrcGV->getSection()) + if (StringRef(DstGV->getSection()) != SrcGV->getSection()) return emitError( "Appending variables with different section name need to be linked!"); @@ -766,34 +886,47 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) { llvm::Optional<GlobalValue::VisibilityTypes> NewVisibility; bool HasUnnamedAddr = SGV->hasUnnamedAddr(); + bool LinkFromSrc = false; + Comdat *DC = nullptr; + if (const Comdat *SC = SGV->getComdat()) { + Comdat::SelectionKind SK; + std::tie(SK, LinkFromSrc) = ComdatsChosen[SC]; + DC = DstM->getOrInsertComdat(SC->getName()); + DC->setSelectionKind(SK); + } + if (DGV) { - // Concatenation of appending linkage variables is magic and handled later. - if (DGV->hasAppendingLinkage() || SGV->hasAppendingLinkage()) - return linkAppendingVarProto(cast<GlobalVariable>(DGV), SGV); - - // Determine whether linkage of these two globals follows the source - // module's definition or the destination module's definition. - GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage; - GlobalValue::VisibilityTypes NV; - bool LinkFromSrc = false; - if (getLinkageResult(DGV, SGV, NewLinkage, NV, LinkFromSrc)) - return true; - NewVisibility = NV; - HasUnnamedAddr = HasUnnamedAddr && DGV->hasUnnamedAddr(); + if (!DC) { + // Concatenation of appending linkage variables is magic and handled later. + if (DGV->hasAppendingLinkage() || SGV->hasAppendingLinkage()) + return linkAppendingVarProto(cast<GlobalVariable>(DGV), SGV); + + // Determine whether linkage of these two globals follows the source + // module's definition or the destination module's definition. + GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage; + GlobalValue::VisibilityTypes NV; + if (getLinkageResult(DGV, SGV, NewLinkage, NV, LinkFromSrc)) + return true; + NewVisibility = NV; + HasUnnamedAddr = HasUnnamedAddr && DGV->hasUnnamedAddr(); + + // If we're not linking from the source, then keep the definition that we + // have. + if (!LinkFromSrc) { + // Special case for const propagation. + if (GlobalVariable *DGVar = dyn_cast<GlobalVariable>(DGV)) + if (DGVar->isDeclaration() && SGV->isConstant() && + !DGVar->isConstant()) + DGVar->setConstant(true); + + // Set calculated linkage, visibility and unnamed_addr. + DGV->setLinkage(NewLinkage); + DGV->setVisibility(*NewVisibility); + DGV->setUnnamedAddr(HasUnnamedAddr); + } + } - // If we're not linking from the source, then keep the definition that we - // have. if (!LinkFromSrc) { - // Special case for const propagation. - if (GlobalVariable *DGVar = dyn_cast<GlobalVariable>(DGV)) - if (DGVar->isDeclaration() && SGV->isConstant() && !DGVar->isConstant()) - DGVar->setConstant(true); - - // Set calculated linkage, visibility and unnamed_addr. - DGV->setLinkage(NewLinkage); - DGV->setVisibility(*NewVisibility); - DGV->setUnnamedAddr(HasUnnamedAddr); - // Make sure to remember this mapping. ValueMap[SGV] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGV->getType())); @@ -805,6 +938,12 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) { } } + // If the Comdat this variable was inside of wasn't selected, skip it. + if (DC && !DGV && !LinkFromSrc) { + DoNotLinkFromSource.insert(SGV); + return false; + } + // No linking to be performed or linking from the source: simply create an // identical version of the symbol over in the dest module... the // initializer will be filled in later by LinkGlobalInits. @@ -820,6 +959,9 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) { NewDGV->setVisibility(*NewVisibility); NewDGV->setUnnamedAddr(HasUnnamedAddr); + if (DC) + NewDGV->setComdat(DC); + if (DGV) { DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV, DGV->getType())); DGV->eraseFromParent(); @@ -837,21 +979,33 @@ bool ModuleLinker::linkFunctionProto(Function *SF) { llvm::Optional<GlobalValue::VisibilityTypes> NewVisibility; bool HasUnnamedAddr = SF->hasUnnamedAddr(); + bool LinkFromSrc = false; + Comdat *DC = nullptr; + if (const Comdat *SC = SF->getComdat()) { + Comdat::SelectionKind SK; + std::tie(SK, LinkFromSrc) = ComdatsChosen[SC]; + DC = DstM->getOrInsertComdat(SC->getName()); + DC->setSelectionKind(SK); + } + if (DGV) { - GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage; - bool LinkFromSrc = false; - GlobalValue::VisibilityTypes NV; - if (getLinkageResult(DGV, SF, NewLinkage, NV, LinkFromSrc)) - return true; - NewVisibility = NV; - HasUnnamedAddr = HasUnnamedAddr && DGV->hasUnnamedAddr(); + if (!DC) { + GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage; + GlobalValue::VisibilityTypes NV; + if (getLinkageResult(DGV, SF, NewLinkage, NV, LinkFromSrc)) + return true; + NewVisibility = NV; + HasUnnamedAddr = HasUnnamedAddr && DGV->hasUnnamedAddr(); + + if (!LinkFromSrc) { + // Set calculated linkage + DGV->setLinkage(NewLinkage); + DGV->setVisibility(*NewVisibility); + DGV->setUnnamedAddr(HasUnnamedAddr); + } + } if (!LinkFromSrc) { - // Set calculated linkage - DGV->setLinkage(NewLinkage); - DGV->setVisibility(*NewVisibility); - DGV->setUnnamedAddr(HasUnnamedAddr); - // Make sure to remember this mapping. ValueMap[SF] = ConstantExpr::getBitCast(DGV, TypeMap.get(SF->getType())); @@ -871,6 +1025,12 @@ bool ModuleLinker::linkFunctionProto(Function *SF) { return false; } + // If the Comdat this function was inside of wasn't selected, skip it. + if (DC && !DGV && !LinkFromSrc) { + DoNotLinkFromSource.insert(SF); + return false; + } + // If there is no linkage to be performed or we are linking from the source, // bring SF over. Function *NewDF = Function::Create(TypeMap.get(SF->getFunctionType()), @@ -880,6 +1040,9 @@ bool ModuleLinker::linkFunctionProto(Function *SF) { NewDF->setVisibility(*NewVisibility); NewDF->setUnnamedAddr(HasUnnamedAddr); + if (DC) + NewDF->setComdat(DC); + if (DGV) { // Any uses of DF need to change to NewDF, with cast. DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF, DGV->getType())); @@ -895,20 +1058,35 @@ bool ModuleLinker::linkFunctionProto(Function *SF) { bool ModuleLinker::linkAliasProto(GlobalAlias *SGA) { GlobalValue *DGV = getLinkedToGlobal(SGA); llvm::Optional<GlobalValue::VisibilityTypes> NewVisibility; + bool HasUnnamedAddr = SGA->hasUnnamedAddr(); + + bool LinkFromSrc = false; + Comdat *DC = nullptr; + if (const Comdat *SC = SGA->getComdat()) { + Comdat::SelectionKind SK; + std::tie(SK, LinkFromSrc) = ComdatsChosen[SC]; + DC = DstM->getOrInsertComdat(SC->getName()); + DC->setSelectionKind(SK); + } if (DGV) { - GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage; - GlobalValue::VisibilityTypes NV; - bool LinkFromSrc = false; - if (getLinkageResult(DGV, SGA, NewLinkage, NV, LinkFromSrc)) - return true; - NewVisibility = NV; + if (!DC) { + GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage; + GlobalValue::VisibilityTypes NV; + if (getLinkageResult(DGV, SGA, NewLinkage, NV, LinkFromSrc)) + return true; + NewVisibility = NV; + HasUnnamedAddr = HasUnnamedAddr && DGV->hasUnnamedAddr(); + + if (!LinkFromSrc) { + // Set calculated linkage. + DGV->setLinkage(NewLinkage); + DGV->setVisibility(*NewVisibility); + DGV->setUnnamedAddr(HasUnnamedAddr); + } + } if (!LinkFromSrc) { - // Set calculated linkage. - DGV->setLinkage(NewLinkage); - DGV->setVisibility(*NewVisibility); - // Make sure to remember this mapping. ValueMap[SGA] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGA->getType())); @@ -919,6 +1097,12 @@ bool ModuleLinker::linkAliasProto(GlobalAlias *SGA) { } } + // If the Comdat this alias was inside of wasn't selected, skip it. + if (DC && !DGV && !LinkFromSrc) { + DoNotLinkFromSource.insert(SGA); + return false; + } + // If there is no linkage to be performed or we're linking from the source, // bring over SGA. auto *PTy = cast<PointerType>(TypeMap.get(SGA->getType())); @@ -928,9 +1112,13 @@ bool ModuleLinker::linkAliasProto(GlobalAlias *SGA) { copyGVAttributes(NewDA, SGA); if (NewVisibility) NewDA->setVisibility(*NewVisibility); + NewDA->setUnnamedAddr(HasUnnamedAddr); - if (DGV) - ReplaceWithAlias.push_back(std::make_pair(DGV, NewDA)); + if (DGV) { + // Any uses of DGV need to change to NewDA, with cast. + DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDA, DGV->getType())); + DGV->eraseFromParent(); + } ValueMap[SGA] = NewDA; return false; @@ -1016,19 +1204,6 @@ void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) { } -static GlobalObject &getGlobalObjectInExpr(Constant &C) { - auto *GO = dyn_cast<GlobalObject>(&C); - if (GO) - return *GO; - auto *GA = dyn_cast<GlobalAlias>(&C); - if (GA) - return *GA->getAliasee(); - auto &CE = cast<ConstantExpr>(C); - assert(CE.getOpcode() == Instruction::BitCast || - CE.getOpcode() == Instruction::AddrSpaceCast); - return getGlobalObjectInExpr(*CE.getOperand(0)); -} - /// linkAliasBodies - Insert all of the aliases in Src into the Dest module. void ModuleLinker::linkAliasBodies() { for (Module::alias_iterator I = SrcM->alias_begin(), E = SrcM->alias_end(); @@ -1039,24 +1214,8 @@ void ModuleLinker::linkAliasBodies() { GlobalAlias *DA = cast<GlobalAlias>(ValueMap[I]); Constant *Val = MapValue(Aliasee, ValueMap, RF_None, &TypeMap, &ValMaterializer); - DA->setAliasee(&getGlobalObjectInExpr(*Val)); - } - } - - // Any uses of DGV need to change to NewDA, with cast. - for (auto &Pair : ReplaceWithAlias) { - GlobalValue *DGV = Pair.first; - GlobalAlias *NewDA = Pair.second; - - for (auto *User : DGV->users()) { - if (auto *GA = dyn_cast<GlobalAlias>(User)) { - if (GA == NewDA) - report_fatal_error("Linking these modules creates an alias cycle."); - } + DA->setAliasee(Val); } - - DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDA, DGV->getType())); - DGV->eraseFromParent(); } } @@ -1165,7 +1324,7 @@ bool ModuleLinker::linkModuleFlagsMetadata() { // Perform the merge for standard behavior types. switch (SrcBehaviorValue) { case Module::Require: - case Module::Override: assert(0 && "not possible"); break; + case Module::Override: llvm_unreachable("not possible"); case Module::Error: { // Emit an error if the values differ. if (SrcOp->getOperand(2) != DstOp->getOperand(2)) { @@ -1278,6 +1437,18 @@ bool ModuleLinker::run() { // Loop over all of the linked values to compute type mappings. computeTypeMapping(); + ComdatsChosen.clear(); + for (const StringMapEntry<llvm::Comdat> &SMEC : SrcM->getComdatSymbolTable()) { + const Comdat &C = SMEC.getValue(); + if (ComdatsChosen.count(&C)) + continue; + Comdat::SelectionKind SK; + bool LinkFromSrc; + if (getComdatResult(&C, SK, LinkFromSrc)) + return true; + ComdatsChosen[&C] = std::make_pair(SK, LinkFromSrc); + } + // Insert all of the globals in src into the DstM module... without linking // initializers (which could refer to functions not yet mapped over). for (Module::global_iterator I = SrcM->global_begin(), diff --git a/lib/MC/Android.mk b/lib/MC/Android.mk index 23ad1d3..fd587c4 100644 --- a/lib/MC/Android.mk +++ b/lib/MC/Android.mk @@ -1,6 +1,7 @@ LOCAL_PATH:= $(call my-dir) mc_SRC_FILES := \ + ConstantPools.cpp \ ELFObjectWriter.cpp \ MCAsmBackend.cpp \ MCAsmInfo.cpp \ @@ -9,7 +10,6 @@ mc_SRC_FILES := \ MCAsmInfoELF.cpp \ MCAsmStreamer.cpp \ MCAssembler.cpp \ - MCAtom.cpp \ MCCodeEmitter.cpp \ MCCodeGenInfo.cpp \ MCContext.cpp \ @@ -18,7 +18,6 @@ mc_SRC_FILES := \ MCELF.cpp \ MCELFObjectTargetWriter.cpp \ MCELFStreamer.cpp \ - MCFunction.cpp \ MCExpr.cpp \ MCExternalSymbolizer.cpp \ MCInst.cpp \ @@ -28,13 +27,9 @@ mc_SRC_FILES := \ MCLinkerOptimizationHint.cpp \ MCMachOStreamer.cpp \ MCMachObjectTargetWriter.cpp \ - MCModule.cpp \ - MCModuleYAML.cpp \ MCNullStreamer.cpp \ MCObjectFileInfo.cpp \ - MCObjectDisassembler.cpp \ MCObjectStreamer.cpp \ - MCObjectSymbolizer.cpp \ MCObjectWriter.cpp \ MCRegisterInfo.cpp \ MCRelocationInfo.cpp \ @@ -50,9 +45,11 @@ mc_SRC_FILES := \ MCValue.cpp \ MCWin64EH.cpp \ MachObjectWriter.cpp \ + StringTableBuilder.cpp \ SubtargetFeature.cpp \ WinCOFFObjectWriter.cpp \ WinCOFFStreamer.cpp \ + YAML.cpp # For the host # ===================================================== diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt index 6a384c1..330519e 100644 --- a/lib/MC/CMakeLists.txt +++ b/lib/MC/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_library(LLVMMC + ConstantPools.cpp ELFObjectWriter.cpp MCAsmBackend.cpp MCAsmInfo.cpp @@ -7,7 +8,6 @@ add_llvm_library(LLVMMC MCAsmInfoELF.cpp MCAsmStreamer.cpp MCAssembler.cpp - MCAtom.cpp MCCodeEmitter.cpp MCCodeGenInfo.cpp MCContext.cpp @@ -16,7 +16,6 @@ add_llvm_library(LLVMMC MCELF.cpp MCELFObjectTargetWriter.cpp MCELFStreamer.cpp - MCFunction.cpp MCExpr.cpp MCExternalSymbolizer.cpp MCInst.cpp @@ -26,13 +25,9 @@ add_llvm_library(LLVMMC MCLinkerOptimizationHint.cpp MCMachOStreamer.cpp MCMachObjectTargetWriter.cpp - MCModule.cpp - MCModuleYAML.cpp MCNullStreamer.cpp MCObjectFileInfo.cpp - MCObjectDisassembler.cpp MCObjectStreamer.cpp - MCObjectSymbolizer.cpp MCObjectWriter.cpp MCRegisterInfo.cpp MCRelocationInfo.cpp @@ -48,10 +43,13 @@ add_llvm_library(LLVMMC MCValue.cpp MCWin64EH.cpp MachObjectWriter.cpp + StringTableBuilder.cpp SubtargetFeature.cpp WinCOFFObjectWriter.cpp WinCOFFStreamer.cpp + YAML.cpp ) +add_subdirectory(MCAnalysis) add_subdirectory(MCParser) add_subdirectory(MCDisassembler) diff --git a/lib/MC/ConstantPools.cpp b/lib/MC/ConstantPools.cpp new file mode 100644 index 0000000..f979dad --- /dev/null +++ b/lib/MC/ConstantPools.cpp @@ -0,0 +1,95 @@ +//===- ConstantPools.cpp - ConstantPool class --*- C++ -*---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the ConstantPool and AssemblerConstantPools classes. +// +//===----------------------------------------------------------------------===// +#include "llvm/ADT/MapVector.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/ConstantPools.h" + +using namespace llvm; +// +// ConstantPool implementation +// +// Emit the contents of the constant pool using the provided streamer. +void ConstantPool::emitEntries(MCStreamer &Streamer) { + if (Entries.empty()) + return; + Streamer.EmitCodeAlignment(4); // align to 4-byte address + Streamer.EmitDataRegion(MCDR_DataRegion); + for (EntryVecTy::const_iterator I = Entries.begin(), E = Entries.end(); + I != E; ++I) { + Streamer.EmitLabel(I->first); + Streamer.EmitValue(I->second, 4); + } + Streamer.EmitDataRegion(MCDR_DataRegionEnd); + Entries.clear(); +} + +const MCExpr *ConstantPool::addEntry(const MCExpr *Value, MCContext &Context) { + MCSymbol *CPEntryLabel = Context.CreateTempSymbol(); + + Entries.push_back(std::make_pair(CPEntryLabel, Value)); + return MCSymbolRefExpr::Create(CPEntryLabel, Context); +} + +bool ConstantPool::empty() { return Entries.empty(); } + +// +// AssemblerConstantPools implementation +// +ConstantPool * +AssemblerConstantPools::getConstantPool(const MCSection *Section) { + ConstantPoolMapTy::iterator CP = ConstantPools.find(Section); + if (CP == ConstantPools.end()) + return nullptr; + + return &CP->second; +} + +ConstantPool & +AssemblerConstantPools::getOrCreateConstantPool(const MCSection *Section) { + return ConstantPools[Section]; +} + +static void emitConstantPool(MCStreamer &Streamer, const MCSection *Section, + ConstantPool &CP) { + if (!CP.empty()) { + Streamer.SwitchSection(Section); + CP.emitEntries(Streamer); + } +} + +void AssemblerConstantPools::emitAll(MCStreamer &Streamer) { + // Dump contents of assembler constant pools. + for (ConstantPoolMapTy::iterator CPI = ConstantPools.begin(), + CPE = ConstantPools.end(); + CPI != CPE; ++CPI) { + const MCSection *Section = CPI->first; + ConstantPool &CP = CPI->second; + + emitConstantPool(Streamer, Section, CP); + } +} + +void AssemblerConstantPools::emitForCurrentSection(MCStreamer &Streamer) { + const MCSection *Section = Streamer.getCurrentSection().first; + if (ConstantPool *CP = getConstantPool(Section)) { + emitConstantPool(Streamer, Section, *CP); + } +} + +const MCExpr *AssemblerConstantPools::addEntry(MCStreamer &Streamer, + const MCExpr *Expr) { + const MCSection *Section = Streamer.getCurrentSection().first; + return getOrCreateConstantPool(Section).addEntry(Expr, Streamer.getContext()); +} diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 0a54627..7fb9fae 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -28,7 +28,7 @@ #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCValue.h" -#include "llvm/Object/StringTableBuilder.h" +#include "llvm/MC/StringTableBuilder.h" #include "llvm/Support/Compression.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" @@ -1179,7 +1179,7 @@ prependCompressionHeader(uint64_t Size, if (Size <= Magic.size() + sizeof(Size) + CompressedContents.size()) return false; if (sys::IsLittleEndianHost) - Size = sys::SwapByteOrder(Size); + sys::swapByteOrder(Size); CompressedContents.insert(CompressedContents.begin(), Magic.size() + sizeof(Size), 0); std::copy(Magic.begin(), Magic.end(), CompressedContents.begin()); @@ -1565,6 +1565,7 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm, case ELF::SHT_X86_64_UNWIND: case ELF::SHT_MIPS_REGINFO: case ELF::SHT_MIPS_OPTIONS: + case ELF::SHT_MIPS_ABIFLAGS: // Nothing to do. break; @@ -1574,8 +1575,7 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm, break; default: - assert(0 && "FIXME: sh_type value not supported!"); - break; + llvm_unreachable("FIXME: sh_type value not supported!"); } if (TargetObjectWriter->getEMachine() == ELF::EM_ARM && diff --git a/lib/MC/LLVMBuild.txt b/lib/MC/LLVMBuild.txt index f35dbe4..3fcb50b 100644 --- a/lib/MC/LLVMBuild.txt +++ b/lib/MC/LLVMBuild.txt @@ -16,10 +16,10 @@ ;===------------------------------------------------------------------------===; [common] -subdirectories = MCDisassembler MCParser +subdirectories = MCAnalysis MCDisassembler MCParser [component_0] type = Library name = MC parent = Libraries -required_libraries = Object Support +required_libraries = Support diff --git a/lib/MC/MCAnalysis/Android.mk b/lib/MC/MCAnalysis/Android.mk new file mode 100644 index 0000000..27f848a --- /dev/null +++ b/lib/MC/MCAnalysis/Android.mk @@ -0,0 +1,37 @@ +LOCAL_PATH:= $(call my-dir) + +mc_analysis_SRC_FILES := \ + MCAtom.cpp \ + MCFunction.cpp \ + MCModule.cpp \ + MCModuleYAML.cpp \ + MCObjectDisassembler.cpp \ + MCObjectSymbolizer.cpp + +# For the host +# ===================================================== +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := $(mc_analysis_SRC_FILES) + +LOCAL_MODULE:= libLLVMMCAnalysis + +LOCAL_MODULE_TAGS := optional + +include $(LLVM_HOST_BUILD_MK) +include $(BUILD_HOST_STATIC_LIBRARY) + +# For the device +# ===================================================== +include $(CLEAR_VARS) +ifneq (true,$(DISABLE_LLVM_DEVICE_BUILDS)) + +LOCAL_SRC_FILES := $(mc_analysis_SRC_FILES) + +LOCAL_MODULE:= libLLVMMCAnalysis + +LOCAL_MODULE_TAGS := optional + +include $(LLVM_DEVICE_BUILD_MK) +include $(BUILD_STATIC_LIBRARY) +endif diff --git a/lib/MC/MCAnalysis/CMakeLists.txt b/lib/MC/MCAnalysis/CMakeLists.txt new file mode 100644 index 0000000..81eae2d --- /dev/null +++ b/lib/MC/MCAnalysis/CMakeLists.txt @@ -0,0 +1,8 @@ +add_llvm_library(LLVMMCAnalysis + MCAtom.cpp + MCFunction.cpp + MCModule.cpp + MCModuleYAML.cpp + MCObjectDisassembler.cpp + MCObjectSymbolizer.cpp +) diff --git a/lib/MC/MCAnalysis/LLVMBuild.txt b/lib/MC/MCAnalysis/LLVMBuild.txt new file mode 100644 index 0000000..1b58fec --- /dev/null +++ b/lib/MC/MCAnalysis/LLVMBuild.txt @@ -0,0 +1,5 @@ +[component_0] +type = Library +name = MCAnalysis +parent = Libraries +required_libraries = MC Object Support diff --git a/lib/MC/MCAtom.cpp b/lib/MC/MCAnalysis/MCAtom.cpp index bc353cd..82056ee 100644 --- a/lib/MC/MCAtom.cpp +++ b/lib/MC/MCAnalysis/MCAtom.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCAtom.h" -#include "llvm/MC/MCModule.h" +#include "llvm/MC/MCAnalysis/MCAtom.h" +#include "llvm/MC/MCAnalysis/MCModule.h" #include "llvm/Support/ErrorHandling.h" #include <iterator> diff --git a/lib/MC/MCFunction.cpp b/lib/MC/MCAnalysis/MCFunction.cpp index 1ddc250..4e09d1a 100644 --- a/lib/MC/MCFunction.cpp +++ b/lib/MC/MCAnalysis/MCFunction.cpp @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCFunction.h" -#include "llvm/MC/MCAtom.h" -#include "llvm/MC/MCModule.h" +#include "llvm/MC/MCAnalysis/MCFunction.h" +#include "llvm/MC/MCAnalysis/MCAtom.h" +#include "llvm/MC/MCAnalysis/MCModule.h" #include <algorithm> using namespace llvm; diff --git a/lib/MC/MCModule.cpp b/lib/MC/MCAnalysis/MCModule.cpp index 3ed7356..7512299 100644 --- a/lib/MC/MCModule.cpp +++ b/lib/MC/MCAnalysis/MCModule.cpp @@ -7,10 +7,10 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/MCAnalysis/MCModule.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/MC/MCModule.h" -#include "llvm/MC/MCAtom.h" -#include "llvm/MC/MCFunction.h" +#include "llvm/MC/MCAnalysis/MCAtom.h" +#include "llvm/MC/MCAnalysis/MCFunction.h" #include <algorithm> using namespace llvm; diff --git a/lib/MC/MCModuleYAML.cpp b/lib/MC/MCAnalysis/MCModuleYAML.cpp index f81cb14..876b06d 100644 --- a/lib/MC/MCModuleYAML.cpp +++ b/lib/MC/MCAnalysis/MCModuleYAML.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCModuleYAML.h" +#include "llvm/MC/MCAnalysis/MCModuleYAML.h" #include "llvm/ADT/StringMap.h" -#include "llvm/MC/MCAtom.h" -#include "llvm/MC/MCFunction.h" +#include "llvm/MC/MCAnalysis/MCAtom.h" +#include "llvm/MC/MCAnalysis/MCFunction.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" -#include "llvm/Object/YAML.h" +#include "llvm/MC/YAML.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" #include "llvm/Support/MathExtras.h" @@ -102,7 +102,7 @@ struct Atom { uint64_t Size; std::vector<Inst> Insts; - object::yaml::BinaryRef Data; + yaml::BinaryRef Data; }; struct BasicBlock { @@ -453,7 +453,7 @@ StringRef yaml2mcmodule(std::unique_ptr<MCModule> &MCM, StringRef YamlContent, InstrRegInfoHolder IRI(MII, MRI); yaml::Input YIn(YamlContent, (void *)&IRI); YIn >> YAMLModule; - if (error_code ec = YIn.error()) + if (std::error_code ec = YIn.error()) return ec.message(); StringRef err = Parser.parse(YAMLModule); if (!err.empty()) diff --git a/lib/MC/MCObjectDisassembler.cpp b/lib/MC/MCAnalysis/MCObjectDisassembler.cpp index 8a258cb..0f789ff 100644 --- a/lib/MC/MCObjectDisassembler.cpp +++ b/lib/MC/MCAnalysis/MCObjectDisassembler.cpp @@ -13,11 +13,11 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" -#include "llvm/MC/MCAtom.h" +#include "llvm/MC/MCAnalysis/MCAtom.h" +#include "llvm/MC/MCAnalysis/MCFunction.h" +#include "llvm/MC/MCAnalysis/MCModule.h" #include "llvm/MC/MCDisassembler.h" -#include "llvm/MC/MCFunction.h" #include "llvm/MC/MCInstrAnalysis.h" -#include "llvm/MC/MCModule.h" #include "llvm/MC/MCObjectSymbolizer.h" #include "llvm/Object/MachO.h" #include "llvm/Object/ObjectFile.h" diff --git a/lib/MC/MCObjectSymbolizer.cpp b/lib/MC/MCAnalysis/MCObjectSymbolizer.cpp index b149596..b149596 100644 --- a/lib/MC/MCObjectSymbolizer.cpp +++ b/lib/MC/MCAnalysis/MCObjectSymbolizer.cpp diff --git a/lib/MC/MCAnalysis/Makefile b/lib/MC/MCAnalysis/Makefile new file mode 100644 index 0000000..add2dbd --- /dev/null +++ b/lib/MC/MCAnalysis/Makefile @@ -0,0 +1,14 @@ +##===- lib/MC/MCAnalysys/Makefile --------------------------*- Makefile -*-===## +# +# The LLVM Compiler Infrastructure +# +# This file is distributed under the University of Illinois Open Source +# License. See LICENSE.TXT for details. +# +##===----------------------------------------------------------------------===## + +LEVEL = ../../.. +LIBRARYNAME = LLVMMCAnalysis +BUILD_ARCHIVE := 1 + +include $(LEVEL)/Makefile.common diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index c0777a6..f8081ef 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -39,7 +39,7 @@ MCAsmInfo::MCAsmInfo() { SeparatorString = ";"; CommentString = "#"; LabelSuffix = ":"; - DebugLabelSuffix = ":"; + UseAssignmentForEHBegin = false; PrivateGlobalPrefix = "L"; LinkerPrivateGlobalPrefix = ""; InlineAsmStart = "APP"; @@ -82,6 +82,7 @@ MCAsmInfo::MCAsmInfo() { HasLEB128 = false; SupportsDebugInformation = false; ExceptionsType = ExceptionHandling::None; + WinEHEncodingType = WinEH::EncodingType::ET_Invalid; DwarfUsesRelocationsAcrossSections = true; DwarfFDESymbolsUseAbsDiff = false; DwarfRegNumForCFI = false; @@ -99,7 +100,7 @@ MCAsmInfo::MCAsmInfo() { // - MCAsmInfoDarwin is handling this case // - Generic_GCC toolchains enable the integrated assembler on a per // architecture basis. - // - The target subclasses for AArch64, ARM, and X86 handle these cases + // - The target subclasses for AArch64, ARM, and X86 handle these cases UseIntegratedAssembler = false; CompressDebugSections = false; diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp index 7f8ae54..6973bbb 100644 --- a/lib/MC/MCAsmStreamer.cpp +++ b/lib/MC/MCAsmStreamer.cpp @@ -120,7 +120,6 @@ public: void EmitLOHDirective(MCLOHType Kind, const MCLOHArgs &Args) override; void EmitLabel(MCSymbol *Symbol) override; - void EmitDebugLabel(MCSymbol *Symbol) override; void EmitAssemblerFlag(MCAssemblerFlag Flag) override; void EmitLinkerOptions(ArrayRef<std::string> Options) override; @@ -213,20 +212,20 @@ public: void EmitCFIRegister(int64_t Register1, int64_t Register2) override; void EmitCFIWindowSave() override; - void EmitWin64EHStartProc(const MCSymbol *Symbol) override; - void EmitWin64EHEndProc() override; - void EmitWin64EHStartChained() override; - void EmitWin64EHEndChained() override; - void EmitWin64EHHandler(const MCSymbol *Sym, bool Unwind, - bool Except) override; - void EmitWin64EHHandlerData() override; - void EmitWin64EHPushReg(unsigned Register) override; - void EmitWin64EHSetFrame(unsigned Register, unsigned Offset) override; - void EmitWin64EHAllocStack(unsigned Size) override; - void EmitWin64EHSaveReg(unsigned Register, unsigned Offset) override; - void EmitWin64EHSaveXMM(unsigned Register, unsigned Offset) override; - void EmitWin64EHPushFrame(bool Code) override; - void EmitWin64EHEndProlog() override; + void EmitWinCFIStartProc(const MCSymbol *Symbol) override; + void EmitWinCFIEndProc() override; + void EmitWinCFIStartChained() override; + void EmitWinCFIEndChained() override; + void EmitWinCFIPushReg(unsigned Register) override; + void EmitWinCFISetFrame(unsigned Register, unsigned Offset) override; + void EmitWinCFIAllocStack(unsigned Size) override; + void EmitWinCFISaveReg(unsigned Register, unsigned Offset) override; + void EmitWinCFISaveXMM(unsigned Register, unsigned Offset) override; + void EmitWinCFIPushFrame(bool Code) override; + void EmitWinCFIEndProlog() override; + + void EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, bool Except) override; + void EmitWinEHHandlerData() override; void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override; @@ -334,14 +333,6 @@ void MCAsmStreamer::EmitLOHDirective(MCLOHType Kind, const MCLOHArgs &Args) { EmitEOL(); } -void MCAsmStreamer::EmitDebugLabel(MCSymbol *Symbol) { - assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); - MCStreamer::EmitDebugLabel(Symbol); - - OS << *Symbol << MAI->getDebugLabelSuffix(); - EmitEOL(); -} - void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { switch (Flag) { case MCAF_SyntaxUnified: OS << "\t.syntax unified"; break; @@ -944,10 +935,7 @@ void MCAsmStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) { } void MCAsmStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) { - // Put a dummy non-null value in Frame.End to mark that this frame has been - // closed. - Frame.End = (MCSymbol *) 1; - + MCStreamer::EmitCFIEndProcImpl(Frame); OS << "\t.cfi_endproc"; EmitEOL(); } @@ -1061,37 +1049,37 @@ void MCAsmStreamer::EmitCFIWindowSave() { EmitEOL(); } -void MCAsmStreamer::EmitWin64EHStartProc(const MCSymbol *Symbol) { - MCStreamer::EmitWin64EHStartProc(Symbol); +void MCAsmStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol) { + MCStreamer::EmitWinCFIStartProc(Symbol); OS << ".seh_proc " << *Symbol; EmitEOL(); } -void MCAsmStreamer::EmitWin64EHEndProc() { - MCStreamer::EmitWin64EHEndProc(); +void MCAsmStreamer::EmitWinCFIEndProc() { + MCStreamer::EmitWinCFIEndProc(); OS << "\t.seh_endproc"; EmitEOL(); } -void MCAsmStreamer::EmitWin64EHStartChained() { - MCStreamer::EmitWin64EHStartChained(); +void MCAsmStreamer::EmitWinCFIStartChained() { + MCStreamer::EmitWinCFIStartChained(); OS << "\t.seh_startchained"; EmitEOL(); } -void MCAsmStreamer::EmitWin64EHEndChained() { - MCStreamer::EmitWin64EHEndChained(); +void MCAsmStreamer::EmitWinCFIEndChained() { + MCStreamer::EmitWinCFIEndChained(); OS << "\t.seh_endchained"; EmitEOL(); } -void MCAsmStreamer::EmitWin64EHHandler(const MCSymbol *Sym, bool Unwind, - bool Except) { - MCStreamer::EmitWin64EHHandler(Sym, Unwind, Except); +void MCAsmStreamer::EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, + bool Except) { + MCStreamer::EmitWinEHHandler(Sym, Unwind, Except); OS << "\t.seh_handler " << *Sym; if (Unwind) @@ -1114,8 +1102,8 @@ static const MCSection *getWin64EHTableSection(StringRef suffix, SectionKind::getDataRel()); } -void MCAsmStreamer::EmitWin64EHHandlerData() { - MCStreamer::EmitWin64EHHandlerData(); +void MCAsmStreamer::EmitWinEHHandlerData() { + MCStreamer::EmitWinEHHandlerData(); // Switch sections. Don't call SwitchSection directly, because that will // cause the section switch to be visible in the emitted assembly. @@ -1131,50 +1119,43 @@ void MCAsmStreamer::EmitWin64EHHandlerData() { EmitEOL(); } -void MCAsmStreamer::EmitWin64EHPushReg(unsigned Register) { - MCStreamer::EmitWin64EHPushReg(Register); +void MCAsmStreamer::EmitWinCFIPushReg(unsigned Register) { + MCStreamer::EmitWinCFIPushReg(Register); - OS << "\t.seh_pushreg "; - EmitRegisterName(Register); + OS << "\t.seh_pushreg " << Register; EmitEOL(); } -void MCAsmStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) { - MCStreamer::EmitWin64EHSetFrame(Register, Offset); +void MCAsmStreamer::EmitWinCFISetFrame(unsigned Register, unsigned Offset) { + MCStreamer::EmitWinCFISetFrame(Register, Offset); - OS << "\t.seh_setframe "; - EmitRegisterName(Register); - OS << ", " << Offset; + OS << "\t.seh_setframe " << Register << ", " << Offset; EmitEOL(); } -void MCAsmStreamer::EmitWin64EHAllocStack(unsigned Size) { - MCStreamer::EmitWin64EHAllocStack(Size); +void MCAsmStreamer::EmitWinCFIAllocStack(unsigned Size) { + MCStreamer::EmitWinCFIAllocStack(Size); OS << "\t.seh_stackalloc " << Size; EmitEOL(); } -void MCAsmStreamer::EmitWin64EHSaveReg(unsigned Register, unsigned Offset) { - MCStreamer::EmitWin64EHSaveReg(Register, Offset); +void MCAsmStreamer::EmitWinCFISaveReg(unsigned Register, unsigned Offset) { + MCStreamer::EmitWinCFISaveReg(Register, Offset); - OS << "\t.seh_savereg "; - EmitRegisterName(Register); - OS << ", " << Offset; + OS << "\t.seh_savereg " << Register << ", " << Offset; EmitEOL(); } -void MCAsmStreamer::EmitWin64EHSaveXMM(unsigned Register, unsigned Offset) { - MCStreamer::EmitWin64EHSaveXMM(Register, Offset); +void MCAsmStreamer::EmitWinCFISaveXMM(unsigned Register, unsigned Offset) { + MCStreamer::EmitWinCFISaveXMM(Register, Offset); - OS << "\t.seh_savexmm "; - EmitRegisterName(Register); - OS << ", " << Offset; + OS << "\t.seh_savexmm " << Register << ", " << Offset; EmitEOL(); } -void MCAsmStreamer::EmitWin64EHPushFrame(bool Code) { - MCStreamer::EmitWin64EHPushFrame(Code); +void MCAsmStreamer::EmitWinCFIPushFrame(bool Code) { + MCStreamer::EmitWinCFIPushFrame(Code); OS << "\t.seh_pushframe"; if (Code) @@ -1182,8 +1163,8 @@ void MCAsmStreamer::EmitWin64EHPushFrame(bool Code) { EmitEOL(); } -void MCAsmStreamer::EmitWin64EHEndProlog(void) { - MCStreamer::EmitWin64EHEndProlog(); +void MCAsmStreamer::EmitWinCFIEndProlog(void) { + MCStreamer::EmitWinCFIEndProlog(); OS << "\t.seh_endprologue"; EmitEOL(); diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 886a5f5..a8aad71 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -27,6 +27,7 @@ #include "llvm/Support/LEB128.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/MC/MCSectionELF.h" #include <tuple> using namespace llvm; @@ -433,12 +434,27 @@ const MCSymbolData *MCAssembler::getAtom(const MCSymbolData *SD) const { return SD->getFragment()->getAtom(); } +// Try to fully compute Expr to an absolute value and if that fails produce +// a relocatable expr. +// FIXME: Should this be the behavior of EvaluateAsRelocatable itself? +static bool evaluate(const MCExpr &Expr, const MCAsmLayout &Layout, + MCValue &Target) { + if (Expr.EvaluateAsValue(Target, &Layout)) + if (Target.isAbsolute()) + return true; + return Expr.EvaluateAsRelocatable(Target, &Layout); +} + bool MCAssembler::evaluateFixup(const MCAsmLayout &Layout, const MCFixup &Fixup, const MCFragment *DF, MCValue &Target, uint64_t &Value) const { ++stats::evaluateFixup; - if (!Fixup.getValue()->EvaluateAsRelocatable(Target, &Layout)) + // FIXME: This code has some duplication with RecordRelocation. We should + // probably merge the two into a single callback that tries to evaluate a + // fixup and records a relocation if one is needed. + const MCExpr *Expr = Fixup.getValue(); + if (!evaluate(*Expr, Layout, Target)) getContext().FatalError(Fixup.getLoc(), "expected relocatable expression"); bool IsPCRel = Backend.getFixupKindInfo( @@ -782,8 +798,13 @@ void MCAssembler::writeSectionData(const MCSectionData *SD, assert(DF.fixup_begin() == DF.fixup_end() && "Cannot have fixups in virtual section!"); for (unsigned i = 0, e = DF.getContents().size(); i != e; ++i) - assert(DF.getContents()[i] == 0 && - "Invalid data value for virtual section!"); + if (DF.getContents()[i]) { + if (auto *ELFSec = dyn_cast<const MCSectionELF>(&SD->getSection())) + report_fatal_error("non-zero initializer found in section '" + + ELFSec->getSectionName() + "'"); + else + report_fatal_error("non-zero initializer found in virtual section"); + } break; } case MCFragment::FT_Align: @@ -1222,7 +1243,7 @@ void MCSectionData::dump() { OS << "]>"; } -void MCSymbolData::dump() { +void MCSymbolData::dump() const { raw_ostream &OS = llvm::errs(); OS << "<MCSymbolData Symbol:" << getSymbol() diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index c163268..960a071 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -39,7 +39,7 @@ MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri, AllowTemporaryLabels(true), DwarfCompileUnitID(0), AutoReset(DoAutoReset) { - error_code EC = llvm::sys::fs::current_path(CompilationDir); + std::error_code EC = llvm::sys::fs::current_path(CompilationDir); if (EC) CompilationDir.clear(); @@ -47,8 +47,9 @@ MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri, SecureLog = nullptr; SecureLogUsed = false; - if (SrcMgr && SrcMgr->getNumBuffers() > 0) - MainFileName = SrcMgr->getMemoryBuffer(0)->getBufferIdentifier(); + if (SrcMgr && SrcMgr->getNumBuffers()) + MainFileName = + SrcMgr->getMemoryBuffer(SrcMgr->getMainFileID())->getBufferIdentifier(); } MCContext::~MCContext() { @@ -277,14 +278,15 @@ const MCSectionELF *MCContext::CreateELFGroupSection() { return Result; } -const MCSectionCOFF * -MCContext::getCOFFSection(StringRef Section, unsigned Characteristics, - SectionKind Kind, StringRef COMDATSymName, - int Selection, const MCSectionCOFF *Assoc) { +const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section, + unsigned Characteristics, + SectionKind Kind, + StringRef COMDATSymName, + int Selection) { // Do the lookup, if we have a hit, return it. - SectionGroupPair P(Section, COMDATSymName); - auto IterBool = COFFUniquingMap.insert(std::make_pair(P, nullptr)); + SectionGroupTriple T(Section, COMDATSymName, Selection); + auto IterBool = COFFUniquingMap.insert(std::make_pair(T, nullptr)); auto Iter = IterBool.first; if (!IterBool.second) return Iter->second; @@ -293,9 +295,9 @@ MCContext::getCOFFSection(StringRef Section, unsigned Characteristics, if (!COMDATSymName.empty()) COMDATSymbol = GetOrCreateSymbol(COMDATSymName); - StringRef CachedName = Iter->first.first; - MCSectionCOFF *Result = new (*this) MCSectionCOFF( - CachedName, Characteristics, COMDATSymbol, Selection, Assoc, Kind); + StringRef CachedName = std::get<0>(Iter->first); + MCSectionCOFF *Result = new (*this) + MCSectionCOFF(CachedName, Characteristics, COMDATSymbol, Selection, Kind); Iter->second = Result; return Result; @@ -308,8 +310,8 @@ MCContext::getCOFFSection(StringRef Section, unsigned Characteristics, } const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section) { - SectionGroupPair P(Section, ""); - auto Iter = COFFUniquingMap.find(P); + SectionGroupTriple T(Section, "", 0); + auto Iter = COFFUniquingMap.find(T); if (Iter == COFFUniquingMap.end()) return nullptr; return Iter->second; @@ -339,6 +341,29 @@ bool MCContext::isValidDwarfFileNumber(unsigned FileNumber, unsigned CUID) { return !MCDwarfFiles[FileNumber].Name.empty(); } +/// finalizeDwarfSections - Emit end symbols for each non-empty code section. +/// Also remove empty sections from SectionStartEndSyms, to avoid generating +/// useless debug info for them. +void MCContext::finalizeDwarfSections(MCStreamer &MCOS) { + MCContext &context = MCOS.getContext(); + + auto sec = SectionStartEndSyms.begin(); + while (sec != SectionStartEndSyms.end()) { + assert(sec->second.first && "Start symbol must be set by now"); + MCOS.SwitchSection(sec->first); + if (MCOS.mayHaveInstructions()) { + MCSymbol *SectionEndSym = context.CreateTempSymbol(); + MCOS.EmitLabel(SectionEndSym); + sec->second.second = SectionEndSym; + ++sec; + } else { + MapVector<const MCSection *, std::pair<MCSymbol *, MCSymbol *> >::iterator + to_erase = sec; + sec = SectionStartEndSyms.erase(to_erase); + } + } +} + void MCContext::FatalError(SMLoc Loc, const Twine &Msg) const { // If we have a source manager and a location, use it. Otherwise just // use the generic report_fatal_error(). diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index be6731a..0a3fab8 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -19,6 +19,7 @@ #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSection.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -518,8 +519,12 @@ static void EmitGenDwarfAbbrev(MCStreamer *MCOS) { MCOS->EmitULEB128IntValue(dwarf::DW_TAG_compile_unit); MCOS->EmitIntValue(dwarf::DW_CHILDREN_yes, 1); EmitAbbrev(MCOS, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4); - EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr); - EmitAbbrev(MCOS, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr); + if (MCOS->getContext().getGenDwarfSectionSyms().size() > 1) { + EmitAbbrev(MCOS, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4); + } else { + EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr); + EmitAbbrev(MCOS, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr); + } EmitAbbrev(MCOS, dwarf::DW_AT_name, dwarf::DW_FORM_string); if (!context.getCompilationDir().empty()) EmitAbbrev(MCOS, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string); @@ -552,20 +557,14 @@ static void EmitGenDwarfAbbrev(MCStreamer *MCOS) { } // When generating dwarf for assembly source files this emits the data for -// .debug_aranges section. Which contains a header and a table of pairs of -// PointerSize'ed values for the address and size of section(s) with line table -// entries (just the default .text in our case) and a terminating pair of zeros. +// .debug_aranges section. This section contains a header and a table of pairs +// of PointerSize'ed values for the address and size of section(s) with line +// table entries. static void EmitGenDwarfAranges(MCStreamer *MCOS, const MCSymbol *InfoSectionSymbol) { MCContext &context = MCOS->getContext(); - // Create a symbol at the end of the section that we are creating the dwarf - // debugging info to use later in here as part of the expression to calculate - // the size of the section for the table. - MCOS->SwitchSection(context.getGenDwarfSection()); - MCSymbol *SectionEndSym = context.CreateTempSymbol(); - MCOS->EmitLabel(SectionEndSym); - context.setGenDwarfSectionEndSym(SectionEndSym); + auto &Sections = context.getGenDwarfSectionSyms(); MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection()); @@ -583,8 +582,8 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS, Length += Pad; // Add the size of the pair of PointerSize'ed values for the address and size - // of the one default .text section we have in the table. - Length += 2 * AddrSize; + // of each section we have in the table. + Length += 2 * AddrSize * Sections.size(); // And the pair of terminating zeros. Length += 2 * AddrSize; @@ -608,14 +607,21 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS, for(int i = 0; i < Pad; i++) MCOS->EmitIntValue(0, 1); - // Now emit the table of pairs of PointerSize'ed values for the section(s) - // address and size, in our case just the one default .text section. - const MCExpr *Addr = MCSymbolRefExpr::Create( - context.getGenDwarfSectionStartSym(), MCSymbolRefExpr::VK_None, context); - const MCExpr *Size = MakeStartMinusEndExpr(*MCOS, - *context.getGenDwarfSectionStartSym(), *SectionEndSym, 0); - MCOS->EmitValue(Addr, AddrSize); - MCOS->EmitAbsValue(Size, AddrSize); + // Now emit the table of pairs of PointerSize'ed values for the section + // addresses and sizes. + for (const auto &sec : Sections) { + MCSymbol *StartSymbol = sec.second.first; + MCSymbol *EndSymbol = sec.second.second; + assert(StartSymbol && "StartSymbol must not be NULL"); + assert(EndSymbol && "EndSymbol must not be NULL"); + + const MCExpr *Addr = MCSymbolRefExpr::Create( + StartSymbol, MCSymbolRefExpr::VK_None, context); + const MCExpr *Size = MakeStartMinusEndExpr(*MCOS, + *StartSymbol, *EndSymbol, 0); + MCOS->EmitValue(Addr, AddrSize); + MCOS->EmitAbsValue(Size, AddrSize); + } // And finally the pair of terminating zeros. MCOS->EmitIntValue(0, AddrSize); @@ -627,7 +633,8 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS, // DIE and a list of label DIEs. static void EmitGenDwarfInfo(MCStreamer *MCOS, const MCSymbol *AbbrevSectionSymbol, - const MCSymbol *LineSectionSymbol) { + const MCSymbol *LineSectionSymbol, + const MCSymbol *RangesSectionSymbol) { MCContext &context = MCOS->getContext(); MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection()); @@ -674,15 +681,37 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, MCOS->EmitIntValue(0, 4); } - // AT_low_pc, the first address of the default .text section. - const MCExpr *Start = MCSymbolRefExpr::Create( - context.getGenDwarfSectionStartSym(), MCSymbolRefExpr::VK_None, context); - MCOS->EmitValue(Start, AddrSize); + if (RangesSectionSymbol) { + // There are multiple sections containing code, so we must use the + // .debug_ranges sections. - // AT_high_pc, the last address of the default .text section. - const MCExpr *End = MCSymbolRefExpr::Create( - context.getGenDwarfSectionEndSym(), MCSymbolRefExpr::VK_None, context); - MCOS->EmitValue(End, AddrSize); + // AT_ranges, the 4 byte offset from the start of the .debug_ranges section + // to the address range list for this compilation unit. + MCOS->EmitSymbolValue(RangesSectionSymbol, 4); + } else { + // If we only have one non-empty code section, we can use the simpler + // AT_low_pc and AT_high_pc attributes. + + // Find the first (and only) non-empty text section + auto &Sections = context.getGenDwarfSectionSyms(); + const auto TextSection = Sections.begin(); + assert(TextSection != Sections.end() && "No text section found"); + + MCSymbol *StartSymbol = TextSection->second.first; + MCSymbol *EndSymbol = TextSection->second.second; + assert(StartSymbol && "StartSymbol must not be NULL"); + assert(EndSymbol && "EndSymbol must not be NULL"); + + // AT_low_pc, the first address of the default .text section. + const MCExpr *Start = MCSymbolRefExpr::Create( + StartSymbol, MCSymbolRefExpr::VK_None, context); + MCOS->EmitValue(Start, AddrSize); + + // AT_high_pc, the last address of the default .text section. + const MCExpr *End = MCSymbolRefExpr::Create( + EndSymbol, MCSymbolRefExpr::VK_None, context); + MCOS->EmitValue(End, AddrSize); + } // AT_name, the name of the source file. Reconstruct from the first directory // and file table entries. @@ -766,13 +795,51 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, MCOS->EmitLabel(InfoEnd); } +// When generating dwarf for assembly source files this emits the data for +// .debug_ranges section. We only emit one range list, which spans all of the +// executable sections of this file. +static void EmitGenDwarfRanges(MCStreamer *MCOS) { + MCContext &context = MCOS->getContext(); + auto &Sections = context.getGenDwarfSectionSyms(); + + const MCAsmInfo *AsmInfo = context.getAsmInfo(); + int AddrSize = AsmInfo->getPointerSize(); + + MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfRangesSection()); + + for (const auto sec : Sections) { + + MCSymbol *StartSymbol = sec.second.first; + MCSymbol *EndSymbol = sec.second.second; + assert(StartSymbol && "StartSymbol must not be NULL"); + assert(EndSymbol && "EndSymbol must not be NULL"); + + // Emit a base address selection entry for the start of this section + const MCExpr *SectionStartAddr = MCSymbolRefExpr::Create( + StartSymbol, MCSymbolRefExpr::VK_None, context); + MCOS->EmitFill(AddrSize, 0xFF); + MCOS->EmitValue(SectionStartAddr, AddrSize); + + // Emit a range list entry spanning this section + const MCExpr *SectionSize = MakeStartMinusEndExpr(*MCOS, + *StartSymbol, *EndSymbol, 0); + MCOS->EmitIntValue(0, AddrSize); + MCOS->EmitAbsValue(SectionSize, AddrSize); + } + + // Emit end of list entry + MCOS->EmitIntValue(0, AddrSize); + MCOS->EmitIntValue(0, AddrSize); +} + // // When generating dwarf for assembly source files this emits the Dwarf // sections. // void MCGenDwarfInfo::Emit(MCStreamer *MCOS) { - // Create the dwarf sections in this order (.debug_line already created). MCContext &context = MCOS->getContext(); + + // Create the dwarf sections in this order (.debug_line already created). const MCAsmInfo *AsmInfo = context.getAsmInfo(); bool CreateDwarfSectionSymbols = AsmInfo->doesDwarfUseRelocationsAcrossSections(); @@ -781,6 +848,22 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) { LineSectionSymbol = MCOS->getDwarfLineTableSymbol(0); MCSymbol *AbbrevSectionSymbol = nullptr; MCSymbol *InfoSectionSymbol = nullptr; + MCSymbol *RangesSectionSymbol = NULL; + + // Create end symbols for each section, and remove empty sections + MCOS->getContext().finalizeDwarfSections(*MCOS); + + // If there are no sections to generate debug info for, we don't need + // to do anything + if (MCOS->getContext().getGenDwarfSectionSyms().empty()) + return; + + // We only need to use the .debug_ranges section if we have multiple + // code sections. + const bool UseRangesSection = + MCOS->getContext().getGenDwarfSectionSyms().size() > 1; + CreateDwarfSectionSymbols |= UseRangesSection; + MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfInfoSection()); if (CreateDwarfSectionSymbols) { InfoSectionSymbol = context.CreateTempSymbol(); @@ -791,20 +874,30 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) { AbbrevSectionSymbol = context.CreateTempSymbol(); MCOS->EmitLabel(AbbrevSectionSymbol); } - MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection()); + if (UseRangesSection) { + MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfRangesSection()); + if (CreateDwarfSectionSymbols) { + RangesSectionSymbol = context.CreateTempSymbol(); + MCOS->EmitLabel(RangesSectionSymbol); + } + } - // If there are no line table entries then do not emit any section contents. - if (!context.hasMCLineSections()) - return; + assert((RangesSectionSymbol != NULL) || !UseRangesSection); + + MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfARangesSection()); // Output the data for .debug_aranges section. EmitGenDwarfAranges(MCOS, InfoSectionSymbol); + if (UseRangesSection) + EmitGenDwarfRanges(MCOS); + // Output the data for .debug_abbrev section. EmitGenDwarfAbbrev(MCOS); // Output the data for .debug_info section. - EmitGenDwarfInfo(MCOS, AbbrevSectionSymbol, LineSectionSymbol); + EmitGenDwarfInfo(MCOS, AbbrevSectionSymbol, LineSectionSymbol, + RangesSectionSymbol); } // @@ -815,12 +908,13 @@ void MCGenDwarfInfo::Emit(MCStreamer *MCOS) { // void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS, SourceMgr &SrcMgr, SMLoc &Loc) { - // We won't create dwarf labels for temporary symbols or symbols not in - // the default text. + // We won't create dwarf labels for temporary symbols. if (Symbol->isTemporary()) return; MCContext &context = MCOS->getContext(); - if (context.getGenDwarfSection() != MCOS->getCurrentSection().first) + // We won't create dwarf labels for symbols in sections that we are not + // generating debug info for. + if (!context.getGenDwarfSectionSyms().count(MCOS->getCurrentSection().first)) return; // The dwarf label's name does not have the symbol name's leading @@ -834,7 +928,7 @@ void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS, // Finding the line number is the expensive part which is why we just don't // pass it in as for some symbols we won't create a dwarf label. - int CurBuffer = SrcMgr.FindBufferContainingLoc(Loc); + unsigned CurBuffer = SrcMgr.FindBufferContainingLoc(Loc); unsigned LineNumber = SrcMgr.FindLineNumber(Loc, CurBuffer); // We create a temporary symbol for use for the AT_high_pc and AT_low_pc @@ -1203,7 +1297,7 @@ void FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer, unsigned FDEEncoding = MOFI->getFDEEncoding(); unsigned Size = getSizeForEncoding(Streamer, FDEEncoding); if (VerboseAsm) Streamer.AddComment("Range Start"); - Streamer.EmitSymbolValue(Frame.Function, Size); + Streamer.EmitSymbolValue(Frame.Begin, Size); // Range Length const MCExpr *Range = MakeStartMinusEndExpr(Streamer, *Frame.Begin, @@ -1246,12 +1340,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer, const MCObjectFileInfo *MOFI = context.getObjectFileInfo(); bool verboseAsm = streamer.isVerboseAsm(); - MCSymbol *sectionStart; - if (MOFI->isFunctionEHFrameSymbolPrivate() || !IsEH) - sectionStart = context.CreateTempSymbol(); - else - sectionStart = context.GetOrCreateSymbol(Twine("EH_frame") + Twine(CIENum)); - + MCSymbol *sectionStart = context.CreateTempSymbol(); streamer.EmitLabel(sectionStart); CIENum++; @@ -1270,7 +1359,10 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer, // Version if (verboseAsm) streamer.AddComment("DW_CIE_VERSION"); - streamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1); + // For DWARF2, we use CIE version 1 + // For DWARF3+, we use CIE version 3 + uint8_t CIEVersion = context.getDwarfVersion() <= 2 ? 1 : 3; + streamer.EmitIntValue(CIEVersion, 1); // Augmentation String SmallString<8> Augmentation; @@ -1298,7 +1390,14 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCObjectStreamer &streamer, // Return Address Register if (verboseAsm) streamer.AddComment("CIE Return Address Column"); - streamer.EmitULEB128IntValue(MRI->getDwarfRegNum(MRI->getRARegister(), true)); + if (CIEVersion == 1) { + assert(MRI->getRARegister() <= 255 && + "DWARF 2 encodes return_address_register in one byte"); + streamer.EmitIntValue(MRI->getDwarfRegNum(MRI->getRARegister(), true), 1); + } else { + streamer.EmitULEB128IntValue( + MRI->getDwarfRegNum(MRI->getRARegister(), true)); + } // Augmentation Data Length (optional) @@ -1360,13 +1459,6 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCObjectStreamer &streamer, const MCObjectFileInfo *MOFI = context.getObjectFileInfo(); bool verboseAsm = streamer.isVerboseAsm(); - if (IsEH && frame.Function && !MOFI->isFunctionEHFrameSymbolPrivate()) { - MCSymbol *EHSym = - context.GetOrCreateSymbol(frame.Function->getName() + Twine(".eh")); - streamer.EmitEHSymAttributes(frame.Function, EHSym); - streamer.EmitLabel(EHSym); - } - // Length const MCExpr *Length = MakeStartMinusEndExpr(streamer, *fdeStart, *fdeEnd, 0); if (verboseAsm) streamer.AddComment("FDE Length"); @@ -1435,13 +1527,12 @@ namespace { return CIEKey(nullptr, -1, 0, false, false); } - CIEKey(const MCSymbol* Personality_, unsigned PersonalityEncoding_, - unsigned LsdaEncoding_, bool IsSignalFrame_, bool IsSimple_) : - Personality(Personality_), PersonalityEncoding(PersonalityEncoding_), - LsdaEncoding(LsdaEncoding_), IsSignalFrame(IsSignalFrame_), - IsSimple(IsSimple_) { - } - const MCSymbol* Personality; + CIEKey(const MCSymbol *Personality_, unsigned PersonalityEncoding_, + unsigned LsdaEncoding_, bool IsSignalFrame_, bool IsSimple_) + : Personality(Personality_), PersonalityEncoding(PersonalityEncoding_), + LsdaEncoding(LsdaEncoding_), IsSignalFrame(IsSignalFrame_), + IsSimple(IsSimple_) {} + const MCSymbol *Personality; unsigned PersonalityEncoding; unsigned LsdaEncoding; bool IsSignalFrame; @@ -1516,7 +1607,7 @@ void MCDwarfFrameEmitter::Emit(MCObjectStreamer &Streamer, MCAsmBackend *MAB, Emitter.setSectionStart(SectionStart); MCSymbol *FDEEnd = nullptr; - DenseMap<CIEKey, const MCSymbol*> CIEStarts; + DenseMap<CIEKey, const MCSymbol *> CIEStarts; const MCSymbol *DummyDebugKey = nullptr; NeedsEHFrameSection = !MOFI->getSupportsCompactUnwindWithoutEHFrame(); diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index 767348c..7c70540 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -65,10 +65,6 @@ void MCELFStreamer::EmitLabel(MCSymbol *Symbol) { MCELF::SetType(SD, ELF::STT_TLS); } -void MCELFStreamer::EmitDebugLabel(MCSymbol *Symbol) { - EmitLabel(Symbol); -} - void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { // Let the target do whatever target specific stuff it needs to do. getAssembler().getBackend().handleAssemblerFlag(Flag); diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp index 37d05e9..9e8bc94 100644 --- a/lib/MC/MCMachOStreamer.cpp +++ b/lib/MC/MCMachOStreamer.cpp @@ -60,7 +60,6 @@ public: void ChangeSection(const MCSection *Sect, const MCExpr *Subsect) override; void EmitLabel(MCSymbol *Symbol) override; - void EmitDebugLabel(MCSymbol *Symbol) override; void EmitEHSymAttributes(const MCSymbol *Symbol, MCSymbol *EHSymbol) override; void EmitAssemblerFlag(MCAssemblerFlag Flag) override; void EmitLinkerOptions(ArrayRef<std::string> Options) override; @@ -162,9 +161,6 @@ void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) { SD.setFlags(SD.getFlags() & ~SF_ReferenceTypeMask); } -void MCMachOStreamer::EmitDebugLabel(MCSymbol *Symbol) { - EmitLabel(Symbol); -} void MCMachOStreamer::EmitDataRegion(DataRegionData::KindTy Kind) { if (!getAssembler().getBackend().hasDataInCodeSupport()) return; diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp index 4f2740e..d543402 100644 --- a/lib/MC/MCNullStreamer.cpp +++ b/lib/MC/MCNullStreamer.cpp @@ -24,83 +24,17 @@ namespace { /// @name MCStreamer Interface /// @{ - void ChangeSection(const MCSection *Section, - const MCExpr *Subsection) override { - } - - void EmitLabel(MCSymbol *Symbol) override { - assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); - assert(getCurrentSection().first &&"Cannot emit before setting section!"); - AssignSection(Symbol, getCurrentSection().first); - } - void EmitDebugLabel(MCSymbol *Symbol) override { - EmitLabel(Symbol); - } - void EmitAssemblerFlag(MCAssemblerFlag Flag) override {} - void EmitThumbFunc(MCSymbol *Func) override {} - - void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override {} - void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) override {} bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override { return true; } - void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) override {} - - void BeginCOFFSymbolDef(const MCSymbol *Symbol) override {} - void EmitCOFFSymbolStorageClass(int StorageClass) override {} - void EmitCOFFSymbolType(int Type) override {} - void EndCOFFSymbolDef() override {} void EmitCOFFSecRel32(MCSymbol const *Symbol) override {} - - void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) override {} void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) override {} - void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, - unsigned ByteAlignment) override {} void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = nullptr, uint64_t Size = 0, unsigned ByteAlignment = 0) override {} - void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, - uint64_t Size, unsigned ByteAlignment) override {} - void EmitBytes(StringRef Data) override {} - - void EmitValueImpl(const MCExpr *Value, unsigned Size, - const SMLoc &Loc = SMLoc()) override {} - void EmitULEB128Value(const MCExpr *Value) override {} - void EmitSLEB128Value(const MCExpr *Value) override {} void EmitGPRel32Value(const MCExpr *Value) override {} - void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, - unsigned ValueSize = 1, - unsigned MaxBytesToEmit = 0) override {} - - void EmitCodeAlignment(unsigned ByteAlignment, - unsigned MaxBytesToEmit = 0) override {} - - bool EmitValueToOffset(const MCExpr *Offset, - unsigned char Value = 0) override { return false; } - - void EmitFileDirective(StringRef Filename) override {} - unsigned EmitDwarfFileDirective(unsigned FileNo, StringRef Directory, - StringRef Filename, - unsigned CUID = 0) override { - return 0; - } - void EmitDwarfLocDirective(unsigned FileNo, unsigned Line, - unsigned Column, unsigned Flags, - unsigned Isa, unsigned Discriminator, - StringRef FileName) override {} - void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo&) override {} - - void EmitBundleAlignMode(unsigned AlignPow2) override {} - void EmitBundleLock(bool AlignToEnd) override {} - void EmitBundleUnlock() override {} - - void FinishImpl() override {} - - void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) override { - RecordProcEnd(Frame); - } }; } diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index 9d413af..d490ef3 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -18,9 +18,29 @@ #include "llvm/MC/MCSectionMachO.h" using namespace llvm; +static bool useCompactUnwind(const Triple &T) { + // Only on darwin. + if (!T.isOSDarwin()) + return false; + + // aarch64 always has it. + if (T.getArch() == Triple::arm64 || T.getArch() == Triple::aarch64) + return true; + + // Use it on newer version of OS X. + if (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) + return true; + + // And the iOS simulator. + if (T.isiOS() && + (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86)) + return true; + + return false; +} + void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { // MachO - IsFunctionEHFrameSymbolPrivate = false; SupportsWeakOmittedEHFrame = false; if (T.isOSDarwin() && @@ -151,13 +171,10 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { COFFDebugSymbolsSection = nullptr; - if ((T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) || - (T.isOSDarwin() && - (T.getArch() == Triple::arm64 || T.getArch() == Triple::aarch64))) { + if (useCompactUnwind(T)) { CompactUnwindSection = - Ctx->getMachOSection("__LD", "__compact_unwind", - MachO::S_ATTR_DEBUG, - SectionKind::getReadOnly()); + Ctx->getMachOSection("__LD", "__compact_unwind", MachO::S_ATTR_DEBUG, + SectionKind::getReadOnly()); if (T.getArch() == Triple::x86_64 || T.getArch() == Triple::x86) CompactUnwindDwarfEHFrameOnly = 0x04000000; @@ -321,6 +338,13 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { TTypeEncoding = dwarf::DW_EH_PE_absptr; } break; + case Triple::mips: + case Triple::mipsel: + // MIPS uses indirect pointer to refer personality functions, so that the + // eh_frame section can be read-only. DW.ref.personality will be generated + // for relocation. + PersonalityEncoding = dwarf::DW_EH_PE_indirect; + break; case Triple::ppc64: case Triple::ppc64le: PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | @@ -562,6 +586,8 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { + bool IsWoA = T.getArch() == Triple::arm || T.getArch() == Triple::thumb; + // The object file format cannot represent common symbols with explicit // alignments. CommDirectiveSupportsAlignment = false; @@ -575,6 +601,8 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { SectionKind::getBSS()); TextSection = Ctx->getCOFFSection(".text", + (IsWoA ? COFF::IMAGE_SCN_MEM_16BIT + : (COFF::SectionCharacteristics)0) | COFF::IMAGE_SCN_CNT_CODE | COFF::IMAGE_SCN_MEM_EXECUTE | COFF::IMAGE_SCN_MEM_READ, @@ -590,12 +618,18 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, SectionKind::getReadOnly()); - if (T.isKnownWindowsMSVCEnvironment()) { + + if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) { StaticCtorSection = Ctx->getCOFFSection(".CRT$XCU", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, SectionKind::getReadOnly()); + StaticDtorSection = + Ctx->getCOFFSection(".CRT$XTX", + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getReadOnly()); } else { StaticCtorSection = Ctx->getCOFFSection(".ctors", @@ -603,16 +637,6 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { COFF::IMAGE_SCN_MEM_READ | COFF::IMAGE_SCN_MEM_WRITE, SectionKind::getDataRel()); - } - - - if (T.isKnownWindowsMSVCEnvironment()) { - StaticDtorSection = - Ctx->getCOFFSection(".CRT$XTX", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getReadOnly()); - } else { StaticDtorSection = Ctx->getCOFFSection(".dtors", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | @@ -625,11 +649,16 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { // though it contains relocatable pointers. In PIC mode, this is probably a // big runtime hit for C++ apps. Either the contents of the LSDA need to be // adjusted or this should be a data section. - LSDASection = - Ctx->getCOFFSection(".gcc_except_table", - COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getReadOnly()); + assert(T.isOSWindows() && "Windows is the only supported COFF target"); + if (T.getArch() == Triple::x86_64) { + // On Windows 64 with SEH, the LSDA is emitted into the .xdata section + LSDASection = 0; + } else { + LSDASection = Ctx->getCOFFSection(".gcc_except_table", + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getReadOnly()); + } // Debug info. COFFDebugSymbolsSection = @@ -705,36 +734,46 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfInfoDWOSection = - Ctx->getCOFFSection(".debug_info.dwo", COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_MEM_READ, + Ctx->getCOFFSection(".debug_info.dwo", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfAbbrevDWOSection = - Ctx->getCOFFSection(".debug_abbrev.dwo", COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_MEM_READ, + Ctx->getCOFFSection(".debug_abbrev.dwo", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfStrDWOSection = - Ctx->getCOFFSection(".debug_str.dwo", COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_MEM_READ, + Ctx->getCOFFSection(".debug_str.dwo", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfLineDWOSection = - Ctx->getCOFFSection(".debug_line.dwo", COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_MEM_READ, + Ctx->getCOFFSection(".debug_line.dwo", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfLocDWOSection = - Ctx->getCOFFSection(".debug_loc.dwo", COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_MEM_READ, + Ctx->getCOFFSection(".debug_loc.dwo", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfStrOffDWOSection = - Ctx->getCOFFSection(".debug_str_offsets.dwo", COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_MEM_READ, + Ctx->getCOFFSection(".debug_str_offsets.dwo", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); - DwarfAddrSection = Ctx->getCOFFSection( - ".debug_addr", COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); + + DwarfAddrSection = + Ctx->getCOFFSection(".debug_addr", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); DrectveSection = Ctx->getCOFFSection(".drectve", - COFF::IMAGE_SCN_LNK_INFO | COFF::IMAGE_SCN_LNK_REMOVE, + COFF::IMAGE_SCN_LNK_INFO | + COFF::IMAGE_SCN_LNK_REMOVE, SectionKind::getMetadata()); PDataSection = @@ -748,6 +787,7 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, SectionKind::getDataRel()); + TLSDataSection = Ctx->getCOFFSection(".tls$", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | @@ -756,7 +796,7 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { SectionKind::getDataRel()); } -void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm, +void MCObjectFileInfo::InitMCObjectFileInfo(StringRef T, Reloc::Model relocm, CodeModel::Model cm, MCContext &ctx) { RelocM = relocm; @@ -766,7 +806,6 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm, // Common. CommDirectiveSupportsAlignment = true; SupportsWeakOmittedEHFrame = true; - IsFunctionEHFrameSymbolPrivate = true; SupportsCompactUnwindWithoutEHFrame = false; PersonalityEncoding = LSDAEncoding = FDECFIEncoding = TTypeEncoding = @@ -781,8 +820,9 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm, DwarfAccelNamespaceSection = nullptr; // Used only by selected targets. DwarfAccelTypesSection = nullptr; // Used only by selected targets. - Triple T(TT); - Triple::ArchType Arch = T.getArch(); + TT = Triple(T); + + Triple::ArchType Arch = TT.getArch(); // FIXME: Checking for Arch here to filter out bogus triples such as // cellspu-apple-darwin. Perhaps we should fix in Triple? if ((Arch == Triple::x86 || Arch == Triple::x86_64 || @@ -790,17 +830,17 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm, Arch == Triple::arm64 || Arch == Triple::aarch64 || Arch == Triple::ppc || Arch == Triple::ppc64 || Arch == Triple::UnknownArch) && - (T.isOSDarwin() || T.isOSBinFormatMachO())) { + (TT.isOSDarwin() || TT.isOSBinFormatMachO())) { Env = IsMachO; - InitMachOMCObjectFileInfo(T); + InitMachOMCObjectFileInfo(TT); } else if ((Arch == Triple::x86 || Arch == Triple::x86_64 || Arch == Triple::arm || Arch == Triple::thumb) && - (T.isOSWindows() && T.getObjectFormat() == Triple::COFF)) { + (TT.isOSWindows() && TT.getObjectFormat() == Triple::COFF)) { Env = IsCOFF; - InitCOFFMCObjectFileInfo(T); + InitCOFFMCObjectFileInfo(TT); } else { Env = IsELF; - InitELFMCObjectFileInfo(T); + InitELFMCObjectFileInfo(TT); } } diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp index a1aa602..a721b59 100644 --- a/lib/MC/MCObjectStreamer.cpp +++ b/lib/MC/MCObjectStreamer.cpp @@ -83,32 +83,8 @@ MCDataFragment *MCObjectStreamer::getOrCreateDataFragment() const { return F; } -const MCExpr *MCObjectStreamer::AddValueSymbols(const MCExpr *Value) { - switch (Value->getKind()) { - case MCExpr::Target: - cast<MCTargetExpr>(Value)->AddValueSymbols(Assembler); - break; - - case MCExpr::Constant: - break; - - case MCExpr::Binary: { - const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value); - AddValueSymbols(BE->getLHS()); - AddValueSymbols(BE->getRHS()); - break; - } - - case MCExpr::SymbolRef: - Assembler->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol()); - break; - - case MCExpr::Unary: - AddValueSymbols(cast<MCUnaryExpr>(Value)->getSubExpr()); - break; - } - - return Value; +void MCObjectStreamer::visitUsedSymbol(const MCSymbol &Sym) { + Assembler->getOrCreateSymbolData(Sym); } void MCObjectStreamer::EmitCFISections(bool EH, bool Debug) { @@ -119,13 +95,14 @@ void MCObjectStreamer::EmitCFISections(bool EH, bool Debug) { void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, const SMLoc &Loc) { + MCStreamer::EmitValueImpl(Value, Size, Loc); MCDataFragment *DF = getOrCreateDataFragment(); MCLineEntry::Make(this, getCurrentSection().first); // Avoid fixups when possible. int64_t AbsValue; - if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue, getAssembler())) { + if (Value->EvaluateAsAbsolute(AbsValue, getAssembler())) { EmitIntValue(AbsValue, Size); return; } @@ -136,11 +113,14 @@ void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, } void MCObjectStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) { - RecordProcStart(Frame); + // We need to create a local symbol to avoid relocations. + Frame.Begin = getContext().CreateTempSymbol(); + EmitLabel(Frame.Begin); } void MCObjectStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) { - RecordProcEnd(Frame); + Frame.End = getContext().CreateTempSymbol(); + EmitLabel(Frame.End); } void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) { @@ -158,10 +138,6 @@ void MCObjectStreamer::EmitLabel(MCSymbol *Symbol) { SD.setOffset(F->getContents().size()); } -void MCObjectStreamer::EmitDebugLabel(MCSymbol *Symbol) { - EmitLabel(Symbol); -} - void MCObjectStreamer::EmitULEB128Value(const MCExpr *Value) { int64_t IntValue; if (Value->EvaluateAsAbsolute(IntValue, getAssembler())) { @@ -205,15 +181,12 @@ void MCObjectStreamer::ChangeSection(const MCSection *Section, void MCObjectStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { getAssembler().getOrCreateSymbolData(*Symbol); - AddValueSymbols(Value); MCStreamer::EmitAssignment(Symbol, Value); } -void MCObjectStreamer::EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) { - // Scan for values. - for (unsigned i = Inst.getNumOperands(); i--; ) - if (Inst.getOperand(i).isExpr()) - AddValueSymbols(Inst.getOperand(i).getExpr()); +void MCObjectStreamer::EmitInstruction(const MCInst &Inst, + const MCSubtargetInfo &STI) { + MCStreamer::EmitInstruction(Inst, STI); MCSectionData *SD = getCurrentSectionData(); SD->setHasInstructions(true); diff --git a/lib/MC/MCParser/AsmLexer.cpp b/lib/MC/MCParser/AsmLexer.cpp index bca516e..145ad4a 100644 --- a/lib/MC/MCParser/AsmLexer.cpp +++ b/lib/MC/MCParser/AsmLexer.cpp @@ -22,7 +22,6 @@ using namespace llvm; AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) { - CurBuf = nullptr; CurPtr = nullptr; isAtStartOfLine = true; AllowAtInIdentifier = !StringRef(MAI.getCommentString()).startswith("@"); @@ -31,13 +30,13 @@ AsmLexer::AsmLexer(const MCAsmInfo &_MAI) : MAI(_MAI) { AsmLexer::~AsmLexer() { } -void AsmLexer::setBuffer(const MemoryBuffer *buf, const char *ptr) { - CurBuf = buf; +void AsmLexer::setBuffer(StringRef Buf, const char *ptr) { + CurBuf = Buf; if (ptr) CurPtr = ptr; else - CurPtr = CurBuf->getBufferStart(); + CurPtr = CurBuf.begin(); TokStart = nullptr; } @@ -58,7 +57,7 @@ int AsmLexer::getNextChar() { case 0: // A nul character in the stream is either the end of the current buffer or // a random nul in the file. Disambiguate that here. - if (CurPtr-1 != CurBuf->getBufferEnd()) + if (CurPtr - 1 != CurBuf.end()) return 0; // Just whitespace. // Otherwise, return end of file. @@ -201,8 +200,8 @@ AsmToken AsmLexer::LexLineComment() { CurChar = getNextChar(); if (CurChar == EOF) - return AsmToken(AsmToken::Eof, StringRef(CurPtr, 0)); - return AsmToken(AsmToken::EndOfStatement, StringRef(CurPtr, 0)); + return AsmToken(AsmToken::Eof, StringRef(TokStart, 0)); + return AsmToken(AsmToken::EndOfStatement, StringRef(TokStart, 0)); } static void SkipIgnoredIntegerSuffix(const char *&CurPtr) { @@ -420,9 +419,8 @@ StringRef AsmLexer::LexUntilEndOfStatement() { while (!isAtStartOfComment(*CurPtr) && // Start of line comment. !isAtStatementSeparator(CurPtr) && // End of statement marker. - *CurPtr != '\n' && - *CurPtr != '\r' && - (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) { + *CurPtr != '\n' && *CurPtr != '\r' && + (*CurPtr != 0 || CurPtr != CurBuf.end())) { ++CurPtr; } return StringRef(TokStart, CurPtr-TokStart); @@ -431,9 +429,8 @@ StringRef AsmLexer::LexUntilEndOfStatement() { StringRef AsmLexer::LexUntilEndOfLine() { TokStart = CurPtr; - while (*CurPtr != '\n' && - *CurPtr != '\r' && - (*CurPtr != 0 || CurPtr != CurBuf->getBufferEnd())) { + while (*CurPtr != '\n' && *CurPtr != '\r' && + (*CurPtr != 0 || CurPtr != CurBuf.end())) { ++CurPtr; } return StringRef(TokStart, CurPtr-TokStart); diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 168597f..62ab4a5 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -102,7 +102,7 @@ public: struct ParseStatementInfo { /// \brief The parsed operands from the last parsed statement. - SmallVector<MCParsedAsmOperand*, 8> ParsedOperands; + SmallVector<std::unique_ptr<MCParsedAsmOperand>, 8> ParsedOperands; /// \brief The opcode from the last parsed instruction. unsigned Opcode; @@ -115,13 +115,6 @@ struct ParseStatementInfo { ParseStatementInfo() : Opcode(~0U), ParseError(false), AsmRewrites(nullptr) {} ParseStatementInfo(SmallVectorImpl<AsmRewrite> *rewrites) : Opcode(~0), ParseError(false), AsmRewrites(rewrites) {} - - ~ParseStatementInfo() { - // Free any parsed operands. - for (unsigned i = 0, e = ParsedOperands.size(); i != e; ++i) - delete ParsedOperands[i]; - ParsedOperands.clear(); - } }; /// \brief The concrete assembly parser instance. @@ -140,7 +133,7 @@ private: /// This is the current buffer index we're lexing from as managed by the /// SourceMgr object. - int CurBuffer; + unsigned CurBuffer; AsmCond TheCondState; std::vector<AsmCond> TheCondStack; @@ -169,13 +162,13 @@ private: StringRef CppHashFilename; int64_t CppHashLineNumber; SMLoc CppHashLoc; - int CppHashBuf; + unsigned CppHashBuf; /// When generating dwarf for assembly source files we need to calculate the /// logical line number based on the last parsed cpp hash file line comment /// and current line. Since this is slow and messes up the SourceMgr's /// cache we save the last info we queried with SrcMgr.FindLineNumber(). SMLoc LastQueryIDLoc; - int LastQueryBuffer; + unsigned LastQueryBuffer; unsigned LastQueryLine; /// AssemblerDialect. ~OU means unset value and use value provided by MAI. @@ -317,9 +310,9 @@ private: /// current token is not set; clients should ensure Lex() is called /// subsequently. /// - /// \param InBuffer If not -1, should be the known buffer id that contains the + /// \param InBuffer If not 0, should be the known buffer id that contains the /// location. - void jumpToLoc(SMLoc Loc, int InBuffer=-1); + void jumpToLoc(SMLoc Loc, unsigned InBuffer = 0); /// \brief Parse up to the end of statement and a return the contents from the /// current token until the end of the statement; the current token on exit @@ -352,8 +345,9 @@ private: DK_REFERENCE, DK_WEAK_DEFINITION, DK_WEAK_REFERENCE, DK_WEAK_DEF_CAN_BE_HIDDEN, DK_COMM, DK_COMMON, DK_LCOMM, DK_ABORT, DK_INCLUDE, DK_INCBIN, DK_CODE16, DK_CODE16GCC, DK_REPT, DK_IRP, DK_IRPC, - DK_IF, DK_IFNE, DK_IFB, DK_IFNB, DK_IFC, DK_IFEQS, DK_IFNC, DK_IFDEF, - DK_IFNDEF, DK_IFNOTDEF, DK_ELSEIF, DK_ELSE, DK_ENDIF, + DK_IF, DK_IFEQ, DK_IFGE, DK_IFGT, DK_IFLE, DK_IFLT, DK_IFNE, DK_IFB, + DK_IFNB, DK_IFC, DK_IFEQS, DK_IFNC, DK_IFDEF, DK_IFNDEF, DK_IFNOTDEF, + DK_ELSEIF, DK_ELSE, DK_ENDIF, DK_SPACE, DK_SKIP, DK_FILE, DK_LINE, DK_LOC, DK_STABS, DK_CFI_SECTIONS, DK_CFI_STARTPROC, DK_CFI_ENDPROC, DK_CFI_DEF_CFA, DK_CFI_DEF_CFA_OFFSET, DK_CFI_ADJUST_CFA_OFFSET, DK_CFI_DEF_CFA_REGISTER, @@ -440,8 +434,8 @@ private: bool parseDirectiveInclude(); // ".include" bool parseDirectiveIncbin(); // ".incbin" - // ".if" or ".ifne" - bool parseDirectiveIf(SMLoc DirectiveLoc); + // ".if", ".ifeq", ".ifge", ".ifgt" , ".ifle", ".iflt" or ".ifne" + bool parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind); // ".ifb" or ".ifnb", depending on ExpectBlank. bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank); // ".ifc" or ".ifnc", depending on ExpectEqual. @@ -497,15 +491,15 @@ enum { DEFAULT_ADDRSPACE = 0 }; AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out, const MCAsmInfo &_MAI) : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM), - PlatformParser(nullptr), CurBuffer(0), MacrosEnabledFlag(true), - HadError(false), CppHashLineNumber(0), AssemblerDialect(~0U), - IsDarwin(false), ParsingInlineAsm(false) { + PlatformParser(nullptr), CurBuffer(_SM.getMainFileID()), + MacrosEnabledFlag(true), HadError(false), CppHashLineNumber(0), + AssemblerDialect(~0U), IsDarwin(false), ParsingInlineAsm(false) { // Save the old handler. SavedDiagHandler = SrcMgr.getDiagHandler(); SavedDiagContext = SrcMgr.getDiagContext(); // Set our own handler which calls the saved handler. SrcMgr.setDiagHandler(DiagHandler, this); - Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)); + Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer()); // Initialize the platform / file format parser. switch (_Ctx.getObjectFileInfo()->getObjectFileType()) { @@ -572,14 +566,13 @@ bool AsmParser::Error(SMLoc L, const Twine &Msg, ArrayRef<SMRange> Ranges) { bool AsmParser::enterIncludeFile(const std::string &Filename) { std::string IncludedFile; - int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile); - if (NewBuf == -1) + unsigned NewBuf = + SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile); + if (!NewBuf) return true; CurBuffer = NewBuf; - - Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)); - + Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer()); return false; } @@ -588,8 +581,9 @@ bool AsmParser::enterIncludeFile(const std::string &Filename) { /// returns true on failure. bool AsmParser::processIncbinFile(const std::string &Filename) { std::string IncludedFile; - int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile); - if (NewBuf == -1) + unsigned NewBuf = + SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile); + if (!NewBuf) return true; // Pick up the bytes from the file and emit them. @@ -597,13 +591,10 @@ bool AsmParser::processIncbinFile(const std::string &Filename) { return false; } -void AsmParser::jumpToLoc(SMLoc Loc, int InBuffer) { - if (InBuffer != -1) { - CurBuffer = InBuffer; - } else { - CurBuffer = SrcMgr.FindBufferContainingLoc(Loc); - } - Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer), Loc.getPointer()); +void AsmParser::jumpToLoc(SMLoc Loc, unsigned InBuffer) { + CurBuffer = InBuffer ? InBuffer : SrcMgr.FindBufferContainingLoc(Loc); + Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(), + Loc.getPointer()); } const AsmToken &AsmParser::Lex() { @@ -639,10 +630,12 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { // If we are generating dwarf for assembly source files save the initial text // section and generate a .file directive. if (getContext().getGenDwarfForAssembly()) { - getContext().setGenDwarfSection(getStreamer().getCurrentSection().first); MCSymbol *SectionStartSym = getContext().CreateTempSymbol(); getStreamer().EmitLabel(SectionStartSym); - getContext().setGenDwarfSectionStartSym(SectionStartSym); + auto InsertResult = getContext().addGenDwarfSection( + getStreamer().getCurrentSection().first); + assert(InsertResult.second && ".text section should not have debug info yet"); + InsertResult.first->second.first = SectionStartSym; getContext().setGenDwarfFileNumber(getStreamer().EmitDwarfFileDirective( 0, StringRef(), getContext().getMainFileName())); } @@ -818,7 +811,19 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { // Parse symbol variant std::pair<StringRef, StringRef> Split; if (!MAI.useParensForSymbolVariant()) { - Split = Identifier.split('@'); + if (FirstTokenKind == AsmToken::String) { + if (Lexer.is(AsmToken::At)) { + Lexer.Lex(); // eat @ + SMLoc AtLoc = getLexer().getLoc(); + StringRef VName; + if (parseIdentifier(VName)) + return Error(AtLoc, "expected symbol variant after '@'"); + + Split = std::make_pair(Identifier, VName); + } + } else { + Split = Identifier.split('@'); + } } else if (Lexer.is(AsmToken::LParen)) { Lexer.Lex(); // eat ( StringRef VName; @@ -1236,8 +1241,13 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info) { default: break; case DK_IF: + case DK_IFEQ: + case DK_IFGE: + case DK_IFGT: + case DK_IFLE: + case DK_IFLT: case DK_IFNE: - return parseDirectiveIf(IDLoc); + return parseDirectiveIf(IDLoc, DirKind); case DK_IFB: return parseDirectiveIfb(IDLoc, true); case DK_IFNB: @@ -1581,12 +1591,11 @@ bool AsmParser::parseStatement(ParseStatementInfo &Info) { printMessage(IDLoc, SourceMgr::DK_Note, OS.str()); } - // If we are generating dwarf for assembly source files and the current - // section is the initial text section then generate a .loc directive for - // the instruction. + // If we are generating dwarf for the current section then generate a .loc + // directive for the instruction. if (!HadError && getContext().getGenDwarfForAssembly() && - getContext().getGenDwarfSection() == - getStreamer().getCurrentSection().first) { + getContext().getGenDwarfSectionSyms().count( + getStreamer().getCurrentSection().first)) { unsigned Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer); @@ -1685,13 +1694,15 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) { const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr(); const SMLoc &DiagLoc = Diag.getLoc(); - int DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc); - int CppHashBuf = Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashLoc); + unsigned DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc); + unsigned CppHashBuf = + Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashLoc); // Like SourceMgr::printMessage() we need to print the include stack if any // before printing the message. - int DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc); - if (!Parser->SavedDiagHandler && DiagCurBuffer > 0) { + unsigned DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc); + if (!Parser->SavedDiagHandler && DiagCurBuffer && + DiagCurBuffer != DiagSrcMgr.getMainFileID()) { SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer); DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS); } @@ -2018,7 +2029,7 @@ bool AsmParser::parseMacroArguments(const MCAsmMacro *M, break; if (FAI >= NParameters) { - assert(M && "expected macro to be defined"); + assert(M && "expected macro to be defined"); Error(IDLoc, "parameter named '" + FA.Name + "' does not exist for macro '" + M->Name + "'"); @@ -2117,7 +2128,7 @@ bool AsmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) { // Jump to the macro instantiation and prime the lexer. CurBuffer = SrcMgr.AddNewSourceBuffer(MI->Instantiation, SMLoc()); - Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)); + Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer()); Lex(); return false; @@ -3799,9 +3810,8 @@ bool AsmParser::parseDirectiveIncbin() { } /// parseDirectiveIf -/// ::= .if expression -/// ::= .ifne expression -bool AsmParser::parseDirectiveIf(SMLoc DirectiveLoc) { +/// ::= .if{,eq,ge,gt,le,lt,ne} expression +bool AsmParser::parseDirectiveIf(SMLoc DirectiveLoc, DirectiveKind DirKind) { TheCondStack.push_back(TheCondState); TheCondState.TheCond = AsmCond::IfCond; if (TheCondState.Ignore) { @@ -3816,6 +3826,29 @@ bool AsmParser::parseDirectiveIf(SMLoc DirectiveLoc) { Lex(); + switch (DirKind) { + default: + llvm_unreachable("unsupported directive"); + case DK_IF: + case DK_IFNE: + break; + case DK_IFEQ: + ExprValue = ExprValue == 0; + break; + case DK_IFGE: + ExprValue = ExprValue >= 0; + break; + case DK_IFGT: + ExprValue = ExprValue > 0; + break; + case DK_IFLE: + ExprValue = ExprValue <= 0; + break; + case DK_IFLT: + ExprValue = ExprValue < 0; + break; + } + TheCondState.CondMet = ExprValue; TheCondState.Ignore = !TheCondState.CondMet; } @@ -4118,6 +4151,11 @@ void AsmParser::initializeDirectiveKindMap() { DirectiveKindMap[".bundle_lock"] = DK_BUNDLE_LOCK; DirectiveKindMap[".bundle_unlock"] = DK_BUNDLE_UNLOCK; DirectiveKindMap[".if"] = DK_IF; + DirectiveKindMap[".ifeq"] = DK_IFEQ; + DirectiveKindMap[".ifge"] = DK_IFGE; + DirectiveKindMap[".ifgt"] = DK_IFGT; + DirectiveKindMap[".ifle"] = DK_IFLE; + DirectiveKindMap[".iflt"] = DK_IFLT; DirectiveKindMap[".ifne"] = DK_IFNE; DirectiveKindMap[".ifb"] = DK_IFB; DirectiveKindMap[".ifnb"] = DK_IFNB; @@ -4227,7 +4265,7 @@ void AsmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc, // Jump to the macro instantiation and prime the lexer. CurBuffer = SrcMgr.AddNewSourceBuffer(MI->Instantiation, SMLoc()); - Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)); + Lexer.setBuffer(SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer()); Lex(); } @@ -4465,27 +4503,27 @@ bool AsmParser::parseMSInlineAsm( // Build the list of clobbers, outputs and inputs. for (unsigned i = 1, e = Info.ParsedOperands.size(); i != e; ++i) { - MCParsedAsmOperand *Operand = Info.ParsedOperands[i]; + MCParsedAsmOperand &Operand = *Info.ParsedOperands[i]; // Immediate. - if (Operand->isImm()) + if (Operand.isImm()) continue; // Register operand. - if (Operand->isReg() && !Operand->needAddressOf()) { + if (Operand.isReg() && !Operand.needAddressOf()) { unsigned NumDefs = Desc.getNumDefs(); // Clobber. - if (NumDefs && Operand->getMCOperandNum() < NumDefs) - ClobberRegs.push_back(Operand->getReg()); + if (NumDefs && Operand.getMCOperandNum() < NumDefs) + ClobberRegs.push_back(Operand.getReg()); continue; } // Expr/Input or Output. - StringRef SymName = Operand->getSymName(); + StringRef SymName = Operand.getSymName(); if (SymName.empty()) continue; - void *OpDecl = Operand->getOpDecl(); + void *OpDecl = Operand.getOpDecl(); if (!OpDecl) continue; @@ -4494,21 +4532,21 @@ bool AsmParser::parseMSInlineAsm( if (isOutput) { ++InputIdx; OutputDecls.push_back(OpDecl); - OutputDeclsAddressOf.push_back(Operand->needAddressOf()); - OutputConstraints.push_back('=' + Operand->getConstraint().str()); + OutputDeclsAddressOf.push_back(Operand.needAddressOf()); + OutputConstraints.push_back('=' + Operand.getConstraint().str()); AsmStrRewrites.push_back(AsmRewrite(AOK_Output, Start, SymName.size())); } else { InputDecls.push_back(OpDecl); - InputDeclsAddressOf.push_back(Operand->needAddressOf()); - InputConstraints.push_back(Operand->getConstraint().str()); + InputDeclsAddressOf.push_back(Operand.needAddressOf()); + InputConstraints.push_back(Operand.getConstraint().str()); AsmStrRewrites.push_back(AsmRewrite(AOK_Input, Start, SymName.size())); } } // Consider implicit defs to be clobbers. Think of cpuid and push. - const uint16_t *ImpDefs = Desc.getImplicitDefs(); - for (unsigned I = 0, E = Desc.getNumImplicitDefs(); I != E; ++I) - ClobberRegs.push_back(ImpDefs[I]); + ArrayRef<uint16_t> ImpDefs(Desc.getImplicitDefs(), + Desc.getNumImplicitDefs()); + ClobberRegs.insert(ClobberRegs.end(), ImpDefs.begin(), ImpDefs.end()); } // Set the number of Outputs and Inputs. @@ -4543,27 +4581,26 @@ bool AsmParser::parseMSInlineAsm( // Build the IR assembly string. std::string AsmStringIR; raw_string_ostream OS(AsmStringIR); - const char *AsmStart = SrcMgr.getMemoryBuffer(0)->getBufferStart(); - const char *AsmEnd = SrcMgr.getMemoryBuffer(0)->getBufferEnd(); + StringRef ASMString = + SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())->getBuffer(); + const char *AsmStart = ASMString.begin(); + const char *AsmEnd = ASMString.end(); array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), rewritesSort); - for (SmallVectorImpl<AsmRewrite>::iterator I = AsmStrRewrites.begin(), - E = AsmStrRewrites.end(); - I != E; ++I) { - AsmRewriteKind Kind = (*I).Kind; + for (const AsmRewrite &AR : AsmStrRewrites) { + AsmRewriteKind Kind = AR.Kind; if (Kind == AOK_Delete) continue; - const char *Loc = (*I).Loc.getPointer(); + const char *Loc = AR.Loc.getPointer(); assert(Loc >= AsmStart && "Expected Loc to be at or after Start!"); // Emit everything up to the immediate/expression. - unsigned Len = Loc - AsmStart; - if (Len) + if (unsigned Len = Loc - AsmStart) OS << StringRef(AsmStart, Len); // Skip the original expression. if (Kind == AOK_Skip) { - AsmStart = Loc + (*I).Len; + AsmStart = Loc + AR.Len; continue; } @@ -4573,7 +4610,7 @@ bool AsmParser::parseMSInlineAsm( default: break; case AOK_Imm: - OS << "$$" << (*I).Val; + OS << "$$" << AR.Val; break; case AOK_ImmPrefix: OS << "$$"; @@ -4585,7 +4622,7 @@ bool AsmParser::parseMSInlineAsm( OS << '$' << OutputIdx++; break; case AOK_SizeDirective: - switch ((*I).Val) { + switch (AR.Val) { default: break; case 8: OS << "byte ptr "; break; case 16: OS << "word ptr "; break; @@ -4600,7 +4637,7 @@ bool AsmParser::parseMSInlineAsm( OS << ".byte"; break; case AOK_Align: { - unsigned Val = (*I).Val; + unsigned Val = AR.Val; OS << ".align " << Val; // Skip the original immediate. @@ -4613,12 +4650,12 @@ bool AsmParser::parseMSInlineAsm( OS.flush(); if (AsmStringIR.back() != '.') OS << '.'; - OS << (*I).Val; + OS << AR.Val; break; } // Skip the original expression. - AsmStart = Loc + (*I).Len + AdditionalSkip; + AsmStart = Loc + AR.Len + AdditionalSkip; } // Emit the remainder of the asm string. diff --git a/lib/MC/MCParser/COFFAsmParser.cpp b/lib/MC/MCParser/COFFAsmParser.cpp index decf01c..5ecf9e5 100644 --- a/lib/MC/MCParser/COFFAsmParser.cpp +++ b/lib/MC/MCParser/COFFAsmParser.cpp @@ -13,6 +13,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSectionCOFF.h" @@ -37,7 +38,7 @@ class COFFAsmParser : public MCAsmParserExtension { bool ParseSectionSwitch(StringRef Section, unsigned Characteristics, SectionKind Kind, StringRef COMDATSymName, - COFF::COMDATType Type, const MCSectionCOFF *Assoc); + COFF::COMDATType Type); bool ParseSectionName(StringRef &SectionName); bool ParseSectionFlags(StringRef FlagsString, unsigned* Flags); @@ -117,8 +118,7 @@ class COFFAsmParser : public MCAsmParserExtension { bool ParseDirectiveEndef(StringRef, SMLoc); bool ParseDirectiveSecRel32(StringRef, SMLoc); bool ParseDirectiveSecIdx(StringRef, SMLoc); - bool parseCOMDATTypeAndAssoc(COFF::COMDATType &Type, - const MCSectionCOFF *&Assoc); + bool parseCOMDATType(COFF::COMDATType &Type); bool ParseDirectiveLinkOnce(StringRef, SMLoc); // Win64 EH directives. @@ -170,8 +170,8 @@ bool COFFAsmParser::ParseSectionFlags(StringRef FlagsString, unsigned* Flags) { bool ReadOnlyRemoved = false; unsigned SecFlags = None; - for (unsigned i = 0; i < FlagsString.size(); ++i) { - switch (FlagsString[i]) { + for (char FlagChar : FlagsString) { + switch (FlagChar) { case 'a': // Ignored. break; @@ -292,22 +292,20 @@ bool COFFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) { bool COFFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Characteristics, SectionKind Kind) { - return ParseSectionSwitch(Section, Characteristics, Kind, "", - COFF::IMAGE_COMDAT_SELECT_ANY, nullptr); + return ParseSectionSwitch(Section, Characteristics, Kind, "", (COFF::COMDATType)0); } bool COFFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Characteristics, SectionKind Kind, StringRef COMDATSymName, - COFF::COMDATType Type, - const MCSectionCOFF *Assoc) { + COFF::COMDATType Type) { if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in section switching directive"); Lex(); getStreamer().SwitchSection(getContext().getCOFFSection( - Section, Characteristics, Kind, COMDATSymName, Type, Assoc)); + Section, Characteristics, Kind, COMDATSymName, Type)); return false; } @@ -358,15 +356,15 @@ bool COFFAsmParser::ParseDirectiveSection(StringRef, SMLoc) { return true; } - COFF::COMDATType Type = COFF::IMAGE_COMDAT_SELECT_ANY; - const MCSectionCOFF *Assoc = nullptr; + COFF::COMDATType Type = (COFF::COMDATType)0; StringRef COMDATSymName; if (getLexer().is(AsmToken::Comma)) { + Type = COFF::IMAGE_COMDAT_SELECT_ANY;; Lex(); Flags |= COFF::IMAGE_SCN_LNK_COMDAT; - if (parseCOMDATTypeAndAssoc(Type, Assoc)) + if (parseCOMDATType(Type)) return true; if (getLexer().isNot(AsmToken::Comma)) @@ -381,7 +379,12 @@ bool COFFAsmParser::ParseDirectiveSection(StringRef, SMLoc) { return TokError("unexpected token in directive"); SectionKind Kind = computeSectionKind(Flags); - ParseSectionSwitch(SectionName, Flags, Kind, COMDATSymName, Type, Assoc); + if (Kind.isText()) { + const Triple &T = getContext().getObjectFileInfo()->getTargetTriple(); + if (T.getArch() == Triple::arm || T.getArch() == Triple::thumb) + Flags |= COFF::IMAGE_SCN_MEM_16BIT; + } + ParseSectionSwitch(SectionName, Flags, Kind, COMDATSymName, Type); return false; } @@ -461,9 +464,8 @@ bool COFFAsmParser::ParseDirectiveSecIdx(StringRef, SMLoc) { return false; } -/// ::= [ identifier [ identifier ] ] -bool COFFAsmParser::parseCOMDATTypeAndAssoc(COFF::COMDATType &Type, - const MCSectionCOFF *&Assoc) { +/// ::= [ identifier ] +bool COFFAsmParser::parseCOMDATType(COFF::COMDATType &Type) { StringRef TypeId = getTok().getIdentifier(); Type = StringSwitch<COFF::COMDATType>(TypeId) @@ -481,48 +483,28 @@ bool COFFAsmParser::parseCOMDATTypeAndAssoc(COFF::COMDATType &Type, Lex(); - if (Type == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) { - SMLoc Loc = getTok().getLoc(); - StringRef AssocName; - if (ParseSectionName(AssocName)) - return TokError("expected associated section name"); - - Assoc = static_cast<const MCSectionCOFF*>( - getContext().getCOFFSection(AssocName)); - if (!Assoc) - return Error(Loc, "cannot associate unknown section '" + AssocName + "'"); - if (!(Assoc->getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT)) - return Error(Loc, "associated section must be a COMDAT section"); - if (Assoc->getSelection() == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) - return Error(Loc, "associated section cannot be itself associative"); - } - return false; } /// ParseDirectiveLinkOnce -/// ::= .linkonce [ identifier [ identifier ] ] +/// ::= .linkonce [ identifier ] bool COFFAsmParser::ParseDirectiveLinkOnce(StringRef, SMLoc Loc) { COFF::COMDATType Type = COFF::IMAGE_COMDAT_SELECT_ANY; - const MCSectionCOFF *Assoc = nullptr; if (getLexer().is(AsmToken::Identifier)) - if (parseCOMDATTypeAndAssoc(Type, Assoc)) + if (parseCOMDATType(Type)) return true; const MCSectionCOFF *Current = static_cast<const MCSectionCOFF*>( getStreamer().getCurrentSection().first); - - if (Type == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) { - if (Assoc == Current) - return Error(Loc, "cannot associate a section with itself"); - } + if (Type == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) + return Error(Loc, "cannot make section associative with .linkonce"); if (Current->getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT) return Error(Loc, Twine("section '") + Current->getSectionName() + "' is already linkonce"); - Current->setSelection(Type, Assoc); + Current->setSelection(Type); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in directive"); @@ -541,25 +523,25 @@ bool COFFAsmParser::ParseSEHDirectiveStartProc(StringRef, SMLoc) { MCSymbol *Symbol = getContext().GetOrCreateSymbol(SymbolID); Lex(); - getStreamer().EmitWin64EHStartProc(Symbol); + getStreamer().EmitWinCFIStartProc(Symbol); return false; } bool COFFAsmParser::ParseSEHDirectiveEndProc(StringRef, SMLoc) { Lex(); - getStreamer().EmitWin64EHEndProc(); + getStreamer().EmitWinCFIEndProc(); return false; } bool COFFAsmParser::ParseSEHDirectiveStartChained(StringRef, SMLoc) { Lex(); - getStreamer().EmitWin64EHStartChained(); + getStreamer().EmitWinCFIStartChained(); return false; } bool COFFAsmParser::ParseSEHDirectiveEndChained(StringRef, SMLoc) { Lex(); - getStreamer().EmitWin64EHEndChained(); + getStreamer().EmitWinCFIEndChained(); return false; } @@ -585,13 +567,13 @@ bool COFFAsmParser::ParseSEHDirectiveHandler(StringRef, SMLoc) { MCSymbol *handler = getContext().GetOrCreateSymbol(SymbolID); Lex(); - getStreamer().EmitWin64EHHandler(handler, unwind, except); + getStreamer().EmitWinEHHandler(handler, unwind, except); return false; } bool COFFAsmParser::ParseSEHDirectiveHandlerData(StringRef, SMLoc) { Lex(); - getStreamer().EmitWin64EHHandlerData(); + getStreamer().EmitWinEHHandlerData(); return false; } @@ -604,7 +586,7 @@ bool COFFAsmParser::ParseSEHDirectivePushReg(StringRef, SMLoc L) { return TokError("unexpected token in directive"); Lex(); - getStreamer().EmitWin64EHPushReg(Reg); + getStreamer().EmitWinCFIPushReg(Reg); return false; } @@ -628,7 +610,7 @@ bool COFFAsmParser::ParseSEHDirectiveSetFrame(StringRef, SMLoc L) { return TokError("unexpected token in directive"); Lex(); - getStreamer().EmitWin64EHSetFrame(Reg, Off); + getStreamer().EmitWinCFISetFrame(Reg, Off); return false; } @@ -645,7 +627,7 @@ bool COFFAsmParser::ParseSEHDirectiveAllocStack(StringRef, SMLoc) { return TokError("unexpected token in directive"); Lex(); - getStreamer().EmitWin64EHAllocStack(Size); + getStreamer().EmitWinCFIAllocStack(Size); return false; } @@ -670,7 +652,7 @@ bool COFFAsmParser::ParseSEHDirectiveSaveReg(StringRef, SMLoc L) { Lex(); // FIXME: Err on %xmm* registers - getStreamer().EmitWin64EHSaveReg(Reg, Off); + getStreamer().EmitWinCFISaveReg(Reg, Off); return false; } @@ -697,7 +679,7 @@ bool COFFAsmParser::ParseSEHDirectiveSaveXMM(StringRef, SMLoc L) { Lex(); // FIXME: Err on non-%xmm* registers - getStreamer().EmitWin64EHSaveXMM(Reg, Off); + getStreamer().EmitWinCFISaveXMM(Reg, Off); return false; } @@ -718,13 +700,13 @@ bool COFFAsmParser::ParseSEHDirectivePushFrame(StringRef, SMLoc) { return TokError("unexpected token in directive"); Lex(); - getStreamer().EmitWin64EHPushFrame(Code); + getStreamer().EmitWinCFIPushFrame(Code); return false; } bool COFFAsmParser::ParseSEHDirectiveEndProlog(StringRef, SMLoc) { Lex(); - getStreamer().EmitWin64EHEndProlog(); + getStreamer().EmitWinCFIEndProlog(); return false; } diff --git a/lib/MC/MCParser/DarwinAsmParser.cpp b/lib/MC/MCParser/DarwinAsmParser.cpp index f74b30a..b2a6785 100644 --- a/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/lib/MC/MCParser/DarwinAsmParser.cpp @@ -650,7 +650,7 @@ bool DarwinAsmParser::parseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) { } // Write the message. - int CurBuf = getSourceManager().FindBufferContainingLoc(IDLoc); + unsigned CurBuf = getSourceManager().FindBufferContainingLoc(IDLoc); *OS << getSourceManager().getBufferInfo(CurBuf).Buffer->getBufferIdentifier() << ":" << getSourceManager().FindLineNumber(IDLoc, CurBuf) << ":" << LogMessage + "\n"; diff --git a/lib/MC/MCParser/ELFAsmParser.cpp b/lib/MC/MCParser/ELFAsmParser.cpp index 95c4971..98b2b3b 100644 --- a/lib/MC/MCParser/ELFAsmParser.cpp +++ b/lib/MC/MCParser/ELFAsmParser.cpp @@ -150,7 +150,7 @@ public: private: bool ParseSectionName(StringRef &SectionName); - bool ParseSectionArguments(bool IsPush); + bool ParseSectionArguments(bool IsPush, SMLoc loc); unsigned parseSunStyleSectionFlags(); }; @@ -382,7 +382,7 @@ unsigned ELFAsmParser::parseSunStyleSectionFlags() { bool ELFAsmParser::ParseDirectivePushSection(StringRef s, SMLoc loc) { getStreamer().PushSection(); - if (ParseSectionArguments(/*IsPush=*/true)) { + if (ParseSectionArguments(/*IsPush=*/true, loc)) { getStreamer().PopSection(); return true; } @@ -397,11 +397,11 @@ bool ELFAsmParser::ParseDirectivePopSection(StringRef, SMLoc) { } // FIXME: This is a work in progress. -bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc) { - return ParseSectionArguments(/*IsPush=*/false); +bool ELFAsmParser::ParseDirectiveSection(StringRef, SMLoc loc) { + return ParseSectionArguments(/*IsPush=*/false, loc); } -bool ELFAsmParser::ParseSectionArguments(bool IsPush) { +bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) { StringRef SectionName; if (ParseSectionName(SectionName)) @@ -545,10 +545,24 @@ EndStmt: } SectionKind Kind = computeSectionKind(Flags, Size); - getStreamer().SwitchSection(getContext().getELFSection(SectionName, Type, - Flags, Kind, Size, - GroupName), - Subsection); + const MCSection *ELFSection = getContext().getELFSection( + SectionName, Type, Flags, Kind, Size, GroupName); + getStreamer().SwitchSection(ELFSection, Subsection); + + if (getContext().getGenDwarfForAssembly()) { + auto &Sections = getContext().getGenDwarfSectionSyms(); + auto InsertResult = Sections.insert( + std::make_pair(ELFSection, std::make_pair(nullptr, nullptr))); + if (InsertResult.second) { + if (getContext().getDwarfVersion() <= 2) + Error(loc, "DWARF2 only supports one section per compilation unit"); + + MCSymbol *SectionStartSymbol = getContext().CreateTempSymbol(); + getStreamer().EmitLabel(SectionStartSymbol); + InsertResult.first->second.first = SectionStartSymbol; + } + } + return false; } @@ -561,6 +575,19 @@ bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) { return false; } +static MCSymbolAttr MCAttrForString(StringRef Type) { + return StringSwitch<MCSymbolAttr>(Type) + .Cases("STT_FUNC", "function", MCSA_ELF_TypeFunction) + .Cases("STT_OBJECT", "object", MCSA_ELF_TypeObject) + .Cases("STT_TLS", "tls_object", MCSA_ELF_TypeTLS) + .Cases("STT_COMMON", "common", MCSA_ELF_TypeCommon) + .Cases("STT_NOTYPE", "notype", MCSA_ELF_TypeNoType) + .Cases("STT_GNU_IFUNC", "gnu_indirect_function", + MCSA_ELF_TypeIndFunction) + .Case("gnu_unique_object", MCSA_ELF_TypeGnuUniqueObject) + .Default(MCSA_Invalid); +} + /// ParseDirectiveELFType /// ::= .type identifier , STT_<TYPE_IN_UPPER_CASE> /// ::= .type identifier , #attribute @@ -575,53 +602,36 @@ bool ELFAsmParser::ParseDirectiveType(StringRef, SMLoc) { // Handle the identifier as the key symbol. MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); - if (getLexer().isNot(AsmToken::Comma)) - return TokError("unexpected token in '.type' directive"); - Lex(); - - StringRef Type; - SMLoc TypeLoc; - MCSymbolAttr Attr; - if (getLexer().is(AsmToken::Identifier)) { - TypeLoc = getLexer().getLoc(); - if (getParser().parseIdentifier(Type)) - return TokError("expected symbol type in directive"); - Attr = StringSwitch<MCSymbolAttr>(Type) - .Case("STT_FUNC", MCSA_ELF_TypeFunction) - .Case("STT_OBJECT", MCSA_ELF_TypeObject) - .Case("STT_TLS", MCSA_ELF_TypeTLS) - .Case("STT_COMMON", MCSA_ELF_TypeCommon) - .Case("STT_NOTYPE", MCSA_ELF_TypeNoType) - .Case("STT_GNU_IFUNC", MCSA_ELF_TypeIndFunction) - .Default(MCSA_Invalid); - } else if (getLexer().is(AsmToken::Hash) || getLexer().is(AsmToken::At) || - getLexer().is(AsmToken::Percent) || - getLexer().is(AsmToken::String)) { - if (!getLexer().is(AsmToken::String)) - Lex(); + // NOTE the comma is optional in all cases. It is only documented as being + // optional in the first case, however, GAS will silently treat the comma as + // optional in all cases. Furthermore, although the documentation states that + // the first form only accepts STT_<TYPE_IN_UPPER_CASE>, in reality, GAS + // accepts both the upper case name as well as the lower case aliases. + if (getLexer().is(AsmToken::Comma)) + Lex(); - TypeLoc = getLexer().getLoc(); - if (getParser().parseIdentifier(Type)) - return TokError("expected symbol type in directive"); - Attr = StringSwitch<MCSymbolAttr>(Type) - .Case("function", MCSA_ELF_TypeFunction) - .Case("object", MCSA_ELF_TypeObject) - .Case("tls_object", MCSA_ELF_TypeTLS) - .Case("common", MCSA_ELF_TypeCommon) - .Case("notype", MCSA_ELF_TypeNoType) - .Case("gnu_unique_object", MCSA_ELF_TypeGnuUniqueObject) - .Case("gnu_indirect_function", MCSA_ELF_TypeIndFunction) - .Default(MCSA_Invalid); - } else + if (getLexer().isNot(AsmToken::Identifier) && + getLexer().isNot(AsmToken::Hash) && getLexer().isNot(AsmToken::At) && + getLexer().isNot(AsmToken::Percent) && getLexer().isNot(AsmToken::String)) return TokError("expected STT_<TYPE_IN_UPPER_CASE>, '#<type>', '@<type>', " "'%<type>' or \"<type>\""); + if (getLexer().isNot(AsmToken::String) && + getLexer().isNot(AsmToken::Identifier)) + Lex(); + + SMLoc TypeLoc = getLexer().getLoc(); + + StringRef Type; + if (getParser().parseIdentifier(Type)) + return TokError("expected symbol type in directive"); + + MCSymbolAttr Attr = MCAttrForString(Type); if (Attr == MCSA_Invalid) return Error(TypeLoc, "unsupported attribute in '.type' directive"); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.type' directive"); - Lex(); getStreamer().EmitSymbolAttribute(Sym, Attr); diff --git a/lib/MC/MCSectionCOFF.cpp b/lib/MC/MCSectionCOFF.cpp index 335b8cd..fc2bd36 100644 --- a/lib/MC/MCSectionCOFF.cpp +++ b/lib/MC/MCSectionCOFF.cpp @@ -30,14 +30,9 @@ bool MCSectionCOFF::ShouldOmitSectionDirective(StringRef Name, return false; } -void MCSectionCOFF::setSelection(int Selection, - const MCSectionCOFF *Assoc) const { +void MCSectionCOFF::setSelection(int Selection) const { assert(Selection != 0 && "invalid COMDAT selection type"); - assert((Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) == - (Assoc != nullptr) && - "associative COMDAT section must have an associated section"); this->Selection = Selection; - this->Assoc = Assoc; Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; } @@ -82,7 +77,7 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, OS << "same_contents,"; break; case COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE: - OS << "associative " << Assoc->getSectionName() << ","; + OS << "associative,"; break; case COFF::IMAGE_COMDAT_SELECT_LARGEST: OS << "largest,"; diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index 7dccf0d..bdcdb97 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -37,7 +37,7 @@ void MCTargetStreamer::finish() {} void MCTargetStreamer::emitAssignment(MCSymbol *Symbol, const MCExpr *Value) {} MCStreamer::MCStreamer(MCContext &Ctx) - : Context(Ctx), CurrentW64UnwindInfo(nullptr), LastSymbol(nullptr) { + : Context(Ctx), CurrentW64UnwindInfo(nullptr) { SectionStack.push_back(std::pair<MCSectionSubPair, MCSectionSubPair>()); } @@ -51,7 +51,6 @@ void MCStreamer::reset() { delete W64UnwindInfos[i]; W64UnwindInfos.clear(); CurrentW64UnwindInfo = nullptr; - LastSymbol = nullptr; SectionStack.clear(); SectionStack.push_back(std::pair<MCSectionSubPair, MCSectionSubPair>()); } @@ -234,20 +233,12 @@ void MCStreamer::EmitLabel(MCSymbol *Symbol) { assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); assert(getCurrentSection().first && "Cannot emit before setting section!"); AssignSection(Symbol, getCurrentSection().first); - LastSymbol = Symbol; MCTargetStreamer *TS = getTargetStreamer(); if (TS) TS->emitLabel(Symbol); } -void MCStreamer::EmitDebugLabel(MCSymbol *Symbol) { - assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); - assert(getCurrentSection().first && "Cannot emit before setting section!"); - AssignSection(Symbol, getCurrentSection().first); - LastSymbol = Symbol; -} - void MCStreamer::EmitCompactUnwindEncoding(uint32_t CompactUnwindEncoding) { EnsureValidFrame(); MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); @@ -273,17 +264,6 @@ void MCStreamer::EmitCFIStartProc(bool IsSimple) { void MCStreamer::EmitCFIStartProcImpl(MCDwarfFrameInfo &Frame) { } -void MCStreamer::RecordProcStart(MCDwarfFrameInfo &Frame) { - // Report an error if we haven't seen a symbol yet where we'd bind - // .cfi_startproc. - if (!LastSymbol) - report_fatal_error("No symbol to start a frame"); - Frame.Function = LastSymbol; - // We need to create a local symbol to avoid relocations. - Frame.Begin = getContext().CreateTempSymbol(); - EmitLabel(Frame.Begin); -} - void MCStreamer::EmitCFIEndProc() { EnsureValidFrame(); MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); @@ -291,11 +271,9 @@ void MCStreamer::EmitCFIEndProc() { } void MCStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) { -} - -void MCStreamer::RecordProcEnd(MCDwarfFrameInfo &Frame) { - Frame.End = getContext().CreateTempSymbol(); - EmitLabel(Frame.End); + // Put a dummy non-null value in Frame.End to mark that this frame has been + // closed. + Frame.End = (MCSymbol *) 1; } MCSymbol *MCStreamer::EmitCFICommon() { @@ -447,7 +425,7 @@ void MCStreamer::EnsureValidW64UnwindInfo() { report_fatal_error("No open Win64 EH frame function!"); } -void MCStreamer::EmitWin64EHStartProc(const MCSymbol *Symbol) { +void MCStreamer::EmitWinCFIStartProc(const MCSymbol *Symbol) { MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo; if (CurFrame && !CurFrame->End) report_fatal_error("Starting a function before ending the previous one!"); @@ -458,7 +436,7 @@ void MCStreamer::EmitWin64EHStartProc(const MCSymbol *Symbol) { setCurrentW64UnwindInfo(Frame); } -void MCStreamer::EmitWin64EHEndProc() { +void MCStreamer::EmitWinCFIEndProc() { EnsureValidW64UnwindInfo(); MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo; if (CurFrame->ChainedParent) @@ -467,7 +445,7 @@ void MCStreamer::EmitWin64EHEndProc() { EmitLabel(CurFrame->End); } -void MCStreamer::EmitWin64EHStartChained() { +void MCStreamer::EmitWinCFIStartChained() { EnsureValidW64UnwindInfo(); MCWin64EHUnwindInfo *Frame = new MCWin64EHUnwindInfo; MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo; @@ -478,7 +456,7 @@ void MCStreamer::EmitWin64EHStartChained() { setCurrentW64UnwindInfo(Frame); } -void MCStreamer::EmitWin64EHEndChained() { +void MCStreamer::EmitWinCFIEndChained() { EnsureValidW64UnwindInfo(); MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo; if (!CurFrame->ChainedParent) @@ -488,8 +466,8 @@ void MCStreamer::EmitWin64EHEndChained() { CurrentW64UnwindInfo = CurFrame->ChainedParent; } -void MCStreamer::EmitWin64EHHandler(const MCSymbol *Sym, bool Unwind, - bool Except) { +void MCStreamer::EmitWinEHHandler(const MCSymbol *Sym, bool Unwind, + bool Except) { EnsureValidW64UnwindInfo(); MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo; if (CurFrame->ChainedParent) @@ -503,14 +481,14 @@ void MCStreamer::EmitWin64EHHandler(const MCSymbol *Sym, bool Unwind, CurFrame->HandlesExceptions = true; } -void MCStreamer::EmitWin64EHHandlerData() { +void MCStreamer::EmitWinEHHandlerData() { EnsureValidW64UnwindInfo(); MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo; if (CurFrame->ChainedParent) report_fatal_error("Chained unwind areas can't have handlers!"); } -void MCStreamer::EmitWin64EHPushReg(unsigned Register) { +void MCStreamer::EmitWinCFIPushReg(unsigned Register) { EnsureValidW64UnwindInfo(); MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo; MCSymbol *Label = getContext().CreateTempSymbol(); @@ -519,13 +497,15 @@ void MCStreamer::EmitWin64EHPushReg(unsigned Register) { CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) { +void MCStreamer::EmitWinCFISetFrame(unsigned Register, unsigned Offset) { EnsureValidW64UnwindInfo(); MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo; if (CurFrame->LastFrameInst >= 0) report_fatal_error("Frame register and offset already specified!"); if (Offset & 0x0F) report_fatal_error("Misaligned frame pointer offset!"); + if (Offset > 240) + report_fatal_error("Frame offset must be less than or equal to 240!"); MCSymbol *Label = getContext().CreateTempSymbol(); MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, Label, Register, Offset); EmitLabel(Label); @@ -533,8 +513,10 @@ void MCStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) { CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWin64EHAllocStack(unsigned Size) { +void MCStreamer::EmitWinCFIAllocStack(unsigned Size) { EnsureValidW64UnwindInfo(); + if (Size == 0) + report_fatal_error("Allocation size must be non-zero!"); if (Size & 7) report_fatal_error("Misaligned stack allocation!"); MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo; @@ -544,7 +526,7 @@ void MCStreamer::EmitWin64EHAllocStack(unsigned Size) { CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWin64EHSaveReg(unsigned Register, unsigned Offset) { +void MCStreamer::EmitWinCFISaveReg(unsigned Register, unsigned Offset) { EnsureValidW64UnwindInfo(); if (Offset & 7) report_fatal_error("Misaligned saved register offset!"); @@ -557,7 +539,7 @@ void MCStreamer::EmitWin64EHSaveReg(unsigned Register, unsigned Offset) { CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWin64EHSaveXMM(unsigned Register, unsigned Offset) { +void MCStreamer::EmitWinCFISaveXMM(unsigned Register, unsigned Offset) { EnsureValidW64UnwindInfo(); if (Offset & 0x0F) report_fatal_error("Misaligned saved vector register offset!"); @@ -570,7 +552,7 @@ void MCStreamer::EmitWin64EHSaveXMM(unsigned Register, unsigned Offset) { CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWin64EHPushFrame(bool Code) { +void MCStreamer::EmitWinCFIPushFrame(bool Code) { EnsureValidW64UnwindInfo(); MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo; if (CurFrame->Instructions.size() > 0) @@ -581,7 +563,7 @@ void MCStreamer::EmitWin64EHPushFrame(bool Code) { CurFrame->Instructions.push_back(Inst); } -void MCStreamer::EmitWin64EHEndProlog() { +void MCStreamer::EmitWinCFIEndProlog() { EnsureValidW64UnwindInfo(); MCWin64EHUnwindInfo *CurFrame = CurrentW64UnwindInfo; CurFrame->PrologEnd = getContext().CreateTempSymbol(); @@ -589,11 +571,9 @@ void MCStreamer::EmitWin64EHEndProlog() { } void MCStreamer::EmitCOFFSectionIndex(MCSymbol const *Symbol) { - llvm_unreachable("This file format doesn't support this directive"); } void MCStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) { - llvm_unreachable("This file format doesn't support this directive"); } /// EmitRawText - If this file is backed by an assembly streamer, this dumps @@ -629,9 +609,82 @@ void MCStreamer::Finish() { } void MCStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { + visitUsedExpr(*Value); Symbol->setVariableValue(Value); MCTargetStreamer *TS = getTargetStreamer(); if (TS) TS->emitAssignment(Symbol, Value); } + +void MCStreamer::visitUsedSymbol(const MCSymbol &Sym) { +} + +void MCStreamer::visitUsedExpr(const MCExpr &Expr) { + switch (Expr.getKind()) { + case MCExpr::Target: + cast<MCTargetExpr>(Expr).visitUsedExpr(*this); + break; + + case MCExpr::Constant: + break; + + case MCExpr::Binary: { + const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr); + visitUsedExpr(*BE.getLHS()); + visitUsedExpr(*BE.getRHS()); + break; + } + + case MCExpr::SymbolRef: + visitUsedSymbol(cast<MCSymbolRefExpr>(Expr).getSymbol()); + break; + + case MCExpr::Unary: + visitUsedExpr(*cast<MCUnaryExpr>(Expr).getSubExpr()); + break; + } +} + +void MCStreamer::EmitInstruction(const MCInst &Inst, + const MCSubtargetInfo &STI) { + // Scan for values. + for (unsigned i = Inst.getNumOperands(); i--;) + if (Inst.getOperand(i).isExpr()) + visitUsedExpr(*Inst.getOperand(i).getExpr()); +} + +void MCStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {} +void MCStreamer::EmitThumbFunc(MCSymbol *Func) {} +void MCStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {} +void MCStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {} +void MCStreamer::EndCOFFSymbolDef() {} +void MCStreamer::EmitFileDirective(StringRef Filename) {} +void MCStreamer::EmitCOFFSymbolStorageClass(int StorageClass) {} +void MCStreamer::EmitCOFFSymbolType(int Type) {} +void MCStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {} +void MCStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) {} +void MCStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment) {} +void MCStreamer::ChangeSection(const MCSection *, const MCExpr *) {} +void MCStreamer::EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {} +void MCStreamer::EmitBytes(StringRef Data) {} +void MCStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, + const SMLoc &Loc) { + visitUsedExpr(*Value); +} +void MCStreamer::EmitULEB128Value(const MCExpr *Value) {} +void MCStreamer::EmitSLEB128Value(const MCExpr *Value) {} +void MCStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, + unsigned ValueSize, + unsigned MaxBytesToEmit) {} +void MCStreamer::EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit) {} +bool MCStreamer::EmitValueToOffset(const MCExpr *Offset, unsigned char Value) { + return false; +} +void MCStreamer::EmitBundleAlignMode(unsigned AlignPow2) {} +void MCStreamer::EmitBundleLock(bool AlignToEnd) {} +void MCStreamer::FinishImpl() {} +void MCStreamer::EmitBundleUnlock() {} diff --git a/lib/MC/MCTargetOptions.cpp b/lib/MC/MCTargetOptions.cpp index 8e946d5..efd724a 100644 --- a/lib/MC/MCTargetOptions.cpp +++ b/lib/MC/MCTargetOptions.cpp @@ -14,6 +14,7 @@ namespace llvm { MCTargetOptions::MCTargetOptions() : SanitizeAddress(false), MCRelaxAll(false), MCNoExecStack(false), MCSaveTempLabels(false), MCUseDwarfDirectory(false), - ShowMCEncoding(false), ShowMCInst(false), AsmVerbose(false) {} + ShowMCEncoding(false), ShowMCInst(false), AsmVerbose(false), + DwarfVersion(0) {} } // end namespace llvm diff --git a/lib/MC/MCWin64EH.cpp b/lib/MC/MCWin64EH.cpp index b8b07d3..bb651647 100644 --- a/lib/MC/MCWin64EH.cpp +++ b/lib/MC/MCWin64EH.cpp @@ -20,34 +20,30 @@ namespace llvm { // NOTE: All relocations generated here are 4-byte image-relative. -static uint8_t CountOfUnwindCodes(std::vector<MCWin64EHInstruction> &instArray){ - uint8_t count = 0; - for (std::vector<MCWin64EHInstruction>::const_iterator I = instArray.begin(), - E = instArray.end(); I != E; ++I) { - switch (I->getOperation()) { +static uint8_t CountOfUnwindCodes(std::vector<MCWin64EHInstruction> &Insns) { + uint8_t Count = 0; + for (const auto &I : Insns) { + switch (I.getOperation()) { case Win64EH::UOP_PushNonVol: case Win64EH::UOP_AllocSmall: case Win64EH::UOP_SetFPReg: case Win64EH::UOP_PushMachFrame: - count += 1; + Count += 1; break; case Win64EH::UOP_SaveNonVol: case Win64EH::UOP_SaveXMM128: - count += 2; + Count += 2; break; case Win64EH::UOP_SaveNonVolBig: case Win64EH::UOP_SaveXMM128Big: - count += 3; + Count += 3; break; case Win64EH::UOP_AllocLarge: - if (I->getSize() > 512*1024-8) - count += 3; - else - count += 2; + Count += (I.getSize() > 512 * 1024 - 8) ? 3 : 2; break; } } - return count; + return Count; } static void EmitAbsDifference(MCStreamer &streamer, MCSymbol *lhs, @@ -274,23 +270,23 @@ void MCWin64EHUnwindEmitter::EmitUnwindInfo(MCStreamer &streamer, llvm::EmitUnwindInfo(streamer, info); } -void MCWin64EHUnwindEmitter::Emit(MCStreamer &streamer) { - MCContext &context = streamer.getContext(); +void MCWin64EHUnwindEmitter::Emit(MCStreamer &Streamer) { + MCContext &Context = Streamer.getContext(); + // Emit the unwind info structs first. - for (unsigned i = 0; i < streamer.getNumW64UnwindInfos(); ++i) { - MCWin64EHUnwindInfo &info = streamer.getW64UnwindInfo(i); - const MCSection *xdataSect = - getWin64EHTableSection(GetSectionSuffix(info.Function), context); - streamer.SwitchSection(xdataSect); - llvm::EmitUnwindInfo(streamer, &info); + for (const auto &CFI : Streamer.getW64UnwindInfos()) { + const MCSection *XData = + getWin64EHTableSection(GetSectionSuffix(CFI->Function), Context); + Streamer.SwitchSection(XData); + EmitUnwindInfo(Streamer, CFI); } + // Now emit RUNTIME_FUNCTION entries. - for (unsigned i = 0; i < streamer.getNumW64UnwindInfos(); ++i) { - MCWin64EHUnwindInfo &info = streamer.getW64UnwindInfo(i); - const MCSection *pdataSect = - getWin64EHFuncTableSection(GetSectionSuffix(info.Function), context); - streamer.SwitchSection(pdataSect); - EmitRuntimeFunction(streamer, &info); + for (const auto &CFI : Streamer.getW64UnwindInfos()) { + const MCSection *PData = + getWin64EHFuncTableSection(GetSectionSuffix(CFI->Function), Context); + Streamer.SwitchSection(PData); + EmitRuntimeFunction(Streamer, CFI); } } diff --git a/lib/MC/MachObjectWriter.cpp b/lib/MC/MachObjectWriter.cpp index cbaf0b8..5214398 100644 --- a/lib/MC/MachObjectWriter.cpp +++ b/lib/MC/MachObjectWriter.cpp @@ -303,20 +303,50 @@ void MachObjectWriter::WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol, assert(OS.tell() - Start == sizeof(MachO::dysymtab_command)); } +MachObjectWriter::MachSymbolData * +MachObjectWriter::findSymbolData(const MCSymbol &Sym) { + for (auto &Entry : LocalSymbolData) + if (&Entry.SymbolData->getSymbol() == &Sym) + return &Entry; + + for (auto &Entry : ExternalSymbolData) + if (&Entry.SymbolData->getSymbol() == &Sym) + return &Entry; + + for (auto &Entry : UndefinedSymbolData) + if (&Entry.SymbolData->getSymbol() == &Sym) + return &Entry; + + return nullptr; +} + void MachObjectWriter::WriteNlist(MachSymbolData &MSD, const MCAsmLayout &Layout) { MCSymbolData &Data = *MSD.SymbolData; - const MCSymbol &Symbol = Data.getSymbol(); + const MCSymbol *Symbol = &Data.getSymbol(); + const MCSymbol *AliasedSymbol = &Symbol->AliasedSymbol(); + uint8_t SectionIndex = MSD.SectionIndex; uint8_t Type = 0; uint16_t Flags = Data.getFlags(); uint64_t Address = 0; + bool IsAlias = Symbol != AliasedSymbol; + + MachSymbolData *AliaseeInfo; + if (IsAlias) { + AliaseeInfo = findSymbolData(*AliasedSymbol); + if (AliaseeInfo) + SectionIndex = AliaseeInfo->SectionIndex; + Symbol = AliasedSymbol; + } // Set the N_TYPE bits. See <mach-o/nlist.h>. // // FIXME: Are the prebound or indirect fields possible here? - if (Symbol.isUndefined()) + if (IsAlias && Symbol->isUndefined()) + Type = MachO::N_INDR; + else if (Symbol->isUndefined()) Type = MachO::N_UNDF; - else if (Symbol.isAbsolute()) + else if (Symbol->isAbsolute()) Type = MachO::N_ABS; else Type = MachO::N_SECT; @@ -327,13 +357,15 @@ void MachObjectWriter::WriteNlist(MachSymbolData &MSD, Type |= MachO::N_PEXT; // Set external bit. - if (Data.isExternal() || Symbol.isUndefined()) + if (Data.isExternal() || (!IsAlias && Symbol->isUndefined())) Type |= MachO::N_EXT; // Compute the symbol address. - if (Symbol.isDefined()) { + if (IsAlias && Symbol->isUndefined()) + Address = AliaseeInfo->StringIndex; + else if (Symbol->isDefined()) Address = getSymbolAddress(&Data, Layout); - } else if (Data.isCommon()) { + else if (Data.isCommon()) { // Common symbols are encoded with the size in the address // field, and their alignment in the flags. Address = Data.getCommonSize(); @@ -344,21 +376,21 @@ void MachObjectWriter::WriteNlist(MachSymbolData &MSD, assert((1U << Log2Size) == Align && "Invalid 'common' alignment!"); if (Log2Size > 15) report_fatal_error("invalid 'common' alignment '" + - Twine(Align) + "' for '" + Symbol.getName() + "'", + Twine(Align) + "' for '" + Symbol->getName() + "'", false); // FIXME: Keep this mask with the SymbolFlags enumeration. Flags = (Flags & 0xF0FF) | (Log2Size << 8); } } - if (Layout.getAssembler().isThumbFunc(&Symbol)) + if (Layout.getAssembler().isThumbFunc(Symbol)) Flags |= SF_ThumbFunc; // struct nlist (12 bytes) Write32(MSD.StringIndex); Write8(Type); - Write8(MSD.SectionIndex); + Write8(SectionIndex); // The Mach-O streamer uses the lowest 16-bits of the flags for the 'desc' // value. diff --git a/lib/MC/Makefile b/lib/MC/Makefile index bf8b7c0..a10f17e 100644 --- a/lib/MC/Makefile +++ b/lib/MC/Makefile @@ -10,7 +10,7 @@ LEVEL = ../.. LIBRARYNAME = LLVMMC BUILD_ARCHIVE := 1 -PARALLEL_DIRS := MCParser MCDisassembler +PARALLEL_DIRS := MCAnalysis MCParser MCDisassembler include $(LEVEL)/Makefile.common diff --git a/lib/Object/StringTableBuilder.cpp b/lib/MC/StringTableBuilder.cpp index 9152834..db58ece 100644 --- a/lib/Object/StringTableBuilder.cpp +++ b/lib/MC/StringTableBuilder.cpp @@ -7,8 +7,8 @@ // //===----------------------------------------------------------------------===// +#include "llvm/MC/StringTableBuilder.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Object/StringTableBuilder.h" using namespace llvm; diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index 961cbc6..a462c0d 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -347,6 +347,14 @@ void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) { COFFSection *coff_section = createSection(Sec.getSectionName()); COFFSymbol *coff_symbol = createSymbol(Sec.getSectionName()); + if (Sec.getSelection() != COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) { + if (const MCSymbol *S = Sec.getCOMDATSymbol()) { + COFFSymbol *COMDATSymbol = GetOrCreateCOFFSymbol(S); + if (COMDATSymbol->Section) + report_fatal_error("two sections have the same comdat"); + COMDATSymbol->Section = coff_section; + } + } coff_section->Symbol = coff_symbol; coff_symbol->Section = coff_section; @@ -458,9 +466,15 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData, coff_symbol->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE; } else { const MCSymbolData &BaseData = Assembler.getSymbolData(*Base); - if (BaseData.Fragment) - coff_symbol->Section = + if (BaseData.Fragment) { + COFFSection *Sec = SectionMap[&BaseData.Fragment->getParent()->getSection()]; + + if (coff_symbol->Section && coff_symbol->Section != Sec) + report_fatal_error("conflicting sections for symbol"); + + coff_symbol->Section = Sec; + } } coff_symbol->MCData = &ResSymData; @@ -537,7 +551,7 @@ bool WinCOFFObjectWriter::ExportSymbol(MCSymbolData const &SymbolData, // This doesn't seem to be right. Strings referred to from the .data section // need symbols so they can be linked to code in the .text section right? - // return Asm.isSymbolLinkerVisible (&SymbolData); + // return Asm.isSymbolLinkerVisible(SymbolData.getSymbol()); // For now, all non-variable symbols are exported, // the linker will sort the rest out for us. @@ -819,13 +833,9 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, DenseMap<COFFSection *, uint16_t> SectionIndices; for (auto & Section : Sections) { - if (Layout.getSectionAddressSize(Section->MCData) > 0) { - size_t Number = ++Header.NumberOfSections; - SectionIndices[Section.get()] = Number; - MakeSectionReal(*Section, Number); - } else { - Section->Number = -1; - } + size_t Number = ++Header.NumberOfSections; + SectionIndices[Section.get()] = Number; + MakeSectionReal(*Section, Number); } Header.NumberOfSymbols = 0; @@ -865,11 +875,15 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, const MCSectionCOFF &MCSec = static_cast<const MCSectionCOFF &>(Section->MCData->getSection()); - COFFSection *Assoc = SectionMap.lookup(MCSec.getAssocSection()); + const MCSymbol *COMDAT = MCSec.getCOMDATSymbol(); + assert(COMDAT); + COFFSymbol *COMDATSymbol = GetOrCreateCOFFSymbol(COMDAT); + assert(COMDATSymbol); + COFFSection *Assoc = COMDATSymbol->Section; if (!Assoc) - report_fatal_error(Twine("Missing associated COMDAT section ") + - MCSec.getAssocSection()->getSectionName() + - " for section " + MCSec.getSectionName()); + report_fatal_error( + Twine("Missing associated COMDAT section for section ") + + MCSec.getSectionName()); // Skip this section if the associated section is unused. if (Assoc->Number == -1) diff --git a/lib/MC/WinCOFFStreamer.cpp b/lib/MC/WinCOFFStreamer.cpp index e6df465..d391a3f 100644 --- a/lib/MC/WinCOFFStreamer.cpp +++ b/lib/MC/WinCOFFStreamer.cpp @@ -82,10 +82,6 @@ void MCWinCOFFStreamer::EmitLabel(MCSymbol *Symbol) { MCObjectStreamer::EmitLabel(Symbol); } -void MCWinCOFFStreamer::EmitDebugLabel(MCSymbol *Symbol) { - EmitLabel(Symbol); -} - void MCWinCOFFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { llvm_unreachable("not implemented"); } @@ -242,7 +238,7 @@ void MCWinCOFFStreamer::EmitIdent(StringRef IdentString) { llvm_unreachable("not implemented"); } -void MCWinCOFFStreamer::EmitWin64EHHandlerData() { +void MCWinCOFFStreamer::EmitWinEHHandlerData() { llvm_unreachable("not implemented"); } diff --git a/lib/Object/YAML.cpp b/lib/MC/YAML.cpp index 61e9da3..067e91a 100644 --- a/lib/Object/YAML.cpp +++ b/lib/MC/YAML.cpp @@ -12,21 +12,20 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Object/YAML.h" +#include "llvm/MC/YAML.h" #include "llvm/ADT/StringExtras.h" #include "llvm/Support/raw_ostream.h" #include <cctype> using namespace llvm; -using namespace object::yaml; -void yaml::ScalarTraits<object::yaml::BinaryRef>::output( - const object::yaml::BinaryRef &Val, void *, llvm::raw_ostream &Out) { +void yaml::ScalarTraits<yaml::BinaryRef>::output( + const yaml::BinaryRef &Val, void *, llvm::raw_ostream &Out) { Val.writeAsHex(Out); } -StringRef yaml::ScalarTraits<object::yaml::BinaryRef>::input( - StringRef Scalar, void *, object::yaml::BinaryRef &Val) { +StringRef yaml::ScalarTraits<yaml::BinaryRef>::input(StringRef Scalar, void *, + yaml::BinaryRef &Val) { if (Scalar.size() % 2 != 0) return "BinaryRef hex string must contain an even number of nybbles."; // TODO: Can we improve YAMLIO to permit a more accurate diagnostic here? @@ -34,11 +33,11 @@ StringRef yaml::ScalarTraits<object::yaml::BinaryRef>::input( for (unsigned I = 0, N = Scalar.size(); I != N; ++I) if (!isxdigit(Scalar[I])) return "BinaryRef hex string must contain only hex digits."; - Val = object::yaml::BinaryRef(Scalar); + Val = yaml::BinaryRef(Scalar); return StringRef(); } -void BinaryRef::writeAsBinary(raw_ostream &OS) const { +void yaml::BinaryRef::writeAsBinary(raw_ostream &OS) const { if (!DataIsHexString) { OS.write((const char *)Data.data(), Data.size()); return; @@ -50,7 +49,7 @@ void BinaryRef::writeAsBinary(raw_ostream &OS) const { } } -void BinaryRef::writeAsHex(raw_ostream &OS) const { +void yaml::BinaryRef::writeAsHex(raw_ostream &OS) const { if (binary_size() == 0) return; if (DataIsHexString) { diff --git a/lib/Object/Android.mk b/lib/Object/Android.mk index 4385f5a..acda4f2 100644 --- a/lib/Object/Android.mk +++ b/lib/Object/Android.mk @@ -7,17 +7,15 @@ object_SRC_FILES := \ COFFYAML.cpp \ ELF.cpp \ ELFObjectFile.cpp \ + ELFYAML.cpp \ Error.cpp \ IRObjectFile.cpp \ MachOObjectFile.cpp \ MachOUniversal.cpp \ Object.cpp \ ObjectFile.cpp \ - StringTableBuilder.cpp \ - SymbolicFile.cpp \ - YAML.cpp \ - ELFYAML.cpp \ - + RecordStreamer.cpp \ + SymbolicFile.cpp # For the host # ===================================================== diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp index 304ca47..6d09bdb 100644 --- a/lib/Object/Archive.cpp +++ b/lib/Object/Archive.cpp @@ -115,18 +115,14 @@ Archive::Child Archive::Child::getNext() const { return Child(Parent, NextLoc); } -error_code Archive::Child::getName(StringRef &Result) const { +ErrorOr<StringRef> Archive::Child::getName() const { StringRef name = getRawName(); // Check if it's a special name. if (name[0] == '/') { - if (name.size() == 1) { // Linker member. - Result = name; - return object_error::success; - } - if (name.size() == 2 && name[1] == '/') { // String table. - Result = name; - return object_error::success; - } + if (name.size() == 1) // Linker member. + return name; + if (name.size() == 2 && name[1] == '/') // String table. + return name; // It's a long name. // Get the offset. std::size_t offset; @@ -147,68 +143,62 @@ error_code Archive::Child::getName(StringRef &Result) const { // GNU long file names end with a /. if (Parent->kind() == K_GNU) { StringRef::size_type End = StringRef(addr).find('/'); - Result = StringRef(addr, End); - } else { - Result = addr; + return StringRef(addr, End); } - return object_error::success; + return StringRef(addr); } else if (name.startswith("#1/")) { uint64_t name_size; if (name.substr(3).rtrim(" ").getAsInteger(10, name_size)) llvm_unreachable("Long name length is not an ingeter"); - Result = Data.substr(sizeof(ArchiveMemberHeader), name_size) + return Data.substr(sizeof(ArchiveMemberHeader), name_size) .rtrim(StringRef("\0", 1)); - return object_error::success; } // It's a simple name. if (name[name.size() - 1] == '/') - Result = name.substr(0, name.size() - 1); - else - Result = name; - return object_error::success; + return name.substr(0, name.size() - 1); + return name; } -error_code Archive::Child::getMemoryBuffer(std::unique_ptr<MemoryBuffer> &Result, - bool FullPath) const { - StringRef Name; - if (error_code ec = getName(Name)) - return ec; +ErrorOr<std::unique_ptr<MemoryBuffer>> +Archive::Child::getMemoryBuffer(bool FullPath) const { + ErrorOr<StringRef> NameOrErr = getName(); + if (std::error_code EC = NameOrErr.getError()) + return EC; + StringRef Name = NameOrErr.get(); SmallString<128> Path; - Result.reset(MemoryBuffer::getMemBuffer( - getBuffer(), FullPath ? (Twine(Parent->getFileName()) + "(" + Name + ")") - .toStringRef(Path) - : Name, + std::unique_ptr<MemoryBuffer> Ret(MemoryBuffer::getMemBuffer( + getBuffer(), + FullPath + ? (Twine(Parent->getFileName()) + "(" + Name + ")").toStringRef(Path) + : Name, false)); - return error_code::success(); + return std::move(Ret); } -error_code Archive::Child::getAsBinary(std::unique_ptr<Binary> &Result, - LLVMContext *Context) const { +ErrorOr<std::unique_ptr<Binary>> +Archive::Child::getAsBinary(LLVMContext *Context) const { std::unique_ptr<Binary> ret; - std::unique_ptr<MemoryBuffer> Buff; - if (error_code ec = getMemoryBuffer(Buff)) - return ec; - ErrorOr<Binary *> BinaryOrErr = createBinary(Buff.release(), Context); - if (error_code EC = BinaryOrErr.getError()) + ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr = getMemoryBuffer(); + if (std::error_code EC = BuffOrErr.getError()) return EC; - Result.reset(BinaryOrErr.get()); - return object_error::success; + + std::unique_ptr<MemoryBuffer> Buff(BuffOrErr.get().release()); + return createBinary(Buff, Context); } -ErrorOr<Archive*> Archive::create(MemoryBuffer *Source) { - error_code EC; - std::unique_ptr<Archive> Ret(new Archive(Source, EC)); +ErrorOr<Archive *> Archive::create(std::unique_ptr<MemoryBuffer> Source) { + std::error_code EC; + std::unique_ptr<Archive> Ret(new Archive(std::move(Source), EC)); if (EC) return EC; return Ret.release(); } -Archive::Archive(MemoryBuffer *source, error_code &ec) - : Binary(Binary::ID_Archive, source), SymbolTable(child_end()) { +Archive::Archive(std::unique_ptr<MemoryBuffer> Source, std::error_code &ec) + : Binary(Binary::ID_Archive, std::move(Source)), SymbolTable(child_end()) { // Check for sufficient magic. - assert(source); - if (source->getBufferSize() < 8 || - StringRef(source->getBufferStart(), 8) != Magic) { + if (Data->getBufferSize() < 8 || + StringRef(Data->getBufferStart(), 8) != Magic) { ec = object_error::invalid_file_type; return; } @@ -255,9 +245,11 @@ Archive::Archive(MemoryBuffer *source, error_code &ec) if (Name.startswith("#1/")) { Format = K_BSD; // We know this is BSD, so getName will work since there is no string table. - ec = i->getName(Name); + ErrorOr<StringRef> NameOrErr = i->getName(); + ec = NameOrErr.getError(); if (ec) return; + Name = NameOrErr.get(); if (Name == "__.SYMDEF SORTED") { SymbolTable = i; ++i; @@ -335,12 +327,11 @@ Archive::child_iterator Archive::child_end() const { return Child(this, nullptr); } -error_code Archive::Symbol::getName(StringRef &Result) const { - Result = StringRef(Parent->SymbolTable->getBuffer().begin() + StringIndex); - return object_error::success; +StringRef Archive::Symbol::getName() const { + return Parent->SymbolTable->getBuffer().begin() + StringIndex; } -error_code Archive::Symbol::getMember(child_iterator &Result) const { +ErrorOr<Archive::child_iterator> Archive::Symbol::getMember() const { const char *Buf = Parent->SymbolTable->getBuffer().begin(); const char *Offsets = Buf + 4; uint32_t Offset = 0; @@ -348,7 +339,14 @@ error_code Archive::Symbol::getMember(child_iterator &Result) const { Offset = *(reinterpret_cast<const support::ubig32_t*>(Offsets) + SymbolIndex); } else if (Parent->kind() == K_BSD) { - llvm_unreachable("BSD format is not supported"); + // The SymbolIndex is an index into the ranlib structs that start at + // Offsets (the first uint32_t is the number of bytes of the ranlib + // structs). The ranlib structs are a pair of uint32_t's the first + // being a string table offset and the second being the offset into + // the archive of the member that defines the symbol. Which is what + // is needed here. + Offset = *(reinterpret_cast<const support::ulittle32_t *>(Offsets) + + (SymbolIndex * 2) + 1); } else { uint32_t MemberCount = *reinterpret_cast<const support::ulittle32_t*>(Buf); @@ -380,16 +378,49 @@ error_code Archive::Symbol::getMember(child_iterator &Result) const { } const char *Loc = Parent->getData().begin() + Offset; - Result = Child(Parent, Loc); - - return object_error::success; + child_iterator Iter(Child(Parent, Loc)); + return Iter; } Archive::Symbol Archive::Symbol::getNext() const { Symbol t(*this); - // Go to one past next null. - t.StringIndex = - Parent->SymbolTable->getBuffer().find('\0', t.StringIndex) + 1; + if (Parent->kind() == K_BSD) { + // t.StringIndex is an offset from the start of the __.SYMDEF or + // "__.SYMDEF SORTED" member into the string table for the ranlib + // struct indexed by t.SymbolIndex . To change t.StringIndex to the + // offset in the string table for t.SymbolIndex+1 we subtract the + // its offset from the start of the string table for t.SymbolIndex + // and add the offset of the string table for t.SymbolIndex+1. + + // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t + // which is the number of bytes of ranlib structs that follow. The ranlib + // structs are a pair of uint32_t's the first being a string table offset + // and the second being the offset into the archive of the member that + // define the symbol. After that the next uint32_t is the byte count of + // the string table followed by the string table. + const char *Buf = Parent->SymbolTable->getBuffer().begin(); + uint32_t RanlibCount = 0; + RanlibCount = (*reinterpret_cast<const support::ulittle32_t *>(Buf)) / + (sizeof(uint32_t) * 2); + // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount) + // don't change the t.StringIndex as we don't want to reference a ranlib + // past RanlibCount. + if (t.SymbolIndex + 1 < RanlibCount) { + const char *Ranlibs = Buf + 4; + uint32_t CurRanStrx = 0; + uint32_t NextRanStrx = 0; + CurRanStrx = *(reinterpret_cast<const support::ulittle32_t *>(Ranlibs) + + (t.SymbolIndex * 2)); + NextRanStrx = *(reinterpret_cast<const support::ulittle32_t *>(Ranlibs) + + ((t.SymbolIndex + 1) * 2)); + t.StringIndex -= CurRanStrx; + t.StringIndex += NextRanStrx; + } + } else { + // Go to one past next null. + t.StringIndex = + Parent->SymbolTable->getBuffer().find('\0', t.StringIndex) + 1; + } ++t.SymbolIndex; return t; } @@ -404,7 +435,22 @@ Archive::symbol_iterator Archive::symbol_begin() const { symbol_count = *reinterpret_cast<const support::ubig32_t*>(buf); buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t))); } else if (kind() == K_BSD) { - llvm_unreachable("BSD archive format is not supported"); + // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t + // which is the number of bytes of ranlib structs that follow. The ranlib + // structs are a pair of uint32_t's the first being a string table offset + // and the second being the offset into the archive of the member that + // define the symbol. After that the next uint32_t is the byte count of + // the string table followed by the string table. + uint32_t ranlib_count = 0; + ranlib_count = (*reinterpret_cast<const support::ulittle32_t *>(buf)) / + (sizeof(uint32_t) * 2); + const char *ranlibs = buf + 4; + uint32_t ran_strx = 0; + ran_strx = *(reinterpret_cast<const support::ulittle32_t *>(ranlibs)); + buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t)))); + // Skip the byte count of the string table. + buf += sizeof(uint32_t); + buf += ran_strx; } else { uint32_t member_count = 0; uint32_t symbol_count = 0; @@ -426,7 +472,8 @@ Archive::symbol_iterator Archive::symbol_end() const { if (kind() == K_GNU) { symbol_count = *reinterpret_cast<const support::ubig32_t*>(buf); } else if (kind() == K_BSD) { - llvm_unreachable("BSD archive format is not supported"); + symbol_count = (*reinterpret_cast<const support::ulittle32_t *>(buf)) / + (sizeof(uint32_t) * 2); } else { uint32_t member_count = 0; member_count = *reinterpret_cast<const support::ulittle32_t*>(buf); @@ -440,16 +487,15 @@ Archive::symbol_iterator Archive::symbol_end() const { Archive::child_iterator Archive::findSym(StringRef name) const { Archive::symbol_iterator bs = symbol_begin(); Archive::symbol_iterator es = symbol_end(); - Archive::child_iterator result; - - StringRef symname; + for (; bs != es; ++bs) { - if (bs->getName(symname)) - return child_end(); - if (symname == name) { - if (bs->getMember(result)) + StringRef SymName = bs->getName(); + if (SymName == name) { + ErrorOr<Archive::child_iterator> ResultOrErr = bs->getMember(); + // FIXME: Should we really eat the error? + if (ResultOrErr.getError()) return child_end(); - return result; + return ResultOrErr.get(); } } return child_end(); diff --git a/lib/Object/Binary.cpp b/lib/Object/Binary.cpp index 63fd3ed..9f6a685 100644 --- a/lib/Object/Binary.cpp +++ b/lib/Object/Binary.cpp @@ -25,13 +25,10 @@ using namespace llvm; using namespace object; -Binary::~Binary() { - if (BufferOwned) - delete Data; -} +Binary::~Binary() {} -Binary::Binary(unsigned int Type, MemoryBuffer *Source, bool BufferOwned) - : TypeID(Type), BufferOwned(BufferOwned), Data(Source) {} +Binary::Binary(unsigned int Type, std::unique_ptr<MemoryBuffer> Source) + : TypeID(Type), Data(std::move(Source)) {} StringRef Binary::getData() const { return Data->getBuffer(); @@ -41,14 +38,13 @@ StringRef Binary::getFileName() const { return Data->getBufferIdentifier(); } -ErrorOr<Binary *> object::createBinary(MemoryBuffer *Source, +ErrorOr<Binary *> object::createBinary(std::unique_ptr<MemoryBuffer> &Buffer, LLVMContext *Context) { - std::unique_ptr<MemoryBuffer> scopedSource(Source); - sys::fs::file_magic Type = sys::fs::identify_magic(Source->getBuffer()); + sys::fs::file_magic Type = sys::fs::identify_magic(Buffer->getBuffer()); switch (Type) { case sys::fs::file_magic::archive: - return Archive::create(scopedSource.release()); + return Archive::create(std::move(Buffer)); case sys::fs::file_magic::elf_relocatable: case sys::fs::file_magic::elf_executable: case sys::fs::file_magic::elf_shared_object: @@ -67,10 +63,9 @@ ErrorOr<Binary *> object::createBinary(MemoryBuffer *Source, case sys::fs::file_magic::coff_import_library: case sys::fs::file_magic::pecoff_executable: case sys::fs::file_magic::bitcode: - return ObjectFile::createSymbolicFile(scopedSource.release(), true, Type, - Context); + return ObjectFile::createSymbolicFile(Buffer, Type, Context); case sys::fs::file_magic::macho_universal_binary: - return MachOUniversalBinary::create(scopedSource.release()); + return MachOUniversalBinary::create(std::move(Buffer)); case sys::fs::file_magic::unknown: case sys::fs::file_magic::windows_resource: // Unrecognized object file format. @@ -80,8 +75,9 @@ ErrorOr<Binary *> object::createBinary(MemoryBuffer *Source, } ErrorOr<Binary *> object::createBinary(StringRef Path) { - std::unique_ptr<MemoryBuffer> File; - if (error_code EC = MemoryBuffer::getFileOrSTDIN(Path, File)) + ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = + MemoryBuffer::getFileOrSTDIN(Path); + if (std::error_code EC = FileOrErr.getError()) return EC; - return createBinary(File.release()); + return createBinary(FileOrErr.get()); } diff --git a/lib/Object/CMakeLists.txt b/lib/Object/CMakeLists.txt index cd8c9ef..5b08e42 100644 --- a/lib/Object/CMakeLists.txt +++ b/lib/Object/CMakeLists.txt @@ -12,7 +12,6 @@ add_llvm_library(LLVMObject MachOUniversal.cpp Object.cpp ObjectFile.cpp - StringTableBuilder.cpp + RecordStreamer.cpp SymbolicFile.cpp - YAML.cpp ) diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index 262c040..46ef87d 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -31,8 +31,9 @@ using support::ulittle32_t; using support::little16_t; // Returns false if size is greater than the buffer size. And sets ec. -static bool checkSize(const MemoryBuffer *M, error_code &EC, uint64_t Size) { - if (M->getBufferSize() < Size) { +static bool checkSize(const MemoryBuffer &M, std::error_code &EC, + uint64_t Size) { + if (M.getBufferSize() < Size) { EC = object_error::unexpected_eof; return false; } @@ -41,13 +42,13 @@ static bool checkSize(const MemoryBuffer *M, error_code &EC, uint64_t Size) { // Sets Obj unless any bytes in [addr, addr + size) fall outsize of m. // Returns unexpected_eof if error. -template<typename T> -static error_code getObject(const T *&Obj, const MemoryBuffer *M, - const uint8_t *Ptr, const size_t Size = sizeof(T)) { +template <typename T> +static std::error_code getObject(const T *&Obj, const MemoryBuffer &M, + const uint8_t *Ptr, + const size_t Size = sizeof(T)) { uintptr_t Addr = uintptr_t(Ptr); - if (Addr + Size < Addr || - Addr + Size < Size || - Addr + Size > uintptr_t(M->getBufferEnd())) { + if (Addr + Size < Addr || Addr + Size < Size || + Addr + Size > uintptr_t(M.getBufferEnd())) { return object_error::unexpected_eof; } Obj = reinterpret_cast<const T *>(Addr); @@ -129,17 +130,17 @@ void COFFObjectFile::moveSymbolNext(DataRefImpl &Ref) const { Ref.p = reinterpret_cast<uintptr_t>(Symb); } -error_code COFFObjectFile::getSymbolName(DataRefImpl Ref, - StringRef &Result) const { +std::error_code COFFObjectFile::getSymbolName(DataRefImpl Ref, + StringRef &Result) const { const coff_symbol *Symb = toSymb(Ref); return getSymbolName(Symb, Result); } -error_code COFFObjectFile::getSymbolAddress(DataRefImpl Ref, - uint64_t &Result) const { +std::error_code COFFObjectFile::getSymbolAddress(DataRefImpl Ref, + uint64_t &Result) const { const coff_symbol *Symb = toSymb(Ref); const coff_section *Section = nullptr; - if (error_code EC = getSection(Symb->SectionNumber, Section)) + if (std::error_code EC = getSection(Symb->SectionNumber, Section)) return EC; if (Symb->SectionNumber == COFF::IMAGE_SYM_UNDEFINED) @@ -151,8 +152,8 @@ error_code COFFObjectFile::getSymbolAddress(DataRefImpl Ref, return object_error::success; } -error_code COFFObjectFile::getSymbolType(DataRefImpl Ref, - SymbolRef::Type &Result) const { +std::error_code COFFObjectFile::getSymbolType(DataRefImpl Ref, + SymbolRef::Type &Result) const { const coff_symbol *Symb = toSymb(Ref); Result = SymbolRef::ST_Other; if (Symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL && @@ -164,7 +165,7 @@ error_code COFFObjectFile::getSymbolType(DataRefImpl Ref, uint32_t Characteristics = 0; if (!COFF::isReservedSectionNumber(Symb->SectionNumber)) { const coff_section *Section = nullptr; - if (error_code EC = getSection(Symb->SectionNumber, Section)) + if (std::error_code EC = getSection(Symb->SectionNumber, Section)) return EC; Characteristics = Section->Characteristics; } @@ -202,14 +203,14 @@ uint32_t COFFObjectFile::getSymbolFlags(DataRefImpl Ref) const { return Result; } -error_code COFFObjectFile::getSymbolSize(DataRefImpl Ref, - uint64_t &Result) const { +std::error_code COFFObjectFile::getSymbolSize(DataRefImpl Ref, + uint64_t &Result) const { // FIXME: Return the correct size. This requires looking at all the symbols // in the same section as this symbol, and looking for either the next // symbol, or the end of the section. const coff_symbol *Symb = toSymb(Ref); const coff_section *Section = nullptr; - if (error_code EC = getSection(Symb->SectionNumber, Section)) + if (std::error_code EC = getSection(Symb->SectionNumber, Section)) return EC; if (Symb->SectionNumber == COFF::IMAGE_SYM_UNDEFINED) @@ -221,14 +222,16 @@ error_code COFFObjectFile::getSymbolSize(DataRefImpl Ref, return object_error::success; } -error_code COFFObjectFile::getSymbolSection(DataRefImpl Ref, - section_iterator &Result) const { +std::error_code +COFFObjectFile::getSymbolSection(DataRefImpl Ref, + section_iterator &Result) const { const coff_symbol *Symb = toSymb(Ref); if (COFF::isReservedSectionNumber(Symb->SectionNumber)) { Result = section_end(); } else { const coff_section *Sec = nullptr; - if (error_code EC = getSection(Symb->SectionNumber, Sec)) return EC; + if (std::error_code EC = getSection(Symb->SectionNumber, Sec)) + return EC; DataRefImpl Ref; Ref.p = reinterpret_cast<uintptr_t>(Sec); Result = section_iterator(SectionRef(Ref, this)); @@ -242,37 +245,37 @@ void COFFObjectFile::moveSectionNext(DataRefImpl &Ref) const { Ref.p = reinterpret_cast<uintptr_t>(Sec); } -error_code COFFObjectFile::getSectionName(DataRefImpl Ref, - StringRef &Result) const { +std::error_code COFFObjectFile::getSectionName(DataRefImpl Ref, + StringRef &Result) const { const coff_section *Sec = toSec(Ref); return getSectionName(Sec, Result); } -error_code COFFObjectFile::getSectionAddress(DataRefImpl Ref, - uint64_t &Result) const { +std::error_code COFFObjectFile::getSectionAddress(DataRefImpl Ref, + uint64_t &Result) const { const coff_section *Sec = toSec(Ref); Result = Sec->VirtualAddress; return object_error::success; } -error_code COFFObjectFile::getSectionSize(DataRefImpl Ref, - uint64_t &Result) const { +std::error_code COFFObjectFile::getSectionSize(DataRefImpl Ref, + uint64_t &Result) const { const coff_section *Sec = toSec(Ref); Result = Sec->SizeOfRawData; return object_error::success; } -error_code COFFObjectFile::getSectionContents(DataRefImpl Ref, - StringRef &Result) const { +std::error_code COFFObjectFile::getSectionContents(DataRefImpl Ref, + StringRef &Result) const { const coff_section *Sec = toSec(Ref); ArrayRef<uint8_t> Res; - error_code EC = getSectionContents(Sec, Res); + std::error_code EC = getSectionContents(Sec, Res); Result = StringRef(reinterpret_cast<const char*>(Res.data()), Res.size()); return EC; } -error_code COFFObjectFile::getSectionAlignment(DataRefImpl Ref, - uint64_t &Res) const { +std::error_code COFFObjectFile::getSectionAlignment(DataRefImpl Ref, + uint64_t &Res) const { const coff_section *Sec = toSec(Ref); if (!Sec) return object_error::parse_failed; @@ -280,62 +283,64 @@ error_code COFFObjectFile::getSectionAlignment(DataRefImpl Ref, return object_error::success; } -error_code COFFObjectFile::isSectionText(DataRefImpl Ref, - bool &Result) const { +std::error_code COFFObjectFile::isSectionText(DataRefImpl Ref, + bool &Result) const { const coff_section *Sec = toSec(Ref); Result = Sec->Characteristics & COFF::IMAGE_SCN_CNT_CODE; return object_error::success; } -error_code COFFObjectFile::isSectionData(DataRefImpl Ref, - bool &Result) const { +std::error_code COFFObjectFile::isSectionData(DataRefImpl Ref, + bool &Result) const { const coff_section *Sec = toSec(Ref); Result = Sec->Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; return object_error::success; } -error_code COFFObjectFile::isSectionBSS(DataRefImpl Ref, - bool &Result) const { +std::error_code COFFObjectFile::isSectionBSS(DataRefImpl Ref, + bool &Result) const { const coff_section *Sec = toSec(Ref); Result = Sec->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA; return object_error::success; } -error_code COFFObjectFile::isSectionRequiredForExecution(DataRefImpl Ref, - bool &Result) const { +std::error_code +COFFObjectFile::isSectionRequiredForExecution(DataRefImpl Ref, + bool &Result) const { // FIXME: Unimplemented Result = true; return object_error::success; } -error_code COFFObjectFile::isSectionVirtual(DataRefImpl Ref, - bool &Result) const { +std::error_code COFFObjectFile::isSectionVirtual(DataRefImpl Ref, + bool &Result) const { const coff_section *Sec = toSec(Ref); Result = Sec->Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA; return object_error::success; } -error_code COFFObjectFile::isSectionZeroInit(DataRefImpl Ref, - bool &Result) const { +std::error_code COFFObjectFile::isSectionZeroInit(DataRefImpl Ref, + bool &Result) const { // FIXME: Unimplemented. Result = false; return object_error::success; } -error_code COFFObjectFile::isSectionReadOnlyData(DataRefImpl Ref, - bool &Result) const { +std::error_code COFFObjectFile::isSectionReadOnlyData(DataRefImpl Ref, + bool &Result) const { // FIXME: Unimplemented. Result = false; return object_error::success; } -error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl SecRef, - DataRefImpl SymbRef, - bool &Result) const { +std::error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl SecRef, + DataRefImpl SymbRef, + bool &Result) const { const coff_section *Sec = toSec(SecRef); const coff_symbol *Symb = toSymb(SymbRef); const coff_section *SymbSec = nullptr; - if (error_code EC = getSection(Symb->SectionNumber, SymbSec)) return EC; + if (std::error_code EC = getSection(Symb->SectionNumber, SymbSec)) + return EC; if (SymbSec == Sec) Result = true; else @@ -390,9 +395,9 @@ relocation_iterator COFFObjectFile::section_rel_end(DataRefImpl Ref) const { } // Initialize the pointer to the symbol table. -error_code COFFObjectFile::initSymbolTablePtr() { - if (error_code EC = getObject( - SymbolTable, Data, base() + COFFHeader->PointerToSymbolTable, +std::error_code COFFObjectFile::initSymbolTablePtr() { + if (std::error_code EC = getObject( + SymbolTable, *Data, base() + COFFHeader->PointerToSymbolTable, COFFHeader->NumberOfSymbols * sizeof(coff_symbol))) return EC; @@ -403,11 +408,12 @@ error_code COFFObjectFile::initSymbolTablePtr() { base() + COFFHeader->PointerToSymbolTable + COFFHeader->NumberOfSymbols * sizeof(coff_symbol); const ulittle32_t *StringTableSizePtr; - if (error_code EC = getObject(StringTableSizePtr, Data, StringTableAddr)) + if (std::error_code EC = + getObject(StringTableSizePtr, *Data, StringTableAddr)) return EC; StringTableSize = *StringTableSizePtr; - if (error_code EC = - getObject(StringTable, Data, StringTableAddr, StringTableSize)) + if (std::error_code EC = + getObject(StringTable, *Data, StringTableAddr, StringTableSize)) return EC; // Treat table sizes < 4 as empty because contrary to the PECOFF spec, some @@ -422,7 +428,7 @@ error_code COFFObjectFile::initSymbolTablePtr() { } // Returns the file offset for the given VA. -error_code COFFObjectFile::getVaPtr(uint64_t Addr, uintptr_t &Res) const { +std::error_code COFFObjectFile::getVaPtr(uint64_t Addr, uintptr_t &Res) const { uint64_t ImageBase = PE32Header ? (uint64_t)PE32Header->ImageBase : (uint64_t)PE32PlusHeader->ImageBase; uint64_t Rva = Addr - ImageBase; @@ -431,7 +437,7 @@ error_code COFFObjectFile::getVaPtr(uint64_t Addr, uintptr_t &Res) const { } // Returns the file offset for the given RVA. -error_code COFFObjectFile::getRvaPtr(uint32_t Addr, uintptr_t &Res) const { +std::error_code COFFObjectFile::getRvaPtr(uint32_t Addr, uintptr_t &Res) const { for (const SectionRef &S : sections()) { const coff_section *Section = getCOFFSection(S); uint32_t SectionStart = Section->VirtualAddress; @@ -447,10 +453,10 @@ error_code COFFObjectFile::getRvaPtr(uint32_t Addr, uintptr_t &Res) const { // Returns hint and name fields, assuming \p Rva is pointing to a Hint/Name // table entry. -error_code COFFObjectFile:: -getHintName(uint32_t Rva, uint16_t &Hint, StringRef &Name) const { +std::error_code COFFObjectFile::getHintName(uint32_t Rva, uint16_t &Hint, + StringRef &Name) const { uintptr_t IntPtr = 0; - if (error_code EC = getRvaPtr(Rva, IntPtr)) + if (std::error_code EC = getRvaPtr(Rva, IntPtr)) return EC; const uint8_t *Ptr = reinterpret_cast<const uint8_t *>(IntPtr); Hint = *reinterpret_cast<const ulittle16_t *>(Ptr); @@ -459,7 +465,7 @@ getHintName(uint32_t Rva, uint16_t &Hint, StringRef &Name) const { } // Find the import table. -error_code COFFObjectFile::initImportTablePtr() { +std::error_code COFFObjectFile::initImportTablePtr() { // First, we get the RVA of the import table. If the file lacks a pointer to // the import table, do nothing. const data_directory *DataEntry; @@ -477,7 +483,7 @@ error_code COFFObjectFile::initImportTablePtr() { // Find the section that contains the RVA. This is needed because the RVA is // the import table's memory address which is different from its file offset. uintptr_t IntPtr = 0; - if (error_code EC = getRvaPtr(ImportTableRva, IntPtr)) + if (std::error_code EC = getRvaPtr(ImportTableRva, IntPtr)) return EC; ImportDirectory = reinterpret_cast< const import_directory_table_entry *>(IntPtr); @@ -485,7 +491,7 @@ error_code COFFObjectFile::initImportTablePtr() { } // Find the export table. -error_code COFFObjectFile::initExportTablePtr() { +std::error_code COFFObjectFile::initExportTablePtr() { // First, we get the RVA of the export table. If the file lacks a pointer to // the export table, do nothing. const data_directory *DataEntry; @@ -498,22 +504,23 @@ error_code COFFObjectFile::initExportTablePtr() { uint32_t ExportTableRva = DataEntry->RelativeVirtualAddress; uintptr_t IntPtr = 0; - if (error_code EC = getRvaPtr(ExportTableRva, IntPtr)) + if (std::error_code EC = getRvaPtr(ExportTableRva, IntPtr)) return EC; ExportDirectory = reinterpret_cast<const export_directory_table_entry *>(IntPtr); return object_error::success; } -COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &EC, - bool BufferOwned) - : ObjectFile(Binary::ID_COFF, Object, BufferOwned), COFFHeader(nullptr), +COFFObjectFile::COFFObjectFile(std::unique_ptr<MemoryBuffer> Object, + std::error_code &EC) + : ObjectFile(Binary::ID_COFF, std::move(Object)), COFFHeader(nullptr), PE32Header(nullptr), PE32PlusHeader(nullptr), DataDirectory(nullptr), SectionTable(nullptr), SymbolTable(nullptr), StringTable(nullptr), StringTableSize(0), ImportDirectory(nullptr), NumberOfImportDirectory(0), ExportDirectory(nullptr) { // Check that we at least have enough room for a header. - if (!checkSize(Data, EC, sizeof(coff_file_header))) return; + if (!checkSize(*Data, EC, sizeof(coff_file_header))) + return; // The current location in the file where we are looking at. uint64_t CurPtr = 0; @@ -526,7 +533,8 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &EC, if (base()[0] == 0x4d && base()[1] == 0x5a) { // PE/COFF, seek through MS-DOS compatibility stub and 4-byte // PE signature to find 'normal' COFF header. - if (!checkSize(Data, EC, 0x3c + 8)) return; + if (!checkSize(*Data, EC, 0x3c + 8)) + return; CurPtr = *reinterpret_cast<const ulittle16_t *>(base() + 0x3c); // Check the PE magic bytes. ("PE\0\0") if (std::memcmp(base() + CurPtr, "PE\0\0", 4) != 0) { @@ -537,13 +545,13 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &EC, HasPEHeader = true; } - if ((EC = getObject(COFFHeader, Data, base() + CurPtr))) + if ((EC = getObject(COFFHeader, *Data, base() + CurPtr))) return; CurPtr += sizeof(coff_file_header); if (HasPEHeader) { const pe32_header *Header; - if ((EC = getObject(Header, Data, base() + CurPtr))) + if ((EC = getObject(Header, *Data, base() + CurPtr))) return; const uint8_t *DataDirAddr; @@ -561,7 +569,7 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &EC, EC = object_error::parse_failed; return; } - if ((EC = getObject(DataDirectory, Data, DataDirAddr, DataDirSize))) + if ((EC = getObject(DataDirectory, *Data, DataDirAddr, DataDirSize))) return; CurPtr += COFFHeader->SizeOfOptionalHeader; } @@ -569,7 +577,7 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &EC, if (COFFHeader->isImportLibrary()) return; - if ((EC = getObject(SectionTable, Data, base() + CurPtr, + if ((EC = getObject(SectionTable, *Data, base() + CurPtr, COFFHeader->NumberOfSections * sizeof(coff_section)))) return; @@ -686,28 +694,30 @@ unsigned COFFObjectFile::getArch() const { // This method is kept here because lld uses this. As soon as we make // lld to use getCOFFHeader, this method will be removed. -error_code COFFObjectFile::getHeader(const coff_file_header *&Res) const { +std::error_code COFFObjectFile::getHeader(const coff_file_header *&Res) const { return getCOFFHeader(Res); } -error_code COFFObjectFile::getCOFFHeader(const coff_file_header *&Res) const { +std::error_code +COFFObjectFile::getCOFFHeader(const coff_file_header *&Res) const { Res = COFFHeader; return object_error::success; } -error_code COFFObjectFile::getPE32Header(const pe32_header *&Res) const { +std::error_code COFFObjectFile::getPE32Header(const pe32_header *&Res) const { Res = PE32Header; return object_error::success; } -error_code +std::error_code COFFObjectFile::getPE32PlusHeader(const pe32plus_header *&Res) const { Res = PE32PlusHeader; return object_error::success; } -error_code COFFObjectFile::getDataDirectory(uint32_t Index, - const data_directory *&Res) const { +std::error_code +COFFObjectFile::getDataDirectory(uint32_t Index, + const data_directory *&Res) const { // Error if if there's no data directory or the index is out of range. if (!DataDirectory) return object_error::parse_failed; @@ -720,8 +730,8 @@ error_code COFFObjectFile::getDataDirectory(uint32_t Index, return object_error::success; } -error_code COFFObjectFile::getSection(int32_t Index, - const coff_section *&Result) const { +std::error_code COFFObjectFile::getSection(int32_t Index, + const coff_section *&Result) const { // Check for special index values. if (COFF::isReservedSectionNumber(Index)) Result = nullptr; @@ -733,8 +743,8 @@ error_code COFFObjectFile::getSection(int32_t Index, return object_error::success; } -error_code COFFObjectFile::getString(uint32_t Offset, - StringRef &Result) const { +std::error_code COFFObjectFile::getString(uint32_t Offset, + StringRef &Result) const { if (StringTableSize <= 4) // Tried to get a string from an empty string table. return object_error::parse_failed; @@ -744,8 +754,8 @@ error_code COFFObjectFile::getString(uint32_t Offset, return object_error::success; } -error_code COFFObjectFile::getSymbol(uint32_t Index, - const coff_symbol *&Result) const { +std::error_code COFFObjectFile::getSymbol(uint32_t Index, + const coff_symbol *&Result) const { if (Index < COFFHeader->NumberOfSymbols) Result = SymbolTable + Index; else @@ -753,12 +763,12 @@ error_code COFFObjectFile::getSymbol(uint32_t Index, return object_error::success; } -error_code COFFObjectFile::getSymbolName(const coff_symbol *Symbol, - StringRef &Res) const { +std::error_code COFFObjectFile::getSymbolName(const coff_symbol *Symbol, + StringRef &Res) const { // Check for string table entry. First 4 bytes are 0. if (Symbol->Name.Offset.Zeroes == 0) { uint32_t Offset = Symbol->Name.Offset.Offset; - if (error_code EC = getString(Offset, Res)) + if (std::error_code EC = getString(Offset, Res)) return EC; return object_error::success; } @@ -795,8 +805,8 @@ ArrayRef<uint8_t> COFFObjectFile::getSymbolAuxData( Symbol->NumberOfAuxSymbols * sizeof(coff_symbol)); } -error_code COFFObjectFile::getSectionName(const coff_section *Sec, - StringRef &Res) const { +std::error_code COFFObjectFile::getSectionName(const coff_section *Sec, + StringRef &Res) const { StringRef Name; if (Sec->Name[7] == 0) // Null terminated, let ::strlen figure out the length. @@ -815,7 +825,7 @@ error_code COFFObjectFile::getSectionName(const coff_section *Sec, if (Name.substr(1).getAsInteger(10, Offset)) return object_error::parse_failed; } - if (error_code EC = getString(Offset, Name)) + if (std::error_code EC = getString(Offset, Name)) return EC; } @@ -823,8 +833,9 @@ error_code COFFObjectFile::getSectionName(const coff_section *Sec, return object_error::success; } -error_code COFFObjectFile::getSectionContents(const coff_section *Sec, - ArrayRef<uint8_t> &Res) const { +std::error_code +COFFObjectFile::getSectionContents(const coff_section *Sec, + ArrayRef<uint8_t> &Res) const { // The only thing that we need to verify is that the contents is contained // within the file bounds. We don't need to make sure it doesn't cover other // data, as there's nothing that says that is not allowed. @@ -846,13 +857,13 @@ void COFFObjectFile::moveRelocationNext(DataRefImpl &Rel) const { reinterpret_cast<const coff_relocation*>(Rel.p) + 1); } -error_code COFFObjectFile::getRelocationAddress(DataRefImpl Rel, - uint64_t &Res) const { +std::error_code COFFObjectFile::getRelocationAddress(DataRefImpl Rel, + uint64_t &Res) const { report_fatal_error("getRelocationAddress not implemented in COFFObjectFile"); } -error_code COFFObjectFile::getRelocationOffset(DataRefImpl Rel, - uint64_t &Res) const { +std::error_code COFFObjectFile::getRelocationOffset(DataRefImpl Rel, + uint64_t &Res) const { Res = toRel(Rel)->VirtualAddress; return object_error::success; } @@ -864,8 +875,8 @@ symbol_iterator COFFObjectFile::getRelocationSymbol(DataRefImpl Rel) const { return symbol_iterator(SymbolRef(Ref, this)); } -error_code COFFObjectFile::getRelocationType(DataRefImpl Rel, - uint64_t &Res) const { +std::error_code COFFObjectFile::getRelocationType(DataRefImpl Rel, + uint64_t &Res) const { const coff_relocation* R = toRel(Rel); Res = R->Type; return object_error::success; @@ -891,8 +902,9 @@ COFFObjectFile::getCOFFRelocation(const RelocationRef &Reloc) const { Res = #reloc_type; \ break; -error_code COFFObjectFile::getRelocationTypeName(DataRefImpl Rel, - SmallVectorImpl<char> &Result) const { +std::error_code +COFFObjectFile::getRelocationTypeName(DataRefImpl Rel, + SmallVectorImpl<char> &Result) const { const coff_relocation *Reloc = toRel(Rel); StringRef Res; switch (COFFHeader->Machine) { @@ -966,26 +978,29 @@ error_code COFFObjectFile::getRelocationTypeName(DataRefImpl Rel, #undef LLVM_COFF_SWITCH_RELOC_TYPE_NAME -error_code COFFObjectFile::getRelocationValueString(DataRefImpl Rel, - SmallVectorImpl<char> &Result) const { +std::error_code +COFFObjectFile::getRelocationValueString(DataRefImpl Rel, + SmallVectorImpl<char> &Result) const { const coff_relocation *Reloc = toRel(Rel); const coff_symbol *Symb = nullptr; - if (error_code EC = getSymbol(Reloc->SymbolTableIndex, Symb)) return EC; + if (std::error_code EC = getSymbol(Reloc->SymbolTableIndex, Symb)) + return EC; DataRefImpl Sym; Sym.p = reinterpret_cast<uintptr_t>(Symb); StringRef SymName; - if (error_code EC = getSymbolName(Sym, SymName)) return EC; + if (std::error_code EC = getSymbolName(Sym, SymName)) + return EC; Result.append(SymName.begin(), SymName.end()); return object_error::success; } -error_code COFFObjectFile::getLibraryNext(DataRefImpl LibData, - LibraryRef &Result) const { +std::error_code COFFObjectFile::getLibraryNext(DataRefImpl LibData, + LibraryRef &Result) const { report_fatal_error("getLibraryNext not implemented in COFFObjectFile"); } -error_code COFFObjectFile::getLibraryPath(DataRefImpl LibData, - StringRef &Result) const { +std::error_code COFFObjectFile::getLibraryPath(DataRefImpl LibData, + StringRef &Result) const { report_fatal_error("getLibraryPath not implemented in COFFObjectFile"); } @@ -998,24 +1013,25 @@ void ImportDirectoryEntryRef::moveNext() { ++Index; } -error_code ImportDirectoryEntryRef:: -getImportTableEntry(const import_directory_table_entry *&Result) const { +std::error_code ImportDirectoryEntryRef::getImportTableEntry( + const import_directory_table_entry *&Result) const { Result = ImportTable; return object_error::success; } -error_code ImportDirectoryEntryRef::getName(StringRef &Result) const { +std::error_code ImportDirectoryEntryRef::getName(StringRef &Result) const { uintptr_t IntPtr = 0; - if (error_code EC = OwningObject->getRvaPtr(ImportTable->NameRVA, IntPtr)) + if (std::error_code EC = + OwningObject->getRvaPtr(ImportTable->NameRVA, IntPtr)) return EC; Result = StringRef(reinterpret_cast<const char *>(IntPtr)); return object_error::success; } -error_code ImportDirectoryEntryRef::getImportLookupEntry( +std::error_code ImportDirectoryEntryRef::getImportLookupEntry( const import_lookup_table_entry32 *&Result) const { uintptr_t IntPtr = 0; - if (error_code EC = + if (std::error_code EC = OwningObject->getRvaPtr(ImportTable->ImportLookupTableRVA, IntPtr)) return EC; Result = reinterpret_cast<const import_lookup_table_entry32 *>(IntPtr); @@ -1033,31 +1049,33 @@ void ExportDirectoryEntryRef::moveNext() { // Returns the name of the current export symbol. If the symbol is exported only // by ordinal, the empty string is set as a result. -error_code ExportDirectoryEntryRef::getDllName(StringRef &Result) const { +std::error_code ExportDirectoryEntryRef::getDllName(StringRef &Result) const { uintptr_t IntPtr = 0; - if (error_code EC = OwningObject->getRvaPtr(ExportTable->NameRVA, IntPtr)) + if (std::error_code EC = + OwningObject->getRvaPtr(ExportTable->NameRVA, IntPtr)) return EC; Result = StringRef(reinterpret_cast<const char *>(IntPtr)); return object_error::success; } // Returns the starting ordinal number. -error_code ExportDirectoryEntryRef::getOrdinalBase(uint32_t &Result) const { +std::error_code +ExportDirectoryEntryRef::getOrdinalBase(uint32_t &Result) const { Result = ExportTable->OrdinalBase; return object_error::success; } // Returns the export ordinal of the current export symbol. -error_code ExportDirectoryEntryRef::getOrdinal(uint32_t &Result) const { +std::error_code ExportDirectoryEntryRef::getOrdinal(uint32_t &Result) const { Result = ExportTable->OrdinalBase + Index; return object_error::success; } // Returns the address of the current export symbol. -error_code ExportDirectoryEntryRef::getExportRVA(uint32_t &Result) const { +std::error_code ExportDirectoryEntryRef::getExportRVA(uint32_t &Result) const { uintptr_t IntPtr = 0; - if (error_code EC = OwningObject->getRvaPtr( - ExportTable->ExportAddressTableRVA, IntPtr)) + if (std::error_code EC = + OwningObject->getRvaPtr(ExportTable->ExportAddressTableRVA, IntPtr)) return EC; const export_address_table_entry *entry = reinterpret_cast<const export_address_table_entry *>(IntPtr); @@ -1067,10 +1085,11 @@ error_code ExportDirectoryEntryRef::getExportRVA(uint32_t &Result) const { // Returns the name of the current export symbol. If the symbol is exported only // by ordinal, the empty string is set as a result. -error_code ExportDirectoryEntryRef::getSymbolName(StringRef &Result) const { +std::error_code +ExportDirectoryEntryRef::getSymbolName(StringRef &Result) const { uintptr_t IntPtr = 0; - if (error_code EC = OwningObject->getRvaPtr( - ExportTable->OrdinalTableRVA, IntPtr)) + if (std::error_code EC = + OwningObject->getRvaPtr(ExportTable->OrdinalTableRVA, IntPtr)) return EC; const ulittle16_t *Start = reinterpret_cast<const ulittle16_t *>(IntPtr); @@ -1080,11 +1099,11 @@ error_code ExportDirectoryEntryRef::getSymbolName(StringRef &Result) const { I < E; ++I, ++Offset) { if (*I != Index) continue; - if (error_code EC = OwningObject->getRvaPtr( - ExportTable->NamePointerRVA, IntPtr)) + if (std::error_code EC = + OwningObject->getRvaPtr(ExportTable->NamePointerRVA, IntPtr)) return EC; const ulittle32_t *NamePtr = reinterpret_cast<const ulittle32_t *>(IntPtr); - if (error_code EC = OwningObject->getRvaPtr(NamePtr[Offset], IntPtr)) + if (std::error_code EC = OwningObject->getRvaPtr(NamePtr[Offset], IntPtr)) return EC; Result = StringRef(reinterpret_cast<const char *>(IntPtr)); return object_error::success; @@ -1093,11 +1112,11 @@ error_code ExportDirectoryEntryRef::getSymbolName(StringRef &Result) const { return object_error::success; } -ErrorOr<ObjectFile *> ObjectFile::createCOFFObjectFile(MemoryBuffer *Object, - bool BufferOwned) { - error_code EC; +ErrorOr<ObjectFile *> +ObjectFile::createCOFFObjectFile(std::unique_ptr<MemoryBuffer> Object) { + std::error_code EC; std::unique_ptr<COFFObjectFile> Ret( - new COFFObjectFile(Object, EC, BufferOwned)); + new COFFObjectFile(std::move(Object), EC)); if (EC) return EC; return Ret.release(); diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp index a2c4df2..4f0f60b 100644 --- a/lib/Object/ELFObjectFile.cpp +++ b/lib/Object/ELFObjectFile.cpp @@ -17,65 +17,66 @@ namespace llvm { using namespace object; -ErrorOr<ObjectFile *> ObjectFile::createELFObjectFile(MemoryBuffer *Obj, - bool BufferOwned) { - std::pair<unsigned char, unsigned char> Ident = getElfArchType(Obj); +ErrorOr<ObjectFile *> +ObjectFile::createELFObjectFile(std::unique_ptr<MemoryBuffer> &Obj) { + std::pair<unsigned char, unsigned char> Ident = + getElfArchType(Obj->getBuffer()); std::size_t MaxAlignment = 1ULL << countTrailingZeros(uintptr_t(Obj->getBufferStart())); - error_code EC; + std::error_code EC; std::unique_ptr<ObjectFile> R; if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB) #if !LLVM_IS_UNALIGNED_ACCESS_FAST if (MaxAlignment >= 4) - R.reset(new ELFObjectFile<ELFType<support::little, 4, false> >( - Obj, EC, BufferOwned)); + R.reset(new ELFObjectFile<ELFType<support::little, 4, false>>( + std::move(Obj), EC)); else #endif if (MaxAlignment >= 2) - R.reset(new ELFObjectFile<ELFType<support::little, 2, false> >( - Obj, EC, BufferOwned)); + R.reset(new ELFObjectFile<ELFType<support::little, 2, false>>( + std::move(Obj), EC)); else - llvm_unreachable("Invalid alignment for ELF file!"); + return object_error::parse_failed; else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB) #if !LLVM_IS_UNALIGNED_ACCESS_FAST if (MaxAlignment >= 4) - R.reset(new ELFObjectFile<ELFType<support::big, 4, false> >(Obj, EC, - BufferOwned)); + R.reset(new ELFObjectFile<ELFType<support::big, 4, false>>(std::move(Obj), + EC)); else #endif if (MaxAlignment >= 2) - R.reset(new ELFObjectFile<ELFType<support::big, 2, false> >(Obj, EC, - BufferOwned)); + R.reset(new ELFObjectFile<ELFType<support::big, 2, false>>(std::move(Obj), + EC)); else - llvm_unreachable("Invalid alignment for ELF file!"); + return object_error::parse_failed; else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB) #if !LLVM_IS_UNALIGNED_ACCESS_FAST if (MaxAlignment >= 8) - R.reset(new ELFObjectFile<ELFType<support::big, 8, true> >(Obj, EC, - BufferOwned)); + R.reset(new ELFObjectFile<ELFType<support::big, 8, true>>(std::move(Obj), + EC)); else #endif if (MaxAlignment >= 2) - R.reset(new ELFObjectFile<ELFType<support::big, 2, true> >(Obj, EC, - BufferOwned)); + R.reset(new ELFObjectFile<ELFType<support::big, 2, true>>(std::move(Obj), + EC)); else - llvm_unreachable("Invalid alignment for ELF file!"); + return object_error::parse_failed; else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB) { #if !LLVM_IS_UNALIGNED_ACCESS_FAST if (MaxAlignment >= 8) - R.reset(new ELFObjectFile<ELFType<support::little, 8, true> >( - Obj, EC, BufferOwned)); + R.reset(new ELFObjectFile<ELFType<support::little, 8, true>>( + std::move(Obj), EC)); else #endif if (MaxAlignment >= 2) - R.reset(new ELFObjectFile<ELFType<support::little, 2, true> >( - Obj, EC, BufferOwned)); + R.reset(new ELFObjectFile<ELFType<support::little, 2, true>>( + std::move(Obj), EC)); else - llvm_unreachable("Invalid alignment for ELF file!"); + return object_error::parse_failed; } else - report_fatal_error("Buffer is not an ELF object file!"); + llvm_unreachable("Buffer is not an ELF object file!"); if (EC) return EC; diff --git a/lib/Object/ELFYAML.cpp b/lib/Object/ELFYAML.cpp index 7d50f23..dc3d467 100644 --- a/lib/Object/ELFYAML.cpp +++ b/lib/Object/ELFYAML.cpp @@ -368,6 +368,16 @@ void ScalarEnumerationTraits<ELFYAML::ELF_STT>::enumeration( #undef ECase } +void ScalarEnumerationTraits<ELFYAML::ELF_STV>::enumeration( + IO &IO, ELFYAML::ELF_STV &Value) { +#define ECase(X) IO.enumCase(Value, #X, ELF::X); + ECase(STV_DEFAULT) + ECase(STV_INTERNAL) + ECase(STV_HIDDEN) + ECase(STV_PROTECTED) +#undef ECase +} + void ScalarEnumerationTraits<ELFYAML::ELF_REL>::enumeration( IO &IO, ELFYAML::ELF_REL &Value) { const auto *Object = static_cast<ELFYAML::Object *>(IO.getContext()); @@ -649,6 +659,7 @@ void MappingTraits<ELFYAML::Symbol>::mapping(IO &IO, ELFYAML::Symbol &Symbol) { IO.mapOptional("Section", Symbol.Section, StringRef()); IO.mapOptional("Value", Symbol.Value, Hex64(0)); IO.mapOptional("Size", Symbol.Size, Hex64(0)); + IO.mapOptional("Visibility", Symbol.Visibility, ELFYAML::ELF_STV(0)); } void MappingTraits<ELFYAML::LocalGlobalWeakSymbols>::mapping( @@ -664,7 +675,6 @@ static void commonSectionMapping(IO &IO, ELFYAML::Section &Section) { IO.mapOptional("Flags", Section.Flags, ELFYAML::ELF_SHF(0)); IO.mapOptional("Address", Section.Address, Hex64(0)); IO.mapOptional("Link", Section.Link, StringRef()); - IO.mapOptional("Info", Section.Info, StringRef()); IO.mapOptional("AddressAlign", Section.AddressAlign, Hex64(0)); } @@ -676,6 +686,7 @@ static void sectionMapping(IO &IO, ELFYAML::RawContentSection &Section) { static void sectionMapping(IO &IO, ELFYAML::RelocationSection &Section) { commonSectionMapping(IO, Section); + IO.mapOptional("Info", Section.Info, StringRef()); IO.mapOptional("Relocations", Section.Relocations); } diff --git a/lib/Object/Error.cpp b/lib/Object/Error.cpp index 8e50869..9d25269 100644 --- a/lib/Object/Error.cpp +++ b/lib/Object/Error.cpp @@ -18,11 +18,10 @@ using namespace llvm; using namespace object; namespace { -class _object_error_category : public error_category { +class _object_error_category : public std::error_category { public: - const char* name() const override; + const char* name() const LLVM_NOEXCEPT override; std::string message(int ev) const override; - error_condition default_error_condition(int ev) const override; }; } @@ -30,8 +29,8 @@ const char *_object_error_category::name() const { return "llvm.object"; } -std::string _object_error_category::message(int ev) const { - object_error::Impl E = static_cast<object_error::Impl>(ev); +std::string _object_error_category::message(int EV) const { + object_error E = static_cast<object_error>(EV); switch (E) { case object_error::success: return "Success"; case object_error::arch_not_found: @@ -47,13 +46,7 @@ std::string _object_error_category::message(int ev) const { "defined."); } -error_condition _object_error_category::default_error_condition(int ev) const { - if (ev == object_error::success) - return errc::success; - return errc::invalid_argument; -} - -const error_category &object::object_category() { +const std::error_category &object::object_category() { static _object_error_category o; return o; } diff --git a/lib/Object/IRObjectFile.cpp b/lib/Object/IRObjectFile.cpp index a8aba26..5323d92 100644 --- a/lib/Object/IRObjectFile.cpp +++ b/lib/Object/IRObjectFile.cpp @@ -11,34 +11,119 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Object/IRObjectFile.h" +#include "RecordStreamer.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/GVMaterializer.h" #include "llvm/IR/Mangler.h" #include "llvm/IR/Module.h" -#include "llvm/Object/IRObjectFile.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCObjectFileInfo.h" +#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; using namespace object; -IRObjectFile::IRObjectFile(MemoryBuffer *Object, error_code &EC, - LLVMContext &Context, bool BufferOwned) - : SymbolicFile(Binary::ID_IR, Object, BufferOwned) { - ErrorOr<Module*> MOrErr = parseBitcodeFile(Object, Context); - if ((EC = MOrErr.getError())) - return; - - M.reset(MOrErr.get()); - +IRObjectFile::IRObjectFile(std::unique_ptr<MemoryBuffer> Object, + std::unique_ptr<Module> Mod) + : SymbolicFile(Binary::ID_IR, std::move(Object)), M(std::move(Mod)) { // If we have a DataLayout, setup a mangler. const DataLayout *DL = M->getDataLayout(); if (!DL) return; Mang.reset(new Mangler(DL)); + + const std::string &InlineAsm = M->getModuleInlineAsm(); + if (InlineAsm.empty()) + return; + + StringRef Triple = M->getTargetTriple(); + std::string Err; + const Target *T = TargetRegistry::lookupTarget(Triple, Err); + if (!T) + return; + + std::unique_ptr<MCRegisterInfo> MRI(T->createMCRegInfo(Triple)); + if (!MRI) + return; + + std::unique_ptr<MCAsmInfo> MAI(T->createMCAsmInfo(*MRI, Triple)); + if (!MAI) + return; + + std::unique_ptr<MCSubtargetInfo> STI( + T->createMCSubtargetInfo(Triple, "", "")); + if (!STI) + return; + + std::unique_ptr<MCInstrInfo> MCII(T->createMCInstrInfo()); + if (!MCII) + return; + + MCObjectFileInfo MOFI; + MCContext MCCtx(MAI.get(), MRI.get(), &MOFI); + MOFI.InitMCObjectFileInfo(Triple, Reloc::Default, CodeModel::Default, MCCtx); + std::unique_ptr<RecordStreamer> Streamer(new RecordStreamer(MCCtx)); + + std::unique_ptr<MemoryBuffer> Buffer(MemoryBuffer::getMemBuffer(InlineAsm)); + SourceMgr SrcMgr; + SrcMgr.AddNewSourceBuffer(Buffer.release(), SMLoc()); + std::unique_ptr<MCAsmParser> Parser( + createMCAsmParser(SrcMgr, MCCtx, *Streamer, *MAI)); + + MCTargetOptions MCOptions; + std::unique_ptr<MCTargetAsmParser> TAP( + T->createMCAsmParser(*STI, *Parser, *MCII, MCOptions)); + if (!TAP) + return; + + Parser->setTargetParser(*TAP); + if (Parser->Run(false)) + return; + + for (auto &KV : *Streamer) { + StringRef Key = KV.first(); + RecordStreamer::State Value = KV.second; + uint32_t Res = BasicSymbolRef::SF_None; + switch (Value) { + case RecordStreamer::NeverSeen: + llvm_unreachable("foo"); + case RecordStreamer::DefinedGlobal: + Res |= BasicSymbolRef::SF_Global; + break; + case RecordStreamer::Defined: + break; + case RecordStreamer::Global: + case RecordStreamer::Used: + Res |= BasicSymbolRef::SF_Undefined; + Res |= BasicSymbolRef::SF_Global; + break; + } + AsmSymbols.push_back( + std::make_pair<std::string, uint32_t>(Key, std::move(Res))); + } } -static const GlobalValue &getGV(DataRefImpl &Symb) { - return *reinterpret_cast<GlobalValue*>(Symb.p & ~uintptr_t(3)); +IRObjectFile::~IRObjectFile() { + GVMaterializer *GVM = M->getMaterializer(); + if (GVM) + GVM->releaseBuffer(); + } + +static const GlobalValue *getGV(DataRefImpl &Symb) { + if ((Symb.p & 3) == 3) + return nullptr; + + return reinterpret_cast<GlobalValue*>(Symb.p & ~uintptr_t(3)); } static uintptr_t skipEmpty(Module::const_alias_iterator I, const Module &M) { @@ -62,68 +147,109 @@ static uintptr_t skipEmpty(Module::const_iterator I, const Module &M) { return reinterpret_cast<uintptr_t>(GV) | 0; } +static unsigned getAsmSymIndex(DataRefImpl Symb) { + assert((Symb.p & uintptr_t(3)) == 3); + uintptr_t Index = Symb.p & ~uintptr_t(3); + Index >>= 2; + return Index; +} + void IRObjectFile::moveSymbolNext(DataRefImpl &Symb) const { - const GlobalValue *GV = &getGV(Symb); - const Module &M = *GV->getParent(); + const GlobalValue *GV = getGV(Symb); uintptr_t Res; + switch (Symb.p & 3) { case 0: { Module::const_iterator Iter(static_cast<const Function*>(GV)); ++Iter; - Res = skipEmpty(Iter, M); + Res = skipEmpty(Iter, *M); break; } case 1: { Module::const_global_iterator Iter(static_cast<const GlobalVariable*>(GV)); ++Iter; - Res = skipEmpty(Iter, M); + Res = skipEmpty(Iter, *M); break; } case 2: { Module::const_alias_iterator Iter(static_cast<const GlobalAlias*>(GV)); ++Iter; - Res = skipEmpty(Iter, M); + Res = skipEmpty(Iter, *M); + break; + } + case 3: { + unsigned Index = getAsmSymIndex(Symb); + assert(Index < AsmSymbols.size()); + ++Index; + Res = (Index << 2) | 3; break; } - case 3: - llvm_unreachable("Invalid symbol reference"); } Symb.p = Res; } -error_code IRObjectFile::printSymbolName(raw_ostream &OS, - DataRefImpl Symb) const { - const GlobalValue &GV = getGV(Symb); +std::error_code IRObjectFile::printSymbolName(raw_ostream &OS, + DataRefImpl Symb) const { + const GlobalValue *GV = getGV(Symb); + if (!GV) { + unsigned Index = getAsmSymIndex(Symb); + assert(Index <= AsmSymbols.size()); + OS << AsmSymbols[Index].first; + return object_error::success;; + } if (Mang) - Mang->getNameWithPrefix(OS, &GV, false); + Mang->getNameWithPrefix(OS, GV, false); else - OS << GV.getName(); + OS << GV->getName(); return object_error::success; } +static bool isDeclaration(const GlobalValue &V) { + if (V.hasAvailableExternallyLinkage()) + return true; + + if (V.isMaterializable()) + return false; + + return V.isDeclaration(); +} + uint32_t IRObjectFile::getSymbolFlags(DataRefImpl Symb) const { - const GlobalValue &GV = getGV(Symb); + const GlobalValue *GV = getGV(Symb); + + if (!GV) { + unsigned Index = getAsmSymIndex(Symb); + assert(Index <= AsmSymbols.size()); + return AsmSymbols[Index].second; + } uint32_t Res = BasicSymbolRef::SF_None; - if (GV.isDeclaration() || GV.hasAvailableExternallyLinkage()) + if (isDeclaration(*GV)) Res |= BasicSymbolRef::SF_Undefined; - if (GV.hasPrivateLinkage()) + if (GV->hasPrivateLinkage()) Res |= BasicSymbolRef::SF_FormatSpecific; - if (!GV.hasLocalLinkage()) + if (!GV->hasLocalLinkage()) Res |= BasicSymbolRef::SF_Global; - if (GV.hasCommonLinkage()) + if (GV->hasCommonLinkage()) Res |= BasicSymbolRef::SF_Common; - if (GV.hasLinkOnceLinkage() || GV.hasWeakLinkage()) + if (GV->hasLinkOnceLinkage() || GV->hasWeakLinkage()) Res |= BasicSymbolRef::SF_Weak; + if (GV->getName().startswith("llvm.")) + Res |= BasicSymbolRef::SF_FormatSpecific; + else if (auto *Var = dyn_cast<GlobalVariable>(GV)) { + if (Var->getSection() == StringRef("llvm.metadata")) + Res |= BasicSymbolRef::SF_FormatSpecific; + } + return Res; } -const GlobalValue &IRObjectFile::getSymbolGV(DataRefImpl Symb) const { - const GlobalValue &GV = getGV(Symb); +const GlobalValue *IRObjectFile::getSymbolGV(DataRefImpl Symb) const { + const GlobalValue *GV = getGV(Symb); return GV; } @@ -136,16 +262,18 @@ basic_symbol_iterator IRObjectFile::symbol_begin_impl() const { basic_symbol_iterator IRObjectFile::symbol_end_impl() const { DataRefImpl Ret; - Ret.p = 3; + uint64_t NumAsm = AsmSymbols.size(); + NumAsm <<= 2; + Ret.p = 3 | NumAsm; return basic_symbol_iterator(BasicSymbolRef(Ret, this)); } -ErrorOr<SymbolicFile *> llvm::object::SymbolicFile::createIRObjectFile( - MemoryBuffer *Object, LLVMContext &Context, bool BufferOwned) { - error_code EC; - std::unique_ptr<IRObjectFile> Ret( - new IRObjectFile(Object, EC, Context, BufferOwned)); - if (EC) +ErrorOr<IRObjectFile *> llvm::object::IRObjectFile::createIRObjectFile( + std::unique_ptr<MemoryBuffer> Object, LLVMContext &Context) { + ErrorOr<Module *> MOrErr = getLazyBitcodeModule(Object.get(), Context); + if (std::error_code EC = MOrErr.getError()) return EC; - return Ret.release(); + + std::unique_ptr<Module> M(MOrErr.get()); + return new IRObjectFile(std::move(Object), std::move(M)); } diff --git a/lib/Object/LLVMBuild.txt b/lib/Object/LLVMBuild.txt index 7813832..8acacba 100644 --- a/lib/Object/LLVMBuild.txt +++ b/lib/Object/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = Object parent = Libraries -required_libraries = BitReader Core Support +required_libraries = BitReader Core Support MC MCParser diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index c6bab03..4919114 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -28,6 +28,7 @@ using namespace llvm; using namespace object; namespace llvm { + namespace object { struct nlist_base { @@ -43,190 +44,195 @@ struct section_base { }; template<typename T> -static void SwapValue(T &Value) { - Value = sys::SwapByteOrder(Value); -} - -template<typename T> static void SwapStruct(T &Value); template<> void SwapStruct(MachO::any_relocation_info &H) { - SwapValue(H.r_word0); - SwapValue(H.r_word1); + sys::swapByteOrder(H.r_word0); + sys::swapByteOrder(H.r_word1); } template<> void SwapStruct(MachO::load_command &L) { - SwapValue(L.cmd); - SwapValue(L.cmdsize); + sys::swapByteOrder(L.cmd); + sys::swapByteOrder(L.cmdsize); } template<> void SwapStruct(nlist_base &S) { - SwapValue(S.n_strx); - SwapValue(S.n_desc); + sys::swapByteOrder(S.n_strx); + sys::swapByteOrder(S.n_desc); } template<> void SwapStruct(MachO::section &S) { - SwapValue(S.addr); - SwapValue(S.size); - SwapValue(S.offset); - SwapValue(S.align); - SwapValue(S.reloff); - SwapValue(S.nreloc); - SwapValue(S.flags); - SwapValue(S.reserved1); - SwapValue(S.reserved2); + sys::swapByteOrder(S.addr); + sys::swapByteOrder(S.size); + sys::swapByteOrder(S.offset); + sys::swapByteOrder(S.align); + sys::swapByteOrder(S.reloff); + sys::swapByteOrder(S.nreloc); + sys::swapByteOrder(S.flags); + sys::swapByteOrder(S.reserved1); + sys::swapByteOrder(S.reserved2); } template<> void SwapStruct(MachO::section_64 &S) { - SwapValue(S.addr); - SwapValue(S.size); - SwapValue(S.offset); - SwapValue(S.align); - SwapValue(S.reloff); - SwapValue(S.nreloc); - SwapValue(S.flags); - SwapValue(S.reserved1); - SwapValue(S.reserved2); - SwapValue(S.reserved3); + sys::swapByteOrder(S.addr); + sys::swapByteOrder(S.size); + sys::swapByteOrder(S.offset); + sys::swapByteOrder(S.align); + sys::swapByteOrder(S.reloff); + sys::swapByteOrder(S.nreloc); + sys::swapByteOrder(S.flags); + sys::swapByteOrder(S.reserved1); + sys::swapByteOrder(S.reserved2); + sys::swapByteOrder(S.reserved3); } template<> void SwapStruct(MachO::nlist &S) { - SwapValue(S.n_strx); - SwapValue(S.n_desc); - SwapValue(S.n_value); + sys::swapByteOrder(S.n_strx); + sys::swapByteOrder(S.n_desc); + sys::swapByteOrder(S.n_value); } template<> void SwapStruct(MachO::nlist_64 &S) { - SwapValue(S.n_strx); - SwapValue(S.n_desc); - SwapValue(S.n_value); + sys::swapByteOrder(S.n_strx); + sys::swapByteOrder(S.n_desc); + sys::swapByteOrder(S.n_value); } template<> void SwapStruct(MachO::mach_header &H) { - SwapValue(H.magic); - SwapValue(H.cputype); - SwapValue(H.cpusubtype); - SwapValue(H.filetype); - SwapValue(H.ncmds); - SwapValue(H.sizeofcmds); - SwapValue(H.flags); + sys::swapByteOrder(H.magic); + sys::swapByteOrder(H.cputype); + sys::swapByteOrder(H.cpusubtype); + sys::swapByteOrder(H.filetype); + sys::swapByteOrder(H.ncmds); + sys::swapByteOrder(H.sizeofcmds); + sys::swapByteOrder(H.flags); } template<> void SwapStruct(MachO::mach_header_64 &H) { - SwapValue(H.magic); - SwapValue(H.cputype); - SwapValue(H.cpusubtype); - SwapValue(H.filetype); - SwapValue(H.ncmds); - SwapValue(H.sizeofcmds); - SwapValue(H.flags); - SwapValue(H.reserved); + sys::swapByteOrder(H.magic); + sys::swapByteOrder(H.cputype); + sys::swapByteOrder(H.cpusubtype); + sys::swapByteOrder(H.filetype); + sys::swapByteOrder(H.ncmds); + sys::swapByteOrder(H.sizeofcmds); + sys::swapByteOrder(H.flags); + sys::swapByteOrder(H.reserved); } template<> void SwapStruct(MachO::symtab_command &C) { - SwapValue(C.cmd); - SwapValue(C.cmdsize); - SwapValue(C.symoff); - SwapValue(C.nsyms); - SwapValue(C.stroff); - SwapValue(C.strsize); + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.symoff); + sys::swapByteOrder(C.nsyms); + sys::swapByteOrder(C.stroff); + sys::swapByteOrder(C.strsize); } template<> void SwapStruct(MachO::dysymtab_command &C) { - SwapValue(C.cmd); - SwapValue(C.cmdsize); - SwapValue(C.ilocalsym); - SwapValue(C.nlocalsym); - SwapValue(C.iextdefsym); - SwapValue(C.nextdefsym); - SwapValue(C.iundefsym); - SwapValue(C.nundefsym); - SwapValue(C.tocoff); - SwapValue(C.ntoc); - SwapValue(C.modtaboff); - SwapValue(C.nmodtab); - SwapValue(C.extrefsymoff); - SwapValue(C.nextrefsyms); - SwapValue(C.indirectsymoff); - SwapValue(C.nindirectsyms); - SwapValue(C.extreloff); - SwapValue(C.nextrel); - SwapValue(C.locreloff); - SwapValue(C.nlocrel); + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.ilocalsym); + sys::swapByteOrder(C.nlocalsym); + sys::swapByteOrder(C.iextdefsym); + sys::swapByteOrder(C.nextdefsym); + sys::swapByteOrder(C.iundefsym); + sys::swapByteOrder(C.nundefsym); + sys::swapByteOrder(C.tocoff); + sys::swapByteOrder(C.ntoc); + sys::swapByteOrder(C.modtaboff); + sys::swapByteOrder(C.nmodtab); + sys::swapByteOrder(C.extrefsymoff); + sys::swapByteOrder(C.nextrefsyms); + sys::swapByteOrder(C.indirectsymoff); + sys::swapByteOrder(C.nindirectsyms); + sys::swapByteOrder(C.extreloff); + sys::swapByteOrder(C.nextrel); + sys::swapByteOrder(C.locreloff); + sys::swapByteOrder(C.nlocrel); } template<> void SwapStruct(MachO::linkedit_data_command &C) { - SwapValue(C.cmd); - SwapValue(C.cmdsize); - SwapValue(C.dataoff); - SwapValue(C.datasize); + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.dataoff); + sys::swapByteOrder(C.datasize); } template<> void SwapStruct(MachO::segment_command &C) { - SwapValue(C.cmd); - SwapValue(C.cmdsize); - SwapValue(C.vmaddr); - SwapValue(C.vmsize); - SwapValue(C.fileoff); - SwapValue(C.filesize); - SwapValue(C.maxprot); - SwapValue(C.initprot); - SwapValue(C.nsects); - SwapValue(C.flags); + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.vmaddr); + sys::swapByteOrder(C.vmsize); + sys::swapByteOrder(C.fileoff); + sys::swapByteOrder(C.filesize); + sys::swapByteOrder(C.maxprot); + sys::swapByteOrder(C.initprot); + sys::swapByteOrder(C.nsects); + sys::swapByteOrder(C.flags); } template<> void SwapStruct(MachO::segment_command_64 &C) { - SwapValue(C.cmd); - SwapValue(C.cmdsize); - SwapValue(C.vmaddr); - SwapValue(C.vmsize); - SwapValue(C.fileoff); - SwapValue(C.filesize); - SwapValue(C.maxprot); - SwapValue(C.initprot); - SwapValue(C.nsects); - SwapValue(C.flags); + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.vmaddr); + sys::swapByteOrder(C.vmsize); + sys::swapByteOrder(C.fileoff); + sys::swapByteOrder(C.filesize); + sys::swapByteOrder(C.maxprot); + sys::swapByteOrder(C.initprot); + sys::swapByteOrder(C.nsects); + sys::swapByteOrder(C.flags); } template<> void SwapStruct(uint32_t &C) { - SwapValue(C); + sys::swapByteOrder(C); } template<> void SwapStruct(MachO::linker_options_command &C) { - SwapValue(C.cmd); - SwapValue(C.cmdsize); - SwapValue(C.count); + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.count); } template<> void SwapStruct(MachO::version_min_command&C) { - SwapValue(C.cmd); - SwapValue(C.cmdsize); - SwapValue(C.version); - SwapValue(C.reserved); + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.version); + sys::swapByteOrder(C.reserved); +} + +template<> +void SwapStruct(MachO::dylib_command&C) { + sys::swapByteOrder(C.cmd); + sys::swapByteOrder(C.cmdsize); + sys::swapByteOrder(C.dylib.name); + sys::swapByteOrder(C.dylib.timestamp); + sys::swapByteOrder(C.dylib.current_version); + sys::swapByteOrder(C.dylib.compatibility_version); } template<> void SwapStruct(MachO::data_in_code_entry &C) { - SwapValue(C.offset); - SwapValue(C.length); - SwapValue(C.kind); + sys::swapByteOrder(C.offset); + sys::swapByteOrder(C.length); + sys::swapByteOrder(C.kind); } template<typename T> @@ -306,7 +312,7 @@ static void printRelocationTargetName(const MachOObjectFile *O, uint32_t Val = O->getPlainRelocationSymbolNum(RE); for (const SymbolRef &Symbol : O->symbols()) { - error_code ec; + std::error_code ec; uint64_t Addr; StringRef Name; @@ -323,7 +329,7 @@ static void printRelocationTargetName(const MachOObjectFile *O, // If we couldn't find a symbol that this relocation refers to, try // to find a section beginning instead. for (const SectionRef &Section : O->sections()) { - error_code ec; + std::error_code ec; uint64_t Addr; StringRef Name; @@ -416,10 +422,10 @@ static uint32_t getSectionFlags(const MachOObjectFile *O, return Sect.flags; } -MachOObjectFile::MachOObjectFile(MemoryBuffer *Object, bool IsLittleEndian, - bool Is64bits, error_code &EC, - bool BufferOwned) - : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object, BufferOwned), +MachOObjectFile::MachOObjectFile(std::unique_ptr<MemoryBuffer> Object, + bool IsLittleEndian, bool Is64bits, + std::error_code &EC) + : ObjectFile(getMachOType(IsLittleEndian, Is64bits), std::move(Object)), SymtabLoadCmd(nullptr), DysymtabLoadCmd(nullptr), DataInCodeLoadCmd(nullptr) { uint32_t LoadCommandCount = this->getHeader().ncmds; @@ -443,6 +449,12 @@ MachOObjectFile::MachOObjectFile(MemoryBuffer *Object, bool IsLittleEndian, const char *Sec = getSectionPtr(this, Load, J); Sections.push_back(Sec); } + } else if (Load.C.cmd == MachO::LC_LOAD_DYLIB || + Load.C.cmd == MachO::LC_LOAD_WEAK_DYLIB || + Load.C.cmd == MachO::LC_LAZY_LOAD_DYLIB || + Load.C.cmd == MachO::LC_REEXPORT_DYLIB || + Load.C.cmd == MachO::LC_LOAD_UPWARD_DYLIB) { + Libraries.push_back(Load.Ptr); } if (I == LoadCommandCount - 1) @@ -459,8 +471,8 @@ void MachOObjectFile::moveSymbolNext(DataRefImpl &Symb) const { Symb.p += SymbolTableEntrySize; } -error_code MachOObjectFile::getSymbolName(DataRefImpl Symb, - StringRef &Res) const { +std::error_code MachOObjectFile::getSymbolName(DataRefImpl Symb, + StringRef &Res) const { StringRef StringTable = getStringTableData(); nlist_base Entry = getSymbolTableEntryBase(this, Symb); const char *Start = &StringTable.data()[Entry.n_strx]; @@ -468,8 +480,32 @@ error_code MachOObjectFile::getSymbolName(DataRefImpl Symb, return object_error::success; } -error_code MachOObjectFile::getSymbolAddress(DataRefImpl Symb, - uint64_t &Res) const { +// getIndirectName() returns the name of the alias'ed symbol who's string table +// index is in the n_value field. +std::error_code MachOObjectFile::getIndirectName(DataRefImpl Symb, + StringRef &Res) const { + StringRef StringTable = getStringTableData(); + uint64_t NValue; + if (is64Bit()) { + MachO::nlist_64 Entry = getSymbol64TableEntry(Symb); + NValue = Entry.n_value; + if ((Entry.n_type & MachO::N_TYPE) != MachO::N_INDR) + return object_error::parse_failed; + } else { + MachO::nlist Entry = getSymbolTableEntry(Symb); + NValue = Entry.n_value; + if ((Entry.n_type & MachO::N_TYPE) != MachO::N_INDR) + return object_error::parse_failed; + } + if (NValue >= StringTable.size()) + return object_error::parse_failed; + const char *Start = &StringTable.data()[NValue]; + Res = StringRef(Start); + return object_error::success; +} + +std::error_code MachOObjectFile::getSymbolAddress(DataRefImpl Symb, + uint64_t &Res) const { if (is64Bit()) { MachO::nlist_64 Entry = getSymbol64TableEntry(Symb); if ((Entry.n_type & MachO::N_TYPE) == MachO::N_UNDF && @@ -488,8 +524,8 @@ error_code MachOObjectFile::getSymbolAddress(DataRefImpl Symb, return object_error::success; } -error_code MachOObjectFile::getSymbolAlignment(DataRefImpl DRI, - uint32_t &Result) const { +std::error_code MachOObjectFile::getSymbolAlignment(DataRefImpl DRI, + uint32_t &Result) const { uint32_t flags = getSymbolFlags(DRI); if (flags & SymbolRef::SF_Common) { nlist_base Entry = getSymbolTableEntryBase(this, DRI); @@ -500,8 +536,8 @@ error_code MachOObjectFile::getSymbolAlignment(DataRefImpl DRI, return object_error::success; } -error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI, - uint64_t &Result) const { +std::error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI, + uint64_t &Result) const { uint64_t BeginOffset; uint64_t EndOffset = 0; uint8_t SectionIndex; @@ -549,8 +585,8 @@ error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI, return object_error::success; } -error_code MachOObjectFile::getSymbolType(DataRefImpl Symb, - SymbolRef::Type &Res) const { +std::error_code MachOObjectFile::getSymbolType(DataRefImpl Symb, + SymbolRef::Type &Res) const { nlist_base Entry = getSymbolTableEntryBase(this, Symb); uint8_t n_type = Entry.n_type; @@ -584,6 +620,9 @@ uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const { if ((MachOType & MachO::N_TYPE) == MachO::N_UNDF) Result |= SymbolRef::SF_Undefined; + if ((MachOType & MachO::N_TYPE) == MachO::N_INDR) + Result |= SymbolRef::SF_Indirect; + if (MachOType & MachO::N_STAB) Result |= SymbolRef::SF_FormatSpecific; @@ -606,9 +645,8 @@ uint32_t MachOObjectFile::getSymbolFlags(DataRefImpl DRI) const { return Result; } -error_code -MachOObjectFile::getSymbolSection(DataRefImpl Symb, - section_iterator &Res) const { +std::error_code MachOObjectFile::getSymbolSection(DataRefImpl Symb, + section_iterator &Res) const { nlist_base Entry = getSymbolTableEntryBase(this, Symb); uint8_t index = Entry.n_sect; @@ -627,15 +665,15 @@ void MachOObjectFile::moveSectionNext(DataRefImpl &Sec) const { Sec.d.a++; } -error_code -MachOObjectFile::getSectionName(DataRefImpl Sec, StringRef &Result) const { +std::error_code MachOObjectFile::getSectionName(DataRefImpl Sec, + StringRef &Result) const { ArrayRef<char> Raw = getSectionRawName(Sec); Result = parseSegmentOrSectionName(Raw.data()); return object_error::success; } -error_code -MachOObjectFile::getSectionAddress(DataRefImpl Sec, uint64_t &Res) const { +std::error_code MachOObjectFile::getSectionAddress(DataRefImpl Sec, + uint64_t &Res) const { if (is64Bit()) { MachO::section_64 Sect = getSection64(Sec); Res = Sect.addr; @@ -646,8 +684,8 @@ MachOObjectFile::getSectionAddress(DataRefImpl Sec, uint64_t &Res) const { return object_error::success; } -error_code -MachOObjectFile::getSectionSize(DataRefImpl Sec, uint64_t &Res) const { +std::error_code MachOObjectFile::getSectionSize(DataRefImpl Sec, + uint64_t &Res) const { if (is64Bit()) { MachO::section_64 Sect = getSection64(Sec); Res = Sect.size; @@ -659,8 +697,8 @@ MachOObjectFile::getSectionSize(DataRefImpl Sec, uint64_t &Res) const { return object_error::success; } -error_code -MachOObjectFile::getSectionContents(DataRefImpl Sec, StringRef &Res) const { +std::error_code MachOObjectFile::getSectionContents(DataRefImpl Sec, + StringRef &Res) const { uint32_t Offset; uint64_t Size; @@ -678,8 +716,8 @@ MachOObjectFile::getSectionContents(DataRefImpl Sec, StringRef &Res) const { return object_error::success; } -error_code -MachOObjectFile::getSectionAlignment(DataRefImpl Sec, uint64_t &Res) const { +std::error_code MachOObjectFile::getSectionAlignment(DataRefImpl Sec, + uint64_t &Res) const { uint32_t Align; if (is64Bit()) { MachO::section_64 Sect = getSection64(Sec); @@ -693,14 +731,15 @@ MachOObjectFile::getSectionAlignment(DataRefImpl Sec, uint64_t &Res) const { return object_error::success; } -error_code -MachOObjectFile::isSectionText(DataRefImpl Sec, bool &Res) const { +std::error_code MachOObjectFile::isSectionText(DataRefImpl Sec, + bool &Res) const { uint32_t Flags = getSectionFlags(this, Sec); Res = Flags & MachO::S_ATTR_PURE_INSTRUCTIONS; return object_error::success; } -error_code MachOObjectFile::isSectionData(DataRefImpl Sec, bool &Result) const { +std::error_code MachOObjectFile::isSectionData(DataRefImpl Sec, + bool &Result) const { uint32_t Flags = getSectionFlags(this, Sec); unsigned SectionType = Flags & MachO::SECTION_TYPE; Result = !(Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) && @@ -709,7 +748,8 @@ error_code MachOObjectFile::isSectionData(DataRefImpl Sec, bool &Result) const { return object_error::success; } -error_code MachOObjectFile::isSectionBSS(DataRefImpl Sec, bool &Result) const { +std::error_code MachOObjectFile::isSectionBSS(DataRefImpl Sec, + bool &Result) const { uint32_t Flags = getSectionFlags(this, Sec); unsigned SectionType = Flags & MachO::SECTION_TYPE; Result = !(Flags & MachO::S_ATTR_PURE_INSTRUCTIONS) && @@ -718,7 +758,7 @@ error_code MachOObjectFile::isSectionBSS(DataRefImpl Sec, bool &Result) const { return object_error::success; } -error_code +std::error_code MachOObjectFile::isSectionRequiredForExecution(DataRefImpl Sec, bool &Result) const { // FIXME: Unimplemented. @@ -726,15 +766,15 @@ MachOObjectFile::isSectionRequiredForExecution(DataRefImpl Sec, return object_error::success; } -error_code MachOObjectFile::isSectionVirtual(DataRefImpl Sec, - bool &Result) const { +std::error_code MachOObjectFile::isSectionVirtual(DataRefImpl Sec, + bool &Result) const { // FIXME: Unimplemented. Result = false; return object_error::success; } -error_code -MachOObjectFile::isSectionZeroInit(DataRefImpl Sec, bool &Res) const { +std::error_code MachOObjectFile::isSectionZeroInit(DataRefImpl Sec, + bool &Res) const { uint32_t Flags = getSectionFlags(this, Sec); unsigned SectionType = Flags & MachO::SECTION_TYPE; Res = SectionType == MachO::S_ZEROFILL || @@ -742,8 +782,8 @@ MachOObjectFile::isSectionZeroInit(DataRefImpl Sec, bool &Res) const { return object_error::success; } -error_code MachOObjectFile::isSectionReadOnlyData(DataRefImpl Sec, - bool &Result) const { +std::error_code MachOObjectFile::isSectionReadOnlyData(DataRefImpl Sec, + bool &Result) const { // Consider using the code from isSectionText to look for __const sections. // Alternately, emit S_ATTR_PURE_INSTRUCTIONS and/or S_ATTR_SOME_INSTRUCTIONS // to use section attributes to distinguish code from data. @@ -753,9 +793,9 @@ error_code MachOObjectFile::isSectionReadOnlyData(DataRefImpl Sec, return object_error::success; } -error_code -MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb, - bool &Result) const { +std::error_code MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec, + DataRefImpl Symb, + bool &Result) const { SymbolRef::Type ST; this->getSymbolType(Symb, ST); if (ST == SymbolRef::ST_Unknown) { @@ -803,8 +843,8 @@ void MachOObjectFile::moveRelocationNext(DataRefImpl &Rel) const { ++Rel.d.b; } -error_code -MachOObjectFile::getRelocationAddress(DataRefImpl Rel, uint64_t &Res) const { +std::error_code MachOObjectFile::getRelocationAddress(DataRefImpl Rel, + uint64_t &Res) const { uint64_t Offset; getRelocationOffset(Rel, Offset); @@ -816,8 +856,8 @@ MachOObjectFile::getRelocationAddress(DataRefImpl Rel, uint64_t &Res) const { return object_error::success; } -error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel, - uint64_t &Res) const { +std::error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel, + uint64_t &Res) const { assert(getHeader().filetype == MachO::MH_OBJECT && "Only implemented for MH_OBJECT"); MachO::any_relocation_info RE = getRelocation(Rel); @@ -828,6 +868,9 @@ error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel, symbol_iterator MachOObjectFile::getRelocationSymbol(DataRefImpl Rel) const { MachO::any_relocation_info RE = getRelocation(Rel); + if (isRelocationScattered(RE)) + return symbol_end(); + uint32_t SymbolIdx = getPlainRelocationSymbolNum(RE); bool isExtern = getPlainRelocationExternal(RE); if (!isExtern) @@ -843,14 +886,14 @@ MachOObjectFile::getRelocationSymbol(DataRefImpl Rel) const { return symbol_iterator(SymbolRef(Sym, this)); } -error_code MachOObjectFile::getRelocationType(DataRefImpl Rel, - uint64_t &Res) const { +std::error_code MachOObjectFile::getRelocationType(DataRefImpl Rel, + uint64_t &Res) const { MachO::any_relocation_info RE = getRelocation(Rel); Res = getAnyRelocationType(RE); return object_error::success; } -error_code +std::error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel, SmallVectorImpl<char> &Result) const { StringRef res; @@ -963,7 +1006,7 @@ MachOObjectFile::getRelocationTypeName(DataRefImpl Rel, return object_error::success; } -error_code +std::error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, SmallVectorImpl<char> &Result) const { MachO::any_relocation_info RE = getRelocation(Rel); @@ -1139,8 +1182,8 @@ MachOObjectFile::getRelocationValueString(DataRefImpl Rel, return object_error::success; } -error_code -MachOObjectFile::getRelocationHidden(DataRefImpl Rel, bool &Result) const { +std::error_code MachOObjectFile::getRelocationHidden(DataRefImpl Rel, + bool &Result) const { unsigned Arch = getArch(); uint64_t Type; getRelocationType(Rel, Type); @@ -1167,16 +1210,199 @@ MachOObjectFile::getRelocationHidden(DataRefImpl Rel, bool &Result) const { return object_error::success; } -error_code MachOObjectFile::getLibraryNext(DataRefImpl LibData, - LibraryRef &Res) const { +std::error_code MachOObjectFile::getLibraryNext(DataRefImpl LibData, + LibraryRef &Res) const { report_fatal_error("Needed libraries unimplemented in MachOObjectFile"); } -error_code MachOObjectFile::getLibraryPath(DataRefImpl LibData, - StringRef &Res) const { +std::error_code MachOObjectFile::getLibraryPath(DataRefImpl LibData, + StringRef &Res) const { report_fatal_error("Needed libraries unimplemented in MachOObjectFile"); } +// +// guessLibraryShortName() is passed a name of a dynamic library and returns a +// guess on what the short name is. Then name is returned as a substring of the +// StringRef Name passed in. The name of the dynamic library is recognized as +// a framework if it has one of the two following forms: +// Foo.framework/Versions/A/Foo +// Foo.framework/Foo +// Where A and Foo can be any string. And may contain a trailing suffix +// starting with an underbar. If the Name is recognized as a framework then +// isFramework is set to true else it is set to false. If the Name has a +// suffix then Suffix is set to the substring in Name that contains the suffix +// else it is set to a NULL StringRef. +// +// The Name of the dynamic library is recognized as a library name if it has +// one of the two following forms: +// libFoo.A.dylib +// libFoo.dylib +// The library may have a suffix trailing the name Foo of the form: +// libFoo_profile.A.dylib +// libFoo_profile.dylib +// +// The Name of the dynamic library is also recognized as a library name if it +// has the following form: +// Foo.qtx +// +// If the Name of the dynamic library is none of the forms above then a NULL +// StringRef is returned. +// +StringRef MachOObjectFile::guessLibraryShortName(StringRef Name, + bool &isFramework, + StringRef &Suffix) { + StringRef Foo, F, DotFramework, V, Dylib, Lib, Dot, Qtx; + size_t a, b, c, d, Idx; + + isFramework = false; + Suffix = StringRef(); + + // Pull off the last component and make Foo point to it + a = Name.rfind('/'); + if (a == Name.npos || a == 0) + goto guess_library; + Foo = Name.slice(a+1, Name.npos); + + // Look for a suffix starting with a '_' + Idx = Foo.rfind('_'); + if (Idx != Foo.npos && Foo.size() >= 2) { + Suffix = Foo.slice(Idx, Foo.npos); + Foo = Foo.slice(0, Idx); + } + + // First look for the form Foo.framework/Foo + b = Name.rfind('/', a); + if (b == Name.npos) + Idx = 0; + else + Idx = b+1; + F = Name.slice(Idx, Idx + Foo.size()); + DotFramework = Name.slice(Idx + Foo.size(), + Idx + Foo.size() + sizeof(".framework/")-1); + if (F == Foo && DotFramework == ".framework/") { + isFramework = true; + return Foo; + } + + // Next look for the form Foo.framework/Versions/A/Foo + if (b == Name.npos) + goto guess_library; + c = Name.rfind('/', b); + if (c == Name.npos || c == 0) + goto guess_library; + V = Name.slice(c+1, Name.npos); + if (!V.startswith("Versions/")) + goto guess_library; + d = Name.rfind('/', c); + if (d == Name.npos) + Idx = 0; + else + Idx = d+1; + F = Name.slice(Idx, Idx + Foo.size()); + DotFramework = Name.slice(Idx + Foo.size(), + Idx + Foo.size() + sizeof(".framework/")-1); + if (F == Foo && DotFramework == ".framework/") { + isFramework = true; + return Foo; + } + +guess_library: + // pull off the suffix after the "." and make a point to it + a = Name.rfind('.'); + if (a == Name.npos || a == 0) + return StringRef(); + Dylib = Name.slice(a, Name.npos); + if (Dylib != ".dylib") + goto guess_qtx; + + // First pull off the version letter for the form Foo.A.dylib if any. + if (a >= 3) { + Dot = Name.slice(a-2, a-1); + if (Dot == ".") + a = a - 2; + } + + b = Name.rfind('/', a); + if (b == Name.npos) + b = 0; + else + b = b+1; + // ignore any suffix after an underbar like Foo_profile.A.dylib + Idx = Name.find('_', b); + if (Idx != Name.npos && Idx != b) { + Lib = Name.slice(b, Idx); + Suffix = Name.slice(Idx, a); + } + else + Lib = Name.slice(b, a); + // There are incorrect library names of the form: + // libATS.A_profile.dylib so check for these. + if (Lib.size() >= 3) { + Dot = Lib.slice(Lib.size()-2, Lib.size()-1); + if (Dot == ".") + Lib = Lib.slice(0, Lib.size()-2); + } + return Lib; + +guess_qtx: + Qtx = Name.slice(a, Name.npos); + if (Qtx != ".qtx") + return StringRef(); + b = Name.rfind('/', a); + if (b == Name.npos) + Lib = Name.slice(0, a); + else + Lib = Name.slice(b+1, a); + // There are library names of the form: QT.A.qtx so check for these. + if (Lib.size() >= 3) { + Dot = Lib.slice(Lib.size()-2, Lib.size()-1); + if (Dot == ".") + Lib = Lib.slice(0, Lib.size()-2); + } + return Lib; +} + +// getLibraryShortNameByIndex() is used to get the short name of the library +// for an undefined symbol in a linked Mach-O binary that was linked with the +// normal two-level namespace default (that is MH_TWOLEVEL in the header). +// It is passed the index (0 - based) of the library as translated from +// GET_LIBRARY_ORDINAL (1 - based). +std::error_code MachOObjectFile::getLibraryShortNameByIndex(unsigned Index, + StringRef &Res) { + if (Index >= Libraries.size()) + return object_error::parse_failed; + + MachO::dylib_command D = + getStruct<MachO::dylib_command>(this, Libraries[Index]); + if (D.dylib.name >= D.cmdsize) + return object_error::parse_failed; + + // If the cache of LibrariesShortNames is not built up do that first for + // all the Libraries. + if (LibrariesShortNames.size() == 0) { + for (unsigned i = 0; i < Libraries.size(); i++) { + MachO::dylib_command D = + getStruct<MachO::dylib_command>(this, Libraries[i]); + if (D.dylib.name >= D.cmdsize) { + LibrariesShortNames.push_back(StringRef()); + continue; + } + const char *P = (const char *)(Libraries[i]) + D.dylib.name; + StringRef Name = StringRef(P); + StringRef Suffix; + bool isFramework; + StringRef shortName = guessLibraryShortName(Name, isFramework, Suffix); + if (shortName == StringRef()) + LibrariesShortNames.push_back(Name); + else + LibrariesShortNames.push_back(shortName); + } + } + + Res = LibrariesShortNames[Index]; + return object_error::success; +} + basic_symbol_iterator MachOObjectFile::symbol_begin_impl() const { return getSymbolByIndex(0); } @@ -1288,6 +1514,108 @@ Triple::ArchType MachOObjectFile::getArch(uint32_t CPUType) { } } +Triple MachOObjectFile::getArch(uint32_t CPUType, uint32_t CPUSubType) { + switch (CPUType) { + case MachO::CPU_TYPE_I386: + switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_I386_ALL: + return Triple("i386-apple-darwin"); + default: + return Triple(); + } + case MachO::CPU_TYPE_X86_64: + switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_X86_64_ALL: + return Triple("x86_64-apple-darwin"); + case MachO::CPU_SUBTYPE_X86_64_H: + return Triple("x86_64h-apple-darwin"); + default: + return Triple(); + } + case MachO::CPU_TYPE_ARM: + switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_ARM_V4T: + return Triple("armv4t-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V5TEJ: + return Triple("armv5e-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V6: + return Triple("armv6-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V6M: + return Triple("armv6m-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V7EM: + return Triple("armv7em-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V7K: + return Triple("armv7k-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V7M: + return Triple("armv7m-apple-darwin"); + case MachO::CPU_SUBTYPE_ARM_V7S: + return Triple("armv7s-apple-darwin"); + default: + return Triple(); + } + case MachO::CPU_TYPE_ARM64: + switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_ARM64_ALL: + return Triple("arm64-apple-darwin"); + default: + return Triple(); + } + case MachO::CPU_TYPE_POWERPC: + switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_POWERPC_ALL: + return Triple("ppc-apple-darwin"); + default: + return Triple(); + } + case MachO::CPU_TYPE_POWERPC64: + switch (CPUSubType & ~MachO::CPU_SUBTYPE_MASK) { + case MachO::CPU_SUBTYPE_POWERPC_ALL: + return Triple("ppc64-apple-darwin"); + default: + return Triple(); + } + default: + return Triple(); + } +} + +Triple MachOObjectFile::getHostArch() { + return Triple(sys::getDefaultTargetTriple()); +} + +Triple MachOObjectFile::getArch(StringRef ArchFlag) { + if (ArchFlag == "i386") + return Triple("i386-apple-darwin"); + else if (ArchFlag == "x86_64") + return Triple("x86_64-apple-darwin"); + else if (ArchFlag == "x86_64h") + return Triple("x86_64h-apple-darwin"); + else if (ArchFlag == "armv4t" || ArchFlag == "arm") + return Triple("armv4t-apple-darwin"); + else if (ArchFlag == "armv5e") + return Triple("armv5e-apple-darwin"); + else if (ArchFlag == "armv6") + return Triple("armv6-apple-darwin"); + else if (ArchFlag == "armv6m") + return Triple("armv6m-apple-darwin"); + else if (ArchFlag == "armv7em") + return Triple("armv7em-apple-darwin"); + else if (ArchFlag == "armv7k") + return Triple("armv7k-apple-darwin"); + else if (ArchFlag == "armv7k") + return Triple("armv7m-apple-darwin"); + else if (ArchFlag == "armv7s") + return Triple("armv7s-apple-darwin"); + else if (ArchFlag == "arm64") + return Triple("arm64-apple-darwin"); + else if (ArchFlag == "ppc") + return Triple("ppc-apple-darwin"); + else if (ArchFlag == "ppc64") + return Triple("ppc64-apple-darwin"); + else + return Triple(); +} + unsigned MachOObjectFile::getArch() const { return getArch(getCPUType(this)); } @@ -1498,6 +1826,12 @@ MachOObjectFile::getVersionMinLoadCommand(const LoadCommandInfo &L) const { return getStruct<MachO::version_min_command>(this, L.Ptr); } +MachO::dylib_command +MachOObjectFile::getDylibIDLoadCommand(const LoadCommandInfo &L) const { + return getStruct<MachO::dylib_command>(this, L.Ptr); +} + + MachO::any_relocation_info MachOObjectFile::getRelocation(DataRefImpl Rel) const { DataRefImpl Sec; @@ -1574,7 +1908,7 @@ StringRef MachOObjectFile::getStringTableData() const { bool MachOObjectFile::is64Bit() const { return getType() == getMachOType(false, true) || - getType() == getMachOType(true, true); + getType() == getMachOType(true, true); } void MachOObjectFile::ReadULEB128s(uint64_t Index, @@ -1589,23 +1923,25 @@ void MachOObjectFile::ReadULEB128s(uint64_t Index, } } -ErrorOr<ObjectFile *> ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer, - bool BufferOwned) { +const char *MachOObjectFile::getSectionPointer(DataRefImpl Rel) const { + return Sections[Rel.d.a]; +} + +ErrorOr<ObjectFile *> +ObjectFile::createMachOObjectFile(std::unique_ptr<MemoryBuffer> &Buffer) { StringRef Magic = Buffer->getBuffer().slice(0, 4); - error_code EC; + std::error_code EC; std::unique_ptr<MachOObjectFile> Ret; if (Magic == "\xFE\xED\xFA\xCE") - Ret.reset(new MachOObjectFile(Buffer, false, false, EC, BufferOwned)); + Ret.reset(new MachOObjectFile(std::move(Buffer), false, false, EC)); else if (Magic == "\xCE\xFA\xED\xFE") - Ret.reset(new MachOObjectFile(Buffer, true, false, EC, BufferOwned)); + Ret.reset(new MachOObjectFile(std::move(Buffer), true, false, EC)); else if (Magic == "\xFE\xED\xFA\xCF") - Ret.reset(new MachOObjectFile(Buffer, false, true, EC, BufferOwned)); + Ret.reset(new MachOObjectFile(std::move(Buffer), false, true, EC)); else if (Magic == "\xCF\xFA\xED\xFE") - Ret.reset(new MachOObjectFile(Buffer, true, true, EC, BufferOwned)); - else { - delete Buffer; + Ret.reset(new MachOObjectFile(std::move(Buffer), true, true, EC)); + else return object_error::parse_failed; - } if (EC) return EC; diff --git a/lib/Object/MachOUniversal.cpp b/lib/Object/MachOUniversal.cpp index 5085efd..4ba5d96 100644 --- a/lib/Object/MachOUniversal.cpp +++ b/lib/Object/MachOUniversal.cpp @@ -23,26 +23,21 @@ using namespace llvm; using namespace object; template<typename T> -static void SwapValue(T &Value) { - Value = sys::SwapByteOrder(Value); -} - -template<typename T> static void SwapStruct(T &Value); template<> void SwapStruct(MachO::fat_header &H) { - SwapValue(H.magic); - SwapValue(H.nfat_arch); + sys::swapByteOrder(H.magic); + sys::swapByteOrder(H.nfat_arch); } template<> void SwapStruct(MachO::fat_arch &H) { - SwapValue(H.cputype); - SwapValue(H.cpusubtype); - SwapValue(H.offset); - SwapValue(H.size); - SwapValue(H.align); + sys::swapByteOrder(H.cputype); + sys::swapByteOrder(H.cpusubtype); + sys::swapByteOrder(H.offset); + sys::swapByteOrder(H.size); + sys::swapByteOrder(H.align); } template<typename T> @@ -58,7 +53,7 @@ static T getUniversalBinaryStruct(const char *Ptr) { MachOUniversalBinary::ObjectForArch::ObjectForArch( const MachOUniversalBinary *Parent, uint32_t Index) : Parent(Parent), Index(Index) { - if (!Parent || Index > Parent->getNumberOfObjects()) { + if (!Parent || Index >= Parent->getNumberOfObjects()) { clear(); } else { // Parse object header. @@ -72,37 +67,29 @@ MachOUniversalBinary::ObjectForArch::ObjectForArch( } } -error_code MachOUniversalBinary::ObjectForArch::getAsObjectFile( - std::unique_ptr<ObjectFile> &Result) const { +ErrorOr<std::unique_ptr<ObjectFile>> +MachOUniversalBinary::ObjectForArch::getAsObjectFile() const { if (Parent) { StringRef ParentData = Parent->getData(); StringRef ObjectData = ParentData.substr(Header.offset, Header.size); - std::string ObjectName = - Parent->getFileName().str() + ":" + - Triple::getArchTypeName(MachOObjectFile::getArch(Header.cputype)); - MemoryBuffer *ObjBuffer = MemoryBuffer::getMemBuffer( - ObjectData, ObjectName, false); - ErrorOr<ObjectFile *> Obj = ObjectFile::createMachOObjectFile(ObjBuffer); - if (error_code EC = Obj.getError()) - return EC; - Result.reset(Obj.get()); - return object_error::success; + std::string ObjectName = Parent->getFileName().str(); + std::unique_ptr<MemoryBuffer> ObjBuffer( + MemoryBuffer::getMemBuffer(ObjectData, ObjectName, false)); + return ObjectFile::createMachOObjectFile(ObjBuffer); } return object_error::parse_failed; } -error_code MachOUniversalBinary::ObjectForArch::getAsArchive( +std::error_code MachOUniversalBinary::ObjectForArch::getAsArchive( std::unique_ptr<Archive> &Result) const { if (Parent) { StringRef ParentData = Parent->getData(); StringRef ObjectData = ParentData.substr(Header.offset, Header.size); - std::string ObjectName = - Parent->getFileName().str() + ":" + - Triple::getArchTypeName(MachOObjectFile::getArch(Header.cputype)); - MemoryBuffer *ObjBuffer = MemoryBuffer::getMemBuffer( - ObjectData, ObjectName, false); - ErrorOr<Archive *> Obj = Archive::create(ObjBuffer); - if (error_code EC = Obj.getError()) + std::string ObjectName = Parent->getFileName().str(); + std::unique_ptr<MemoryBuffer> ObjBuffer( + MemoryBuffer::getMemBuffer(ObjectData, ObjectName, false)); + ErrorOr<Archive *> Obj = Archive::create(std::move(ObjBuffer)); + if (std::error_code EC = Obj.getError()) return EC; Result.reset(Obj.get()); return object_error::success; @@ -113,20 +100,20 @@ error_code MachOUniversalBinary::ObjectForArch::getAsArchive( void MachOUniversalBinary::anchor() { } ErrorOr<MachOUniversalBinary *> -MachOUniversalBinary::create(MemoryBuffer *Source) { - error_code EC; +MachOUniversalBinary::create(std::unique_ptr<MemoryBuffer> Source) { + std::error_code EC; std::unique_ptr<MachOUniversalBinary> Ret( - new MachOUniversalBinary(Source, EC)); + new MachOUniversalBinary(std::move(Source), EC)); if (EC) return EC; return Ret.release(); } -MachOUniversalBinary::MachOUniversalBinary(MemoryBuffer *Source, - error_code &ec) - : Binary(Binary::ID_MachOUniversalBinary, Source), - NumberOfObjects(0) { - if (Source->getBufferSize() < sizeof(MachO::fat_header)) { +MachOUniversalBinary::MachOUniversalBinary(std::unique_ptr<MemoryBuffer> Source, + std::error_code &ec) + : Binary(Binary::ID_MachOUniversalBinary, std::move(Source)), + NumberOfObjects(0) { + if (Data->getBufferSize() < sizeof(MachO::fat_header)) { ec = object_error::invalid_file_type; return; } @@ -155,14 +142,14 @@ static bool getCTMForArch(Triple::ArchType Arch, MachO::CPUType &CTM) { } } -error_code MachOUniversalBinary::getObjectForArch( - Triple::ArchType Arch, std::unique_ptr<ObjectFile> &Result) const { +ErrorOr<std::unique_ptr<ObjectFile>> +MachOUniversalBinary::getObjectForArch(Triple::ArchType Arch) const { MachO::CPUType CTM; if (!getCTMForArch(Arch, CTM)) return object_error::arch_not_found; for (object_iterator I = begin_objects(), E = end_objects(); I != E; ++I) { if (I->getCPUType() == static_cast<uint32_t>(CTM)) - return I->getAsObjectFile(Result); + return I->getAsObjectFile(); } return object_error::arch_not_found; } diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp index b0068a8..567d87f 100644 --- a/lib/Object/Object.cpp +++ b/lib/Object/Object.cpp @@ -59,7 +59,9 @@ wrap(const relocation_iterator *SI) { // ObjectFile creation LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf) { - ErrorOr<ObjectFile*> ObjOrErr(ObjectFile::createObjectFile(unwrap(MemBuf))); + std::unique_ptr<MemoryBuffer> Buf(unwrap(MemBuf)); + ErrorOr<ObjectFile *> ObjOrErr(ObjectFile::createObjectFile(Buf)); + Buf.release(); ObjectFile *Obj = ObjOrErr ? ObjOrErr.get() : nullptr; return wrap(Obj); } @@ -89,7 +91,7 @@ void LLVMMoveToNextSection(LLVMSectionIteratorRef SI) { void LLVMMoveToContainingSection(LLVMSectionIteratorRef Sect, LLVMSymbolIteratorRef Sym) { - if (error_code ec = (*unwrap(Sym))->getSection(*unwrap(Sect))) + if (std::error_code ec = (*unwrap(Sym))->getSection(*unwrap(Sect))) report_fatal_error(ec.message()); } @@ -115,28 +117,28 @@ void LLVMMoveToNextSymbol(LLVMSymbolIteratorRef SI) { // SectionRef accessors const char *LLVMGetSectionName(LLVMSectionIteratorRef SI) { StringRef ret; - if (error_code ec = (*unwrap(SI))->getName(ret)) + if (std::error_code ec = (*unwrap(SI))->getName(ret)) report_fatal_error(ec.message()); return ret.data(); } uint64_t LLVMGetSectionSize(LLVMSectionIteratorRef SI) { uint64_t ret; - if (error_code ec = (*unwrap(SI))->getSize(ret)) + if (std::error_code ec = (*unwrap(SI))->getSize(ret)) report_fatal_error(ec.message()); return ret; } const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI) { StringRef ret; - if (error_code ec = (*unwrap(SI))->getContents(ret)) + if (std::error_code ec = (*unwrap(SI))->getContents(ret)) report_fatal_error(ec.message()); return ret.data(); } uint64_t LLVMGetSectionAddress(LLVMSectionIteratorRef SI) { uint64_t ret; - if (error_code ec = (*unwrap(SI))->getAddress(ret)) + if (std::error_code ec = (*unwrap(SI))->getAddress(ret)) report_fatal_error(ec.message()); return ret; } @@ -144,7 +146,7 @@ uint64_t LLVMGetSectionAddress(LLVMSectionIteratorRef SI) { LLVMBool LLVMGetSectionContainsSymbol(LLVMSectionIteratorRef SI, LLVMSymbolIteratorRef Sym) { bool ret; - if (error_code ec = (*unwrap(SI))->containsSymbol(**unwrap(Sym), ret)) + if (std::error_code ec = (*unwrap(SI))->containsSymbol(**unwrap(Sym), ret)) report_fatal_error(ec.message()); return ret; } @@ -172,21 +174,21 @@ void LLVMMoveToNextRelocation(LLVMRelocationIteratorRef SI) { // SymbolRef accessors const char *LLVMGetSymbolName(LLVMSymbolIteratorRef SI) { StringRef ret; - if (error_code ec = (*unwrap(SI))->getName(ret)) + if (std::error_code ec = (*unwrap(SI))->getName(ret)) report_fatal_error(ec.message()); return ret.data(); } uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI) { uint64_t ret; - if (error_code ec = (*unwrap(SI))->getAddress(ret)) + if (std::error_code ec = (*unwrap(SI))->getAddress(ret)) report_fatal_error(ec.message()); return ret; } uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI) { uint64_t ret; - if (error_code ec = (*unwrap(SI))->getSize(ret)) + if (std::error_code ec = (*unwrap(SI))->getSize(ret)) report_fatal_error(ec.message()); return ret; } @@ -194,14 +196,14 @@ uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI) { // RelocationRef accessors uint64_t LLVMGetRelocationAddress(LLVMRelocationIteratorRef RI) { uint64_t ret; - if (error_code ec = (*unwrap(RI))->getAddress(ret)) + if (std::error_code ec = (*unwrap(RI))->getAddress(ret)) report_fatal_error(ec.message()); return ret; } uint64_t LLVMGetRelocationOffset(LLVMRelocationIteratorRef RI) { uint64_t ret; - if (error_code ec = (*unwrap(RI))->getOffset(ret)) + if (std::error_code ec = (*unwrap(RI))->getOffset(ret)) report_fatal_error(ec.message()); return ret; } @@ -213,7 +215,7 @@ LLVMSymbolIteratorRef LLVMGetRelocationSymbol(LLVMRelocationIteratorRef RI) { uint64_t LLVMGetRelocationType(LLVMRelocationIteratorRef RI) { uint64_t ret; - if (error_code ec = (*unwrap(RI))->getType(ret)) + if (std::error_code ec = (*unwrap(RI))->getType(ret)) report_fatal_error(ec.message()); return ret; } @@ -221,7 +223,7 @@ uint64_t LLVMGetRelocationType(LLVMRelocationIteratorRef RI) { // NOTE: Caller takes ownership of returned string. const char *LLVMGetRelocationTypeName(LLVMRelocationIteratorRef RI) { SmallVector<char, 0> ret; - if (error_code ec = (*unwrap(RI))->getTypeName(ret)) + if (std::error_code ec = (*unwrap(RI))->getTypeName(ret)) report_fatal_error(ec.message()); char *str = static_cast<char*>(malloc(ret.size())); @@ -232,7 +234,7 @@ const char *LLVMGetRelocationTypeName(LLVMRelocationIteratorRef RI) { // NOTE: Caller takes ownership of returned string. const char *LLVMGetRelocationValueString(LLVMRelocationIteratorRef RI) { SmallVector<char, 0> ret; - if (error_code ec = (*unwrap(RI))->getValueString(ret)) + if (std::error_code ec = (*unwrap(RI))->getValueString(ret)) report_fatal_error(ec.message()); char *str = static_cast<char*>(malloc(ret.size())); diff --git a/lib/Object/ObjectFile.cpp b/lib/Object/ObjectFile.cpp index d30f0cc..f5488c6 100644 --- a/lib/Object/ObjectFile.cpp +++ b/lib/Object/ObjectFile.cpp @@ -16,28 +16,27 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/system_error.h" +#include <system_error> using namespace llvm; using namespace object; void ObjectFile::anchor() { } -ObjectFile::ObjectFile(unsigned int Type, MemoryBuffer *Source, - bool BufferOwned) - : SymbolicFile(Type, Source, BufferOwned) {} +ObjectFile::ObjectFile(unsigned int Type, std::unique_ptr<MemoryBuffer> Source) + : SymbolicFile(Type, std::move(Source)) {} -error_code ObjectFile::printSymbolName(raw_ostream &OS, - DataRefImpl Symb) const { +std::error_code ObjectFile::printSymbolName(raw_ostream &OS, + DataRefImpl Symb) const { StringRef Name; - if (error_code EC = getSymbolName(Symb, Name)) + if (std::error_code EC = getSymbolName(Symb, Name)) return EC; OS << Name; return object_error::success; } -error_code ObjectFile::getSymbolAlignment(DataRefImpl DRI, - uint32_t &Result) const { +std::error_code ObjectFile::getSymbolAlignment(DataRefImpl DRI, + uint32_t &Result) const { Result = 0; return object_error::success; } @@ -46,9 +45,9 @@ section_iterator ObjectFile::getRelocatedSection(DataRefImpl Sec) const { return section_iterator(SectionRef(Sec, this)); } -ErrorOr<ObjectFile *> ObjectFile::createObjectFile(MemoryBuffer *Object, - bool BufferOwned, - sys::fs::file_magic Type) { +ErrorOr<ObjectFile *> +ObjectFile::createObjectFile(std::unique_ptr<MemoryBuffer> &Object, + sys::fs::file_magic Type) { if (Type == sys::fs::file_magic::unknown) Type = sys::fs::identify_magic(Object->getBuffer()); @@ -58,14 +57,12 @@ ErrorOr<ObjectFile *> ObjectFile::createObjectFile(MemoryBuffer *Object, case sys::fs::file_magic::archive: case sys::fs::file_magic::macho_universal_binary: case sys::fs::file_magic::windows_resource: - if (BufferOwned) - delete Object; return object_error::invalid_file_type; case sys::fs::file_magic::elf_relocatable: case sys::fs::file_magic::elf_executable: case sys::fs::file_magic::elf_shared_object: case sys::fs::file_magic::elf_core: - return createELFObjectFile(Object, BufferOwned); + return createELFObjectFile(Object); case sys::fs::file_magic::macho_object: case sys::fs::file_magic::macho_executable: case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib: @@ -76,18 +73,19 @@ ErrorOr<ObjectFile *> ObjectFile::createObjectFile(MemoryBuffer *Object, case sys::fs::file_magic::macho_bundle: case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub: case sys::fs::file_magic::macho_dsym_companion: - return createMachOObjectFile(Object, BufferOwned); + return createMachOObjectFile(Object); case sys::fs::file_magic::coff_object: case sys::fs::file_magic::coff_import_library: case sys::fs::file_magic::pecoff_executable: - return createCOFFObjectFile(Object, BufferOwned); + return createCOFFObjectFile(std::move(Object)); } llvm_unreachable("Unexpected Object File Type"); } ErrorOr<ObjectFile *> ObjectFile::createObjectFile(StringRef ObjectPath) { - std::unique_ptr<MemoryBuffer> File; - if (error_code EC = MemoryBuffer::getFile(ObjectPath, File)) + ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = + MemoryBuffer::getFile(ObjectPath); + if (std::error_code EC = FileOrErr.getError()) return EC; - return createObjectFile(File.release()); + return createObjectFile(FileOrErr.get()); } diff --git a/lib/Object/RecordStreamer.cpp b/lib/Object/RecordStreamer.cpp new file mode 100644 index 0000000..081fadd --- /dev/null +++ b/lib/Object/RecordStreamer.cpp @@ -0,0 +1,100 @@ +//===-- RecordStreamer.cpp - Record asm definde and used symbols ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "RecordStreamer.h" +#include "llvm/MC/MCSymbol.h" +using namespace llvm; + +void RecordStreamer::markDefined(const MCSymbol &Symbol) { + State &S = Symbols[Symbol.getName()]; + switch (S) { + case DefinedGlobal: + case Global: + S = DefinedGlobal; + break; + case NeverSeen: + case Defined: + case Used: + S = Defined; + break; + } +} + +void RecordStreamer::markGlobal(const MCSymbol &Symbol) { + State &S = Symbols[Symbol.getName()]; + switch (S) { + case DefinedGlobal: + case Defined: + S = DefinedGlobal; + break; + + case NeverSeen: + case Global: + case Used: + S = Global; + break; + } +} + +void RecordStreamer::markUsed(const MCSymbol &Symbol) { + State &S = Symbols[Symbol.getName()]; + switch (S) { + case DefinedGlobal: + case Defined: + case Global: + break; + + case NeverSeen: + case Used: + S = Used; + break; + } +} + +void RecordStreamer::visitUsedSymbol(const MCSymbol &Sym) { markUsed(Sym); } + +RecordStreamer::const_iterator RecordStreamer::begin() { + return Symbols.begin(); +} + +RecordStreamer::const_iterator RecordStreamer::end() { return Symbols.end(); } + +RecordStreamer::RecordStreamer(MCContext &Context) : MCStreamer(Context) {} + +void RecordStreamer::EmitInstruction(const MCInst &Inst, + const MCSubtargetInfo &STI) { + MCStreamer::EmitInstruction(Inst, STI); +} + +void RecordStreamer::EmitLabel(MCSymbol *Symbol) { + MCStreamer::EmitLabel(Symbol); + markDefined(*Symbol); +} + +void RecordStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { + markDefined(*Symbol); + MCStreamer::EmitAssignment(Symbol, Value); +} + +bool RecordStreamer::EmitSymbolAttribute(MCSymbol *Symbol, + MCSymbolAttr Attribute) { + if (Attribute == MCSA_Global) + markGlobal(*Symbol); + return true; +} + +void RecordStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment) { + markDefined(*Symbol); +} + +void RecordStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { + markDefined(*Symbol); +} diff --git a/lib/Object/RecordStreamer.h b/lib/Object/RecordStreamer.h new file mode 100644 index 0000000..10e70ef --- /dev/null +++ b/lib/Object/RecordStreamer.h @@ -0,0 +1,42 @@ +//===-- RecordStreamer.h - Record asm defined and used symbols ---*- C++ -*===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_OBJECT_RECORD_STREAMER +#define LLVM_OBJECT_RECORD_STREAMER + +#include "llvm/MC/MCStreamer.h" + +namespace llvm { +class RecordStreamer : public MCStreamer { +public: + enum State { NeverSeen, Global, Defined, DefinedGlobal, Used }; + +private: + StringMap<State> Symbols; + void markDefined(const MCSymbol &Symbol); + void markGlobal(const MCSymbol &Symbol); + void markUsed(const MCSymbol &Symbol); + void visitUsedSymbol(const MCSymbol &Sym) override; + +public: + typedef StringMap<State>::const_iterator const_iterator; + const_iterator begin(); + const_iterator end(); + RecordStreamer(MCContext &Context); + void EmitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override; + void EmitLabel(MCSymbol *Symbol) override; + void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) override; + bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override; + void EmitZerofill(const MCSection *Section, MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) override; + void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) override; +}; +} +#endif diff --git a/lib/Object/SymbolicFile.cpp b/lib/Object/SymbolicFile.cpp index 495f0b6..30cf1a0 100644 --- a/lib/Object/SymbolicFile.cpp +++ b/lib/Object/SymbolicFile.cpp @@ -19,14 +19,14 @@ using namespace llvm; using namespace object; -SymbolicFile::SymbolicFile(unsigned int Type, MemoryBuffer *Source, - bool BufferOwned) - : Binary(Type, Source, BufferOwned) {} +SymbolicFile::SymbolicFile(unsigned int Type, + std::unique_ptr<MemoryBuffer> Source) + : Binary(Type, std::move(Source)) {} SymbolicFile::~SymbolicFile() {} ErrorOr<SymbolicFile *> -SymbolicFile::createSymbolicFile(MemoryBuffer *Object, bool BufferOwned, +SymbolicFile::createSymbolicFile(std::unique_ptr<MemoryBuffer> &Object, sys::fs::file_magic Type, LLVMContext *Context) { if (Type == sys::fs::file_magic::unknown) @@ -35,14 +35,12 @@ SymbolicFile::createSymbolicFile(MemoryBuffer *Object, bool BufferOwned, switch (Type) { case sys::fs::file_magic::bitcode: if (Context) - return IRObjectFile::createIRObjectFile(Object, *Context, BufferOwned); + return IRObjectFile::createIRObjectFile(std::move(Object), *Context); // Fallthrough case sys::fs::file_magic::unknown: case sys::fs::file_magic::archive: case sys::fs::file_magic::macho_universal_binary: case sys::fs::file_magic::windows_resource: - if (BufferOwned) - delete Object; return object_error::invalid_file_type; case sys::fs::file_magic::elf_relocatable: case sys::fs::file_magic::elf_executable: @@ -61,7 +59,7 @@ SymbolicFile::createSymbolicFile(MemoryBuffer *Object, bool BufferOwned, case sys::fs::file_magic::coff_object: case sys::fs::file_magic::coff_import_library: case sys::fs::file_magic::pecoff_executable: - return ObjectFile::createObjectFile(Object, BufferOwned, Type); + return ObjectFile::createObjectFile(Object, Type); } llvm_unreachable("Unexpected Binary File Type"); } diff --git a/lib/Option/ArgList.cpp b/lib/Option/ArgList.cpp index a5ab8d7..5848bb1 100644 --- a/lib/Option/ArgList.cpp +++ b/lib/Option/ArgList.cpp @@ -234,44 +234,40 @@ void ArgList::AddLastArg(ArgStringList &Output, OptSpecifier Id0, void ArgList::AddAllArgs(ArgStringList &Output, OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2) const { - for (arg_iterator it = filtered_begin(Id0, Id1, Id2), - ie = filtered_end(); it != ie; ++it) { - (*it)->claim(); - (*it)->render(*this, Output); + for (auto Arg: filtered(Id0, Id1, Id2)) { + Arg->claim(); + Arg->render(*this, Output); } } void ArgList::AddAllArgValues(ArgStringList &Output, OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2) const { - for (arg_iterator it = filtered_begin(Id0, Id1, Id2), - ie = filtered_end(); it != ie; ++it) { - (*it)->claim(); - for (unsigned i = 0, e = (*it)->getNumValues(); i != e; ++i) - Output.push_back((*it)->getValue(i)); + for (auto Arg : filtered(Id0, Id1, Id2)) { + Arg->claim(); + for (unsigned i = 0, e = Arg->getNumValues(); i != e; ++i) + Output.push_back(Arg->getValue(i)); } } void ArgList::AddAllArgsTranslated(ArgStringList &Output, OptSpecifier Id0, const char *Translation, bool Joined) const { - for (arg_iterator it = filtered_begin(Id0), - ie = filtered_end(); it != ie; ++it) { - (*it)->claim(); + for (auto Arg: filtered(Id0)) { + Arg->claim(); if (Joined) { Output.push_back(MakeArgString(StringRef(Translation) + - (*it)->getValue(0))); + Arg->getValue(0))); } else { Output.push_back(Translation); - Output.push_back((*it)->getValue(0)); + Output.push_back(Arg->getValue(0)); } } } void ArgList::ClaimAllArgs(OptSpecifier Id0) const { - for (arg_iterator it = filtered_begin(Id0), - ie = filtered_end(); it != ie; ++it) - (*it)->claim(); + for (auto Arg : filtered(Id0)) + Arg->claim(); } void ArgList::ClaimAllArgs() const { @@ -350,30 +346,27 @@ void DerivedArgList::AddSynthesizedArg(Arg *A) { } Arg *DerivedArgList::MakeFlagArg(const Arg *BaseArg, const Option Opt) const { - SynthesizedArgs.push_back(make_unique<Arg>( - Opt, - ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())), - BaseArgs.MakeIndex(Opt.getName()), BaseArg)); + SynthesizedArgs.push_back( + make_unique<Arg>(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), + BaseArgs.MakeIndex(Opt.getName()), BaseArg)); return SynthesizedArgs.back().get(); } Arg *DerivedArgList::MakePositionalArg(const Arg *BaseArg, const Option Opt, StringRef Value) const { unsigned Index = BaseArgs.MakeIndex(Value); - SynthesizedArgs.push_back(make_unique<Arg>( - Opt, - ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())), - Index, BaseArgs.getArgString(Index), BaseArg)); + SynthesizedArgs.push_back( + make_unique<Arg>(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), + Index, BaseArgs.getArgString(Index), BaseArg)); return SynthesizedArgs.back().get(); } Arg *DerivedArgList::MakeSeparateArg(const Arg *BaseArg, const Option Opt, StringRef Value) const { unsigned Index = BaseArgs.MakeIndex(Opt.getName(), Value); - SynthesizedArgs.push_back(make_unique<Arg>( - Opt, - ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())), - Index, BaseArgs.getArgString(Index + 1), BaseArg)); + SynthesizedArgs.push_back( + make_unique<Arg>(Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), + Index, BaseArgs.getArgString(Index + 1), BaseArg)); return SynthesizedArgs.back().get(); } @@ -381,8 +374,7 @@ Arg *DerivedArgList::MakeJoinedArg(const Arg *BaseArg, const Option Opt, StringRef Value) const { unsigned Index = BaseArgs.MakeIndex(Opt.getName().str() + Value.str()); SynthesizedArgs.push_back(make_unique<Arg>( - Opt, - ArgList::MakeArgString(Twine(Opt.getPrefix()) + Twine(Opt.getName())), - Index, BaseArgs.getArgString(Index) + Opt.getName().size(), BaseArg)); + Opt, MakeArgString(Opt.getPrefix() + Opt.getName()), Index, + BaseArgs.getArgString(Index) + Opt.getName().size(), BaseArg)); return SynthesizedArgs.back().get(); } diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp index de2b13d..0121222 100644 --- a/lib/ProfileData/InstrProf.cpp +++ b/lib/ProfileData/InstrProf.cpp @@ -18,10 +18,10 @@ using namespace llvm; namespace { -class InstrProfErrorCategoryType : public error_category { - const char *name() const override { return "llvm.instrprof"; } +class InstrProfErrorCategoryType : public std::error_category { + const char *name() const LLVM_NOEXCEPT override { return "llvm.instrprof"; } std::string message(int IE) const override { - instrprof_error::ErrorType E = static_cast<instrprof_error::ErrorType>(IE); + instrprof_error E = static_cast<instrprof_error>(IE); switch (E) { case instrprof_error::success: return "Success"; @@ -52,15 +52,10 @@ class InstrProfErrorCategoryType : public error_category { } llvm_unreachable("A value of instrprof_error has no message."); } - error_condition default_error_condition(int EV) const override { - if (EV == instrprof_error::success) - return errc::success; - return errc::invalid_argument; - } }; } -const error_category &llvm::instrprof_category() { +const std::error_category &llvm::instrprof_category() { static InstrProfErrorCategoryType C; return C; } diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp index 7014f5e..0b36728 100644 --- a/lib/ProfileData/InstrProfReader.cpp +++ b/lib/ProfileData/InstrProfReader.cpp @@ -21,10 +21,13 @@ using namespace llvm; -static error_code setupMemoryBuffer(std::string Path, - std::unique_ptr<MemoryBuffer> &Buffer) { - if (error_code EC = MemoryBuffer::getFileOrSTDIN(Path, Buffer)) +static std::error_code +setupMemoryBuffer(std::string Path, std::unique_ptr<MemoryBuffer> &Buffer) { + ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = + MemoryBuffer::getFileOrSTDIN(Path); + if (std::error_code EC = BufferOrErr.getError()) return EC; + Buffer = std::move(BufferOrErr.get()); // Sanity check the file. if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max()) @@ -32,15 +35,16 @@ static error_code setupMemoryBuffer(std::string Path, return instrprof_error::success; } -static error_code initializeReader(InstrProfReader &Reader) { +static std::error_code initializeReader(InstrProfReader &Reader) { return Reader.readHeader(); } -error_code InstrProfReader::create(std::string Path, - std::unique_ptr<InstrProfReader> &Result) { +std::error_code +InstrProfReader::create(std::string Path, + std::unique_ptr<InstrProfReader> &Result) { // Set up the buffer to read. std::unique_ptr<MemoryBuffer> Buffer; - if (error_code EC = setupMemoryBuffer(Path, Buffer)) + if (std::error_code EC = setupMemoryBuffer(Path, Buffer)) return EC; // Create the reader. @@ -57,11 +61,11 @@ error_code InstrProfReader::create(std::string Path, return initializeReader(*Result); } -error_code IndexedInstrProfReader::create( +std::error_code IndexedInstrProfReader::create( std::string Path, std::unique_ptr<IndexedInstrProfReader> &Result) { // Set up the buffer to read. std::unique_ptr<MemoryBuffer> Buffer; - if (error_code EC = setupMemoryBuffer(Path, Buffer)) + if (std::error_code EC = setupMemoryBuffer(Path, Buffer)) return EC; // Create the reader. @@ -78,7 +82,7 @@ void InstrProfIterator::Increment() { *this = InstrProfIterator(); } -error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) { +std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) { // Skip empty lines. while (!Line.is_at_end() && Line->empty()) ++Line; @@ -157,11 +161,11 @@ bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) { uint64_t Magic = *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart()); return getRawMagic<IntPtrT>() == Magic || - sys::SwapByteOrder(getRawMagic<IntPtrT>()) == Magic; + sys::getSwappedBytes(getRawMagic<IntPtrT>()) == Magic; } template <class IntPtrT> -error_code RawInstrProfReader<IntPtrT>::readHeader() { +std::error_code RawInstrProfReader<IntPtrT>::readHeader() { if (!hasFormat(*DataBuffer)) return error(instrprof_error::bad_magic); if (DataBuffer->getBufferSize() < sizeof(RawHeader)) @@ -173,7 +177,8 @@ error_code RawInstrProfReader<IntPtrT>::readHeader() { } template <class IntPtrT> -error_code RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) { +std::error_code +RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) { const char *End = DataBuffer->getBufferEnd(); // Skip zero padding between profiles. while (CurrentPos != End && *CurrentPos == 0) @@ -200,7 +205,8 @@ static uint64_t getRawVersion() { } template <class IntPtrT> -error_code RawInstrProfReader<IntPtrT>::readHeader(const RawHeader &Header) { +std::error_code +RawInstrProfReader<IntPtrT>::readHeader(const RawHeader &Header) { if (swap(Header.Version) != getRawVersion()) return error(instrprof_error::unsupported_version); @@ -229,10 +235,10 @@ error_code RawInstrProfReader<IntPtrT>::readHeader(const RawHeader &Header) { } template <class IntPtrT> -error_code +std::error_code RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) { if (Data == DataEnd) - if (error_code EC = readNextHeader(ProfileEnd)) + if (std::error_code EC = readNextHeader(ProfileEnd)) return EC; // Get the raw data. @@ -286,7 +292,7 @@ bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) { return Magic == IndexedInstrProf::Magic; } -error_code IndexedInstrProfReader::readHeader() { +std::error_code IndexedInstrProfReader::readHeader() { const unsigned char *Start = (const unsigned char *)DataBuffer->getBufferStart(); const unsigned char *Cur = Start; @@ -324,7 +330,7 @@ error_code IndexedInstrProfReader::readHeader() { return success(); } -error_code IndexedInstrProfReader::getFunctionCounts( +std::error_code IndexedInstrProfReader::getFunctionCounts( StringRef FuncName, uint64_t &FuncHash, std::vector<uint64_t> &Counts) { const auto &Iter = Index->find(FuncName); if (Iter == Index->end()) @@ -339,7 +345,8 @@ error_code IndexedInstrProfReader::getFunctionCounts( return success(); } -error_code IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) { +std::error_code +IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) { // Are we out of records? if (RecordIterator == Index->data_end()) return error(instrprof_error::eof); diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp index 83c41d9..e55c299 100644 --- a/lib/ProfileData/InstrProfWriter.cpp +++ b/lib/ProfileData/InstrProfWriter.cpp @@ -66,9 +66,10 @@ public: }; } -error_code InstrProfWriter::addFunctionCounts(StringRef FunctionName, - uint64_t FunctionHash, - ArrayRef<uint64_t> Counters) { +std::error_code +InstrProfWriter::addFunctionCounts(StringRef FunctionName, + uint64_t FunctionHash, + ArrayRef<uint64_t> Counters) { auto Where = FunctionData.find(FunctionName); if (Where == FunctionData.end()) { // If this is the first time we've seen this function, just add it. diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index f9fe095..7989e30 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -1372,7 +1372,9 @@ APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) case PackCategoriesIntoKey(fcZero, fcNaN): case PackCategoriesIntoKey(fcNormal, fcNaN): case PackCategoriesIntoKey(fcInfinity, fcNaN): - sign = false; + // We need to be sure to flip the sign here for subtraction because we + // don't have a separate negate operation so -NaN becomes 0 - NaN here. + sign = rhs.sign ^ subtract; category = fcNaN; copySignificand(rhs); return opOK; diff --git a/lib/Support/ARMWinEH.cpp b/lib/Support/ARMWinEH.cpp new file mode 100644 index 0000000..03c150f --- /dev/null +++ b/lib/Support/ARMWinEH.cpp @@ -0,0 +1,38 @@ +//===-- ARMWinEH.cpp - Windows on ARM EH Support Functions ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ARMWinEH.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { +namespace ARM { +namespace WinEH { +std::pair<uint16_t, uint32_t> SavedRegisterMask(const RuntimeFunction &RF) { + uint8_t NumRegisters = RF.Reg(); + uint8_t RegistersVFP = RF.R(); + uint8_t LinkRegister = RF.L(); + uint8_t ChainedFrame = RF.C(); + + uint16_t GPRMask = (ChainedFrame << 11) | (LinkRegister << 14); + uint32_t VFPMask = 0; + + if (RegistersVFP) + VFPMask |= (((1 << ((NumRegisters + 1) % 8)) - 1) << 8); + else + GPRMask |= (((1 << (NumRegisters + 1)) - 1) << 4); + + if (PrologueFolding(RF)) + GPRMask |= (((1 << (NumRegisters + 1)) - 1) << (~RF.StackAdjust() & 0x3)); + + return std::make_pair(GPRMask, VFPMask); +} +} +} +} + diff --git a/lib/Support/Android.mk b/lib/Support/Android.mk index 6efccf5..5de8d3f 100644 --- a/lib/Support/Android.mk +++ b/lib/Support/Android.mk @@ -6,6 +6,7 @@ support_SRC_FILES := \ APInt.cpp \ APSInt.cpp \ ARMBuildAttrs.cpp \ + ARMWinEH.cpp \ Atomic.cpp \ BlockFrequency.cpp \ BranchProbability.cpp \ @@ -49,13 +50,16 @@ support_SRC_FILES := \ PrettyStackTrace.cpp \ Process.cpp \ Program.cpp \ + RandomNumberGenerator.cpp \ Regex.cpp \ RWMutex.cpp \ + ScaledNumber.cpp \ SearchForAddressOfSpecialSymbol.cpp \ Signals.cpp \ SmallPtrSet.cpp \ SmallVector.cpp \ SourceMgr.cpp \ + SpecialCaseList.cpp \ Statistic.cpp \ StreamableMemoryObject.cpp \ StringExtras.cpp \ @@ -84,8 +88,7 @@ support_SRC_FILES := \ regerror.c \ regexec.c \ regfree.c \ - regstrlcpy.c \ - system_error.cpp + regstrlcpy.c # For the host diff --git a/lib/Support/Atomic.cpp b/lib/Support/Atomic.cpp index 2ef32b0..ac4ff3e 100644 --- a/lib/Support/Atomic.cpp +++ b/lib/Support/Atomic.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This header file implements atomic operations. +// This file implements atomic operations. // //===----------------------------------------------------------------------===// diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt index b4c674d..9ecd559 100644 --- a/lib/Support/CMakeLists.txt +++ b/lib/Support/CMakeLists.txt @@ -3,6 +3,7 @@ add_llvm_library(LLVMSupport APInt.cpp APSInt.cpp ARMBuildAttrs.cpp + ARMWinEH.cpp Allocator.cpp BlockFrequency.cpp BranchProbability.cpp @@ -40,10 +41,13 @@ add_llvm_library(LLVMSupport MD5.cpp PluginLoader.cpp PrettyStackTrace.cpp + RandomNumberGenerator.cpp Regex.cpp + ScaledNumber.cpp SmallPtrSet.cpp SmallVector.cpp SourceMgr.cpp + SpecialCaseList.cpp Statistic.cpp StreamableMemoryObject.cpp StringExtras.cpp @@ -82,7 +86,6 @@ add_llvm_library(LLVMSupport RWMutex.cpp SearchForAddressOfSpecialSymbol.cpp Signals.cpp - system_error.cpp TargetRegistry.cpp ThreadLocal.cpp Threading.cpp @@ -99,7 +102,6 @@ add_llvm_library(LLVMSupport Unix/Program.inc Unix/RWMutex.inc Unix/Signals.inc - Unix/system_error.inc Unix/ThreadLocal.inc Unix/TimeValue.inc Unix/Watchdog.inc @@ -112,7 +114,6 @@ add_llvm_library(LLVMSupport Windows/Program.inc Windows/RWMutex.inc Windows/Signals.inc - Windows/system_error.inc Windows/ThreadLocal.inc Windows/TimeValue.inc Windows/Watchdog.inc diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index 37bbf48..87348f7 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -31,10 +31,10 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/system_error.h" #include <cerrno> #include <cstdlib> #include <map> +#include <system_error> using namespace llvm; using namespace cl; @@ -145,6 +145,7 @@ void OptionCategory::registerCategory() { static void GetOptionInfo(SmallVectorImpl<Option*> &PositionalOpts, SmallVectorImpl<Option*> &SinkOpts, StringMap<Option*> &OptionsMap) { + bool HadErrors = false; SmallVector<const char*, 16> OptionNames; Option *CAOpt = nullptr; // The ConsumeAfter option if it exists. for (Option *O = RegisteredOptionList; O; O = O->getNextRegisteredOption()) { @@ -158,8 +159,9 @@ static void GetOptionInfo(SmallVectorImpl<Option*> &PositionalOpts, for (size_t i = 0, e = OptionNames.size(); i != e; ++i) { // Add argument to the argument map! if (OptionsMap.GetOrCreateValue(OptionNames[i], O).second != O) { - errs() << ProgramName << ": CommandLine Error: Argument '" - << OptionNames[i] << "' defined more than once!\n"; + errs() << ProgramName << ": CommandLine Error: Option '" + << OptionNames[i] << "' registered more than once!\n"; + HadErrors = true; } } @@ -171,8 +173,10 @@ static void GetOptionInfo(SmallVectorImpl<Option*> &PositionalOpts, else if (O->getMiscFlags() & cl::Sink) // Remember sink options SinkOpts.push_back(O); else if (O->getNumOccurrencesFlag() == cl::ConsumeAfter) { - if (CAOpt) + if (CAOpt) { O->error("Cannot specify more than one option with cl::ConsumeAfter!"); + HadErrors = true; + } CAOpt = O; } } @@ -182,6 +186,12 @@ static void GetOptionInfo(SmallVectorImpl<Option*> &PositionalOpts, // Make sure that they are in order of registration not backwards. std::reverse(PositionalOpts.begin(), PositionalOpts.end()); + + // Fail hard if there were errors. These are strictly unrecoverable and + // indicate serious issues such as conflicting option names or an incorrectly + // linked LLVM distribution. + if (HadErrors) + report_fatal_error("inconsistency in registered CommandLine options"); } @@ -621,9 +631,11 @@ void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver, static bool ExpandResponseFile(const char *FName, StringSaver &Saver, TokenizerCallback Tokenizer, SmallVectorImpl<const char *> &NewArgv) { - std::unique_ptr<MemoryBuffer> MemBuf; - if (MemoryBuffer::getFile(FName, MemBuf)) + ErrorOr<std::unique_ptr<MemoryBuffer>> MemBufOrErr = + MemoryBuffer::getFile(FName); + if (!MemBufOrErr) return false; + std::unique_ptr<MemoryBuffer> MemBuf = std::move(MemBufOrErr.get()); StringRef Str(MemBuf->getBufferStart(), MemBuf->getBufferSize()); // If we have a UTF-16 byte order mark, convert to UTF-8 for parsing. @@ -1699,7 +1711,7 @@ public: OS << "LLVM (http://llvm.org/):\n" << " " << PACKAGE_NAME << " version " << PACKAGE_VERSION; #ifdef LLVM_VERSION_INFO - OS << LLVM_VERSION_INFO; + OS << " " << LLVM_VERSION_INFO; #endif OS << "\n "; #ifndef __OPTIMIZE__ diff --git a/lib/Support/ConvertUTF.c b/lib/Support/ConvertUTF.c index 23f17ca..128459a 100644 --- a/lib/Support/ConvertUTF.c +++ b/lib/Support/ConvertUTF.c @@ -51,6 +51,7 @@ #ifdef CVTUTF_DEBUG #include <stdio.h> #endif +#include <assert.h> static const int halfShift = 10; /* used for shifting by 10 bits */ @@ -392,6 +393,99 @@ Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) { /* --------------------------------------------------------------------- */ +static unsigned +findMaximalSubpartOfIllFormedUTF8Sequence(const UTF8 *source, + const UTF8 *sourceEnd) { + UTF8 b1, b2, b3; + + assert(!isLegalUTF8Sequence(source, sourceEnd)); + + /* + * Unicode 6.3.0, D93b: + * + * Maximal subpart of an ill-formed subsequence: The longest code unit + * subsequence starting at an unconvertible offset that is either: + * a. the initial subsequence of a well-formed code unit sequence, or + * b. a subsequence of length one. + */ + + if (source == sourceEnd) + return 0; + + /* + * Perform case analysis. See Unicode 6.3.0, Table 3-7. Well-Formed UTF-8 + * Byte Sequences. + */ + + b1 = *source; + ++source; + if (b1 >= 0xC2 && b1 <= 0xDF) { + /* + * First byte is valid, but we know that this code unit sequence is + * invalid, so the maximal subpart has to end after the first byte. + */ + return 1; + } + + if (source == sourceEnd) + return 1; + + b2 = *source; + ++source; + + if (b1 == 0xE0) { + return (b2 >= 0xA0 && b2 <= 0xBF) ? 2 : 1; + } + if (b1 >= 0xE1 && b1 <= 0xEC) { + return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1; + } + if (b1 == 0xED) { + return (b2 >= 0x80 && b2 <= 0x9F) ? 2 : 1; + } + if (b1 >= 0xEE && b1 <= 0xEF) { + return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1; + } + if (b1 == 0xF0) { + if (b2 >= 0x90 && b2 <= 0xBF) { + if (source == sourceEnd) + return 2; + + b3 = *source; + return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2; + } + return 1; + } + if (b1 >= 0xF1 && b1 <= 0xF3) { + if (b2 >= 0x80 && b2 <= 0xBF) { + if (source == sourceEnd) + return 2; + + b3 = *source; + return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2; + } + return 1; + } + if (b1 == 0xF4) { + if (b2 >= 0x80 && b2 <= 0x8F) { + if (source == sourceEnd) + return 2; + + b3 = *source; + return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2; + } + return 1; + } + + assert((b1 >= 0x80 && b1 <= 0xC1) || b1 >= 0xF5); + /* + * There are no valid sequences that start with these bytes. Maximal subpart + * is defined to have length 1 in these cases. + */ + return 1; +} + +/* --------------------------------------------------------------------- */ + /* * Exported function to return the total number of bytes in a codepoint * represented in UTF-8, given the value of the first byte. @@ -491,9 +585,10 @@ ConversionResult ConvertUTF8toUTF16 ( /* --------------------------------------------------------------------- */ -ConversionResult ConvertUTF8toUTF32 ( +static ConversionResult ConvertUTF8toUTF32Impl( const UTF8** sourceStart, const UTF8* sourceEnd, - UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) { + UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags, + Boolean InputIsPartial) { ConversionResult result = conversionOK; const UTF8* source = *sourceStart; UTF32* target = *targetStart; @@ -501,12 +596,42 @@ ConversionResult ConvertUTF8toUTF32 ( UTF32 ch = 0; unsigned short extraBytesToRead = trailingBytesForUTF8[*source]; if (extraBytesToRead >= sourceEnd - source) { - result = sourceExhausted; break; + if (flags == strictConversion || InputIsPartial) { + result = sourceExhausted; + break; + } else { + result = sourceIllegal; + + /* + * Replace the maximal subpart of ill-formed sequence with + * replacement character. + */ + source += findMaximalSubpartOfIllFormedUTF8Sequence(source, + sourceEnd); + *target++ = UNI_REPLACEMENT_CHAR; + continue; + } } + if (target >= targetEnd) { + result = targetExhausted; break; + } + /* Do this check whether lenient or strict */ if (!isLegalUTF8(source, extraBytesToRead+1)) { result = sourceIllegal; - break; + if (flags == strictConversion) { + /* Abort conversion. */ + break; + } else { + /* + * Replace the maximal subpart of ill-formed sequence with + * replacement character. + */ + source += findMaximalSubpartOfIllFormedUTF8Sequence(source, + sourceEnd); + *target++ = UNI_REPLACEMENT_CHAR; + continue; + } } /* * The cases all fall through. See "Note A" below. @@ -521,10 +646,6 @@ ConversionResult ConvertUTF8toUTF32 ( } ch -= offsetsFromUTF8[extraBytesToRead]; - if (target >= targetEnd) { - source -= (extraBytesToRead+1); /* Back up the source pointer! */ - result = targetExhausted; break; - } if (ch <= UNI_MAX_LEGAL_UTF32) { /* * UTF-16 surrogate values are illegal in UTF-32, and anything @@ -551,6 +672,22 @@ ConversionResult ConvertUTF8toUTF32 ( return result; } +ConversionResult ConvertUTF8toUTF32Partial(const UTF8 **sourceStart, + const UTF8 *sourceEnd, + UTF32 **targetStart, + UTF32 *targetEnd, + ConversionFlags flags) { + return ConvertUTF8toUTF32Impl(sourceStart, sourceEnd, targetStart, targetEnd, + flags, /*InputIsPartial=*/true); +} + +ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart, + const UTF8 *sourceEnd, UTF32 **targetStart, + UTF32 *targetEnd, ConversionFlags flags) { + return ConvertUTF8toUTF32Impl(sourceStart, sourceEnd, targetStart, targetEnd, + flags, /*InputIsPartial=*/false); +} + /* --------------------------------------------------------------------- Note A. diff --git a/lib/Support/CrashRecoveryContext.cpp b/lib/Support/CrashRecoveryContext.cpp index a426377..9b0e443 100644 --- a/lib/Support/CrashRecoveryContext.cpp +++ b/lib/Support/CrashRecoveryContext.cpp @@ -22,7 +22,8 @@ namespace { struct CrashRecoveryContextImpl; -static ManagedStatic<sys::ThreadLocal<const CrashRecoveryContextImpl> > CurrentContext; +static ManagedStatic< + sys::ThreadLocal<const CrashRecoveryContextImpl> > CurrentContext; struct CrashRecoveryContextImpl { CrashRecoveryContext *CRC; @@ -231,7 +232,8 @@ void CrashRecoveryContext::Disable() { #include <signal.h> -static const int Signals[] = { SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV, SIGTRAP }; +static const int Signals[] = + { SIGABRT, SIGBUS, SIGFPE, SIGILL, SIGSEGV, SIGTRAP }; static const unsigned NumSignals = sizeof(Signals) / sizeof(Signals[0]); static struct sigaction PrevActions[NumSignals]; @@ -330,12 +332,26 @@ const std::string &CrashRecoveryContext::getBacktrace() const { return CRC->Backtrace; } -// +// FIXME: Portability. +static void setThreadBackgroundPriority() { +#ifdef __APPLE__ + setpriority(PRIO_DARWIN_THREAD, 0, PRIO_DARWIN_BG); +#endif +} + +static bool hasThreadBackgroundPriority() { +#ifdef __APPLE__ + return getpriority(PRIO_DARWIN_THREAD, 0) == 1; +#else + return false; +#endif +} namespace { struct RunSafelyOnThreadInfo { function_ref<void()> Fn; CrashRecoveryContext *CRC; + bool UseBackgroundPriority; bool Result; }; } @@ -343,11 +359,16 @@ struct RunSafelyOnThreadInfo { static void RunSafelyOnThread_Dispatch(void *UserData) { RunSafelyOnThreadInfo *Info = reinterpret_cast<RunSafelyOnThreadInfo*>(UserData); + + if (Info->UseBackgroundPriority) + setThreadBackgroundPriority(); + Info->Result = Info->CRC->RunSafely(Info->Fn); } bool CrashRecoveryContext::RunSafelyOnThread(function_ref<void()> Fn, unsigned RequestedStackSize) { - RunSafelyOnThreadInfo Info = { Fn, this, false }; + bool UseBackgroundPriority = hasThreadBackgroundPriority(); + RunSafelyOnThreadInfo Info = { Fn, this, UseBackgroundPriority, false }; llvm_execute_on_thread(RunSafelyOnThread_Dispatch, &Info, RequestedStackSize); if (CrashRecoveryContextImpl *CRC = (CrashRecoveryContextImpl *)Impl) CRC->setSwitchedThread(); diff --git a/lib/Support/DataExtractor.cpp b/lib/Support/DataExtractor.cpp index 7b82921..5d6d60a 100644 --- a/lib/Support/DataExtractor.cpp +++ b/lib/Support/DataExtractor.cpp @@ -21,7 +21,7 @@ static T getU(uint32_t *offset_ptr, const DataExtractor *de, if (de->isValidOffsetForDataOfSize(offset, sizeof(val))) { std::memcpy(&val, &Data[offset], sizeof(val)); if (sys::IsLittleEndianHost != isLittleEndian) - val = sys::SwapByteOrder(val); + sys::swapByteOrder(val); // Advance the offset *offset_ptr += sizeof(val); diff --git a/lib/Support/DataStream.cpp b/lib/Support/DataStream.cpp index eec8584..32653de 100644 --- a/lib/Support/DataStream.cpp +++ b/lib/Support/DataStream.cpp @@ -18,10 +18,10 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Program.h" -#include "llvm/Support/system_error.h" #include <cerrno> #include <cstdio> #include <string> +#include <system_error> #if !defined(_MSC_VER) && !defined(__MINGW32__) #include <unistd.h> #else @@ -64,11 +64,11 @@ public: return read(Fd, buf, len); } - error_code OpenFile(const std::string &Filename) { + std::error_code OpenFile(const std::string &Filename) { if (Filename == "-") { Fd = 0; sys::ChangeStdinToBinary(); - return error_code::success(); + return std::error_code(); } return sys::fs::openFileForRead(Filename, Fd); @@ -81,7 +81,7 @@ namespace llvm { DataStreamer *getDataFileStreamer(const std::string &Filename, std::string *StrError) { DataFileStreamer *s = new DataFileStreamer(); - if (error_code e = s->OpenFile(Filename)) { + if (std::error_code e = s->OpenFile(Filename)) { *StrError = std::string("Could not open ") + Filename + ": " + e.message() + "\n"; return nullptr; diff --git a/lib/Support/DynamicLibrary.cpp b/lib/Support/DynamicLibrary.cpp index 82d7c0c..d2b551e 100644 --- a/lib/Support/DynamicLibrary.cpp +++ b/lib/Support/DynamicLibrary.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This header file implements the operating system DynamicLibrary concept. +// This file implements the operating system DynamicLibrary concept. // // FIXME: This file leaks ExplicitSymbols and OpenedHandles! // diff --git a/lib/Support/ErrorHandling.cpp b/lib/Support/ErrorHandling.cpp index 342c4f0..c36007f 100644 --- a/lib/Support/ErrorHandling.cpp +++ b/lib/Support/ErrorHandling.cpp @@ -18,8 +18,12 @@ #include "llvm/ADT/Twine.h" #include "llvm/Config/config.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/Signals.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/MutexGuard.h" #include "llvm/Support/Threading.h" +#include "llvm/Support/WindowsError.h" #include "llvm/Support/raw_ostream.h" #include <cassert> #include <cstdlib> @@ -37,17 +41,20 @@ using namespace llvm; static fatal_error_handler_t ErrorHandler = nullptr; static void *ErrorHandlerUserData = nullptr; +static sys::Mutex ErrorHandlerMutex; + void llvm::install_fatal_error_handler(fatal_error_handler_t handler, void *user_data) { - assert(!llvm_is_multithreaded() && - "Cannot register error handlers after starting multithreaded mode!\n"); + llvm::MutexGuard Lock(ErrorHandlerMutex); assert(!ErrorHandler && "Error handler already registered!\n"); ErrorHandler = handler; ErrorHandlerUserData = user_data; } void llvm::remove_fatal_error_handler() { + llvm::MutexGuard Lock(ErrorHandlerMutex); ErrorHandler = nullptr; + ErrorHandlerUserData = nullptr; } void llvm::report_fatal_error(const char *Reason, bool GenCrashDiag) { @@ -63,8 +70,18 @@ void llvm::report_fatal_error(StringRef Reason, bool GenCrashDiag) { } void llvm::report_fatal_error(const Twine &Reason, bool GenCrashDiag) { - if (ErrorHandler) { - ErrorHandler(ErrorHandlerUserData, Reason.str(), GenCrashDiag); + llvm::fatal_error_handler_t handler = nullptr; + void* handlerData = nullptr; + { + // Only acquire the mutex while reading the handler, so as not to invoke a + // user-supplied callback under a lock. + llvm::MutexGuard Lock(ErrorHandlerMutex); + handler = ErrorHandler; + handlerData = ErrorHandlerUserData; + } + + if (handler) { + handler(handlerData, Reason.str(), GenCrashDiag); } else { // Blast the result out to stderr. We don't try hard to make sure this // succeeds (e.g. handling EINTR) and we can't use errs() here because @@ -119,3 +136,70 @@ void LLVMInstallFatalErrorHandler(LLVMFatalErrorHandler Handler) { void LLVMResetFatalErrorHandler() { remove_fatal_error_handler(); } + +#ifdef LLVM_ON_WIN32 + +#include <winerror.h> + +// I'd rather not double the line count of the following. +#define MAP_ERR_TO_COND(x, y) \ + case x: \ + return make_error_code(errc::y) + +std::error_code llvm::mapWindowsError(unsigned EV) { + switch (EV) { + MAP_ERR_TO_COND(ERROR_ACCESS_DENIED, permission_denied); + MAP_ERR_TO_COND(ERROR_ALREADY_EXISTS, file_exists); + MAP_ERR_TO_COND(ERROR_BAD_UNIT, no_such_device); + MAP_ERR_TO_COND(ERROR_BUFFER_OVERFLOW, filename_too_long); + MAP_ERR_TO_COND(ERROR_BUSY, device_or_resource_busy); + MAP_ERR_TO_COND(ERROR_BUSY_DRIVE, device_or_resource_busy); + MAP_ERR_TO_COND(ERROR_CANNOT_MAKE, permission_denied); + MAP_ERR_TO_COND(ERROR_CANTOPEN, io_error); + MAP_ERR_TO_COND(ERROR_CANTREAD, io_error); + MAP_ERR_TO_COND(ERROR_CANTWRITE, io_error); + MAP_ERR_TO_COND(ERROR_CURRENT_DIRECTORY, permission_denied); + MAP_ERR_TO_COND(ERROR_DEV_NOT_EXIST, no_such_device); + MAP_ERR_TO_COND(ERROR_DEVICE_IN_USE, device_or_resource_busy); + MAP_ERR_TO_COND(ERROR_DIR_NOT_EMPTY, directory_not_empty); + MAP_ERR_TO_COND(ERROR_DIRECTORY, invalid_argument); + MAP_ERR_TO_COND(ERROR_DISK_FULL, no_space_on_device); + MAP_ERR_TO_COND(ERROR_FILE_EXISTS, file_exists); + MAP_ERR_TO_COND(ERROR_FILE_NOT_FOUND, no_such_file_or_directory); + MAP_ERR_TO_COND(ERROR_HANDLE_DISK_FULL, no_space_on_device); + MAP_ERR_TO_COND(ERROR_INVALID_ACCESS, permission_denied); + MAP_ERR_TO_COND(ERROR_INVALID_DRIVE, no_such_device); + MAP_ERR_TO_COND(ERROR_INVALID_FUNCTION, function_not_supported); + MAP_ERR_TO_COND(ERROR_INVALID_HANDLE, invalid_argument); + MAP_ERR_TO_COND(ERROR_INVALID_NAME, invalid_argument); + MAP_ERR_TO_COND(ERROR_LOCK_VIOLATION, no_lock_available); + MAP_ERR_TO_COND(ERROR_LOCKED, no_lock_available); + MAP_ERR_TO_COND(ERROR_NEGATIVE_SEEK, invalid_argument); + MAP_ERR_TO_COND(ERROR_NOACCESS, permission_denied); + MAP_ERR_TO_COND(ERROR_NOT_ENOUGH_MEMORY, not_enough_memory); + MAP_ERR_TO_COND(ERROR_NOT_READY, resource_unavailable_try_again); + MAP_ERR_TO_COND(ERROR_OPEN_FAILED, io_error); + MAP_ERR_TO_COND(ERROR_OPEN_FILES, device_or_resource_busy); + MAP_ERR_TO_COND(ERROR_OUTOFMEMORY, not_enough_memory); + MAP_ERR_TO_COND(ERROR_PATH_NOT_FOUND, no_such_file_or_directory); + MAP_ERR_TO_COND(ERROR_BAD_NETPATH, no_such_file_or_directory); + MAP_ERR_TO_COND(ERROR_READ_FAULT, io_error); + MAP_ERR_TO_COND(ERROR_RETRY, resource_unavailable_try_again); + MAP_ERR_TO_COND(ERROR_SEEK, io_error); + MAP_ERR_TO_COND(ERROR_SHARING_VIOLATION, permission_denied); + MAP_ERR_TO_COND(ERROR_TOO_MANY_OPEN_FILES, too_many_files_open); + MAP_ERR_TO_COND(ERROR_WRITE_FAULT, io_error); + MAP_ERR_TO_COND(ERROR_WRITE_PROTECT, permission_denied); + MAP_ERR_TO_COND(WSAEACCES, permission_denied); + MAP_ERR_TO_COND(WSAEBADF, bad_file_descriptor); + MAP_ERR_TO_COND(WSAEFAULT, bad_address); + MAP_ERR_TO_COND(WSAEINTR, interrupted); + MAP_ERR_TO_COND(WSAEINVAL, invalid_argument); + MAP_ERR_TO_COND(WSAEMFILE, too_many_files_open); + MAP_ERR_TO_COND(WSAENAMETOOLONG, filename_too_long); + default: + return std::error_code(EV, std::system_category()); + } +} + +#endif diff --git a/lib/Support/FileOutputBuffer.cpp b/lib/Support/FileOutputBuffer.cpp index 49311c2..2e740ca 100644 --- a/lib/Support/FileOutputBuffer.cpp +++ b/lib/Support/FileOutputBuffer.cpp @@ -11,10 +11,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/Errc.h" #include "llvm/Support/FileOutputBuffer.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/system_error.h" +#include <system_error> using llvm::sys::fs::mapped_file_region; @@ -30,13 +31,13 @@ FileOutputBuffer::~FileOutputBuffer() { sys::fs::remove(Twine(TempPath)); } -error_code FileOutputBuffer::create(StringRef FilePath, - size_t Size, - std::unique_ptr<FileOutputBuffer> &Result, - unsigned Flags) { +std::error_code +FileOutputBuffer::create(StringRef FilePath, size_t Size, + std::unique_ptr<FileOutputBuffer> &Result, + unsigned Flags) { // If file already exists, it must be a regular file (to be mappable). sys::fs::file_status Stat; - error_code EC = sys::fs::status(FilePath, Stat); + std::error_code EC = sys::fs::status(FilePath, Stat); switch (Stat.type()) { case sys::fs::file_type::file_not_found: // If file does not exist, we'll create one. @@ -81,16 +82,16 @@ error_code FileOutputBuffer::create(StringRef FilePath, if (Result) MappedFile.release(); - return error_code::success(); + return std::error_code(); } -error_code FileOutputBuffer::commit(int64_t NewSmallerSize) { +std::error_code FileOutputBuffer::commit(int64_t NewSmallerSize) { // Unmap buffer, letting OS flush dirty pages to file on disk. Region.reset(nullptr); // If requested, resize file as part of commit. if ( NewSmallerSize != -1 ) { - error_code EC = sys::fs::resize_file(Twine(TempPath), NewSmallerSize); + std::error_code EC = sys::fs::resize_file(Twine(TempPath), NewSmallerSize); if (EC) return EC; } diff --git a/lib/Support/FileUtilities.cpp b/lib/Support/FileUtilities.cpp index b2dc47d..8a23491 100644 --- a/lib/Support/FileUtilities.cpp +++ b/lib/Support/FileUtilities.cpp @@ -17,10 +17,10 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/system_error.h" #include <cctype> #include <cstdlib> #include <cstring> +#include <system_error> using namespace llvm; static bool isSignedChar(char C) { @@ -176,18 +176,21 @@ int llvm::DiffFilesWithTolerance(StringRef NameA, std::string *Error) { // Now its safe to mmap the files into memory because both files // have a non-zero size. - std::unique_ptr<MemoryBuffer> F1; - if (error_code ec = MemoryBuffer::getFile(NameA, F1)) { + ErrorOr<std::unique_ptr<MemoryBuffer>> F1OrErr = MemoryBuffer::getFile(NameA); + if (std::error_code EC = F1OrErr.getError()) { if (Error) - *Error = ec.message(); + *Error = EC.message(); return 2; } - std::unique_ptr<MemoryBuffer> F2; - if (error_code ec = MemoryBuffer::getFile(NameB, F2)) { + std::unique_ptr<MemoryBuffer> F1 = std::move(F1OrErr.get()); + + ErrorOr<std::unique_ptr<MemoryBuffer>> F2OrErr = MemoryBuffer::getFile(NameB); + if (std::error_code EC = F2OrErr.getError()) { if (Error) - *Error = ec.message(); + *Error = EC.message(); return 2; } + std::unique_ptr<MemoryBuffer> F2 = std::move(F2OrErr.get()); // Okay, now that we opened the files, scan them for the first difference. const char *File1Start = F1->getBufferStart(); diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp index f5b2943..e68ee43 100644 --- a/lib/Support/GraphWriter.cpp +++ b/lib/Support/GraphWriter.cpp @@ -68,7 +68,7 @@ StringRef llvm::DOT::getColorString(unsigned ColorNumber) { std::string llvm::createGraphFilename(const Twine &Name, int &FD) { FD = -1; SmallString<128> Filename; - error_code EC = sys::fs::createTemporaryFile(Name, "dot", FD, Filename); + std::error_code EC = sys::fs::createTemporaryFile(Name, "dot", FD, Filename); if (EC) { errs() << "Error: " << EC.message() << "\n"; return ""; @@ -78,148 +78,165 @@ std::string llvm::createGraphFilename(const Twine &Name, int &FD) { return Filename.str(); } -// Execute the graph viewer. Return true if successful. -static bool LLVM_ATTRIBUTE_UNUSED -ExecGraphViewer(StringRef ExecPath, std::vector<const char*> &args, - StringRef Filename, bool wait, std::string &ErrMsg) { +// Execute the graph viewer. Return true if there were errors. +static bool ExecGraphViewer(StringRef ExecPath, std::vector<const char *> &args, + StringRef Filename, bool wait, + std::string &ErrMsg) { + assert(args.back() == nullptr); if (wait) { - if (sys::ExecuteAndWait(ExecPath, &args[0],nullptr,nullptr,0,0,&ErrMsg)) { + if (sys::ExecuteAndWait(ExecPath, args.data(), nullptr, nullptr, 0, 0, + &ErrMsg)) { errs() << "Error: " << ErrMsg << "\n"; - return false; + return true; } sys::fs::remove(Filename); errs() << " done. \n"; - } - else { - sys::ExecuteNoWait(ExecPath, &args[0],nullptr,nullptr,0,&ErrMsg); + } else { + sys::ExecuteNoWait(ExecPath, args.data(), nullptr, nullptr, 0, &ErrMsg); errs() << "Remember to erase graph file: " << Filename.str() << "\n"; } - return true; + return false; +} + +struct GraphSession { + std::string LogBuffer; + bool TryFindProgram(StringRef Names, std::string &ProgramPath) { + raw_string_ostream Log(LogBuffer); + SmallVector<StringRef, 8> parts; + Names.split(parts, "|"); + for (auto Name : parts) { + ProgramPath = sys::FindProgramByName(Name); + if (!ProgramPath.empty()) + return true; + Log << " Tried '" << Name << "'\n"; + } + return false; + } +}; + +static const char *getProgramName(GraphProgram::Name program) { + switch (program) { + case GraphProgram::DOT: + return "dot"; + case GraphProgram::FDP: + return "fdp"; + case GraphProgram::NEATO: + return "neato"; + case GraphProgram::TWOPI: + return "twopi"; + case GraphProgram::CIRCO: + return "circo"; + } + llvm_unreachable("bad kind"); } -void llvm::DisplayGraph(StringRef FilenameRef, bool wait, +bool llvm::DisplayGraph(StringRef FilenameRef, bool wait, GraphProgram::Name program) { std::string Filename = FilenameRef; wait &= !ViewBackground; std::string ErrMsg; -#if HAVE_GRAPHVIZ - std::string Graphviz(LLVM_PATH_GRAPHVIZ); - - std::vector<const char*> args; - args.push_back(Graphviz.c_str()); - args.push_back(Filename.c_str()); - args.push_back(nullptr); - - errs() << "Running 'Graphviz' program... "; - if (!ExecGraphViewer(Graphviz, args, Filename, wait, ErrMsg)) - return; - -#elif HAVE_XDOT - std::vector<const char*> args; - args.push_back(LLVM_PATH_XDOT); - args.push_back(Filename.c_str()); - - switch (program) { - case GraphProgram::DOT: args.push_back("-f"); args.push_back("dot"); break; - case GraphProgram::FDP: args.push_back("-f"); args.push_back("fdp"); break; - case GraphProgram::NEATO: args.push_back("-f"); args.push_back("neato");break; - case GraphProgram::TWOPI: args.push_back("-f"); args.push_back("twopi");break; - case GraphProgram::CIRCO: args.push_back("-f"); args.push_back("circo");break; + std::string ViewerPath; + GraphSession S; + + // Graphviz + if (S.TryFindProgram("Graphviz", ViewerPath)) { + std::vector<const char *> args; + args.push_back(ViewerPath.c_str()); + args.push_back(Filename.c_str()); + args.push_back(nullptr); + + errs() << "Running 'Graphviz' program... "; + return ExecGraphViewer(ViewerPath, args, Filename, wait, ErrMsg); } - args.push_back(0); + // xdot + if (S.TryFindProgram("xdot|xdot.py", ViewerPath)) { + std::vector<const char *> args; + args.push_back(ViewerPath.c_str()); + args.push_back(Filename.c_str()); - errs() << "Running 'xdot.py' program... "; - if (!ExecGraphViewer(LLVM_PATH_XDOT, args, Filename, wait, ErrMsg)) - return; + args.push_back("-f"); + args.push_back(getProgramName(program)); -#elif (HAVE_GV && (HAVE_DOT || HAVE_FDP || HAVE_NEATO || \ - HAVE_TWOPI || HAVE_CIRCO)) - std::string PSFilename = Filename + ".ps"; - std::string prog; + args.push_back(nullptr); - // Set default grapher -#if HAVE_CIRCO - prog = LLVM_PATH_CIRCO; -#endif -#if HAVE_TWOPI - prog = LLVM_PATH_TWOPI; -#endif -#if HAVE_NEATO - prog = LLVM_PATH_NEATO; -#endif -#if HAVE_FDP - prog = LLVM_PATH_FDP; -#endif -#if HAVE_DOT - prog = LLVM_PATH_DOT; -#endif + errs() << "Running 'xdot.py' program... "; + return ExecGraphViewer(ViewerPath, args, Filename, wait, ErrMsg); + } - // Find which program the user wants -#if HAVE_DOT - if (program == GraphProgram::DOT) - prog = LLVM_PATH_DOT; -#endif -#if (HAVE_FDP) - if (program == GraphProgram::FDP) - prog = LLVM_PATH_FDP; -#endif -#if (HAVE_NEATO) - if (program == GraphProgram::NEATO) - prog = LLVM_PATH_NEATO; -#endif -#if (HAVE_TWOPI) - if (program == GraphProgram::TWOPI) - prog = LLVM_PATH_TWOPI; -#endif -#if (HAVE_CIRCO) - if (program == GraphProgram::CIRCO) - prog = LLVM_PATH_CIRCO; + enum PSViewerKind { PSV_None, PSV_OSXOpen, PSV_XDGOpen, PSV_Ghostview }; + PSViewerKind PSViewer = PSV_None; +#ifdef __APPLE__ + if (!PSViewer && S.TryFindProgram("open", ViewerPath)) + PSViewer = PSV_OSXOpen; #endif + if (!PSViewer && S.TryFindProgram("gv", ViewerPath)) + PSViewer = PSV_Ghostview; + if (!PSViewer && S.TryFindProgram("xdg-open", ViewerPath)) + PSViewer = PSV_XDGOpen; + + // PostScript graph generator + PostScript viewer + std::string GeneratorPath; + if (PSViewer && + (S.TryFindProgram(getProgramName(program), GeneratorPath) || + S.TryFindProgram("dot|fdp|neato|twopi|circo", GeneratorPath))) { + std::string PSFilename = Filename + ".ps"; + + std::vector<const char *> args; + args.push_back(GeneratorPath.c_str()); + args.push_back("-Tps"); + args.push_back("-Nfontname=Courier"); + args.push_back("-Gsize=7.5,10"); + args.push_back(Filename.c_str()); + args.push_back("-o"); + args.push_back(PSFilename.c_str()); + args.push_back(nullptr); + + errs() << "Running '" << GeneratorPath << "' program... "; + + if (ExecGraphViewer(GeneratorPath, args, Filename, wait, ErrMsg)) + return true; + + args.clear(); + args.push_back(ViewerPath.c_str()); + switch (PSViewer) { + case PSV_OSXOpen: + args.push_back("-W"); + args.push_back(PSFilename.c_str()); + break; + case PSV_XDGOpen: + wait = false; + args.push_back(PSFilename.c_str()); + break; + case PSV_Ghostview: + args.push_back("--spartan"); + args.push_back(PSFilename.c_str()); + break; + case PSV_None: + llvm_unreachable("Invalid viewer"); + } + args.push_back(nullptr); - std::vector<const char*> args; - args.push_back(prog.c_str()); - args.push_back("-Tps"); - args.push_back("-Nfontname=Courier"); - args.push_back("-Gsize=7.5,10"); - args.push_back(Filename.c_str()); - args.push_back("-o"); - args.push_back(PSFilename.c_str()); - args.push_back(0); - - errs() << "Running '" << prog << "' program... "; - - if (!ExecGraphViewer(prog, args, Filename, wait, ErrMsg)) - return; - - std::string gv(LLVM_PATH_GV); - args.clear(); - args.push_back(gv.c_str()); - args.push_back(PSFilename.c_str()); - args.push_back("--spartan"); - args.push_back(0); - - ErrMsg.clear(); - if (!ExecGraphViewer(gv, args, PSFilename, wait, ErrMsg)) - return; - -#elif HAVE_DOTTY - std::string dotty(LLVM_PATH_DOTTY); + ErrMsg.clear(); + return ExecGraphViewer(ViewerPath, args, PSFilename, wait, ErrMsg); + } - std::vector<const char*> args; - args.push_back(dotty.c_str()); - args.push_back(Filename.c_str()); - args.push_back(0); + // dotty + if (S.TryFindProgram("dotty", ViewerPath)) { + std::vector<const char *> args; + args.push_back(ViewerPath.c_str()); + args.push_back(Filename.c_str()); + args.push_back(nullptr); // Dotty spawns another app and doesn't wait until it returns -#if defined (__MINGW32__) || defined (_WINDOWS) - wait = false; -#endif - errs() << "Running 'dotty' program... "; - if (!ExecGraphViewer(dotty, args, Filename, wait, ErrMsg)) - return; -#else - (void)Filename; - (void)ErrMsg; +#ifdef LLVM_ON_WIN32 + wait = false; #endif + errs() << "Running 'dotty' program... "; + return ExecGraphViewer(ViewerPath, args, Filename, wait, ErrMsg); + } + + errs() << "Error: Couldn't find a usable graph viewer program:\n"; + errs() << S.LogBuffer << "\n"; + return true; } diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp index fd0472e..e2dd6d5 100644 --- a/lib/Support/Host.cpp +++ b/lib/Support/Host.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This header file implements the operating system Host concept. +// This file implements the operating system Host concept. // //===----------------------------------------------------------------------===// @@ -570,6 +570,8 @@ StringRef sys::getHostCPUName() { .Case("A2", "a2") .Case("POWER6", "pwr6") .Case("POWER7", "pwr7") + .Case("POWER8", "pwr8") + .Case("POWER8E", "pwr8") .Default(generic); } #elif defined(__linux__) && defined(__arm__) @@ -744,7 +746,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) { .Default(""); #if defined(__aarch64__) - // We need to check crypto seperately since we need all of the crypto + // We need to check crypto separately since we need all of the crypto // extensions to enable the subtarget feature if (CPUFeatures[I] == "aes") crypto |= CAP_AES; diff --git a/lib/Support/LockFileManager.cpp b/lib/Support/LockFileManager.cpp index 9b4bfbe..3f224e0 100644 --- a/lib/Support/LockFileManager.cpp +++ b/lib/Support/LockFileManager.cpp @@ -9,6 +9,7 @@ #include "llvm/Support/LockFileManager.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" @@ -32,11 +33,13 @@ Optional<std::pair<std::string, int> > LockFileManager::readLockFile(StringRef LockFileName) { // Read the owning host and PID out of the lock file. If it appears that the // owning process is dead, the lock file is invalid. - std::unique_ptr<MemoryBuffer> MB; - if (MemoryBuffer::getFile(LockFileName, MB)) { + ErrorOr<std::unique_ptr<MemoryBuffer>> MBOrErr = + MemoryBuffer::getFile(LockFileName); + if (!MBOrErr) { sys::fs::remove(LockFileName); return None; } + std::unique_ptr<MemoryBuffer> MB = std::move(MBOrErr.get()); StringRef Hostname; StringRef PIDStr; @@ -71,7 +74,7 @@ bool LockFileManager::processStillExecuting(StringRef Hostname, int PID) { LockFileManager::LockFileManager(StringRef FileName) { this->FileName = FileName; - if (error_code EC = sys::fs::make_absolute(this->FileName)) { + if (std::error_code EC = sys::fs::make_absolute(this->FileName)) { Error = EC; return; } @@ -87,10 +90,8 @@ LockFileManager::LockFileManager(StringRef FileName) UniqueLockFileName = LockFileName; UniqueLockFileName += "-%%%%%%%%"; int UniqueLockFileID; - if (error_code EC - = sys::fs::createUniqueFile(UniqueLockFileName.str(), - UniqueLockFileID, - UniqueLockFileName)) { + if (std::error_code EC = sys::fs::createUniqueFile( + UniqueLockFileName.str(), UniqueLockFileID, UniqueLockFileName)) { Error = EC; return; } @@ -122,9 +123,9 @@ LockFileManager::LockFileManager(StringRef FileName) while (1) { // Create a link from the lock file name. If this succeeds, we're done. - error_code EC = + std::error_code EC = sys::fs::create_link(UniqueLockFileName.str(), LockFileName.str()); - if (EC == errc::success) + if (!EC) return; if (EC != errc::file_exists) { diff --git a/lib/Support/Makefile b/lib/Support/Makefile index 4a2185d..39426aa 100644 --- a/lib/Support/Makefile +++ b/lib/Support/Makefile @@ -17,3 +17,7 @@ include $(LEVEL)/Makefile.common CompileCommonOpts := $(filter-out -pedantic,$(CompileCommonOpts)) CompileCommonOpts := $(filter-out -Wno-long-long,$(CompileCommonOpts)) + +ifdef LLVM_VERSION_INFO +CompileCommonOpts += -DLLVM_VERSION_INFO='"$(LLVM_VERSION_INFO)"' +endif diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp index 6a1c2a5..b8fb284 100644 --- a/lib/Support/ManagedStatic.cpp +++ b/lib/Support/ManagedStatic.cpp @@ -14,16 +14,26 @@ #include "llvm/Support/ManagedStatic.h" #include "llvm/Config/config.h" #include "llvm/Support/Atomic.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/MutexGuard.h" #include <cassert> using namespace llvm; static const ManagedStaticBase *StaticList = nullptr; +static sys::Mutex& getManagedStaticMutex() { + // We need to use a function local static here, since this can get called + // during a static constructor and we need to guarantee that it's initialized + // correctly. + static sys::Mutex ManagedStaticMutex; + return ManagedStaticMutex; +} + void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(), void (*Deleter)(void*)) const { assert(Creator); if (llvm_is_multithreaded()) { - llvm_acquire_global_lock(); + MutexGuard Lock(getManagedStaticMutex()); if (!Ptr) { void* tmp = Creator(); @@ -43,8 +53,6 @@ void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(), Next = StaticList; StaticList = this; } - - llvm_release_global_lock(); } else { assert(!Ptr && !DeleterFn && !Next && "Partially initialized ManagedStatic!?"); @@ -75,8 +83,8 @@ void ManagedStaticBase::destroy() const { /// llvm_shutdown - Deallocate and destroy all ManagedStatic variables. void llvm::llvm_shutdown() { + MutexGuard Lock(getManagedStaticMutex()); + while (StaticList) StaticList->destroy(); - - if (llvm_is_multithreaded()) llvm_stop_multithreaded(); } diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index 629d885..5f4b7da 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -14,19 +14,20 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/ADT/SmallString.h" #include "llvm/Config/config.h" +#include "llvm/Support/Errc.h" #include "llvm/Support/Errno.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/Path.h" #include "llvm/Support/Process.h" #include "llvm/Support/Program.h" -#include "llvm/Support/system_error.h" #include <cassert> #include <cerrno> #include <cstdio> #include <cstring> #include <new> #include <sys/types.h> +#include <system_error> #if !defined(_MSC_VER) && !defined(__MINGW32__) #include <unistd.h> #else @@ -151,17 +152,11 @@ MemoryBuffer *MemoryBuffer::getNewMemBuffer(size_t Size, StringRef BufferName) { return SB; } - -/// getFileOrSTDIN - Open the specified file as a MemoryBuffer, or open stdin -/// if the Filename is "-". If an error occurs, this returns null and fills -/// in *ErrStr with a reason. If stdin is empty, this API (unlike getSTDIN) -/// returns an empty buffer. -error_code MemoryBuffer::getFileOrSTDIN(StringRef Filename, - std::unique_ptr<MemoryBuffer> &Result, - int64_t FileSize) { +ErrorOr<std::unique_ptr<MemoryBuffer>> +MemoryBuffer::getFileOrSTDIN(StringRef Filename, int64_t FileSize) { if (Filename == "-") - return getSTDIN(Result); - return getFile(Filename, Result, FileSize); + return getSTDIN(); + return getFile(Filename, FileSize); } @@ -190,7 +185,7 @@ class MemoryBufferMMapFile : public MemoryBuffer { public: MemoryBufferMMapFile(bool RequiresNullTerminator, int FD, uint64_t Len, - uint64_t Offset, error_code EC) + uint64_t Offset, std::error_code EC) : MFR(FD, false, sys::fs::mapped_file_region::readonly, getLegalMapSize(Len, Offset), getLegalMapOffset(Offset), EC) { if (!EC) { @@ -210,9 +205,8 @@ public: }; } -static error_code getMemoryBufferForStream(int FD, - StringRef BufferName, - std::unique_ptr<MemoryBuffer> &Result) { +static ErrorOr<std::unique_ptr<MemoryBuffer>> +getMemoryBufferForStream(int FD, StringRef BufferName) { const ssize_t ChunkSize = 4096*4; SmallString<ChunkSize> Buffer; ssize_t ReadBytes; @@ -222,52 +216,48 @@ static error_code getMemoryBufferForStream(int FD, ReadBytes = read(FD, Buffer.end(), ChunkSize); if (ReadBytes == -1) { if (errno == EINTR) continue; - return error_code(errno, posix_category()); + return std::error_code(errno, std::generic_category()); } Buffer.set_size(Buffer.size() + ReadBytes); } while (ReadBytes != 0); - Result.reset(MemoryBuffer::getMemBufferCopy(Buffer, BufferName)); - return error_code::success(); + std::unique_ptr<MemoryBuffer> Ret( + MemoryBuffer::getMemBufferCopy(Buffer, BufferName)); + return std::move(Ret); } -static error_code getFileAux(const char *Filename, - std::unique_ptr<MemoryBuffer> &Result, - int64_t FileSize, - bool RequiresNullTerminator, - bool IsVolatileSize); - -error_code MemoryBuffer::getFile(Twine Filename, - std::unique_ptr<MemoryBuffer> &Result, - int64_t FileSize, - bool RequiresNullTerminator, - bool IsVolatileSize) { +static ErrorOr<std::unique_ptr<MemoryBuffer>> +getFileAux(const char *Filename, int64_t FileSize, bool RequiresNullTerminator, + bool IsVolatileSize); + +ErrorOr<std::unique_ptr<MemoryBuffer>> +MemoryBuffer::getFile(Twine Filename, int64_t FileSize, + bool RequiresNullTerminator, bool IsVolatileSize) { // Ensure the path is null terminated. SmallString<256> PathBuf; StringRef NullTerminatedName = Filename.toNullTerminatedStringRef(PathBuf); - return getFileAux(NullTerminatedName.data(), Result, FileSize, - RequiresNullTerminator, IsVolatileSize); + return getFileAux(NullTerminatedName.data(), FileSize, RequiresNullTerminator, + IsVolatileSize); } -static error_code getOpenFileImpl(int FD, const char *Filename, - std::unique_ptr<MemoryBuffer> &Result, - uint64_t FileSize, uint64_t MapSize, - int64_t Offset, bool RequiresNullTerminator, - bool IsVolatileSize); +static ErrorOr<std::unique_ptr<MemoryBuffer>> +getOpenFileImpl(int FD, const char *Filename, uint64_t FileSize, + uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator, + bool IsVolatileSize); -static error_code getFileAux(const char *Filename, - std::unique_ptr<MemoryBuffer> &Result, int64_t FileSize, - bool RequiresNullTerminator, - bool IsVolatileSize) { +static ErrorOr<std::unique_ptr<MemoryBuffer>> +getFileAux(const char *Filename, int64_t FileSize, bool RequiresNullTerminator, + bool IsVolatileSize) { int FD; - error_code EC = sys::fs::openFileForRead(Filename, FD); + std::error_code EC = sys::fs::openFileForRead(Filename, FD); if (EC) return EC; - error_code ret = getOpenFileImpl(FD, Filename, Result, FileSize, FileSize, 0, - RequiresNullTerminator, IsVolatileSize); + ErrorOr<std::unique_ptr<MemoryBuffer>> Ret = + getOpenFileImpl(FD, Filename, FileSize, FileSize, 0, + RequiresNullTerminator, IsVolatileSize); close(FD); - return ret; + return Ret; } static bool shouldUseMmap(int FD, @@ -318,11 +308,10 @@ static bool shouldUseMmap(int FD, return true; } -static error_code getOpenFileImpl(int FD, const char *Filename, - std::unique_ptr<MemoryBuffer> &Result, - uint64_t FileSize, uint64_t MapSize, - int64_t Offset, bool RequiresNullTerminator, - bool IsVolatileSize) { +static ErrorOr<std::unique_ptr<MemoryBuffer>> +getOpenFileImpl(int FD, const char *Filename, uint64_t FileSize, + uint64_t MapSize, int64_t Offset, bool RequiresNullTerminator, + bool IsVolatileSize) { static int PageSize = sys::process::get_self()->page_size(); // Default is to map the full file. @@ -331,7 +320,7 @@ static error_code getOpenFileImpl(int FD, const char *Filename, // file descriptor is cheaper than stat on a random path. if (FileSize == uint64_t(-1)) { sys::fs::file_status Status; - error_code EC = sys::fs::status(FD, Status); + std::error_code EC = sys::fs::status(FD, Status); if (EC) return EC; @@ -341,7 +330,7 @@ static error_code getOpenFileImpl(int FD, const char *Filename, sys::fs::file_type Type = Status.type(); if (Type != sys::fs::file_type::regular_file && Type != sys::fs::file_type::block_file) - return getMemoryBufferForStream(FD, Filename, Result); + return getMemoryBufferForStream(FD, Filename); FileSize = Status.getSize(); } @@ -350,11 +339,12 @@ static error_code getOpenFileImpl(int FD, const char *Filename, if (shouldUseMmap(FD, FileSize, MapSize, Offset, RequiresNullTerminator, PageSize, IsVolatileSize)) { - error_code EC; - Result.reset(new (NamedBufferAlloc(Filename)) MemoryBufferMMapFile( - RequiresNullTerminator, FD, MapSize, Offset, EC)); + std::error_code EC; + std::unique_ptr<MemoryBuffer> Result( + new (NamedBufferAlloc(Filename)) + MemoryBufferMMapFile(RequiresNullTerminator, FD, MapSize, Offset, EC)); if (!EC) - return error_code::success(); + return std::move(Result); } MemoryBuffer *Buf = MemoryBuffer::getNewUninitMemBuffer(MapSize, Filename); @@ -370,7 +360,7 @@ static error_code getOpenFileImpl(int FD, const char *Filename, size_t BytesLeft = MapSize; #ifndef HAVE_PREAD if (lseek(FD, Offset, SEEK_SET) == -1) - return error_code(errno, posix_category()); + return std::error_code(errno, std::generic_category()); #endif while (BytesLeft) { @@ -383,7 +373,7 @@ static error_code getOpenFileImpl(int FD, const char *Filename, if (errno == EINTR) continue; // Error while reading. - return error_code(errno, posix_category()); + return std::error_code(errno, std::generic_category()); } if (NumRead == 0) { memset(BufPtr, 0, BytesLeft); // zero-initialize rest of the buffer. @@ -393,37 +383,29 @@ static error_code getOpenFileImpl(int FD, const char *Filename, BufPtr += NumRead; } - Result.swap(SB); - return error_code::success(); + return std::move(SB); } -error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, - std::unique_ptr<MemoryBuffer> &Result, - uint64_t FileSize, - bool RequiresNullTerminator, - bool IsVolatileSize) { - return getOpenFileImpl(FD, Filename, Result, FileSize, FileSize, 0, +ErrorOr<std::unique_ptr<MemoryBuffer>> +MemoryBuffer::getOpenFile(int FD, const char *Filename, uint64_t FileSize, + bool RequiresNullTerminator, bool IsVolatileSize) { + return getOpenFileImpl(FD, Filename, FileSize, FileSize, 0, RequiresNullTerminator, IsVolatileSize); } -error_code MemoryBuffer::getOpenFileSlice(int FD, const char *Filename, - std::unique_ptr<MemoryBuffer> &Result, - uint64_t MapSize, int64_t Offset, - bool IsVolatileSize) { - return getOpenFileImpl(FD, Filename, Result, -1, MapSize, Offset, false, +ErrorOr<std::unique_ptr<MemoryBuffer>> +MemoryBuffer::getOpenFileSlice(int FD, const char *Filename, uint64_t MapSize, + int64_t Offset, bool IsVolatileSize) { + return getOpenFileImpl(FD, Filename, -1, MapSize, Offset, false, IsVolatileSize); } -//===----------------------------------------------------------------------===// -// MemoryBuffer::getSTDIN implementation. -//===----------------------------------------------------------------------===// - -error_code MemoryBuffer::getSTDIN(std::unique_ptr<MemoryBuffer> &Result) { +ErrorOr<std::unique_ptr<MemoryBuffer>> MemoryBuffer::getSTDIN() { // Read in all of the data from stdin, we cannot mmap stdin. // // FIXME: That isn't necessarily true, we should try to mmap stdin and // fallback if it fails. sys::ChangeStdinToBinary(); - return getMemoryBufferForStream(0, "<stdin>", Result); + return getMemoryBufferForStream(0, "<stdin>"); } diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp index b8d676f..d5a0ec5 100644 --- a/lib/Support/Path.cpp +++ b/lib/Support/Path.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/Errc.h" #include "llvm/Support/Path.h" #include "llvm/Support/Endian.h" #include "llvm/Support/ErrorHandling.h" @@ -164,12 +165,12 @@ enum FSEntity { }; // Implemented in Unix/Path.inc and Windows/Path.inc. -static error_code TempDir(SmallVectorImpl<char> &result); +static std::error_code TempDir(SmallVectorImpl<char> &result); -static error_code createUniqueEntity(const Twine &Model, int &ResultFD, - SmallVectorImpl<char> &ResultPath, - bool MakeAbsolute, unsigned Mode, - FSEntity Type) { +static std::error_code createUniqueEntity(const Twine &Model, int &ResultFD, + SmallVectorImpl<char> &ResultPath, + bool MakeAbsolute, unsigned Mode, + FSEntity Type) { SmallString<128> ModelStorage; Model.toVector(ModelStorage); @@ -177,7 +178,7 @@ static error_code createUniqueEntity(const Twine &Model, int &ResultFD, // Make model absolute by prepending a temp directory if it's not already. if (!sys::path::is_absolute(Twine(ModelStorage))) { SmallString<128> TDir; - if (error_code EC = TempDir(TDir)) + if (std::error_code EC = TempDir(TDir)) return EC; sys::path::append(TDir, Twine(ModelStorage)); ModelStorage.swap(TDir); @@ -201,7 +202,7 @@ retry_random_path: // Try to open + create the file. switch (Type) { case FS_File: { - if (error_code EC = + if (std::error_code EC = sys::fs::openFileForWrite(Twine(ResultPath.begin()), ResultFD, sys::fs::F_RW | sys::fs::F_Excl, Mode)) { if (EC == errc::file_exists) @@ -209,26 +210,27 @@ retry_random_path: return EC; } - return error_code::success(); + return std::error_code(); } case FS_Name: { bool Exists; - error_code EC = sys::fs::exists(ResultPath.begin(), Exists); + std::error_code EC = sys::fs::exists(ResultPath.begin(), Exists); if (EC) return EC; if (Exists) goto retry_random_path; - return error_code::success(); + return std::error_code(); } case FS_Dir: { - if (error_code EC = sys::fs::create_directory(ResultPath.begin(), false)) { + if (std::error_code EC = + sys::fs::create_directory(ResultPath.begin(), false)) { if (EC == errc::file_exists) goto retry_random_path; return EC; } - return error_code::success(); + return std::error_code(); } } llvm_unreachable("Invalid Type"); @@ -705,29 +707,30 @@ bool is_relative(const Twine &path) { namespace fs { -error_code getUniqueID(const Twine Path, UniqueID &Result) { +std::error_code getUniqueID(const Twine Path, UniqueID &Result) { file_status Status; - error_code EC = status(Path, Status); + std::error_code EC = status(Path, Status); if (EC) return EC; Result = Status.getUniqueID(); - return error_code::success(); + return std::error_code(); } -error_code createUniqueFile(const Twine &Model, int &ResultFd, - SmallVectorImpl<char> &ResultPath, unsigned Mode) { +std::error_code createUniqueFile(const Twine &Model, int &ResultFd, + SmallVectorImpl<char> &ResultPath, + unsigned Mode) { return createUniqueEntity(Model, ResultFd, ResultPath, false, Mode, FS_File); } -error_code createUniqueFile(const Twine &Model, - SmallVectorImpl<char> &ResultPath) { +std::error_code createUniqueFile(const Twine &Model, + SmallVectorImpl<char> &ResultPath) { int Dummy; return createUniqueEntity(Model, Dummy, ResultPath, false, 0, FS_Name); } -static error_code createTemporaryFile(const Twine &Model, int &ResultFD, - llvm::SmallVectorImpl<char> &ResultPath, - FSEntity Type) { +static std::error_code +createTemporaryFile(const Twine &Model, int &ResultFD, + llvm::SmallVectorImpl<char> &ResultPath, FSEntity Type) { SmallString<128> Storage; StringRef P = Model.toNullTerminatedStringRef(Storage); assert(P.find_first_of(separators) == StringRef::npos && @@ -737,24 +740,22 @@ static error_code createTemporaryFile(const Twine &Model, int &ResultFD, true, owner_read | owner_write, Type); } -static error_code +static std::error_code createTemporaryFile(const Twine &Prefix, StringRef Suffix, int &ResultFD, - llvm::SmallVectorImpl<char> &ResultPath, - FSEntity Type) { + llvm::SmallVectorImpl<char> &ResultPath, FSEntity Type) { const char *Middle = Suffix.empty() ? "-%%%%%%" : "-%%%%%%."; return createTemporaryFile(Prefix + Middle + Suffix, ResultFD, ResultPath, Type); } - -error_code createTemporaryFile(const Twine &Prefix, StringRef Suffix, - int &ResultFD, - SmallVectorImpl<char> &ResultPath) { +std::error_code createTemporaryFile(const Twine &Prefix, StringRef Suffix, + int &ResultFD, + SmallVectorImpl<char> &ResultPath) { return createTemporaryFile(Prefix, Suffix, ResultFD, ResultPath, FS_File); } -error_code createTemporaryFile(const Twine &Prefix, StringRef Suffix, - SmallVectorImpl<char> &ResultPath) { +std::error_code createTemporaryFile(const Twine &Prefix, StringRef Suffix, + SmallVectorImpl<char> &ResultPath) { int Dummy; return createTemporaryFile(Prefix, Suffix, Dummy, ResultPath, FS_Name); } @@ -762,14 +763,14 @@ error_code createTemporaryFile(const Twine &Prefix, StringRef Suffix, // This is a mkdtemp with a different pattern. We use createUniqueEntity mostly // for consistency. We should try using mkdtemp. -error_code createUniqueDirectory(const Twine &Prefix, - SmallVectorImpl<char> &ResultPath) { +std::error_code createUniqueDirectory(const Twine &Prefix, + SmallVectorImpl<char> &ResultPath) { int Dummy; return createUniqueEntity(Prefix + "-%%%%%%", Dummy, ResultPath, true, 0, FS_Dir); } -error_code make_absolute(SmallVectorImpl<char> &path) { +std::error_code make_absolute(SmallVectorImpl<char> &path) { StringRef p(path.data(), path.size()); bool rootDirectory = path::has_root_directory(p), @@ -781,11 +782,12 @@ error_code make_absolute(SmallVectorImpl<char> &path) { // Already absolute. if (rootName && rootDirectory) - return error_code::success(); + return std::error_code(); // All of the following conditions will need the current directory. SmallString<128> current_dir; - if (error_code ec = current_path(current_dir)) return ec; + if (std::error_code ec = current_path(current_dir)) + return ec; // Relative path. Prepend the current directory. if (!rootName && !rootDirectory) { @@ -793,7 +795,7 @@ error_code make_absolute(SmallVectorImpl<char> &path) { path::append(current_dir, p); // Set path to the result. path.swap(current_dir); - return error_code::success(); + return std::error_code(); } if (!rootName && rootDirectory) { @@ -802,7 +804,7 @@ error_code make_absolute(SmallVectorImpl<char> &path) { path::append(curDirRootName, p); // Set path to the result. path.swap(curDirRootName); - return error_code::success(); + return std::error_code(); } if (rootName && !rootDirectory) { @@ -814,19 +816,19 @@ error_code make_absolute(SmallVectorImpl<char> &path) { SmallString<128> res; path::append(res, pRootName, bRootDirectory, bRelativePath, pRelativePath); path.swap(res); - return error_code::success(); + return std::error_code(); } llvm_unreachable("All rootName and rootDirectory combinations should have " "occurred above!"); } -error_code create_directories(const Twine &Path, bool IgnoreExisting) { +std::error_code create_directories(const Twine &Path, bool IgnoreExisting) { SmallString<128> PathStorage; StringRef P = Path.toStringRef(PathStorage); // Be optimistic and try to create the directory - error_code EC = create_directory(P, IgnoreExisting); + std::error_code EC = create_directory(P, IgnoreExisting); // If we succeeded, or had any error other than the parent not existing, just // return it. if (EC != errc::no_such_file_or_directory) @@ -844,6 +846,40 @@ error_code create_directories(const Twine &Path, bool IgnoreExisting) { return create_directory(P, IgnoreExisting); } +std::error_code copy_file(const Twine &From, const Twine &To) { + int ReadFD, WriteFD; + if (std::error_code EC = openFileForRead(From, ReadFD)) + return EC; + if (std::error_code EC = openFileForWrite(To, WriteFD, F_None)) { + close(ReadFD); + return EC; + } + + const size_t BufSize = 4096; + char *Buf = new char[BufSize]; + int BytesRead = 0, BytesWritten = 0; + for (;;) { + BytesRead = read(ReadFD, Buf, BufSize); + if (BytesRead <= 0) + break; + while (BytesRead) { + BytesWritten = write(WriteFD, Buf, BytesRead); + if (BytesWritten < 0) + break; + BytesRead -= BytesWritten; + } + if (BytesWritten < 0) + break; + } + close(ReadFD); + close(WriteFD); + delete[] Buf; + + if (BytesRead < 0 || BytesWritten < 0) + return std::error_code(errno, std::generic_category()); + return std::error_code(); +} + bool exists(file_status status) { return status_known(status) && status.type() != file_type::file_not_found; } @@ -856,24 +892,24 @@ bool is_directory(file_status status) { return status.type() == file_type::directory_file; } -error_code is_directory(const Twine &path, bool &result) { +std::error_code is_directory(const Twine &path, bool &result) { file_status st; - if (error_code ec = status(path, st)) + if (std::error_code ec = status(path, st)) return ec; result = is_directory(st); - return error_code::success(); + return std::error_code(); } bool is_regular_file(file_status status) { return status.type() == file_type::regular_file; } -error_code is_regular_file(const Twine &path, bool &result) { +std::error_code is_regular_file(const Twine &path, bool &result) { file_status st; - if (error_code ec = status(path, st)) + if (std::error_code ec = status(path, st)) return ec; result = is_regular_file(st); - return error_code::success(); + return std::error_code(); } bool is_other(file_status status) { @@ -890,26 +926,8 @@ void directory_entry::replace_filename(const Twine &filename, file_status st) { Status = st; } -error_code has_magic(const Twine &path, const Twine &magic, bool &result) { - SmallString<32> MagicStorage; - StringRef Magic = magic.toStringRef(MagicStorage); - SmallString<32> Buffer; - - if (error_code ec = get_magic(path, Magic.size(), Buffer)) { - if (ec == errc::value_too_large) { - // Magic.size() > file_size(Path). - result = false; - return error_code::success(); - } - return ec; - } - - result = Magic == Buffer; - return error_code::success(); -} - /// @brief Identify the magic in magic. - file_magic identify_magic(StringRef Magic) { +file_magic identify_magic(StringRef Magic) { if (Magic.size() < 4) return file_magic::unknown; switch ((unsigned char)Magic[0]) { @@ -1040,17 +1058,21 @@ error_code has_magic(const Twine &path, const Twine &magic, bool &result) { return file_magic::unknown; } -error_code identify_magic(const Twine &path, file_magic &result) { - SmallString<32> Magic; - error_code ec = get_magic(path, Magic.capacity(), Magic); - if (ec && ec != errc::value_too_large) - return ec; +std::error_code identify_magic(const Twine &Path, file_magic &Result) { + int FD; + if (std::error_code EC = openFileForRead(Path, FD)) + return EC; + + char Buffer[32]; + int Length = read(FD, Buffer, sizeof(Buffer)); + if (close(FD) != 0 || Length < 0) + return std::error_code(errno, std::generic_category()); - result = identify_magic(Magic); - return error_code::success(); + Result = identify_magic(StringRef(Buffer, Length)); + return std::error_code(); } -error_code directory_entry::status(file_status &result) const { +std::error_code directory_entry::status(file_status &result) const { return fs::status(Path, result); } diff --git a/lib/Support/Process.cpp b/lib/Support/Process.cpp index 0380ed9..0d42e0e 100644 --- a/lib/Support/Process.cpp +++ b/lib/Support/Process.cpp @@ -7,13 +7,16 @@ // //===----------------------------------------------------------------------===// // -// This header file implements the operating system Process concept. +// This file implements the operating system Process concept. // //===----------------------------------------------------------------------===// +#include "llvm/ADT/StringExtras.h" #include "llvm/Config/config.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/Process.h" +#include "llvm/Support/Program.h" using namespace llvm; using namespace sys; @@ -66,6 +69,33 @@ TimeValue self_process::get_wall_time() const { return getElapsedWallTime(); } +Optional<std::string> Process::FindInEnvPath(const std::string& EnvName, + const std::string& FileName) +{ + Optional<std::string> FoundPath; + Optional<std::string> OptPath = Process::GetEnv(EnvName); + if (!OptPath.hasValue()) + return FoundPath; + + const char EnvPathSeparatorStr[] = {EnvPathSeparator, '\0'}; + SmallVector<StringRef, 8> Dirs; + SplitString(OptPath.getValue(), Dirs, EnvPathSeparatorStr); + + for (const auto &Dir : Dirs) { + if (Dir.empty()) + continue; + + SmallString<128> FilePath(Dir); + path::append(FilePath, FileName); + if (fs::exists(Twine(FilePath))) { + FoundPath = FilePath.str(); + break; + } + } + + return FoundPath; +} + #define COLOR(FGBG, CODE, BOLD) "\033[0;" BOLD FGBG CODE "m" diff --git a/lib/Support/Program.cpp b/lib/Support/Program.cpp index 83f2ec4..b84b82b 100644 --- a/lib/Support/Program.cpp +++ b/lib/Support/Program.cpp @@ -7,13 +7,13 @@ // //===----------------------------------------------------------------------===// // -// This header file implements the operating system Program concept. +// This file implements the operating system Program concept. // //===----------------------------------------------------------------------===// #include "llvm/Support/Program.h" #include "llvm/Config/config.h" -#include "llvm/Support/system_error.h" +#include <system_error> using namespace llvm; using namespace sys; @@ -34,7 +34,8 @@ int sys::ExecuteAndWait(StringRef Program, const char **args, const char **envp, if (Execute(PI, Program, args, envp, redirects, memoryLimit, ErrMsg)) { if (ExecutionFailed) *ExecutionFailed = false; - ProcessInfo Result = Wait(PI, secondsToWait, true, ErrMsg); + ProcessInfo Result = Wait( + PI, secondsToWait, /*WaitUntilTerminates=*/secondsToWait == 0, ErrMsg); return Result.ReturnCode; } diff --git a/lib/Support/RandomNumberGenerator.cpp b/lib/Support/RandomNumberGenerator.cpp new file mode 100644 index 0000000..c50e7cb --- /dev/null +++ b/lib/Support/RandomNumberGenerator.cpp @@ -0,0 +1,61 @@ +//===-- RandomNumberGenerator.cpp - Implement RNG class -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements random number generation (RNG). +// The current implementation is NOT cryptographically secure as it uses +// the C++11 <random> facilities. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "rng" +#include "llvm/Support/RandomNumberGenerator.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; + +// Tracking BUG: 19665 +// http://llvm.org/bugs/show_bug.cgi?id=19665 +// +// Do not change to cl::opt<uint64_t> since this silently breaks argument parsing. +static cl::opt<unsigned long long> +Seed("rng-seed", cl::value_desc("seed"), + cl::desc("Seed for the random number generator"), cl::init(0)); + +RandomNumberGenerator::RandomNumberGenerator(StringRef Salt) { + DEBUG( + if (Seed == 0) + errs() << "Warning! Using unseeded random number generator.\n" + ); + + // Combine seed and salt using std::seed_seq. + // Entropy: Seed-low, Seed-high, Salt... + std::vector<uint32_t> Data; + Data.reserve(2 + Salt.size()/4 + 1); + Data.push_back(Seed); + Data.push_back(Seed >> 32); + + uint32_t Pack = 0; + for (size_t I = 0; I < Salt.size(); ++I) { + Pack <<= 8; + Pack += Salt[I]; + + if (I%4 == 3) + Data.push_back(Pack); + } + Data.push_back(Pack); + + std::seed_seq SeedSeq(Data.begin(), Data.end()); + Generator.seed(SeedSeq); +} + +uint64_t RandomNumberGenerator::next(uint64_t Max) { + std::uniform_int_distribution<uint64_t> distribution(0, Max - 1); + return distribution(Generator); +} diff --git a/lib/Support/ScaledNumber.cpp b/lib/Support/ScaledNumber.cpp new file mode 100644 index 0000000..3fe027b --- /dev/null +++ b/lib/Support/ScaledNumber.cpp @@ -0,0 +1,319 @@ +//==- lib/Support/ScaledNumber.cpp - Support for scaled numbers -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of some scaled number algorithms. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/ScaledNumber.h" + +#include "llvm/ADT/APFloat.h" +#include "llvm/Support/Debug.h" + +using namespace llvm; +using namespace llvm::ScaledNumbers; + +std::pair<uint64_t, int16_t> ScaledNumbers::multiply64(uint64_t LHS, + uint64_t RHS) { + // Separate into two 32-bit digits (U.L). + auto getU = [](uint64_t N) { return N >> 32; }; + auto getL = [](uint64_t N) { return N & UINT32_MAX; }; + uint64_t UL = getU(LHS), LL = getL(LHS), UR = getU(RHS), LR = getL(RHS); + + // Compute cross products. + uint64_t P1 = UL * UR, P2 = UL * LR, P3 = LL * UR, P4 = LL * LR; + + // Sum into two 64-bit digits. + uint64_t Upper = P1, Lower = P4; + auto addWithCarry = [&](uint64_t N) { + uint64_t NewLower = Lower + (getL(N) << 32); + Upper += getU(N) + (NewLower < Lower); + Lower = NewLower; + }; + addWithCarry(P2); + addWithCarry(P3); + + // Check whether the upper digit is empty. + if (!Upper) + return std::make_pair(Lower, 0); + + // Shift as little as possible to maximize precision. + unsigned LeadingZeros = countLeadingZeros(Upper); + int Shift = 64 - LeadingZeros; + if (LeadingZeros) + Upper = Upper << LeadingZeros | Lower >> Shift; + return getRounded(Upper, Shift, + Shift && (Lower & UINT64_C(1) << (Shift - 1))); +} + +static uint64_t getHalf(uint64_t N) { return (N >> 1) + (N & 1); } + +std::pair<uint32_t, int16_t> ScaledNumbers::divide32(uint32_t Dividend, + uint32_t Divisor) { + assert(Dividend && "expected non-zero dividend"); + assert(Divisor && "expected non-zero divisor"); + + // Use 64-bit math and canonicalize the dividend to gain precision. + uint64_t Dividend64 = Dividend; + int Shift = 0; + if (int Zeros = countLeadingZeros(Dividend64)) { + Shift -= Zeros; + Dividend64 <<= Zeros; + } + uint64_t Quotient = Dividend64 / Divisor; + uint64_t Remainder = Dividend64 % Divisor; + + // If Quotient needs to be shifted, leave the rounding to getAdjusted(). + if (Quotient > UINT32_MAX) + return getAdjusted<uint32_t>(Quotient, Shift); + + // Round based on the value of the next bit. + return getRounded<uint32_t>(Quotient, Shift, Remainder >= getHalf(Divisor)); +} + +std::pair<uint64_t, int16_t> ScaledNumbers::divide64(uint64_t Dividend, + uint64_t Divisor) { + assert(Dividend && "expected non-zero dividend"); + assert(Divisor && "expected non-zero divisor"); + + // Minimize size of divisor. + int Shift = 0; + if (int Zeros = countTrailingZeros(Divisor)) { + Shift -= Zeros; + Divisor >>= Zeros; + } + + // Check for powers of two. + if (Divisor == 1) + return std::make_pair(Dividend, Shift); + + // Maximize size of dividend. + if (int Zeros = countLeadingZeros(Dividend)) { + Shift -= Zeros; + Dividend <<= Zeros; + } + + // Start with the result of a divide. + uint64_t Quotient = Dividend / Divisor; + Dividend %= Divisor; + + // Continue building the quotient with long division. + while (!(Quotient >> 63) && Dividend) { + // Shift Dividend and check for overflow. + bool IsOverflow = Dividend >> 63; + Dividend <<= 1; + --Shift; + + // Get the next bit of Quotient. + Quotient <<= 1; + if (IsOverflow || Divisor <= Dividend) { + Quotient |= 1; + Dividend -= Divisor; + } + } + + return getRounded(Quotient, Shift, Dividend >= getHalf(Divisor)); +} + +int ScaledNumbers::compareImpl(uint64_t L, uint64_t R, int ScaleDiff) { + assert(ScaleDiff >= 0 && "wrong argument order"); + assert(ScaleDiff < 64 && "numbers too far apart"); + + uint64_t L_adjusted = L >> ScaleDiff; + if (L_adjusted < R) + return -1; + if (L_adjusted > R) + return 1; + + return L > L_adjusted << ScaleDiff ? 1 : 0; +} + +static void appendDigit(std::string &Str, unsigned D) { + assert(D < 10); + Str += '0' + D % 10; +} + +static void appendNumber(std::string &Str, uint64_t N) { + while (N) { + appendDigit(Str, N % 10); + N /= 10; + } +} + +static bool doesRoundUp(char Digit) { + switch (Digit) { + case '5': + case '6': + case '7': + case '8': + case '9': + return true; + default: + return false; + } +} + +static std::string toStringAPFloat(uint64_t D, int E, unsigned Precision) { + assert(E >= ScaledNumbers::MinScale); + assert(E <= ScaledNumbers::MaxScale); + + // Find a new E, but don't let it increase past MaxScale. + int LeadingZeros = ScaledNumberBase::countLeadingZeros64(D); + int NewE = std::min(ScaledNumbers::MaxScale, E + 63 - LeadingZeros); + int Shift = 63 - (NewE - E); + assert(Shift <= LeadingZeros); + assert(Shift == LeadingZeros || NewE == ScaledNumbers::MaxScale); + D <<= Shift; + E = NewE; + + // Check for a denormal. + unsigned AdjustedE = E + 16383; + if (!(D >> 63)) { + assert(E == ScaledNumbers::MaxScale); + AdjustedE = 0; + } + + // Build the float and print it. + uint64_t RawBits[2] = {D, AdjustedE}; + APFloat Float(APFloat::x87DoubleExtended, APInt(80, RawBits)); + SmallVector<char, 24> Chars; + Float.toString(Chars, Precision, 0); + return std::string(Chars.begin(), Chars.end()); +} + +static std::string stripTrailingZeros(const std::string &Float) { + size_t NonZero = Float.find_last_not_of('0'); + assert(NonZero != std::string::npos && "no . in floating point string"); + + if (Float[NonZero] == '.') + ++NonZero; + + return Float.substr(0, NonZero + 1); +} + +std::string ScaledNumberBase::toString(uint64_t D, int16_t E, int Width, + unsigned Precision) { + if (!D) + return "0.0"; + + // Canonicalize exponent and digits. + uint64_t Above0 = 0; + uint64_t Below0 = 0; + uint64_t Extra = 0; + int ExtraShift = 0; + if (E == 0) { + Above0 = D; + } else if (E > 0) { + if (int Shift = std::min(int16_t(countLeadingZeros64(D)), E)) { + D <<= Shift; + E -= Shift; + + if (!E) + Above0 = D; + } + } else if (E > -64) { + Above0 = D >> -E; + Below0 = D << (64 + E); + } else if (E > -120) { + Below0 = D >> (-E - 64); + Extra = D << (128 + E); + ExtraShift = -64 - E; + } + + // Fall back on APFloat for very small and very large numbers. + if (!Above0 && !Below0) + return toStringAPFloat(D, E, Precision); + + // Append the digits before the decimal. + std::string Str; + size_t DigitsOut = 0; + if (Above0) { + appendNumber(Str, Above0); + DigitsOut = Str.size(); + } else + appendDigit(Str, 0); + std::reverse(Str.begin(), Str.end()); + + // Return early if there's nothing after the decimal. + if (!Below0) + return Str + ".0"; + + // Append the decimal and beyond. + Str += '.'; + uint64_t Error = UINT64_C(1) << (64 - Width); + + // We need to shift Below0 to the right to make space for calculating + // digits. Save the precision we're losing in Extra. + Extra = (Below0 & 0xf) << 56 | (Extra >> 8); + Below0 >>= 4; + size_t SinceDot = 0; + size_t AfterDot = Str.size(); + do { + if (ExtraShift) { + --ExtraShift; + Error *= 5; + } else + Error *= 10; + + Below0 *= 10; + Extra *= 10; + Below0 += (Extra >> 60); + Extra = Extra & (UINT64_MAX >> 4); + appendDigit(Str, Below0 >> 60); + Below0 = Below0 & (UINT64_MAX >> 4); + if (DigitsOut || Str.back() != '0') + ++DigitsOut; + ++SinceDot; + } while (Error && (Below0 << 4 | Extra >> 60) >= Error / 2 && + (!Precision || DigitsOut <= Precision || SinceDot < 2)); + + // Return early for maximum precision. + if (!Precision || DigitsOut <= Precision) + return stripTrailingZeros(Str); + + // Find where to truncate. + size_t Truncate = + std::max(Str.size() - (DigitsOut - Precision), AfterDot + 1); + + // Check if there's anything to truncate. + if (Truncate >= Str.size()) + return stripTrailingZeros(Str); + + bool Carry = doesRoundUp(Str[Truncate]); + if (!Carry) + return stripTrailingZeros(Str.substr(0, Truncate)); + + // Round with the first truncated digit. + for (std::string::reverse_iterator I(Str.begin() + Truncate), E = Str.rend(); + I != E; ++I) { + if (*I == '.') + continue; + if (*I == '9') { + *I = '0'; + continue; + } + + ++*I; + Carry = false; + break; + } + + // Add "1" in front if we still need to carry. + return stripTrailingZeros(std::string(Carry, '1') + Str.substr(0, Truncate)); +} + +raw_ostream &ScaledNumberBase::print(raw_ostream &OS, uint64_t D, int16_t E, + int Width, unsigned Precision) { + return OS << toString(D, E, Width, Precision); +} + +void ScaledNumberBase::dump(uint64_t D, int16_t E, int Width) { + print(dbgs(), D, E, Width, 0) << "[" << Width << ":" << D << "*2^" << E + << "]"; +} diff --git a/lib/Support/SourceMgr.cpp b/lib/Support/SourceMgr.cpp index acd75fb..003cb56 100644 --- a/lib/Support/SourceMgr.cpp +++ b/lib/Support/SourceMgr.cpp @@ -20,14 +20,14 @@ #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/system_error.h" +#include <system_error> using namespace llvm; static const size_t TabStop = 8; namespace { struct LineNoCacheTy { - int LastQueryBufferID; + unsigned LastQueryBufferID; const char *LastQuery; unsigned LineNoOfQuery; }; @@ -49,48 +49,44 @@ SourceMgr::~SourceMgr() { } } -/// AddIncludeFile - Search for a file with the specified name in the current -/// directory or in one of the IncludeDirs. If no file is found, this returns -/// ~0, otherwise it returns the buffer ID of the stacked file. -size_t SourceMgr::AddIncludeFile(const std::string &Filename, - SMLoc IncludeLoc, - std::string &IncludedFile) { - std::unique_ptr<MemoryBuffer> NewBuf; +unsigned SourceMgr::AddIncludeFile(const std::string &Filename, + SMLoc IncludeLoc, + std::string &IncludedFile) { IncludedFile = Filename; - MemoryBuffer::getFile(IncludedFile.c_str(), NewBuf); + ErrorOr<std::unique_ptr<MemoryBuffer>> NewBufOrErr = + MemoryBuffer::getFile(IncludedFile.c_str()); // If the file didn't exist directly, see if it's in an include path. - for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBuf; ++i) { - IncludedFile = IncludeDirectories[i] + sys::path::get_separator().data() + Filename; - MemoryBuffer::getFile(IncludedFile.c_str(), NewBuf); + for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBufOrErr; + ++i) { + IncludedFile = + IncludeDirectories[i] + sys::path::get_separator().data() + Filename; + NewBufOrErr = MemoryBuffer::getFile(IncludedFile.c_str()); } - if (!NewBuf) return ~0U; + if (!NewBufOrErr) + return 0; - return AddNewSourceBuffer(NewBuf.release(), IncludeLoc); + return AddNewSourceBuffer(NewBufOrErr.get().release(), IncludeLoc); } - -/// FindBufferContainingLoc - Return the ID of the buffer containing the -/// specified location, returning -1 if not found. -int SourceMgr::FindBufferContainingLoc(SMLoc Loc) const { +unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc) const { for (unsigned i = 0, e = Buffers.size(); i != e; ++i) if (Loc.getPointer() >= Buffers[i].Buffer->getBufferStart() && // Use <= here so that a pointer to the null at the end of the buffer // is included as part of the buffer. Loc.getPointer() <= Buffers[i].Buffer->getBufferEnd()) - return i; - return -1; + return i + 1; + return 0; } -/// getLineAndColumn - Find the line and column number for the specified -/// location in the specified file. This is not a fast method. std::pair<unsigned, unsigned> -SourceMgr::getLineAndColumn(SMLoc Loc, int BufferID) const { - if (BufferID == -1) BufferID = FindBufferContainingLoc(Loc); - assert(BufferID != -1 && "Invalid Location!"); +SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const { + if (!BufferID) + BufferID = FindBufferContainingLoc(Loc); + assert(BufferID && "Invalid Location!"); - MemoryBuffer *Buff = getBufferInfo(BufferID).Buffer; + const MemoryBuffer *Buff = getMemoryBuffer(BufferID); // Count the number of \n's between the start of the file and the specified // location. @@ -132,8 +128,8 @@ SourceMgr::getLineAndColumn(SMLoc Loc, int BufferID) const { void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const { if (IncludeLoc == SMLoc()) return; // Top of stack. - int CurBuf = FindBufferContainingLoc(IncludeLoc); - assert(CurBuf != -1 && "Invalid or unspecified location!"); + unsigned CurBuf = FindBufferContainingLoc(IncludeLoc); + assert(CurBuf && "Invalid or unspecified location!"); PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS); @@ -143,11 +139,6 @@ void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const { } -/// GetMessage - Return an SMDiagnostic at the specified location with the -/// specified string. -/// -/// @param Type - If non-null, the kind of message (e.g., "error") which is -/// prefixed to the message. SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg, ArrayRef<SMRange> Ranges, @@ -161,10 +152,10 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind, std::string LineStr; if (Loc.isValid()) { - int CurBuf = FindBufferContainingLoc(Loc); - assert(CurBuf != -1 && "Invalid or unspecified location!"); + unsigned CurBuf = FindBufferContainingLoc(Loc); + assert(CurBuf && "Invalid or unspecified location!"); - MemoryBuffer *CurMB = getBufferInfo(CurBuf).Buffer; + const MemoryBuffer *CurMB = getMemoryBuffer(CurBuf); BufferID = CurMB->getBufferIdentifier(); // Scan backward to find the start of the line. @@ -211,27 +202,30 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind, LineStr, ColRanges, FixIts); } -void SourceMgr::PrintMessage(raw_ostream &OS, SMLoc Loc, - SourceMgr::DiagKind Kind, - const Twine &Msg, ArrayRef<SMRange> Ranges, - ArrayRef<SMFixIt> FixIts, bool ShowColors) const { - SMDiagnostic Diagnostic = GetMessage(Loc, Kind, Msg, Ranges, FixIts); - +void SourceMgr::PrintMessage(raw_ostream &OS, const SMDiagnostic &Diagnostic, + bool ShowColors) const { // Report the message with the diagnostic handler if present. if (DiagHandler) { DiagHandler(Diagnostic, DiagContext); return; } - if (Loc != SMLoc()) { - int CurBuf = FindBufferContainingLoc(Loc); - assert(CurBuf != -1 && "Invalid or unspecified location!"); + if (Diagnostic.getLoc().isValid()) { + unsigned CurBuf = FindBufferContainingLoc(Diagnostic.getLoc()); + assert(CurBuf && "Invalid or unspecified location!"); PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS); } Diagnostic.print(nullptr, OS, ShowColors); } +void SourceMgr::PrintMessage(raw_ostream &OS, SMLoc Loc, + SourceMgr::DiagKind Kind, + const Twine &Msg, ArrayRef<SMRange> Ranges, + ArrayRef<SMFixIt> FixIts, bool ShowColors) const { + PrintMessage(OS, GetMessage(Loc, Kind, Msg, Ranges, FixIts), ShowColors); +} + void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg, ArrayRef<SMRange> Ranges, ArrayRef<SMFixIt> FixIts, bool ShowColors) const { diff --git a/lib/Transforms/Utils/SpecialCaseList.cpp b/lib/Support/SpecialCaseList.cpp index 2c6fcd1..21e43c5 100644 --- a/lib/Transforms/Utils/SpecialCaseList.cpp +++ b/lib/Support/SpecialCaseList.cpp @@ -14,20 +14,16 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Utils/SpecialCaseList.h" +#include "llvm/Support/SpecialCaseList.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSet.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Function.h" -#include "llvm/IR/GlobalVariable.h" -#include "llvm/IR/Module.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Regex.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/system_error.h" #include <string> +#include <system_error> #include <utility> namespace llvm { @@ -38,10 +34,12 @@ namespace llvm { /// reason for doing so is efficiency; StringSet is much faster at matching /// literal strings than Regex. struct SpecialCaseList::Entry { - StringSet<> Strings; - Regex *RegEx; + Entry() {} + Entry(Entry &&Other) + : Strings(std::move(Other.Strings)), RegEx(std::move(Other.RegEx)) {} - Entry() : RegEx(nullptr) {} + StringSet<> Strings; + std::unique_ptr<Regex> RegEx; bool match(StringRef Query) const { return Strings.count(Query) || (RegEx && RegEx->match(Query)); @@ -54,12 +52,13 @@ SpecialCaseList *SpecialCaseList::create( const StringRef Path, std::string &Error) { if (Path.empty()) return new SpecialCaseList(); - std::unique_ptr<MemoryBuffer> File; - if (error_code EC = MemoryBuffer::getFile(Path, File)) { + ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = + MemoryBuffer::getFile(Path); + if (std::error_code EC = FileOrErr.getError()) { Error = (Twine("Can't open file '") + Path + "': " + EC.message()).str(); return nullptr; } - return create(File.get(), Error); + return create(FileOrErr.get().get(), Error); } SpecialCaseList *SpecialCaseList::create( @@ -150,66 +149,16 @@ bool SpecialCaseList::parse(const MemoryBuffer *MB, std::string &Error) { for (StringMap<std::string>::const_iterator II = I->second.begin(), IE = I->second.end(); II != IE; ++II) { - Entries[I->getKey()][II->getKey()].RegEx = new Regex(II->getValue()); + Entries[I->getKey()][II->getKey()].RegEx.reset(new Regex(II->getValue())); } } return true; } -SpecialCaseList::~SpecialCaseList() { - for (StringMap<StringMap<Entry> >::iterator I = Entries.begin(), - E = Entries.end(); - I != E; ++I) { - for (StringMap<Entry>::const_iterator II = I->second.begin(), - IE = I->second.end(); - II != IE; ++II) { - delete II->second.RegEx; - } - } -} - -bool SpecialCaseList::isIn(const Function& F, const StringRef Category) const { - return isIn(*F.getParent(), Category) || - inSectionCategory("fun", F.getName(), Category); -} - -static StringRef GetGlobalTypeString(const GlobalValue &G) { - // Types of GlobalVariables are always pointer types. - Type *GType = G.getType()->getElementType(); - // For now we support blacklisting struct types only. - if (StructType *SGType = dyn_cast<StructType>(GType)) { - if (!SGType->isLiteral()) - return SGType->getName(); - } - return "<unknown type>"; -} - -bool SpecialCaseList::isIn(const GlobalVariable &G, - const StringRef Category) const { - return isIn(*G.getParent(), Category) || - inSectionCategory("global", G.getName(), Category) || - inSectionCategory("type", GetGlobalTypeString(G), Category); -} - -bool SpecialCaseList::isIn(const GlobalAlias &GA, - const StringRef Category) const { - if (isIn(*GA.getParent(), Category)) - return true; - - if (isa<FunctionType>(GA.getType()->getElementType())) - return inSectionCategory("fun", GA.getName(), Category); - - return inSectionCategory("global", GA.getName(), Category) || - inSectionCategory("type", GetGlobalTypeString(GA), Category); -} - -bool SpecialCaseList::isIn(const Module &M, const StringRef Category) const { - return inSectionCategory("src", M.getModuleIdentifier(), Category); -} +SpecialCaseList::~SpecialCaseList() {} -bool SpecialCaseList::inSectionCategory(const StringRef Section, - const StringRef Query, - const StringRef Category) const { +bool SpecialCaseList::inSection(const StringRef Section, const StringRef Query, + const StringRef Category) const { StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section); if (I == Entries.end()) return false; StringMap<Entry>::const_iterator II = I->second.find(Category); diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp index 72a6d82..ddb7349 100644 --- a/lib/Support/StringMap.cpp +++ b/lib/Support/StringMap.cpp @@ -181,7 +181,7 @@ StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) { /// RehashTable - Grow the table, redistributing values into the buckets with /// the appropriate mod-of-hashtable-size. -void StringMapImpl::RehashTable() { +unsigned StringMapImpl::RehashTable(unsigned BucketNo) { unsigned NewSize; unsigned *HashTable = (unsigned *)(TheTable + NumBuckets + 1); @@ -193,9 +193,10 @@ void StringMapImpl::RehashTable() { } else if (NumBuckets-(NumItems+NumTombstones) <= NumBuckets/8) { NewSize = NumBuckets; } else { - return; + return BucketNo; } + unsigned NewBucketNo = BucketNo; // Allocate one extra bucket which will always be non-empty. This allows the // iterators to stop at end. StringMapEntryBase **NewTableArray = @@ -215,6 +216,8 @@ void StringMapImpl::RehashTable() { if (!NewTableArray[NewBucket]) { NewTableArray[FullHash & (NewSize-1)] = Bucket; NewHashArray[FullHash & (NewSize-1)] = FullHash; + if (I == BucketNo) + NewBucketNo = NewBucket; continue; } @@ -227,6 +230,8 @@ void StringMapImpl::RehashTable() { // Finally found a slot. Fill it in. NewTableArray[NewBucket] = Bucket; NewHashArray[NewBucket] = FullHash; + if (I == BucketNo) + NewBucketNo = NewBucket; } } @@ -235,4 +240,5 @@ void StringMapImpl::RehashTable() { TheTable = NewTableArray; NumBuckets = NewSize; NumTombstones = 0; + return NewBucketNo; } diff --git a/lib/Support/StringPool.cpp b/lib/Support/StringPool.cpp index ff607cf..76faabc 100644 --- a/lib/Support/StringPool.cpp +++ b/lib/Support/StringPool.cpp @@ -27,7 +27,7 @@ PooledStringPtr StringPool::intern(StringRef Key) { if (I != InternTable.end()) return PooledStringPtr(&*I); - entry_t *S = entry_t::Create(Key.begin(), Key.end()); + entry_t *S = entry_t::Create(Key); S->getValue().Pool = this; InternTable.insert(S); diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp index a008831..f691883 100644 --- a/lib/Support/TargetRegistry.cpp +++ b/lib/Support/TargetRegistry.cpp @@ -116,17 +116,6 @@ void TargetRegistry::RegisterTarget(Target &T, T.HasJIT = HasJIT; } -const Target *TargetRegistry::getClosestTargetForJIT(std::string &Error) { - const Target *TheTarget = lookupTarget(sys::getDefaultTargetTriple(), Error); - - if (TheTarget && !TheTarget->hasJIT()) { - Error = "No JIT compatible target available for this host"; - return nullptr; - } - - return TheTarget; -} - static int TargetArraySortFn(const std::pair<StringRef, const Target *> *LHS, const std::pair<StringRef, const Target *> *RHS) { return LHS->first.compare(RHS->first); diff --git a/lib/Support/Threading.cpp b/lib/Support/Threading.cpp index 1acfa79..ca7f3f6 100644 --- a/lib/Support/Threading.cpp +++ b/lib/Support/Threading.cpp @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file implements llvm_start_multithreaded() and friends. +// This file defines helper functions for running LLVM in a multi-threaded +// environment. // //===----------------------------------------------------------------------===// @@ -19,50 +20,14 @@ using namespace llvm; -static bool multithreaded_mode = false; - -static sys::Mutex* global_lock = nullptr; - -bool llvm::llvm_start_multithreaded() { +bool llvm::llvm_is_multithreaded() { #if LLVM_ENABLE_THREADS != 0 - assert(!multithreaded_mode && "Already multithreaded!"); - multithreaded_mode = true; - global_lock = new sys::Mutex(true); - - // We fence here to ensure that all initialization is complete BEFORE we - // return from llvm_start_multithreaded(). - sys::MemoryFence(); return true; #else return false; #endif } -void llvm::llvm_stop_multithreaded() { -#if LLVM_ENABLE_THREADS != 0 - assert(multithreaded_mode && "Not currently multithreaded!"); - - // We fence here to insure that all threaded operations are complete BEFORE we - // return from llvm_stop_multithreaded(). - sys::MemoryFence(); - - multithreaded_mode = false; - delete global_lock; -#endif -} - -bool llvm::llvm_is_multithreaded() { - return multithreaded_mode; -} - -void llvm::llvm_acquire_global_lock() { - if (multithreaded_mode) global_lock->acquire(); -} - -void llvm::llvm_release_global_lock() { - if (multithreaded_mode) global_lock->release(); -} - #if LLVM_ENABLE_THREADS != 0 && defined(HAVE_PTHREAD_H) #include <pthread.h> diff --git a/lib/Support/TimeValue.cpp b/lib/Support/TimeValue.cpp index bd8af17..4a70797 100644 --- a/lib/Support/TimeValue.cpp +++ b/lib/Support/TimeValue.cpp @@ -53,7 +53,7 @@ TimeValue::normalize( void ) { } -/// Include the platform specific portion of TimeValue class +/// Include the platform-specific portion of TimeValue class #ifdef LLVM_ON_UNIX #include "Unix/TimeValue.inc" #endif diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp index 61465ae..210bda7 100644 --- a/lib/Support/Timer.cpp +++ b/lib/Support/Timer.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/Format.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" +#include "llvm/Support/MutexGuard.h" #include "llvm/Support/Process.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -84,14 +85,13 @@ static TimerGroup *getDefaultTimerGroup() { sys::MemoryFence(); if (tmp) return tmp; - llvm_acquire_global_lock(); + sys::SmartScopedLock<true> Lock(*TimerLock); tmp = DefaultTimerGroup; if (!tmp) { tmp = new TimerGroup("Miscellaneous Ungrouped Timers"); sys::MemoryFence(); DefaultTimerGroup = tmp; } - llvm_release_global_lock(); return tmp; } diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index b3d48fb..b74ee13 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -50,6 +50,7 @@ const char *Triple::getArchTypeName(ArchType Kind) { case amdil: return "amdil"; case spir: return "spir"; case spir64: return "spir64"; + case kalimba: return "kalimba"; } llvm_unreachable("Invalid ArchType!"); @@ -101,6 +102,7 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { case amdil: return "amdil"; case spir: return "spir"; case spir64: return "spir"; + case kalimba: return "kalimba"; } } @@ -115,7 +117,9 @@ const char *Triple::getVendorTypeName(VendorType Kind) { case BGQ: return "bgq"; case Freescale: return "fsl"; case IBM: return "ibm"; + case ImaginationTechnologies: return "img"; case NVIDIA: return "nvidia"; + case CSR: return "csr"; } llvm_unreachable("Invalid VendorType!"); @@ -207,6 +211,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { .Case("amdil", amdil) .Case("spir", spir) .Case("spir64", spir64) + .Case("kalimba", kalimba) .Default(UnknownArch); } @@ -280,6 +285,7 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Case("amdil", Triple::amdil) .Case("spir", Triple::spir) .Case("spir64", Triple::spir64) + .Case("kalimba", Triple::kalimba) .Default(Triple::UnknownArch); } @@ -292,7 +298,9 @@ static Triple::VendorType parseVendor(StringRef VendorName) { .Case("bgq", Triple::BGQ) .Case("fsl", Triple::Freescale) .Case("ibm", Triple::IBM) + .Case("img", Triple::ImaginationTechnologies) .Case("nvidia", Triple::NVIDIA) + .Case("csr", Triple::CSR) .Default(Triple::UnknownVendor); } @@ -737,9 +745,8 @@ void Triple::setObjectFormat(ObjectFormatType Kind) { if (Environment == UnknownEnvironment) return setEnvironmentName(getObjectFormatTypeName(Kind)); - Twine Env = getEnvironmentTypeName(Environment) + Twine("-") + - getObjectFormatTypeName(Kind); - setEnvironmentName(Env.str()); + setEnvironmentName((getEnvironmentTypeName(Environment) + Twine("-") + + getObjectFormatTypeName(Kind)).str()); } void Triple::setArchName(StringRef Str) { @@ -799,6 +806,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::x86: case llvm::Triple::xcore: case llvm::Triple::spir: + case llvm::Triple::kalimba: return 32; case llvm::Triple::arm64: @@ -850,6 +858,7 @@ Triple Triple::get32BitArchVariant() const { case Triple::arm: case Triple::armeb: case Triple::hexagon: + case Triple::kalimba: case Triple::le32: case Triple::mips: case Triple::mipsel: @@ -884,6 +893,7 @@ Triple Triple::get64BitArchVariant() const { case Triple::arm: case Triple::armeb: case Triple::hexagon: + case Triple::kalimba: case Triple::le32: case Triple::msp430: case Triple::r600: diff --git a/lib/Support/Unix/Memory.inc b/lib/Support/Unix/Memory.inc index 23b49b7..c9d89a8 100644 --- a/lib/Support/Unix/Memory.inc +++ b/lib/Support/Unix/Memory.inc @@ -83,8 +83,8 @@ MemoryBlock Memory::allocateMappedMemory(size_t NumBytes, const MemoryBlock *const NearBlock, unsigned PFlags, - error_code &EC) { - EC = error_code::success(); + std::error_code &EC) { + EC = std::error_code(); if (NumBytes == 0) return MemoryBlock(); @@ -95,7 +95,7 @@ Memory::allocateMappedMemory(size_t NumBytes, #ifdef NEED_DEV_ZERO_FOR_MMAP static int zero_fd = open("/dev/zero", O_RDWR); if (zero_fd == -1) { - EC = error_code(errno, system_category()); + EC = std::error_code(errno, std::generic_category()); return MemoryBlock(); } fd = zero_fd; @@ -123,7 +123,7 @@ Memory::allocateMappedMemory(size_t NumBytes, if (NearBlock) //Try again without a near hint return allocateMappedMemory(NumBytes, nullptr, PFlags, EC); - EC = error_code(errno, system_category()); + EC = std::error_code(errno, std::generic_category()); return MemoryBlock(); } @@ -137,38 +137,38 @@ Memory::allocateMappedMemory(size_t NumBytes, return Result; } -error_code +std::error_code Memory::releaseMappedMemory(MemoryBlock &M) { if (M.Address == nullptr || M.Size == 0) - return error_code::success(); + return std::error_code(); if (0 != ::munmap(M.Address, M.Size)) - return error_code(errno, system_category()); + return std::error_code(errno, std::generic_category()); M.Address = nullptr; M.Size = 0; - return error_code::success(); + return std::error_code(); } -error_code +std::error_code Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) { if (M.Address == nullptr || M.Size == 0) - return error_code::success(); + return std::error_code(); if (!Flags) - return error_code(EINVAL, generic_category()); + return std::error_code(EINVAL, std::generic_category()); int Protect = getPosixProtectionFlags(Flags); int Result = ::mprotect(M.Address, M.Size, Protect); if (Result != 0) - return error_code(errno, system_category()); + return std::error_code(errno, std::generic_category()); if (Flags & MF_EXEC) Memory::InvalidateInstructionCache(M.Address, M.Size); - return error_code::success(); + return std::error_code(); } /// AllocateRWX - Allocate a slab of memory with read/write/execute diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc index 519a016..623547a 100644 --- a/lib/Support/Unix/Path.inc +++ b/lib/Support/Unix/Path.inc @@ -87,7 +87,7 @@ namespace { }; } -static error_code TempDir(SmallVectorImpl<char> &result) { +static std::error_code TempDir(SmallVectorImpl<char> &result) { // FIXME: Don't use TMPDIR if program is SUID or SGID enabled. const char *dir = nullptr; (dir = std::getenv("TMPDIR")) || (dir = std::getenv("TMP")) || @@ -100,7 +100,7 @@ static error_code TempDir(SmallVectorImpl<char> &result) { result.clear(); StringRef d(dir); result.append(d.begin(), d.end()); - return error_code::success(); + return std::error_code(); } namespace llvm { @@ -225,7 +225,7 @@ UniqueID file_status::getUniqueID() const { return UniqueID(fs_st_dev, fs_st_ino); } -error_code current_path(SmallVectorImpl<char> &result) { +std::error_code current_path(SmallVectorImpl<char> &result) { result.clear(); const char *pwd = ::getenv("PWD"); @@ -235,7 +235,7 @@ error_code current_path(SmallVectorImpl<char> &result) { !llvm::sys::fs::status(".", DotStatus) && PWDStatus.getUniqueID() == DotStatus.getUniqueID()) { result.append(pwd, pwd + strlen(pwd)); - return error_code::success(); + return std::error_code(); } #ifdef MAXPATHLEN @@ -248,8 +248,8 @@ error_code current_path(SmallVectorImpl<char> &result) { while (true) { if (::getcwd(result.data(), result.capacity()) == nullptr) { // See if there was a real error. - if (errno != errc::not_enough_memory) - return error_code(errno, system_category()); + if (errno != ENOMEM) + return std::error_code(errno, std::generic_category()); // Otherwise there just wasn't enough space. result.reserve(result.capacity() * 2); } else @@ -257,22 +257,22 @@ error_code current_path(SmallVectorImpl<char> &result) { } result.set_size(strlen(result.data())); - return error_code::success(); + return std::error_code(); } -error_code create_directory(const Twine &path, bool IgnoreExisting) { +std::error_code create_directory(const Twine &path, bool IgnoreExisting) { SmallString<128> path_storage; StringRef p = path.toNullTerminatedStringRef(path_storage); if (::mkdir(p.begin(), S_IRWXU | S_IRWXG) == -1) { - if (errno != errc::file_exists || !IgnoreExisting) - return error_code(errno, system_category()); + if (errno != EEXIST || !IgnoreExisting) + return std::error_code(errno, std::generic_category()); } - return error_code::success(); + return std::error_code(); } -error_code normalize_separators(SmallVectorImpl<char> &Path) { +std::error_code normalize_separators(SmallVectorImpl<char> &Path) { for (auto PI = Path.begin(), PE = Path.end(); PI < PE; ++PI) { if (*PI == '\\') { auto PN = PI + 1; @@ -282,12 +282,12 @@ error_code normalize_separators(SmallVectorImpl<char> &Path) { *PI = '/'; } } - return error_code::success(); + return std::error_code(); } // Note that we are using symbolic link because hard links are not supported by // all filesystems (SMB doesn't). -error_code create_link(const Twine &to, const Twine &from) { +std::error_code create_link(const Twine &to, const Twine &from) { // Get arguments. SmallString<128> from_storage; SmallString<128> to_storage; @@ -295,20 +295,20 @@ error_code create_link(const Twine &to, const Twine &from) { StringRef t = to.toNullTerminatedStringRef(to_storage); if (::symlink(t.begin(), f.begin()) == -1) - return error_code(errno, system_category()); + return std::error_code(errno, std::generic_category()); - return error_code::success(); + return std::error_code(); } -error_code remove(const Twine &path, bool IgnoreNonExisting) { +std::error_code remove(const Twine &path, bool IgnoreNonExisting) { SmallString<128> path_storage; StringRef p = path.toNullTerminatedStringRef(path_storage); struct stat buf; if (lstat(p.begin(), &buf) != 0) { - if (errno != errc::no_such_file_or_directory || !IgnoreNonExisting) - return error_code(errno, system_category()); - return error_code::success(); + if (errno != ENOENT || !IgnoreNonExisting) + return std::error_code(errno, std::generic_category()); + return std::error_code(); } // Note: this check catches strange situations. In all cases, LLVM should @@ -320,14 +320,14 @@ error_code remove(const Twine &path, bool IgnoreNonExisting) { return make_error_code(errc::operation_not_permitted); if (::remove(p.begin()) == -1) { - if (errno != errc::no_such_file_or_directory || !IgnoreNonExisting) - return error_code(errno, system_category()); + if (errno != ENOENT || !IgnoreNonExisting) + return std::error_code(errno, std::generic_category()); } - return error_code::success(); + return std::error_code(); } -error_code rename(const Twine &from, const Twine &to) { +std::error_code rename(const Twine &from, const Twine &to) { // Get arguments. SmallString<128> from_storage; SmallString<128> to_storage; @@ -335,33 +335,33 @@ error_code rename(const Twine &from, const Twine &to) { StringRef t = to.toNullTerminatedStringRef(to_storage); if (::rename(f.begin(), t.begin()) == -1) - return error_code(errno, system_category()); + return std::error_code(errno, std::generic_category()); - return error_code::success(); + return std::error_code(); } -error_code resize_file(const Twine &path, uint64_t size) { +std::error_code resize_file(const Twine &path, uint64_t size) { SmallString<128> path_storage; StringRef p = path.toNullTerminatedStringRef(path_storage); if (::truncate(p.begin(), size) == -1) - return error_code(errno, system_category()); + return std::error_code(errno, std::generic_category()); - return error_code::success(); + return std::error_code(); } -error_code exists(const Twine &path, bool &result) { +std::error_code exists(const Twine &path, bool &result) { SmallString<128> path_storage; StringRef p = path.toNullTerminatedStringRef(path_storage); if (::access(p.begin(), F_OK) == -1) { - if (errno != errc::no_such_file_or_directory) - return error_code(errno, system_category()); + if (errno != ENOENT) + return std::error_code(errno, std::generic_category()); result = false; } else result = true; - return error_code::success(); + return std::error_code(); } bool can_write(const Twine &Path) { @@ -390,18 +390,20 @@ bool equivalent(file_status A, file_status B) { A.fs_st_ino == B.fs_st_ino; } -error_code equivalent(const Twine &A, const Twine &B, bool &result) { +std::error_code equivalent(const Twine &A, const Twine &B, bool &result) { file_status fsA, fsB; - if (error_code ec = status(A, fsA)) return ec; - if (error_code ec = status(B, fsB)) return ec; + if (std::error_code ec = status(A, fsA)) + return ec; + if (std::error_code ec = status(B, fsB)) + return ec; result = equivalent(fsA, fsB); - return error_code::success(); + return std::error_code(); } -static error_code fillStatus(int StatRet, const struct stat &Status, +static std::error_code fillStatus(int StatRet, const struct stat &Status, file_status &Result) { if (StatRet != 0) { - error_code ec(errno, system_category()); + std::error_code ec(errno, std::generic_category()); if (ec == errc::no_such_file_or_directory) Result = file_status(file_type::file_not_found); else @@ -429,10 +431,10 @@ static error_code fillStatus(int StatRet, const struct stat &Status, file_status(Type, Perms, Status.st_dev, Status.st_ino, Status.st_mtime, Status.st_uid, Status.st_gid, Status.st_size); - return error_code::success(); + return std::error_code(); } -error_code status(const Twine &Path, file_status &Result) { +std::error_code status(const Twine &Path, file_status &Result) { SmallString<128> PathStorage; StringRef P = Path.toNullTerminatedStringRef(PathStorage); @@ -441,36 +443,36 @@ error_code status(const Twine &Path, file_status &Result) { return fillStatus(StatRet, Status, Result); } -error_code status(int FD, file_status &Result) { +std::error_code status(int FD, file_status &Result) { struct stat Status; int StatRet = ::fstat(FD, &Status); return fillStatus(StatRet, Status, Result); } -error_code setLastModificationAndAccessTime(int FD, TimeValue Time) { +std::error_code setLastModificationAndAccessTime(int FD, TimeValue Time) { #if defined(HAVE_FUTIMENS) timespec Times[2]; Times[0].tv_sec = Time.toEpochTime(); Times[0].tv_nsec = 0; Times[1] = Times[0]; if (::futimens(FD, Times)) - return error_code(errno, system_category()); - return error_code::success(); + return std::error_code(errno, std::generic_category()); + return std::error_code(); #elif defined(HAVE_FUTIMES) timeval Times[2]; Times[0].tv_sec = Time.toEpochTime(); Times[0].tv_usec = 0; Times[1] = Times[0]; if (::futimes(FD, Times)) - return error_code(errno, system_category()); - return error_code::success(); + return std::error_code(errno, std::generic_category()); + return std::error_code(); #else #warning Missing futimes() and futimens() - return make_error_code(errc::not_supported); + return make_error_code(errc::function_not_supported); #endif } -error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) { +std::error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) { AutoFD ScopedFD(FD); if (!CloseFD) ScopedFD.take(); @@ -478,7 +480,7 @@ error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) { // Figure out how large the file is. struct stat FileInfo; if (fstat(FD, &FileInfo) == -1) - return error_code(errno, system_category()); + return std::error_code(errno, std::generic_category()); uint64_t FileSize = FileInfo.st_size; if (Size == 0) @@ -486,7 +488,7 @@ error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) { else if (FileSize < Size) { // We need to grow the file. if (ftruncate(FD, Size) == -1) - return error_code(errno, system_category()); + return std::error_code(errno, std::generic_category()); } int flags = (Mode == readwrite) ? MAP_SHARED : MAP_PRIVATE; @@ -496,15 +498,15 @@ error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) { #endif Mapping = ::mmap(nullptr, Size, prot, flags, FD, Offset); if (Mapping == MAP_FAILED) - return error_code(errno, system_category()); - return error_code::success(); + return std::error_code(errno, std::generic_category()); + return std::error_code(); } mapped_file_region::mapped_file_region(const Twine &path, mapmode mode, uint64_t length, uint64_t offset, - error_code &ec) + std::error_code &ec) : Mode(mode) , Size(length) , Mapping() { @@ -519,7 +521,7 @@ mapped_file_region::mapped_file_region(const Twine &path, int oflags = (mode == readonly) ? O_RDONLY : O_RDWR; int ofd = ::open(name.begin(), oflags); if (ofd == -1) { - ec = error_code(errno, system_category()); + ec = std::error_code(errno, std::generic_category()); return; } @@ -533,7 +535,7 @@ mapped_file_region::mapped_file_region(int fd, mapmode mode, uint64_t length, uint64_t offset, - error_code &ec) + std::error_code &ec) : Mode(mode) , Size(length) , Mapping() { @@ -583,12 +585,12 @@ int mapped_file_region::alignment() { return process::get_self()->page_size(); } -error_code detail::directory_iterator_construct(detail::DirIterState &it, +std::error_code detail::directory_iterator_construct(detail::DirIterState &it, StringRef path){ SmallString<128> path_null(path); DIR *directory = ::opendir(path_null.c_str()); if (!directory) - return error_code(errno, system_category()); + return std::error_code(errno, std::generic_category()); it.IterationHandle = reinterpret_cast<intptr_t>(directory); // Add something for replace_filename to replace. @@ -597,19 +599,19 @@ error_code detail::directory_iterator_construct(detail::DirIterState &it, return directory_iterator_increment(it); } -error_code detail::directory_iterator_destruct(detail::DirIterState &it) { +std::error_code detail::directory_iterator_destruct(detail::DirIterState &it) { if (it.IterationHandle) ::closedir(reinterpret_cast<DIR *>(it.IterationHandle)); it.IterationHandle = 0; it.CurrentEntry = directory_entry(); - return error_code::success(); + return std::error_code(); } -error_code detail::directory_iterator_increment(detail::DirIterState &it) { +std::error_code detail::directory_iterator_increment(detail::DirIterState &it) { errno = 0; dirent *cur_dir = ::readdir(reinterpret_cast<DIR *>(it.IterationHandle)); if (cur_dir == nullptr && errno != 0) { - return error_code(errno, system_category()); + return std::error_code(errno, std::generic_category()); } else if (cur_dir != nullptr) { StringRef name(cur_dir->d_name, NAMLEN(cur_dir)); if ((name.size() == 1 && name[0] == '.') || @@ -619,80 +621,20 @@ error_code detail::directory_iterator_increment(detail::DirIterState &it) { } else return directory_iterator_destruct(it); - return error_code::success(); -} - -error_code get_magic(const Twine &path, uint32_t len, - SmallVectorImpl<char> &result) { - SmallString<128> PathStorage; - StringRef Path = path.toNullTerminatedStringRef(PathStorage); - result.set_size(0); - - // Open path. - std::FILE *file = std::fopen(Path.data(), "rb"); - if (!file) - return error_code(errno, system_category()); - - // Reserve storage. - result.reserve(len); - - // Read magic! - size_t size = std::fread(result.data(), 1, len, file); - if (std::ferror(file) != 0) { - std::fclose(file); - return error_code(errno, system_category()); - } else if (size != len) { - if (std::feof(file) != 0) { - std::fclose(file); - result.set_size(size); - return make_error_code(errc::value_too_large); - } - } - std::fclose(file); - result.set_size(size); - return error_code::success(); -} - -error_code map_file_pages(const Twine &path, off_t file_offset, size_t size, - bool map_writable, void *&result) { - SmallString<128> path_storage; - StringRef name = path.toNullTerminatedStringRef(path_storage); - int oflags = map_writable ? O_RDWR : O_RDONLY; - int ofd = ::open(name.begin(), oflags); - if ( ofd == -1 ) - return error_code(errno, system_category()); - AutoFD fd(ofd); - int flags = map_writable ? MAP_SHARED : MAP_PRIVATE; - int prot = map_writable ? (PROT_READ|PROT_WRITE) : PROT_READ; -#ifdef MAP_FILE - flags |= MAP_FILE; -#endif - result = ::mmap(nullptr, size, prot, flags, fd, file_offset); - if (result == MAP_FAILED) { - return error_code(errno, system_category()); - } - - return error_code::success(); -} - -error_code unmap_file_pages(void *base, size_t size) { - if ( ::munmap(base, size) == -1 ) - return error_code(errno, system_category()); - - return error_code::success(); + return std::error_code(); } -error_code openFileForRead(const Twine &Name, int &ResultFD) { +std::error_code openFileForRead(const Twine &Name, int &ResultFD) { SmallString<128> Storage; StringRef P = Name.toNullTerminatedStringRef(Storage); while ((ResultFD = open(P.begin(), O_RDONLY)) < 0) { if (errno != EINTR) - return error_code(errno, system_category()); + return std::error_code(errno, std::generic_category()); } - return error_code::success(); + return std::error_code(); } -error_code openFileForWrite(const Twine &Name, int &ResultFD, +std::error_code openFileForWrite(const Twine &Name, int &ResultFD, sys::fs::OpenFlags Flags, unsigned Mode) { // Verify that we don't have both "append" and "excl". assert((!(Flags & sys::fs::F_Excl) || !(Flags & sys::fs::F_Append)) && @@ -717,9 +659,9 @@ error_code openFileForWrite(const Twine &Name, int &ResultFD, StringRef P = Name.toNullTerminatedStringRef(Storage); while ((ResultFD = open(P.begin(), OpenFlags, Mode)) < 0) { if (errno != EINTR) - return error_code(errno, system_category()); + return std::error_code(errno, std::generic_category()); } - return error_code::success(); + return std::error_code(); } } // end namespace fs diff --git a/lib/Support/Unix/Process.inc b/lib/Support/Unix/Process.inc index 8faa638..d2c5dbc 100644 --- a/lib/Support/Unix/Process.inc +++ b/lib/Support/Unix/Process.inc @@ -47,7 +47,6 @@ using namespace llvm; using namespace sys; - process::id_type self_process::get_id() { return getpid(); } @@ -190,12 +189,13 @@ Optional<std::string> Process::GetEnv(StringRef Name) { return std::string(Val); } -error_code Process::GetArgumentVector(SmallVectorImpl<const char *> &ArgsOut, - ArrayRef<const char *> ArgsIn, - SpecificBumpPtrAllocator<char> &) { +std::error_code +Process::GetArgumentVector(SmallVectorImpl<const char *> &ArgsOut, + ArrayRef<const char *> ArgsIn, + SpecificBumpPtrAllocator<char> &) { ArgsOut.append(ArgsIn.begin(), ArgsIn.end()); - return error_code::success(); + return std::error_code(); } bool Process::StandardInIsUserInput() { diff --git a/lib/Support/Unix/Program.inc b/lib/Support/Unix/Program.inc index 1225a9c..06a33cd 100644 --- a/lib/Support/Unix/Program.inc +++ b/lib/Support/Unix/Program.inc @@ -48,6 +48,7 @@ #endif namespace llvm { + using namespace sys; ProcessInfo::ProcessInfo() : Pid(0), ReturnCode(0) {} @@ -349,7 +350,11 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait, // Parent process: Wait for the child process to terminate. int status; ProcessInfo WaitResult; - WaitResult.Pid = waitpid(ChildPid, &status, WaitPidOptions); + + do { + WaitResult.Pid = waitpid(ChildPid, &status, WaitPidOptions); + } while (WaitUntilTerminates && WaitResult.Pid == -1 && errno == EINTR); + if (WaitResult.Pid != PI.Pid) { if (WaitResult.Pid == 0) { // Non-blocking wait. @@ -425,14 +430,14 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait, return WaitResult; } -error_code sys::ChangeStdinToBinary(){ + std::error_code sys::ChangeStdinToBinary(){ // Do nothing, as Unix doesn't differentiate between text and binary. - return make_error_code(errc::success); + return std::error_code(); } -error_code sys::ChangeStdoutToBinary(){ + std::error_code sys::ChangeStdoutToBinary(){ // Do nothing, as Unix doesn't differentiate between text and binary. - return make_error_code(errc::success); + return std::error_code(); } bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) { diff --git a/lib/Support/Unix/system_error.inc b/lib/Support/Unix/system_error.inc deleted file mode 100644 index 681e919..0000000 --- a/lib/Support/Unix/system_error.inc +++ /dev/null @@ -1,34 +0,0 @@ -//===- llvm/Support/Unix/system_error.inc - Unix error_code ------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides the Unix specific implementation of the error_code -// and error_condition classes. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -//=== WARNING: Implementation here must contain only generic UNIX code that -//=== is guaranteed to work on *all* UNIX variants. -//===----------------------------------------------------------------------===// - -using namespace llvm; - -std::string -_system_error_category::message(int ev) const { - return _do_message::message(ev); -} - -error_condition -_system_error_category::default_error_condition(int ev) const { -#ifdef ELAST - if (ev > ELAST) - return error_condition(ev, system_category()); -#endif // ELAST - return error_condition(ev, generic_category()); -} diff --git a/lib/Support/Windows/DynamicLibrary.inc b/lib/Support/Windows/DynamicLibrary.inc index 5d0278f..5ed0b70 100644 --- a/lib/Support/Windows/DynamicLibrary.inc +++ b/lib/Support/Windows/DynamicLibrary.inc @@ -85,7 +85,7 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename, } SmallVector<wchar_t, MAX_PATH> filenameUnicode; - if (error_code ec = windows::UTF8ToUTF16(filename, filenameUnicode)) { + if (std::error_code ec = windows::UTF8ToUTF16(filename, filenameUnicode)) { SetLastError(ec.value()); MakeErrMsg(errMsg, std::string(filename) + ": Can't convert to UTF-16: "); return DynamicLibrary(); diff --git a/lib/Support/Windows/Memory.inc b/lib/Support/Windows/Memory.inc index ebe7878..ae8371a 100644 --- a/lib/Support/Windows/Memory.inc +++ b/lib/Support/Windows/Memory.inc @@ -15,6 +15,7 @@ #include "llvm/Support/DataTypes.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Process.h" +#include "llvm/Support/WindowsError.h" // The Windows.h header must be the last one included. #include "WindowsSupport.h" @@ -69,8 +70,8 @@ namespace sys { MemoryBlock Memory::allocateMappedMemory(size_t NumBytes, const MemoryBlock *const NearBlock, unsigned Flags, - error_code &EC) { - EC = error_code::success(); + std::error_code &EC) { + EC = std::error_code(); if (NumBytes == 0) return MemoryBlock(); @@ -99,7 +100,7 @@ MemoryBlock Memory::allocateMappedMemory(size_t NumBytes, // Try again without the NearBlock hint return allocateMappedMemory(NumBytes, NULL, Flags, EC); } - EC = error_code(::GetLastError(), system_category()); + EC = mapWindowsError(::GetLastError()); return MemoryBlock(); } @@ -113,34 +114,34 @@ MemoryBlock Memory::allocateMappedMemory(size_t NumBytes, return Result; } -error_code Memory::releaseMappedMemory(MemoryBlock &M) { + std::error_code Memory::releaseMappedMemory(MemoryBlock &M) { if (M.Address == 0 || M.Size == 0) - return error_code::success(); + return std::error_code(); if (!VirtualFree(M.Address, 0, MEM_RELEASE)) - return error_code(::GetLastError(), system_category()); + return mapWindowsError(::GetLastError()); M.Address = 0; M.Size = 0; - return error_code::success(); + return std::error_code(); } -error_code Memory::protectMappedMemory(const MemoryBlock &M, + std::error_code Memory::protectMappedMemory(const MemoryBlock &M, unsigned Flags) { if (M.Address == 0 || M.Size == 0) - return error_code::success(); + return std::error_code(); DWORD Protect = getWindowsProtectionFlags(Flags); DWORD OldFlags; if (!VirtualProtect(M.Address, M.Size, Protect, &OldFlags)) - return error_code(::GetLastError(), system_category()); + return mapWindowsError(::GetLastError()); if (Flags & MF_EXEC) Memory::InvalidateInstructionCache(M.Address, M.Size); - return error_code::success(); + return std::error_code(); } /// InvalidateInstructionCache - Before the JIT can run a block of code @@ -156,18 +157,18 @@ MemoryBlock Memory::AllocateRWX(size_t NumBytes, const MemoryBlock *NearBlock, std::string *ErrMsg) { MemoryBlock MB; - error_code EC; + std::error_code EC; MB = allocateMappedMemory(NumBytes, NearBlock, MF_READ|MF_WRITE|MF_EXEC, EC); - if (EC != error_code::success() && ErrMsg) { + if (EC != std::error_code() && ErrMsg) { MakeErrMsg(ErrMsg, EC.message()); } return MB; } bool Memory::ReleaseRWX(MemoryBlock &M, std::string *ErrMsg) { - error_code EC = releaseMappedMemory(M); - if (EC == error_code::success()) + std::error_code EC = releaseMappedMemory(M); + if (EC == std::error_code()) return false; MakeErrMsg(ErrMsg, EC.message()); return true; diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc index e59888e..7a1bc04 100644 --- a/lib/Support/Windows/Path.inc +++ b/lib/Support/Windows/Path.inc @@ -17,6 +17,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/WindowsError.h" #include <fcntl.h> #include <io.h> #include <sys/stat.h> @@ -44,7 +45,11 @@ using namespace llvm; using llvm::sys::windows::UTF8ToUTF16; using llvm::sys::windows::UTF16ToUTF8; -static error_code TempDir(SmallVectorImpl<char> &Result) { +static std::error_code windows_error(DWORD E) { + return mapWindowsError(E); +} + +static std::error_code TempDir(SmallVectorImpl<char> &Result) { SmallVector<wchar_t, 64> Res; retry_temp_dir: DWORD Len = ::GetTempPathW(Res.capacity(), Res.begin()); @@ -119,7 +124,7 @@ TimeValue file_status::getLastModificationTime() const { return Ret; } -error_code current_path(SmallVectorImpl<char> &result) { +std::error_code current_path(SmallVectorImpl<char> &result) { SmallVector<wchar_t, MAX_PATH> cur_path; DWORD len = MAX_PATH; @@ -141,30 +146,30 @@ error_code current_path(SmallVectorImpl<char> &result) { return UTF16ToUTF8(cur_path.begin(), cur_path.size(), result); } -error_code create_directory(const Twine &path, bool IgnoreExisting) { +std::error_code create_directory(const Twine &path, bool IgnoreExisting) { SmallString<128> path_storage; SmallVector<wchar_t, 128> path_utf16; - if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage), - path_utf16)) + if (std::error_code ec = + UTF8ToUTF16(path.toStringRef(path_storage), path_utf16)) return ec; if (!::CreateDirectoryW(path_utf16.begin(), NULL)) { - error_code ec = windows_error(::GetLastError()); - if (ec != windows_error::already_exists || !IgnoreExisting) - return ec; + DWORD LastError = ::GetLastError(); + if (LastError != ERROR_ALREADY_EXISTS || !IgnoreExisting) + return windows_error(LastError); } - return error_code::success(); + return std::error_code(); } -error_code normalize_separators(SmallVectorImpl<char> &Path) { +std::error_code normalize_separators(SmallVectorImpl<char> &Path) { (void) Path; - return error_code::success(); + return std::error_code(); } // We can't use symbolic links for windows. -error_code create_link(const Twine &to, const Twine &from) { +std::error_code create_link(const Twine &to, const Twine &from) { // Get arguments. SmallString<128> from_storage; SmallString<128> to_storage; @@ -174,47 +179,49 @@ error_code create_link(const Twine &to, const Twine &from) { // Convert to utf-16. SmallVector<wchar_t, 128> wide_from; SmallVector<wchar_t, 128> wide_to; - if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec; - if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec; + if (std::error_code ec = UTF8ToUTF16(f, wide_from)) + return ec; + if (std::error_code ec = UTF8ToUTF16(t, wide_to)) + return ec; if (!::CreateHardLinkW(wide_from.begin(), wide_to.begin(), NULL)) return windows_error(::GetLastError()); - return error_code::success(); + return std::error_code(); } -error_code remove(const Twine &path, bool IgnoreNonExisting) { +std::error_code remove(const Twine &path, bool IgnoreNonExisting) { SmallString<128> path_storage; SmallVector<wchar_t, 128> path_utf16; file_status ST; - if (error_code EC = status(path, ST)) { + if (std::error_code EC = status(path, ST)) { if (EC != errc::no_such_file_or_directory || !IgnoreNonExisting) return EC; - return error_code::success(); + return std::error_code(); } - if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage), - path_utf16)) + if (std::error_code ec = + UTF8ToUTF16(path.toStringRef(path_storage), path_utf16)) return ec; if (ST.type() == file_type::directory_file) { if (!::RemoveDirectoryW(c_str(path_utf16))) { - error_code EC = windows_error(::GetLastError()); + std::error_code EC = windows_error(::GetLastError()); if (EC != errc::no_such_file_or_directory || !IgnoreNonExisting) return EC; } - return error_code::success(); + return std::error_code(); } if (!::DeleteFileW(c_str(path_utf16))) { - error_code EC = windows_error(::GetLastError()); + std::error_code EC = windows_error(::GetLastError()); if (EC != errc::no_such_file_or_directory || !IgnoreNonExisting) return EC; } - return error_code::success(); + return std::error_code(); } -error_code rename(const Twine &from, const Twine &to) { +std::error_code rename(const Twine &from, const Twine &to) { // Get arguments. SmallString<128> from_storage; SmallString<128> to_storage; @@ -224,16 +231,18 @@ error_code rename(const Twine &from, const Twine &to) { // Convert to utf-16. SmallVector<wchar_t, 128> wide_from; SmallVector<wchar_t, 128> wide_to; - if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec; - if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec; + if (std::error_code ec = UTF8ToUTF16(f, wide_from)) + return ec; + if (std::error_code ec = UTF8ToUTF16(t, wide_to)) + return ec; - error_code ec = error_code::success(); + std::error_code ec = std::error_code(); for (int i = 0; i < 2000; i++) { if (::MoveFileExW(wide_from.begin(), wide_to.begin(), MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING)) - return error_code::success(); - ec = windows_error(::GetLastError()); - if (ec != windows_error::access_denied) + return std::error_code(); + DWORD LastError = ::GetLastError(); + if (LastError != ERROR_ACCESS_DENIED) break; // Retry MoveFile() at ACCESS_DENIED. // System scanners (eg. indexer) might open the source file when @@ -244,46 +253,46 @@ error_code rename(const Twine &from, const Twine &to) { return ec; } -error_code resize_file(const Twine &path, uint64_t size) { +std::error_code resize_file(const Twine &path, uint64_t size) { SmallString<128> path_storage; SmallVector<wchar_t, 128> path_utf16; - if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage), - path_utf16)) + if (std::error_code ec = + UTF8ToUTF16(path.toStringRef(path_storage), path_utf16)) return ec; int fd = ::_wopen(path_utf16.begin(), O_BINARY | _O_RDWR, S_IWRITE); if (fd == -1) - return error_code(errno, generic_category()); + return std::error_code(errno, std::generic_category()); #ifdef HAVE__CHSIZE_S errno_t error = ::_chsize_s(fd, size); #else errno_t error = ::_chsize(fd, size); #endif ::close(fd); - return error_code(error, generic_category()); + return std::error_code(error, std::generic_category()); } -error_code exists(const Twine &path, bool &result) { +std::error_code exists(const Twine &path, bool &result) { SmallString<128> path_storage; SmallVector<wchar_t, 128> path_utf16; - if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage), - path_utf16)) + if (std::error_code ec = + UTF8ToUTF16(path.toStringRef(path_storage), path_utf16)) return ec; DWORD attributes = ::GetFileAttributesW(path_utf16.begin()); if (attributes == INVALID_FILE_ATTRIBUTES) { // See if the file didn't actually exist. - error_code ec = make_error_code(windows_error(::GetLastError())); - if (ec != windows_error::file_not_found && - ec != windows_error::path_not_found) - return ec; + DWORD LastError = ::GetLastError(); + if (LastError != ERROR_FILE_NOT_FOUND && + LastError != ERROR_PATH_NOT_FOUND) + return windows_error(LastError); result = false; } else result = true; - return error_code::success(); + return std::error_code(); } bool can_write(const Twine &Path) { @@ -320,12 +329,14 @@ bool equivalent(file_status A, file_status B) { A.VolumeSerialNumber == B.VolumeSerialNumber; } -error_code equivalent(const Twine &A, const Twine &B, bool &result) { +std::error_code equivalent(const Twine &A, const Twine &B, bool &result) { file_status fsA, fsB; - if (error_code ec = status(A, fsA)) return ec; - if (error_code ec = status(B, fsB)) return ec; + if (std::error_code ec = status(A, fsA)) + return ec; + if (std::error_code ec = status(B, fsB)) + return ec; result = equivalent(fsA, fsB); - return error_code::success(); + return std::error_code(); } static bool isReservedName(StringRef path) { @@ -351,7 +362,7 @@ static bool isReservedName(StringRef path) { return false; } -static error_code getStatus(HANDLE FileHandle, file_status &Result) { +static std::error_code getStatus(HANDLE FileHandle, file_status &Result) { if (FileHandle == INVALID_HANDLE_VALUE) goto handle_status_error; @@ -363,16 +374,16 @@ static error_code getStatus(HANDLE FileHandle, file_status &Result) { if (Err != NO_ERROR) return windows_error(Err); Result = file_status(file_type::type_unknown); - return error_code::success(); + return std::error_code(); } case FILE_TYPE_DISK: break; case FILE_TYPE_CHAR: Result = file_status(file_type::character_file); - return error_code::success(); + return std::error_code(); case FILE_TYPE_PIPE: Result = file_status(file_type::fifo_file); - return error_code::success(); + return std::error_code(); } BY_HANDLE_FILE_INFORMATION Info; @@ -388,32 +399,32 @@ static error_code getStatus(HANDLE FileHandle, file_status &Result) { Info.ftLastWriteTime.dwLowDateTime, Info.dwVolumeSerialNumber, Info.nFileSizeHigh, Info.nFileSizeLow, Info.nFileIndexHigh, Info.nFileIndexLow); - return error_code::success(); + return std::error_code(); } handle_status_error: - error_code EC = windows_error(::GetLastError()); - if (EC == windows_error::file_not_found || - EC == windows_error::path_not_found) + DWORD LastError = ::GetLastError(); + if (LastError == ERROR_FILE_NOT_FOUND || + LastError == ERROR_PATH_NOT_FOUND) Result = file_status(file_type::file_not_found); - else if (EC == windows_error::sharing_violation) + else if (LastError == ERROR_SHARING_VIOLATION) Result = file_status(file_type::type_unknown); else Result = file_status(file_type::status_error); - return EC; + return windows_error(LastError); } -error_code status(const Twine &path, file_status &result) { +std::error_code status(const Twine &path, file_status &result) { SmallString<128> path_storage; SmallVector<wchar_t, 128> path_utf16; StringRef path8 = path.toStringRef(path_storage); if (isReservedName(path8)) { result = file_status(file_type::character_file); - return error_code::success(); + return std::error_code(); } - if (error_code ec = UTF8ToUTF16(path8, path_utf16)) + if (std::error_code ec = UTF8ToUTF16(path8, path_utf16)) return ec; DWORD attr = ::GetFileAttributesW(path_utf16.begin()); @@ -444,12 +455,12 @@ error_code status(const Twine &path, file_status &result) { return getStatus(h, result); } -error_code status(int FD, file_status &Result) { +std::error_code status(int FD, file_status &Result) { HANDLE FileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(FD)); return getStatus(FileHandle, Result); } -error_code setLastModificationAndAccessTime(int FD, TimeValue Time) { +std::error_code setLastModificationAndAccessTime(int FD, TimeValue Time) { ULARGE_INTEGER UI; UI.QuadPart = Time.toWin32Time(); FILETIME FT; @@ -458,52 +469,10 @@ error_code setLastModificationAndAccessTime(int FD, TimeValue Time) { HANDLE FileHandle = reinterpret_cast<HANDLE>(_get_osfhandle(FD)); if (!SetFileTime(FileHandle, NULL, &FT, &FT)) return windows_error(::GetLastError()); - return error_code::success(); -} - -error_code get_magic(const Twine &path, uint32_t len, - SmallVectorImpl<char> &result) { - SmallString<128> path_storage; - SmallVector<wchar_t, 128> path_utf16; - result.set_size(0); - - // Convert path to UTF-16. - if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage), - path_utf16)) - return ec; - - // Open file. - HANDLE file = ::CreateFileW(c_str(path_utf16), - GENERIC_READ, - FILE_SHARE_READ, - NULL, - OPEN_EXISTING, - FILE_ATTRIBUTE_READONLY, - NULL); - if (file == INVALID_HANDLE_VALUE) - return windows_error(::GetLastError()); - - // Allocate buffer. - result.reserve(len); - - // Get magic! - DWORD bytes_read = 0; - BOOL read_success = ::ReadFile(file, result.data(), len, &bytes_read, NULL); - error_code ec = windows_error(::GetLastError()); - ::CloseHandle(file); - if (!read_success || (bytes_read != len)) { - // Set result size to the number of bytes read if it's valid. - if (bytes_read <= len) - result.set_size(bytes_read); - // ERROR_HANDLE_EOF is mapped to errc::value_too_large. - return ec; - } - - result.set_size(len); - return error_code::success(); + return std::error_code(); } -error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) { +std::error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) { FileDescriptor = FD; // Make sure that the requested size fits within SIZE_T. if (Size > std::numeric_limits<SIZE_T>::max()) { @@ -528,7 +497,7 @@ error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) { (Offset + Size) & 0xffffffff, 0); if (FileMappingHandle == NULL) { - error_code ec = windows_error(GetLastError()); + std::error_code ec = windows_error(GetLastError()); if (FileDescriptor) { if (CloseFD) _close(FileDescriptor); @@ -549,7 +518,7 @@ error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) { Offset & 0xffffffff, Size); if (Mapping == NULL) { - error_code ec = windows_error(GetLastError()); + std::error_code ec = windows_error(GetLastError()); ::CloseHandle(FileMappingHandle); if (FileDescriptor) { if (CloseFD) @@ -563,7 +532,7 @@ error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) { MEMORY_BASIC_INFORMATION mbi; SIZE_T Result = VirtualQuery(Mapping, &mbi, sizeof(mbi)); if (Result == 0) { - error_code ec = windows_error(GetLastError()); + std::error_code ec = windows_error(GetLastError()); ::UnmapViewOfFile(Mapping); ::CloseHandle(FileMappingHandle); if (FileDescriptor) { @@ -584,14 +553,14 @@ error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) { _close(FileDescriptor); // Also closes FileHandle. } else ::CloseHandle(FileHandle); - return error_code::success(); + return std::error_code(); } mapped_file_region::mapped_file_region(const Twine &path, mapmode mode, uint64_t length, uint64_t offset, - error_code &ec) + std::error_code &ec) : Mode(mode) , Size(length) , Mapping() @@ -636,7 +605,7 @@ mapped_file_region::mapped_file_region(int fd, mapmode mode, uint64_t length, uint64_t offset, - error_code &ec) + std::error_code &ec) : Mode(mode) , Size(length) , Mapping() @@ -704,12 +673,11 @@ int mapped_file_region::alignment() { return SysInfo.dwAllocationGranularity; } -error_code detail::directory_iterator_construct(detail::DirIterState &it, +std::error_code detail::directory_iterator_construct(detail::DirIterState &it, StringRef path){ SmallVector<wchar_t, 128> path_utf16; - if (error_code ec = UTF8ToUTF16(path, - path_utf16)) + if (std::error_code ec = UTF8ToUTF16(path, path_utf16)) return ec; // Convert path to the format that Windows is happy with. @@ -733,19 +701,19 @@ error_code detail::directory_iterator_construct(detail::DirIterState &it, (FilenameLen == 2 && FirstFind.cFileName[0] == L'.' && FirstFind.cFileName[1] == L'.')) if (!::FindNextFileW(FindHandle, &FirstFind)) { - error_code ec = windows_error(::GetLastError()); + DWORD LastError = ::GetLastError(); // Check for end. - if (ec == windows_error::no_more_files) + if (LastError == ERROR_NO_MORE_FILES) return detail::directory_iterator_destruct(it); - return ec; + return windows_error(LastError); } else FilenameLen = ::wcslen(FirstFind.cFileName); // Construct the current directory entry. SmallString<128> directory_entry_name_utf8; - if (error_code ec = UTF16ToUTF8(FirstFind.cFileName, - ::wcslen(FirstFind.cFileName), - directory_entry_name_utf8)) + if (std::error_code ec = + UTF16ToUTF8(FirstFind.cFileName, ::wcslen(FirstFind.cFileName), + directory_entry_name_utf8)) return ec; it.IterationHandle = intptr_t(FindHandle.take()); @@ -753,26 +721,26 @@ error_code detail::directory_iterator_construct(detail::DirIterState &it, path::append(directory_entry_path, directory_entry_name_utf8.str()); it.CurrentEntry = directory_entry(directory_entry_path.str()); - return error_code::success(); + return std::error_code(); } -error_code detail::directory_iterator_destruct(detail::DirIterState &it) { +std::error_code detail::directory_iterator_destruct(detail::DirIterState &it) { if (it.IterationHandle != 0) // Closes the handle if it's valid. ScopedFindHandle close(HANDLE(it.IterationHandle)); it.IterationHandle = 0; it.CurrentEntry = directory_entry(); - return error_code::success(); + return std::error_code(); } -error_code detail::directory_iterator_increment(detail::DirIterState &it) { +std::error_code detail::directory_iterator_increment(detail::DirIterState &it) { WIN32_FIND_DATAW FindData; if (!::FindNextFileW(HANDLE(it.IterationHandle), &FindData)) { - error_code ec = windows_error(::GetLastError()); + DWORD LastError = ::GetLastError(); // Check for end. - if (ec == windows_error::no_more_files) + if (LastError == ERROR_NO_MORE_FILES) return detail::directory_iterator_destruct(it); - return ec; + return windows_error(LastError); } size_t FilenameLen = ::wcslen(FindData.cFileName); @@ -782,60 +750,50 @@ error_code detail::directory_iterator_increment(detail::DirIterState &it) { return directory_iterator_increment(it); SmallString<128> directory_entry_path_utf8; - if (error_code ec = UTF16ToUTF8(FindData.cFileName, - ::wcslen(FindData.cFileName), - directory_entry_path_utf8)) + if (std::error_code ec = + UTF16ToUTF8(FindData.cFileName, ::wcslen(FindData.cFileName), + directory_entry_path_utf8)) return ec; it.CurrentEntry.replace_filename(Twine(directory_entry_path_utf8)); - return error_code::success(); + return std::error_code(); } -error_code map_file_pages(const Twine &path, off_t file_offset, size_t size, - bool map_writable, void *&result) { - assert(0 && "NOT IMPLEMENTED"); - return windows_error::invalid_function; -} - -error_code unmap_file_pages(void *base, size_t size) { - assert(0 && "NOT IMPLEMENTED"); - return windows_error::invalid_function; -} - -error_code openFileForRead(const Twine &Name, int &ResultFD) { +std::error_code openFileForRead(const Twine &Name, int &ResultFD) { SmallString<128> PathStorage; SmallVector<wchar_t, 128> PathUTF16; - if (error_code EC = UTF8ToUTF16(Name.toStringRef(PathStorage), - PathUTF16)) + if (std::error_code EC = + UTF8ToUTF16(Name.toStringRef(PathStorage), PathUTF16)) return EC; HANDLE H = ::CreateFileW(PathUTF16.begin(), GENERIC_READ, FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); if (H == INVALID_HANDLE_VALUE) { - error_code EC = windows_error(::GetLastError()); + DWORD LastError = ::GetLastError(); + std::error_code EC = windows_error(LastError); // Provide a better error message when trying to open directories. // This only runs if we failed to open the file, so there is probably // no performances issues. - if (EC != windows_error::access_denied) + if (LastError != ERROR_ACCESS_DENIED) return EC; if (is_directory(Name)) - return error_code(errc::is_a_directory, posix_category()); + return make_error_code(errc::is_a_directory); return EC; } int FD = ::_open_osfhandle(intptr_t(H), 0); if (FD == -1) { ::CloseHandle(H); - return windows_error::invalid_handle; + return windows_error(ERROR_INVALID_HANDLE); } ResultFD = FD; - return error_code::success(); + return std::error_code(); } -error_code openFileForWrite(const Twine &Name, int &ResultFD, +std::error_code openFileForWrite(const Twine &Name, int &ResultFD, sys::fs::OpenFlags Flags, unsigned Mode) { // Verify that we don't have both "append" and "excl". assert((!(Flags & sys::fs::F_Excl) || !(Flags & sys::fs::F_Append)) && @@ -844,8 +802,8 @@ error_code openFileForWrite(const Twine &Name, int &ResultFD, SmallString<128> PathStorage; SmallVector<wchar_t, 128> PathUTF16; - if (error_code EC = UTF8ToUTF16(Name.toStringRef(PathStorage), - PathUTF16)) + if (std::error_code EC = + UTF8ToUTF16(Name.toStringRef(PathStorage), PathUTF16)) return EC; DWORD CreationDisposition; @@ -865,14 +823,15 @@ error_code openFileForWrite(const Twine &Name, int &ResultFD, CreationDisposition, FILE_ATTRIBUTE_NORMAL, NULL); if (H == INVALID_HANDLE_VALUE) { - error_code EC = windows_error(::GetLastError()); + DWORD LastError = ::GetLastError(); + std::error_code EC = windows_error(LastError); // Provide a better error message when trying to open directories. // This only runs if we failed to open the file, so there is probably // no performances issues. - if (EC != windows_error::access_denied) + if (LastError != ERROR_ACCESS_DENIED) return EC; if (is_directory(Name)) - return error_code(errc::is_a_directory, posix_category()); + return make_error_code(errc::is_a_directory); return EC; } @@ -886,11 +845,11 @@ error_code openFileForWrite(const Twine &Name, int &ResultFD, int FD = ::_open_osfhandle(intptr_t(H), OpenFlags); if (FD == -1) { ::CloseHandle(H); - return windows_error::invalid_handle; + return windows_error(ERROR_INVALID_HANDLE); } ResultFD = FD; - return error_code::success(); + return std::error_code(); } } // end namespace fs @@ -911,14 +870,14 @@ bool home_directory(SmallVectorImpl<char> &result) { } // end namespace path namespace windows { -llvm::error_code UTF8ToUTF16(llvm::StringRef utf8, - llvm::SmallVectorImpl<wchar_t> &utf16) { +std::error_code UTF8ToUTF16(llvm::StringRef utf8, + llvm::SmallVectorImpl<wchar_t> &utf16) { if (!utf8.empty()) { int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, utf8.begin(), utf8.size(), utf16.begin(), 0); if (len == 0) - return llvm::windows_error(::GetLastError()); + return windows_error(::GetLastError()); utf16.reserve(len + 1); utf16.set_size(len); @@ -927,25 +886,25 @@ llvm::error_code UTF8ToUTF16(llvm::StringRef utf8, utf8.size(), utf16.begin(), utf16.size()); if (len == 0) - return llvm::windows_error(::GetLastError()); + return windows_error(::GetLastError()); } // Make utf16 null terminated. utf16.push_back(0); utf16.pop_back(); - return llvm::error_code::success(); + return std::error_code(); } -llvm::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len, - llvm::SmallVectorImpl<char> &utf8) { +std::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len, + llvm::SmallVectorImpl<char> &utf8) { if (utf16_len) { // Get length. int len = ::WideCharToMultiByte(CP_UTF8, 0, utf16, utf16_len, utf8.begin(), 0, NULL, NULL); if (len == 0) - return llvm::windows_error(::GetLastError()); + return windows_error(::GetLastError()); utf8.reserve(len); utf8.set_size(len); @@ -955,14 +914,14 @@ llvm::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len, utf8.size(), NULL, NULL); if (len == 0) - return llvm::windows_error(::GetLastError()); + return windows_error(::GetLastError()); } // Make utf8 null terminated. utf8.push_back(0); utf8.pop_back(); - return llvm::error_code::success(); + return std::error_code(); } } // end namespace windows } // end namespace sys diff --git a/lib/Support/Windows/Process.inc b/lib/Support/Windows/Process.inc index c3df801..81aee0e 100644 --- a/lib/Support/Windows/Process.inc +++ b/lib/Support/Windows/Process.inc @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Allocator.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/WindowsError.h" #include <malloc.h> // The Windows.h header must be after LLVM and standard headers. @@ -47,7 +49,6 @@ using namespace llvm; using namespace sys; - process::id_type self_process::get_id() { return GetCurrentProcessId(); } @@ -178,12 +179,16 @@ Optional<std::string> Process::GetEnv(StringRef Name) { return std::string(Res.data()); } -error_code +static std::error_code windows_error(DWORD E) { + return mapWindowsError(E); +} + +std::error_code Process::GetArgumentVector(SmallVectorImpl<const char *> &Args, ArrayRef<const char *>, SpecificBumpPtrAllocator<char> &ArgAllocator) { int NewArgCount; - error_code ec; + std::error_code ec; wchar_t **UnicodeCommandLine = CommandLineToArgvW(GetCommandLineW(), &NewArgCount); @@ -208,7 +213,7 @@ Process::GetArgumentVector(SmallVectorImpl<const char *> &Args, if (ec) return ec; - return error_code::success(); + return std::error_code(); } bool Process::StandardInIsUserInput() { @@ -363,12 +368,12 @@ unsigned Process::GetRandomNumber() { HCRYPTPROV HCPC; if (!::CryptAcquireContextW(&HCPC, NULL, NULL, PROV_RSA_FULL, CRYPT_VERIFYCONTEXT)) - assert(false && "Could not acquire a cryptographic context"); + report_fatal_error("Could not acquire a cryptographic context"); ScopedCryptContext CryptoProvider(HCPC); unsigned Ret; if (!::CryptGenRandom(CryptoProvider, sizeof(Ret), reinterpret_cast<BYTE *>(&Ret))) - assert(false && "Could not generate a random number"); + report_fatal_error("Could not generate a random number"); return Ret; } diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc index 5827c10..b2f71ae 100644 --- a/lib/Support/Windows/Program.inc +++ b/lib/Support/Windows/Program.inc @@ -226,7 +226,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args, // an environment block by concatenating them. for (unsigned i = 0; envp[i]; ++i) { SmallVector<wchar_t, MAX_PATH> EnvString; - if (error_code ec = windows::UTF8ToUTF16(envp[i], EnvString)) { + if (std::error_code ec = windows::UTF8ToUTF16(envp[i], EnvString)) { SetLastError(ec.value()); MakeErrMsg(ErrMsg, "Unable to convert environment variable to UTF-16"); return false; @@ -290,7 +290,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args, fflush(stderr); SmallVector<wchar_t, MAX_PATH> ProgramUtf16; - if (error_code ec = windows::UTF8ToUTF16(Program, ProgramUtf16)) { + if (std::error_code ec = windows::UTF8ToUTF16(Program, ProgramUtf16)) { SetLastError(ec.value()); MakeErrMsg(ErrMsg, std::string("Unable to convert application name to UTF-16")); @@ -298,7 +298,7 @@ static bool Execute(ProcessInfo &PI, StringRef Program, const char **args, } SmallVector<wchar_t, MAX_PATH> CommandUtf16; - if (error_code ec = windows::UTF8ToUTF16(command.get(), CommandUtf16)) { + if (std::error_code ec = windows::UTF8ToUTF16(command.get(), CommandUtf16)) { SetLastError(ec.value()); MakeErrMsg(ErrMsg, std::string("Unable to convert command-line to UTF-16")); @@ -422,18 +422,18 @@ ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait, return WaitResult; } -error_code sys::ChangeStdinToBinary(){ + std::error_code sys::ChangeStdinToBinary(){ int result = _setmode( _fileno(stdin), _O_BINARY ); if (result == -1) - return error_code(errno, generic_category()); - return make_error_code(errc::success); + return std::error_code(errno, std::generic_category()); + return std::error_code(); } -error_code sys::ChangeStdoutToBinary(){ + std::error_code sys::ChangeStdoutToBinary(){ int result = _setmode( _fileno(stdout), _O_BINARY ); if (result == -1) - return error_code(errno, generic_category()); - return make_error_code(errc::success); + return std::error_code(errno, std::generic_category()); + return std::error_code(); } bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef<const char*> Args) { diff --git a/lib/Support/Windows/WindowsSupport.h b/lib/Support/Windows/WindowsSupport.h index 6bef444..f68835b 100644 --- a/lib/Support/Windows/WindowsSupport.h +++ b/lib/Support/Windows/WindowsSupport.h @@ -32,7 +32,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/Config/config.h" // Get build system configuration settings #include "llvm/Support/Compiler.h" -#include "llvm/Support/system_error.h" +#include <system_error> #include <windows.h> #include <wincrypt.h> #include <cassert> @@ -163,10 +163,9 @@ c_str(SmallVectorImpl<T> &str) { namespace sys { namespace windows { -error_code UTF8ToUTF16(StringRef utf8, - SmallVectorImpl<wchar_t> &utf16); -error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len, - SmallVectorImpl<char> &utf8); +std::error_code UTF8ToUTF16(StringRef utf8, SmallVectorImpl<wchar_t> &utf16); +std::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len, + SmallVectorImpl<char> &utf8); } // end namespace windows } // end namespace sys } // end namespace llvm. diff --git a/lib/Support/Windows/system_error.inc b/lib/Support/Windows/system_error.inc deleted file mode 100644 index 37ec81d..0000000 --- a/lib/Support/Windows/system_error.inc +++ /dev/null @@ -1,142 +0,0 @@ -//===- llvm/Support/Win32/system_error.inc - Windows error_code --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides the Windows specific implementation of the error_code -// and error_condition classes. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -//=== WARNING: Implementation here must contain only generic Windows code that -//=== is guaranteed to work on *all* Windows variants. -//===----------------------------------------------------------------------===// - -#include <windows.h> -#include <winerror.h> - -using namespace llvm; - -std::string -_system_error_category::message(int ev) const { - LPVOID lpMsgBuf = 0; - DWORD retval = ::FormatMessageA( - FORMAT_MESSAGE_ALLOCATE_BUFFER | - FORMAT_MESSAGE_FROM_SYSTEM | - FORMAT_MESSAGE_IGNORE_INSERTS, - NULL, - ev, - MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language - (LPSTR) &lpMsgBuf, - 0, - NULL); - if (retval == 0) { - ::LocalFree(lpMsgBuf); - return std::string("Unknown error"); - } - - std::string str( static_cast<LPCSTR>(lpMsgBuf) ); - ::LocalFree(lpMsgBuf); - - while (str.size() - && (str[str.size()-1] == '\n' || str[str.size()-1] == '\r')) - str.erase( str.size()-1 ); - if (str.size() && str[str.size()-1] == '.') - str.erase( str.size()-1 ); - return str; -} - -// I'd rather not double the line count of the following. -#define MAP_ERR_TO_COND(x, y) case x: return make_error_condition(errc::y) - -error_condition -_system_error_category::default_error_condition(int ev) const { - switch (ev) { - MAP_ERR_TO_COND(0, success); - // Windows system -> posix_errno decode table ---------------------------// - // see WinError.h comments for descriptions of errors - MAP_ERR_TO_COND(ERROR_ACCESS_DENIED, permission_denied); - MAP_ERR_TO_COND(ERROR_ALREADY_EXISTS, file_exists); - MAP_ERR_TO_COND(ERROR_BAD_UNIT, no_such_device); - MAP_ERR_TO_COND(ERROR_BUFFER_OVERFLOW, filename_too_long); - MAP_ERR_TO_COND(ERROR_BUSY, device_or_resource_busy); - MAP_ERR_TO_COND(ERROR_BUSY_DRIVE, device_or_resource_busy); - MAP_ERR_TO_COND(ERROR_CANNOT_MAKE, permission_denied); - MAP_ERR_TO_COND(ERROR_CANTOPEN, io_error); - MAP_ERR_TO_COND(ERROR_CANTREAD, io_error); - MAP_ERR_TO_COND(ERROR_CANTWRITE, io_error); - MAP_ERR_TO_COND(ERROR_CURRENT_DIRECTORY, permission_denied); - MAP_ERR_TO_COND(ERROR_DEV_NOT_EXIST, no_such_device); - MAP_ERR_TO_COND(ERROR_DEVICE_IN_USE, device_or_resource_busy); - MAP_ERR_TO_COND(ERROR_DIR_NOT_EMPTY, directory_not_empty); - MAP_ERR_TO_COND(ERROR_DIRECTORY, invalid_argument); - MAP_ERR_TO_COND(ERROR_DISK_FULL, no_space_on_device); - MAP_ERR_TO_COND(ERROR_FILE_EXISTS, file_exists); - MAP_ERR_TO_COND(ERROR_FILE_NOT_FOUND, no_such_file_or_directory); - MAP_ERR_TO_COND(ERROR_HANDLE_DISK_FULL, no_space_on_device); - MAP_ERR_TO_COND(ERROR_HANDLE_EOF, value_too_large); - MAP_ERR_TO_COND(ERROR_INVALID_ACCESS, permission_denied); - MAP_ERR_TO_COND(ERROR_INVALID_DRIVE, no_such_device); - MAP_ERR_TO_COND(ERROR_INVALID_FUNCTION, function_not_supported); - MAP_ERR_TO_COND(ERROR_INVALID_HANDLE, invalid_argument); - MAP_ERR_TO_COND(ERROR_INVALID_NAME, invalid_argument); - MAP_ERR_TO_COND(ERROR_LOCK_VIOLATION, no_lock_available); - MAP_ERR_TO_COND(ERROR_LOCKED, no_lock_available); - MAP_ERR_TO_COND(ERROR_NEGATIVE_SEEK, invalid_argument); - MAP_ERR_TO_COND(ERROR_NOACCESS, permission_denied); - MAP_ERR_TO_COND(ERROR_NOT_ENOUGH_MEMORY, not_enough_memory); - MAP_ERR_TO_COND(ERROR_NOT_READY, resource_unavailable_try_again); - MAP_ERR_TO_COND(ERROR_NOT_SAME_DEVICE, cross_device_link); - MAP_ERR_TO_COND(ERROR_OPEN_FAILED, io_error); - MAP_ERR_TO_COND(ERROR_OPEN_FILES, device_or_resource_busy); - MAP_ERR_TO_COND(ERROR_OPERATION_ABORTED, operation_canceled); - MAP_ERR_TO_COND(ERROR_OUTOFMEMORY, not_enough_memory); - MAP_ERR_TO_COND(ERROR_PATH_NOT_FOUND, no_such_file_or_directory); - MAP_ERR_TO_COND(ERROR_BAD_NETPATH, no_such_file_or_directory); - MAP_ERR_TO_COND(ERROR_READ_FAULT, io_error); - MAP_ERR_TO_COND(ERROR_RETRY, resource_unavailable_try_again); - MAP_ERR_TO_COND(ERROR_SEEK, io_error); - MAP_ERR_TO_COND(ERROR_SHARING_VIOLATION, permission_denied); - MAP_ERR_TO_COND(ERROR_TOO_MANY_OPEN_FILES, too_many_files_open); - MAP_ERR_TO_COND(ERROR_WRITE_FAULT, io_error); - MAP_ERR_TO_COND(ERROR_WRITE_PROTECT, permission_denied); - MAP_ERR_TO_COND(ERROR_SEM_TIMEOUT, timed_out); - MAP_ERR_TO_COND(WSAEACCES, permission_denied); - MAP_ERR_TO_COND(WSAEADDRINUSE, address_in_use); - MAP_ERR_TO_COND(WSAEADDRNOTAVAIL, address_not_available); - MAP_ERR_TO_COND(WSAEAFNOSUPPORT, address_family_not_supported); - MAP_ERR_TO_COND(WSAEALREADY, connection_already_in_progress); - MAP_ERR_TO_COND(WSAEBADF, bad_file_descriptor); - MAP_ERR_TO_COND(WSAECONNABORTED, connection_aborted); - MAP_ERR_TO_COND(WSAECONNREFUSED, connection_refused); - MAP_ERR_TO_COND(WSAECONNRESET, connection_reset); - MAP_ERR_TO_COND(WSAEDESTADDRREQ, destination_address_required); - MAP_ERR_TO_COND(WSAEFAULT, bad_address); - MAP_ERR_TO_COND(WSAEHOSTUNREACH, host_unreachable); - MAP_ERR_TO_COND(WSAEINPROGRESS, operation_in_progress); - MAP_ERR_TO_COND(WSAEINTR, interrupted); - MAP_ERR_TO_COND(WSAEINVAL, invalid_argument); - MAP_ERR_TO_COND(WSAEISCONN, already_connected); - MAP_ERR_TO_COND(WSAEMFILE, too_many_files_open); - MAP_ERR_TO_COND(WSAEMSGSIZE, message_size); - MAP_ERR_TO_COND(WSAENAMETOOLONG, filename_too_long); - MAP_ERR_TO_COND(WSAENETDOWN, network_down); - MAP_ERR_TO_COND(WSAENETRESET, network_reset); - MAP_ERR_TO_COND(WSAENETUNREACH, network_unreachable); - MAP_ERR_TO_COND(WSAENOBUFS, no_buffer_space); - MAP_ERR_TO_COND(WSAENOPROTOOPT, no_protocol_option); - MAP_ERR_TO_COND(WSAENOTCONN, not_connected); - MAP_ERR_TO_COND(WSAENOTSOCK, not_a_socket); - MAP_ERR_TO_COND(WSAEOPNOTSUPP, operation_not_supported); - MAP_ERR_TO_COND(WSAEPROTONOSUPPORT, protocol_not_supported); - MAP_ERR_TO_COND(WSAEPROTOTYPE, wrong_protocol_type); - MAP_ERR_TO_COND(WSAETIMEDOUT, timed_out); - MAP_ERR_TO_COND(WSAEWOULDBLOCK, operation_would_block); - default: return error_condition(ev, system_category()); - } -} diff --git a/lib/Support/YAMLTraits.cpp b/lib/Support/YAMLTraits.cpp index e5f9494..5212624 100644 --- a/lib/Support/YAMLTraits.cpp +++ b/lib/Support/YAMLTraits.cpp @@ -7,6 +7,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/Errc.h" #include "llvm/Support/YAMLTraits.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/Casting.h" @@ -56,9 +57,7 @@ Input::Input(StringRef InputContent, Input::~Input() { } -error_code Input::error() { - return EC; -} +std::error_code Input::error() { return EC; } // Pin the vtables to this file. void Input::HNode::anchor() {} @@ -90,8 +89,8 @@ bool Input::setCurrentDocument() { return false; } -void Input::nextDocument() { - ++DocIterator; +bool Input::nextDocument() { + return ++DocIterator != Strm->end(); } bool Input::mapTag(StringRef Tag, bool Default) { diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index f55838e..f7c213a 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -22,10 +22,10 @@ #include "llvm/Support/Format.h" #include "llvm/Support/Process.h" #include "llvm/Support/Program.h" -#include "llvm/Support/system_error.h" #include <cctype> #include <cerrno> #include <sys/stat.h> +#include <system_error> // <fcntl.h> may provide O_BINARY. #if defined(HAVE_FCNTL_H) @@ -450,7 +450,7 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo, return; } - error_code EC = sys::fs::openFileForWrite(Filename, FD, Flags); + std::error_code EC = sys::fs::openFileForWrite(Filename, FD, Flags); if (EC) { ErrorInfo = "Error opening output file '" + std::string(Filename) + "': " + diff --git a/lib/Support/regcclass.h b/lib/Support/regcclass.h index 2cea3e4..7fd6604 100644 --- a/lib/Support/regcclass.h +++ b/lib/Support/regcclass.h @@ -37,6 +37,9 @@ * @(#)cclass.h 8.3 (Berkeley) 3/20/94 */ +#ifndef LLVM_SUPPORT_REGCCLASS_H +#define LLVM_SUPPORT_REGCCLASS_H + /* character-class table */ static struct cclass { const char *name; @@ -68,3 +71,5 @@ static struct cclass { ""} , { NULL, 0, "" } }; + +#endif diff --git a/lib/Support/regcname.h b/lib/Support/regcname.h index 3c0bb24..891d255 100644 --- a/lib/Support/regcname.h +++ b/lib/Support/regcname.h @@ -35,6 +35,9 @@ * @(#)cname.h 8.3 (Berkeley) 3/20/94 */ +#ifndef LLVM_SUPPORT_REGCNAME_H +#define LLVM_SUPPORT_REGCNAME_H + /* character-name table */ static struct cname { const char *name; @@ -137,3 +140,5 @@ static struct cname { { "DEL", '\177' }, { NULL, 0 } }; + +#endif diff --git a/lib/Support/regex2.h b/lib/Support/regex2.h index 21659c3..d81bfbc 100644 --- a/lib/Support/regex2.h +++ b/lib/Support/regex2.h @@ -35,6 +35,9 @@ * @(#)regex2.h 8.4 (Berkeley) 3/20/94 */ +#ifndef LLVM_SUPPORT_REGEX2_H +#define LLVM_SUPPORT_REGEX2_H + /* * internals of regex_t */ @@ -155,3 +158,5 @@ struct re_guts { /* misc utilities */ #define OUT (CHAR_MAX+1) /* a non-character value */ #define ISWORD(c) (isalnum(c&0xff) || (c) == '_') + +#endif diff --git a/lib/Support/regutils.h b/lib/Support/regutils.h index d0ee100..49a975c 100644 --- a/lib/Support/regutils.h +++ b/lib/Support/regutils.h @@ -35,6 +35,9 @@ * @(#)utils.h 8.3 (Berkeley) 3/20/94 */ +#ifndef LLVM_SUPPORT_REGUTILS_H +#define LLVM_SUPPORT_REGUTILS_H + /* utility definitions */ #define NC (CHAR_MAX - CHAR_MIN + 1) typedef unsigned char uch; @@ -51,3 +54,5 @@ typedef unsigned char uch; #ifdef USEBCOPY #define memmove(d, s, c) bcopy(s, d, c) #endif + +#endif diff --git a/lib/Support/system_error.cpp b/lib/Support/system_error.cpp deleted file mode 100644 index 299f54a..0000000 --- a/lib/Support/system_error.cpp +++ /dev/null @@ -1,130 +0,0 @@ -//===---------------------- system_error.cpp ------------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This was lifted from libc++ and modified for C++03. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Support/system_error.h" -#include "llvm/Support/Errno.h" -#include <cstring> -#include <string> - -namespace llvm { - -// class error_category - -error_category::error_category() { -} - -error_category::~error_category() { -} - -error_condition -error_category::default_error_condition(int ev) const { - return error_condition(ev, *this); -} - -bool -error_category::equivalent(int code, const error_condition& condition) const { - return default_error_condition(code) == condition; -} - -bool -error_category::equivalent(const error_code& code, int condition) const { - return *this == code.category() && code.value() == condition; -} - -std::string -_do_message::message(int ev) const { - return std::string(sys::StrError(ev)); -} - -class _generic_error_category : public _do_message { -public: - const char* name() const override; - std::string message(int ev) const override; -}; - -const char* -_generic_error_category::name() const { - return "generic"; -} - -std::string -_generic_error_category::message(int ev) const { -#ifdef ELAST - if (ev > ELAST) - return std::string("unspecified generic_category error"); -#endif // ELAST - return _do_message::message(ev); -} - -const error_category& -generic_category() { - static _generic_error_category s; - return s; -} - -class _system_error_category : public _do_message { -public: - const char* name() const override; - std::string message(int ev) const override; - error_condition default_error_condition(int ev) const override; -}; - -const char* -_system_error_category::name() const { - return "system"; -} - -// std::string _system_error_category::message(int ev) const { -// Is in Platform/system_error.inc - -// error_condition _system_error_category::default_error_condition(int ev) const -// Is in Platform/system_error.inc - -const error_category& -system_category() { - static _system_error_category s; - return s; -} - -const error_category& -posix_category() { -#ifdef LLVM_ON_WIN32 - return generic_category(); -#else - return system_category(); -#endif -} - -// error_condition - -std::string -error_condition::message() const { - return _cat_->message(_val_); -} - -// error_code - -std::string -error_code::message() const { - return _cat_->message(_val_); -} - -} // end namespace llvm - -// Include the truly platform-specific parts of this class. -#if defined(LLVM_ON_UNIX) -#include "Unix/system_error.inc" -#endif -#if defined(LLVM_ON_WIN32) -#include "Windows/system_error.inc" -#endif diff --git a/lib/TableGen/Android.mk b/lib/TableGen/Android.mk index 1f01ef7..0fd94bb 100644 --- a/lib/TableGen/Android.mk +++ b/lib/TableGen/Android.mk @@ -4,6 +4,7 @@ libtablegen_SRC_FILES := \ Error.cpp \ Main.cpp \ Record.cpp \ + SetTheory.cpp \ StringMatcher.cpp \ TableGenBackend.cpp \ TGLexer.cpp \ diff --git a/lib/TableGen/CMakeLists.txt b/lib/TableGen/CMakeLists.txt index 935d674..fb70218 100644 --- a/lib/TableGen/CMakeLists.txt +++ b/lib/TableGen/CMakeLists.txt @@ -2,6 +2,7 @@ add_llvm_library(LLVMTableGen Error.cpp Main.cpp Record.cpp + SetTheory.cpp StringMatcher.cpp TableGenBackend.cpp TGLexer.cpp diff --git a/lib/TableGen/Main.cpp b/lib/TableGen/Main.cpp index 476026d..e317fbf 100644 --- a/lib/TableGen/Main.cpp +++ b/lib/TableGen/Main.cpp @@ -20,12 +20,12 @@ #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/ToolOutputFile.h" -#include "llvm/Support/system_error.h" #include "llvm/TableGen/Error.h" #include "llvm/TableGen/Main.h" #include "llvm/TableGen/Record.h" #include <algorithm> #include <cstdio> +#include <system_error> using namespace llvm; namespace { @@ -81,14 +81,14 @@ int TableGenMain(char *argv0, TableGenMainFn *MainFn) { RecordKeeper Records; // Parse the input file. - std::unique_ptr<MemoryBuffer> File; - if (error_code ec = - MemoryBuffer::getFileOrSTDIN(InputFilename, File)) { - errs() << "Could not open input file '" << InputFilename << "': " - << ec.message() <<"\n"; + ErrorOr<std::unique_ptr<MemoryBuffer>> FileOrErr = + MemoryBuffer::getFileOrSTDIN(InputFilename); + if (std::error_code EC = FileOrErr.getError()) { + errs() << "Could not open input file '" << InputFilename + << "': " << EC.message() << "\n"; return 1; } - MemoryBuffer *F = File.release(); + MemoryBuffer *F = FileOrErr.get().release(); // Tell SrcMgr about this buffer, which is what TGParser will pick up. SrcMgr.AddNewSourceBuffer(F, SMLoc()); diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp index c553a21..f7843dc 100644 --- a/lib/TableGen/Record.cpp +++ b/lib/TableGen/Record.cpp @@ -811,20 +811,14 @@ Init *UnOpInit::Fold(Record *CurRec, MultiClass *CurMultiClass) const { } case HEAD: { if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) { - if (LHSl->getSize() == 0) { - assert(0 && "Empty list in car"); - return nullptr; - } + assert(LHSl->getSize() != 0 && "Empty list in car"); return LHSl->getElement(0); } break; } case TAIL: { if (ListInit *LHSl = dyn_cast<ListInit>(LHS)) { - if (LHSl->getSize() == 0) { - assert(0 && "Empty list in cdr"); - return nullptr; - } + assert(LHSl->getSize() != 0 && "Empty list in cdr"); // Note the +1. We can't just pass the result of getValues() // directly. ArrayRef<Init *>::iterator begin = LHSl->getValues().begin()+1; diff --git a/lib/TableGen/SetTheory.cpp b/lib/TableGen/SetTheory.cpp new file mode 100644 index 0000000..c99c2ba --- /dev/null +++ b/lib/TableGen/SetTheory.cpp @@ -0,0 +1,323 @@ +//===- SetTheory.cpp - Generate ordered sets from DAG expressions ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the SetTheory class that computes ordered sets of +// Records from DAG expressions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Format.h" +#include "llvm/TableGen/Error.h" +#include "llvm/TableGen/Record.h" +#include "llvm/TableGen/SetTheory.h" + +using namespace llvm; + +// Define the standard operators. +namespace { + +typedef SetTheory::RecSet RecSet; +typedef SetTheory::RecVec RecVec; + +// (add a, b, ...) Evaluate and union all arguments. +struct AddOp : public SetTheory::Operator { + void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, + ArrayRef<SMLoc> Loc) override { + ST.evaluate(Expr->arg_begin(), Expr->arg_end(), Elts, Loc); + } +}; + +// (sub Add, Sub, ...) Set difference. +struct SubOp : public SetTheory::Operator { + void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, + ArrayRef<SMLoc> Loc) override { + if (Expr->arg_size() < 2) + PrintFatalError(Loc, "Set difference needs at least two arguments: " + + Expr->getAsString()); + RecSet Add, Sub; + ST.evaluate(*Expr->arg_begin(), Add, Loc); + ST.evaluate(Expr->arg_begin() + 1, Expr->arg_end(), Sub, Loc); + for (RecSet::iterator I = Add.begin(), E = Add.end(); I != E; ++I) + if (!Sub.count(*I)) + Elts.insert(*I); + } +}; + +// (and S1, S2) Set intersection. +struct AndOp : public SetTheory::Operator { + void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, + ArrayRef<SMLoc> Loc) override { + if (Expr->arg_size() != 2) + PrintFatalError(Loc, "Set intersection requires two arguments: " + + Expr->getAsString()); + RecSet S1, S2; + ST.evaluate(Expr->arg_begin()[0], S1, Loc); + ST.evaluate(Expr->arg_begin()[1], S2, Loc); + for (RecSet::iterator I = S1.begin(), E = S1.end(); I != E; ++I) + if (S2.count(*I)) + Elts.insert(*I); + } +}; + +// SetIntBinOp - Abstract base class for (Op S, N) operators. +struct SetIntBinOp : public SetTheory::Operator { + virtual void apply2(SetTheory &ST, DagInit *Expr, RecSet &Set, int64_t N, + RecSet &Elts, ArrayRef<SMLoc> Loc) = 0; + + void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, + ArrayRef<SMLoc> Loc) override { + if (Expr->arg_size() != 2) + PrintFatalError(Loc, "Operator requires (Op Set, Int) arguments: " + + Expr->getAsString()); + RecSet Set; + ST.evaluate(Expr->arg_begin()[0], Set, Loc); + IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[1]); + if (!II) + PrintFatalError(Loc, "Second argument must be an integer: " + + Expr->getAsString()); + apply2(ST, Expr, Set, II->getValue(), Elts, Loc); + } +}; + +// (shl S, N) Shift left, remove the first N elements. +struct ShlOp : public SetIntBinOp { + void apply2(SetTheory &ST, DagInit *Expr, RecSet &Set, int64_t N, + RecSet &Elts, ArrayRef<SMLoc> Loc) override { + if (N < 0) + PrintFatalError(Loc, "Positive shift required: " + + Expr->getAsString()); + if (unsigned(N) < Set.size()) + Elts.insert(Set.begin() + N, Set.end()); + } +}; + +// (trunc S, N) Truncate after the first N elements. +struct TruncOp : public SetIntBinOp { + void apply2(SetTheory &ST, DagInit *Expr, RecSet &Set, int64_t N, + RecSet &Elts, ArrayRef<SMLoc> Loc) override { + if (N < 0) + PrintFatalError(Loc, "Positive length required: " + + Expr->getAsString()); + if (unsigned(N) > Set.size()) + N = Set.size(); + Elts.insert(Set.begin(), Set.begin() + N); + } +}; + +// Left/right rotation. +struct RotOp : public SetIntBinOp { + const bool Reverse; + + RotOp(bool Rev) : Reverse(Rev) {} + + void apply2(SetTheory &ST, DagInit *Expr, RecSet &Set, int64_t N, + RecSet &Elts, ArrayRef<SMLoc> Loc) override { + if (Reverse) + N = -N; + // N > 0 -> rotate left, N < 0 -> rotate right. + if (Set.empty()) + return; + if (N < 0) + N = Set.size() - (-N % Set.size()); + else + N %= Set.size(); + Elts.insert(Set.begin() + N, Set.end()); + Elts.insert(Set.begin(), Set.begin() + N); + } +}; + +// (decimate S, N) Pick every N'th element of S. +struct DecimateOp : public SetIntBinOp { + void apply2(SetTheory &ST, DagInit *Expr, RecSet &Set, int64_t N, + RecSet &Elts, ArrayRef<SMLoc> Loc) override { + if (N <= 0) + PrintFatalError(Loc, "Positive stride required: " + + Expr->getAsString()); + for (unsigned I = 0; I < Set.size(); I += N) + Elts.insert(Set[I]); + } +}; + +// (interleave S1, S2, ...) Interleave elements of the arguments. +struct InterleaveOp : public SetTheory::Operator { + void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, + ArrayRef<SMLoc> Loc) override { + // Evaluate the arguments individually. + SmallVector<RecSet, 4> Args(Expr->getNumArgs()); + unsigned MaxSize = 0; + for (unsigned i = 0, e = Expr->getNumArgs(); i != e; ++i) { + ST.evaluate(Expr->getArg(i), Args[i], Loc); + MaxSize = std::max(MaxSize, unsigned(Args[i].size())); + } + // Interleave arguments into Elts. + for (unsigned n = 0; n != MaxSize; ++n) + for (unsigned i = 0, e = Expr->getNumArgs(); i != e; ++i) + if (n < Args[i].size()) + Elts.insert(Args[i][n]); + } +}; + +// (sequence "Format", From, To) Generate a sequence of records by name. +struct SequenceOp : public SetTheory::Operator { + void apply(SetTheory &ST, DagInit *Expr, RecSet &Elts, + ArrayRef<SMLoc> Loc) override { + int Step = 1; + if (Expr->arg_size() > 4) + PrintFatalError(Loc, "Bad args to (sequence \"Format\", From, To): " + + Expr->getAsString()); + else if (Expr->arg_size() == 4) { + if (IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[3])) { + Step = II->getValue(); + } else + PrintFatalError(Loc, "Stride must be an integer: " + + Expr->getAsString()); + } + + std::string Format; + if (StringInit *SI = dyn_cast<StringInit>(Expr->arg_begin()[0])) + Format = SI->getValue(); + else + PrintFatalError(Loc, "Format must be a string: " + Expr->getAsString()); + + int64_t From, To; + if (IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[1])) + From = II->getValue(); + else + PrintFatalError(Loc, "From must be an integer: " + Expr->getAsString()); + if (From < 0 || From >= (1 << 30)) + PrintFatalError(Loc, "From out of range"); + + if (IntInit *II = dyn_cast<IntInit>(Expr->arg_begin()[2])) + To = II->getValue(); + else + PrintFatalError(Loc, "From must be an integer: " + Expr->getAsString()); + if (To < 0 || To >= (1 << 30)) + PrintFatalError(Loc, "To out of range"); + + RecordKeeper &Records = + cast<DefInit>(Expr->getOperator())->getDef()->getRecords(); + + Step *= From <= To ? 1 : -1; + while (true) { + if (Step > 0 && From > To) + break; + else if (Step < 0 && From < To) + break; + std::string Name; + raw_string_ostream OS(Name); + OS << format(Format.c_str(), unsigned(From)); + Record *Rec = Records.getDef(OS.str()); + if (!Rec) + PrintFatalError(Loc, "No def named '" + Name + "': " + + Expr->getAsString()); + // Try to reevaluate Rec in case it is a set. + if (const RecVec *Result = ST.expand(Rec)) + Elts.insert(Result->begin(), Result->end()); + else + Elts.insert(Rec); + + From += Step; + } + } +}; + +// Expand a Def into a set by evaluating one of its fields. +struct FieldExpander : public SetTheory::Expander { + StringRef FieldName; + + FieldExpander(StringRef fn) : FieldName(fn) {} + + void expand(SetTheory &ST, Record *Def, RecSet &Elts) override { + ST.evaluate(Def->getValueInit(FieldName), Elts, Def->getLoc()); + } +}; +} // end anonymous namespace + +// Pin the vtables to this file. +void SetTheory::Operator::anchor() {} +void SetTheory::Expander::anchor() {} + + +SetTheory::SetTheory() { + addOperator("add", new AddOp); + addOperator("sub", new SubOp); + addOperator("and", new AndOp); + addOperator("shl", new ShlOp); + addOperator("trunc", new TruncOp); + addOperator("rotl", new RotOp(false)); + addOperator("rotr", new RotOp(true)); + addOperator("decimate", new DecimateOp); + addOperator("interleave", new InterleaveOp); + addOperator("sequence", new SequenceOp); +} + +void SetTheory::addOperator(StringRef Name, Operator *Op) { + Operators[Name] = Op; +} + +void SetTheory::addExpander(StringRef ClassName, Expander *E) { + Expanders[ClassName] = E; +} + +void SetTheory::addFieldExpander(StringRef ClassName, StringRef FieldName) { + addExpander(ClassName, new FieldExpander(FieldName)); +} + +void SetTheory::evaluate(Init *Expr, RecSet &Elts, ArrayRef<SMLoc> Loc) { + // A def in a list can be a just an element, or it may expand. + if (DefInit *Def = dyn_cast<DefInit>(Expr)) { + if (const RecVec *Result = expand(Def->getDef())) + return Elts.insert(Result->begin(), Result->end()); + Elts.insert(Def->getDef()); + return; + } + + // Lists simply expand. + if (ListInit *LI = dyn_cast<ListInit>(Expr)) + return evaluate(LI->begin(), LI->end(), Elts, Loc); + + // Anything else must be a DAG. + DagInit *DagExpr = dyn_cast<DagInit>(Expr); + if (!DagExpr) + PrintFatalError(Loc, "Invalid set element: " + Expr->getAsString()); + DefInit *OpInit = dyn_cast<DefInit>(DagExpr->getOperator()); + if (!OpInit) + PrintFatalError(Loc, "Bad set expression: " + Expr->getAsString()); + Operator *Op = Operators.lookup(OpInit->getDef()->getName()); + if (!Op) + PrintFatalError(Loc, "Unknown set operator: " + Expr->getAsString()); + Op->apply(*this, DagExpr, Elts, Loc); +} + +const RecVec *SetTheory::expand(Record *Set) { + // Check existing entries for Set and return early. + ExpandMap::iterator I = Expansions.find(Set); + if (I != Expansions.end()) + return &I->second; + + // This is the first time we see Set. Find a suitable expander. + const std::vector<Record*> &SC = Set->getSuperClasses(); + for (unsigned i = 0, e = SC.size(); i != e; ++i) { + // Skip unnamed superclasses. + if (!dyn_cast<StringInit>(SC[i]->getNameInit())) + continue; + if (Expander *Exp = Expanders.lookup(SC[i]->getName())) { + // This breaks recursive definitions. + RecVec &EltVec = Expansions[Set]; + RecSet Elts; + Exp->expand(*this, Set, Elts); + EltVec.assign(Elts.begin(), Elts.end()); + return &EltVec; + } + } + + // Set is not expandable. + return nullptr; +} + diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp index 1ec2eea..fc1d3ca 100644 --- a/lib/TableGen/TGLexer.cpp +++ b/lib/TableGen/TGLexer.cpp @@ -27,9 +27,9 @@ using namespace llvm; TGLexer::TGLexer(SourceMgr &SM) : SrcMgr(SM) { - CurBuffer = 0; - CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); - CurPtr = CurBuf->getBufferStart(); + CurBuffer = SrcMgr.getMainFileID(); + CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(); + CurPtr = CurBuf.begin(); TokStart = nullptr; } @@ -52,7 +52,7 @@ int TGLexer::getNextChar() { case 0: { // A nul character in the stream is either the end of the current buffer or // a random nul in the file. Disambiguate that here. - if (CurPtr-1 != CurBuf->getBufferEnd()) + if (CurPtr-1 != CurBuf.end()) return 0; // Just whitespace. // If this is the end of an included file, pop the parent file off the @@ -60,7 +60,7 @@ int TGLexer::getNextChar() { SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); if (ParentIncludeLoc != SMLoc()) { CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc); - CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); + CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(); CurPtr = ParentIncludeLoc.getPointer(); return getNextChar(); } @@ -187,7 +187,7 @@ tgtok::TokKind TGLexer::LexString() { while (*CurPtr != '"') { // If we hit the end of the buffer, report an error. - if (*CurPtr == 0 && CurPtr == CurBuf->getBufferEnd()) + if (*CurPtr == 0 && CurPtr == CurBuf.end()) return ReturnError(StrStart, "End of file in string literal"); if (*CurPtr == '\n' || *CurPtr == '\r') @@ -220,7 +220,7 @@ tgtok::TokKind TGLexer::LexString() { // If we hit the end of the buffer, report an error. case '\0': - if (CurPtr == CurBuf->getBufferEnd()) + if (CurPtr == CurBuf.end()) return ReturnError(StrStart, "End of file in string literal"); // FALL THROUGH default: @@ -304,7 +304,7 @@ bool TGLexer::LexInclude() { CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr), IncludedFile); - if (CurBuffer == -1) { + if (!CurBuffer) { PrintError(getLoc(), "Could not find include file '" + Filename + "'"); return true; } @@ -319,8 +319,8 @@ bool TGLexer::LexInclude() { } Dependencies.insert(std::make_pair(IncludedFile, getLoc())); // Save the line number and lex buffer of the includer. - CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); - CurPtr = CurBuf->getBufferStart(); + CurBuf = SrcMgr.getMemoryBuffer(CurBuffer)->getBuffer(); + CurPtr = CurBuf.begin(); return false; } @@ -333,7 +333,7 @@ void TGLexer::SkipBCPLComment() { return; // Newline is end of comment. case 0: // If this is the end of the buffer, end the comment. - if (CurPtr == CurBuf->getBufferEnd()) + if (CurPtr == CurBuf.end()) return; break; } diff --git a/lib/TableGen/TGLexer.h b/lib/TableGen/TGLexer.h index 1e599f8..a2c95ca 100644 --- a/lib/TableGen/TGLexer.h +++ b/lib/TableGen/TGLexer.h @@ -14,6 +14,7 @@ #ifndef TGLEXER_H #define TGLEXER_H +#include "llvm/ADT/StringRef.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/SMLoc.h" #include <cassert> @@ -21,7 +22,6 @@ #include <string> namespace llvm { -class MemoryBuffer; class SourceMgr; class SMLoc; class Twine; @@ -63,7 +63,7 @@ class TGLexer { SourceMgr &SrcMgr; const char *CurPtr; - const MemoryBuffer *CurBuf; + StringRef CurBuf; // Information about the current token. const char *TokStart; @@ -73,7 +73,7 @@ class TGLexer { /// CurBuffer - This is the current buffer index we're lexing from as managed /// by the SourceMgr object. - int CurBuffer; + unsigned CurBuffer; public: typedef std::map<std::string, SMLoc> DependenciesMapTy; diff --git a/lib/TableGen/TGParser.cpp b/lib/TableGen/TGParser.cpp index 038e018..0550692 100644 --- a/lib/TableGen/TGParser.cpp +++ b/lib/TableGen/TGParser.cpp @@ -360,8 +360,13 @@ bool TGParser::ProcessForeachDefs(Record *CurRec, SMLoc Loc, IterSet &IterVals){ } if (Records.getDef(IterRec->getNameInitAsString())) { - Error(Loc, "def already exists: " + IterRec->getNameInitAsString()); - return true; + // If this record is anonymous, it's no problem, just generate a new name + if (IterRec->isAnonymous()) + IterRec->setName(GetNewAnonymousName()); + else { + Error(Loc, "def already exists: " + IterRec->getNameInitAsString()); + return true; + } } Records.addDef(IterRec); @@ -782,7 +787,7 @@ Init *TGParser::ParseIDValue(Record *CurRec, /// /// Operation ::= XOperator ['<' Type '>'] '(' Args ')' /// -Init *TGParser::ParseOperation(Record *CurRec) { +Init *TGParser::ParseOperation(Record *CurRec, RecTy *ItemType) { switch (Lex.getCode()) { default: TokError("unknown operation"); @@ -845,7 +850,7 @@ Init *TGParser::ParseOperation(Record *CurRec) { ListRecTy *LType = dyn_cast<ListRecTy>(LHSt->getType()); StringRecTy *SType = dyn_cast<StringRecTy>(LHSt->getType()); if (!LType && !SType) { - TokError("expected list or string type argumnet in unary operator"); + TokError("expected list or string type argument in unary operator"); return nullptr; } } @@ -853,7 +858,7 @@ Init *TGParser::ParseOperation(Record *CurRec) { if (Code == UnOpInit::HEAD || Code == UnOpInit::TAIL) { if (!LHSl && !LHSt) { - TokError("expected list type argumnet in unary operator"); + TokError("expected list type argument in unary operator"); return nullptr; } @@ -877,7 +882,7 @@ Init *TGParser::ParseOperation(Record *CurRec) { assert(LHSt && "expected list type argument in unary operator"); ListRecTy *LType = dyn_cast<ListRecTy>(LHSt->getType()); if (!LType) { - TokError("expected list type argumnet in unary operator"); + TokError("expected list type argument in unary operator"); return nullptr; } if (Code == UnOpInit::HEAD) { @@ -1021,8 +1026,9 @@ Init *TGParser::ParseOperation(Record *CurRec) { } Lex.Lex(); // eat the ',' - Init *MHS = ParseValue(CurRec); - if (!MHS) return nullptr; + Init *MHS = ParseValue(CurRec, ItemType); + if (!MHS) + return nullptr; if (Lex.getCode() != tgtok::comma) { TokError("expected ',' in ternary operator"); @@ -1030,8 +1036,9 @@ Init *TGParser::ParseOperation(Record *CurRec) { } Lex.Lex(); // eat the ',' - Init *RHS = ParseValue(CurRec); - if (!RHS) return nullptr; + Init *RHS = ParseValue(CurRec, ItemType); + if (!RHS) + return nullptr; if (Lex.getCode() != tgtok::r_paren) { TokError("expected ')' in binary operator"); @@ -1441,7 +1448,7 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, case tgtok::XIf: case tgtok::XForEach: case tgtok::XSubst: { // Value ::= !ternop '(' Value ',' Value ',' Value ')' - return ParseOperation(CurRec); + return ParseOperation(CurRec, ItemType); } } diff --git a/lib/TableGen/TGParser.h b/lib/TableGen/TGParser.h index 6fd442a..9f4b7e9 100644 --- a/lib/TableGen/TGParser.h +++ b/lib/TableGen/TGParser.h @@ -181,7 +181,7 @@ private: // Parser methods. std::vector<unsigned> ParseRangeList(); bool ParseRangePiece(std::vector<unsigned> &Ranges); RecTy *ParseType(); - Init *ParseOperation(Record *CurRec); + Init *ParseOperation(Record *CurRec, RecTy *ItemType); RecTy *ParseOperatorType(); Init *ParseObjectName(MultiClass *CurMultiClass); Record *ParseClassID(); diff --git a/lib/Target/AArch64/AArch64.td b/lib/Target/AArch64/AArch64.td index 1ad5ac8..e6a27c3 100644 --- a/lib/Target/AArch64/AArch64.td +++ b/lib/Target/AArch64/AArch64.td @@ -60,6 +60,7 @@ def AArch64InstrInfo : InstrInfo; // AArch64 Processors supported. // include "AArch64SchedA53.td" +include "AArch64SchedA57.td" include "AArch64SchedCyclone.td" def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", @@ -89,7 +90,7 @@ def : ProcessorModel<"generic", NoSchedModel, [FeatureFPARMv8, FeatureCRC]>; def : ProcessorModel<"cortex-a53", CortexA53Model, [ProcA53]>; -def : ProcessorModel<"cortex-a57", NoSchedModel, [ProcA57]>; +def : ProcessorModel<"cortex-a57", CortexA57Model, [ProcA57]>; def : ProcessorModel<"cyclone", CycloneModel, [ProcCyclone]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp index 04906f6..ab2c4b7 100644 --- a/lib/Target/AArch64/AArch64AddressTypePromotion.cpp +++ b/lib/Target/AArch64/AArch64AddressTypePromotion.cpp @@ -214,8 +214,8 @@ AArch64AddressTypePromotion::shouldConsiderSExt(const Instruction *SExt) const { if (SExt->getType() != ConsideredSExtType) return false; - for (const Use &U : SExt->uses()) { - if (isa<GetElementPtrInst>(*U)) + for (const User *U : SExt->users()) { + if (isa<GetElementPtrInst>(U)) return true; } @@ -267,8 +267,7 @@ AArch64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) { } // Now try to get through the chain of definitions. - while (isa<Instruction>(SExt->getOperand(0))) { - Instruction *Inst = dyn_cast<Instruction>(SExt->getOperand(0)); + while (auto *Inst = dyn_cast<Instruction>(SExt->getOperand(0))) { DEBUG(dbgs() << "Try to get through:\n" << *Inst << '\n'); if (!canGetThrough(Inst) || !shouldGetThrough(Inst)) { // We cannot get through something that is not an Instruction @@ -285,10 +284,10 @@ AArch64AddressTypePromotion::propagateSignExtension(Instructions &SExtInsts) { // assertion on the type as all involved sext operation may have not // been moved yet. while (!Inst->use_empty()) { - Value::use_iterator UseIt = Inst->use_begin(); - Instruction *UseInst = dyn_cast<Instruction>(*UseIt); - assert(UseInst && "Use of sext is not an Instruction!"); - UseInst->setOperand(UseIt->getOperandNo(), SExt); + Use &U = *Inst->use_begin(); + Instruction *User = dyn_cast<Instruction>(U.getUser()); + assert(User && "User of sext is not an Instruction!"); + User->setOperand(U.getOperandNo(), SExt); } ToRemove.insert(Inst); SExt->setOperand(0, Inst->getOperand(0)); @@ -385,11 +384,11 @@ void AArch64AddressTypePromotion::mergeSExts(ValueToInsts &ValToSExtendedUses, if (ToRemove.count(Inst)) continue; bool inserted = false; - for (auto Pt : CurPts) { + for (auto &Pt : CurPts) { if (DT.dominates(Inst, Pt)) { DEBUG(dbgs() << "Replace all uses of:\n" << *Pt << "\nwith:\n" << *Inst << '\n'); - (Pt)->replaceAllUsesWith(Inst); + Pt->replaceAllUsesWith(Inst); ToRemove.insert(Pt); Pt = Inst; inserted = true; @@ -436,7 +435,7 @@ void AArch64AddressTypePromotion::analyzeSExtension(Instructions &SExtInsts) { bool insert = false; // #1. - for (const Use &U : SExt->uses()) { + for (const User *U : SExt->users()) { const Instruction *Inst = dyn_cast<GetElementPtrInst>(U); if (Inst && Inst->getNumOperands() > 2) { DEBUG(dbgs() << "Interesting use in GetElementPtrInst\n" << *Inst diff --git a/lib/Target/AArch64/AArch64AsmPrinter.cpp b/lib/Target/AArch64/AArch64AsmPrinter.cpp index c3ee9bb..cd94e24 100644 --- a/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -211,7 +211,7 @@ void AArch64AsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum, const MachineOperand &MO = MI->getOperand(OpNum); switch (MO.getType()) { default: - assert(0 && "<unknown operand type>"); + llvm_unreachable("<unknown operand type>"); case MachineOperand::MO_Register: { unsigned Reg = MO.getReg(); assert(TargetRegisterInfo::isPhysicalRegister(Reg)); diff --git a/lib/Target/AArch64/AArch64BranchRelaxation.cpp b/lib/Target/AArch64/AArch64BranchRelaxation.cpp index 5209452..484e7e8 100644 --- a/lib/Target/AArch64/AArch64BranchRelaxation.cpp +++ b/lib/Target/AArch64/AArch64BranchRelaxation.cpp @@ -291,7 +291,7 @@ static bool isConditionalBranch(unsigned Opc) { static MachineBasicBlock *getDestBlock(MachineInstr *MI) { switch (MI->getOpcode()) { default: - assert(0 && "unexpected opcode!"); + llvm_unreachable("unexpected opcode!"); case AArch64::TBZW: case AArch64::TBNZW: case AArch64::TBZX: @@ -309,7 +309,7 @@ static MachineBasicBlock *getDestBlock(MachineInstr *MI) { static unsigned getOppositeConditionOpcode(unsigned Opc) { switch (Opc) { default: - assert(0 && "unexpected opcode!"); + llvm_unreachable("unexpected opcode!"); case AArch64::TBNZW: return AArch64::TBZW; case AArch64::TBNZX: return AArch64::TBZX; case AArch64::TBZW: return AArch64::TBNZW; @@ -325,7 +325,7 @@ static unsigned getOppositeConditionOpcode(unsigned Opc) { static unsigned getBranchDisplacementBits(unsigned Opc) { switch (Opc) { default: - assert(0 && "unexpected opcode!"); + llvm_unreachable("unexpected opcode!"); case AArch64::TBNZW: case AArch64::TBZW: case AArch64::TBNZX: diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td index ded2e17..8e8bd3d 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.td +++ b/lib/Target/AArch64/AArch64CallingConvention.td @@ -18,9 +18,6 @@ class CCIfAlign<string Align, CCAction A> : class CCIfBigEndian<CCAction A> : CCIf<"State.getTarget().getDataLayout()->isBigEndian()", A>; -class CCIfUnallocated<string Reg, CCAction A> : - CCIf<"!State.isAllocated(AArch64::" # Reg # ")", A>; - //===----------------------------------------------------------------------===// // ARM AAPCS64 Calling Convention //===----------------------------------------------------------------------===// @@ -45,7 +42,7 @@ def CC_AArch64_AAPCS : CallingConv<[ // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, // up to eight each of GPR and FPR. - CCIfType<[i1, i8, i16], CCIfUnallocated<"X7", CCPromoteToType<i32>>>, + CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], [X0, X1, X2, X3, X4, X5, X6, X7]>>, // i128 is split to two i64s, we can't fit half to register X7. @@ -120,7 +117,7 @@ def CC_AArch64_DarwinPCS : CallingConv<[ // Handle i1, i8, i16, i32, i64, f32, f64 and v2f64 by passing in registers, // up to eight each of GPR and FPR. - CCIfType<[i1, i8, i16], CCIfUnallocated<"X7", CCPromoteToType<i32>>>, + CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, CCIfType<[i32], CCAssignToRegWithShadow<[W0, W1, W2, W3, W4, W5, W6, W7], [X0, X1, X2, X3, X4, X5, X6, X7]>>, // i128 is split to two i64s, we can't fit half to register X7. @@ -143,8 +140,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[ CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, // If more than will fit in registers, pass them on the stack instead. - CCIfType<[i1, i8], CCAssignToStack<1, 1>>, - CCIfType<[i16], CCAssignToStack<2, 2>>, + CCIf<"ValVT == MVT::i1 || ValVT == MVT::i8", CCAssignToStack<1, 1>>, + CCIf<"ValVT == MVT::i16", CCAssignToStack<2, 2>>, CCIfType<[i32, f32], CCAssignToStack<4, 4>>, CCIfType<[i64, f64, v1f64, v2f32, v1i64, v2i32, v4i16, v8i8], CCAssignToStack<8, 8>>, @@ -172,12 +169,11 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[ // 32bit quantity as undef. def CC_AArch64_WebKit_JS : CallingConv<[ // Handle i1, i8, i16, i32, and i64 passing in register X0 (W0). - CCIfType<[i1, i8, i16], CCIfUnallocated<"X0", CCPromoteToType<i32>>>, + CCIfType<[i1, i8, i16], CCPromoteToType<i32>>, CCIfType<[i32], CCAssignToRegWithShadow<[W0], [X0]>>, CCIfType<[i64], CCAssignToRegWithShadow<[X0], [W0]>>, // Pass the remaining arguments on the stack instead. - CCIfType<[i1, i8, i16], CCAssignToStack<4, 4>>, CCIfType<[i32, f32], CCAssignToStack<4, 4>>, CCIfType<[i64, f64], CCAssignToStack<8, 8>> ]>; diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index c3b5369..2164d77 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -240,21 +240,15 @@ unsigned AArch64FastISel::AArch64MaterializeFP(const ConstantFP *CFP, MVT VT) { } unsigned AArch64FastISel::AArch64MaterializeGV(const GlobalValue *GV) { - // We can't handle thread-local variables quickly yet. Unfortunately we have - // to peer through any aliases to find out if that rule applies. - const GlobalValue *TLSGV = GV; - if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) - TLSGV = GA->getAliasee(); + // We can't handle thread-local variables quickly yet. + if (GV->isThreadLocal()) + return 0; // MachO still uses GOT for large code-model accesses, but ELF requires // movz/movk sequences, which FastISel doesn't handle yet. if (TM.getCodeModel() != CodeModel::Small && !Subtarget->isTargetMachO()) return 0; - if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(TLSGV)) - if (GVar->isThreadLocal()) - return 0; - unsigned char OpFlags = Subtarget->ClassifyGlobalReference(GV, TM); EVT DestEVT = TLI.getValueType(GV->getType(), true); @@ -469,11 +463,18 @@ bool AArch64FastISel::SimplifyAddress(Address &Addr, MVT VT, break; } - // FIXME: If this is a stack pointer and the offset needs to be simplified - // then put the alloca address into a register, set the base type back to - // register and continue. This should almost never happen. + //If this is a stack pointer and the offset needs to be simplified then put + // the alloca address into a register, set the base type back to register and + // continue. This should almost never happen. if (needsLowering && Addr.getKind() == Address::FrameIndexBase) { - return false; + unsigned ResultReg = createResultReg(&AArch64::GPR64RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::ADDXri), + ResultReg) + .addFrameIndex(Addr.getFI()) + .addImm(0) + .addImm(0); + Addr.setKind(Address::RegBase); + Addr.setReg(ResultReg); } // Since the offset is too large for the load/store instruction get the @@ -1224,7 +1225,6 @@ bool AArch64FastISel::ProcessCallArgs( Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ false); if (Arg == 0) return false; - ArgVT = DestVT; break; } case CCValAssign::AExt: @@ -1235,7 +1235,6 @@ bool AArch64FastISel::ProcessCallArgs( Arg = EmitIntExt(SrcVT, Arg, DestVT, /*isZExt*/ true); if (Arg == 0) return false; - ArgVT = DestVT; break; } default: @@ -1254,7 +1253,7 @@ bool AArch64FastISel::ProcessCallArgs( assert(VA.isMemLoc() && "Assuming store on stack."); // Need to store on the stack. - unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8; + unsigned ArgSize = (ArgVT.getSizeInBits() + 7) / 8; unsigned BEAlign = 0; if (ArgSize < 8 && !Subtarget->isLittleEndian()) @@ -1468,10 +1467,12 @@ bool AArch64FastISel::TryEmitSmallMemCpy(Address Dest, Address Src, bool RV; unsigned ResultReg; RV = EmitLoad(VT, ResultReg, Src); - assert(RV == true && "Should be able to handle this load."); + if (!RV) + return false; + RV = EmitStore(VT, ResultReg, Dest); - assert(RV == true && "Should be able to handle this store."); - (void)RV; + if (!RV) + return false; int64_t Size = VT.getSizeInBits() / 8; Len -= Size; @@ -1749,6 +1750,17 @@ unsigned AArch64FastISel::Emiti1Ext(unsigned SrcReg, MVT DestVT, bool isZExt) { unsigned AArch64FastISel::EmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt) { assert(DestVT != MVT::i1 && "ZeroExt/SignExt an i1?"); + + // FastISel does not have plumbing to deal with extensions where the SrcVT or + // DestVT are odd things, so test to make sure that they are both types we can + // handle (i1/i8/i16/i32 for SrcVT and i8/i16/i32/i64 for DestVT), otherwise + // bail out to SelectionDAG. + if (((DestVT != MVT::i8) && (DestVT != MVT::i16) && + (DestVT != MVT::i32) && (DestVT != MVT::i64)) || + ((SrcVT != MVT::i1) && (SrcVT != MVT::i8) && + (SrcVT != MVT::i16) && (SrcVT != MVT::i32))) + return 0; + unsigned Opc; unsigned Imm = 0; @@ -1895,6 +1907,7 @@ bool AArch64FastISel::SelectMul(const Instruction *I) { case MVT::i32: ZReg = AArch64::WZR; Opc = AArch64::MADDWrrr; + SrcVT = MVT::i32; break; case MVT::i64: ZReg = AArch64::XZR; diff --git a/lib/Target/AArch64/AArch64FrameLowering.cpp b/lib/Target/AArch64/AArch64FrameLowering.cpp index deb306a..9c33717 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -158,7 +158,7 @@ void AArch64FrameLowering::emitCalleeSavedFrameMoves( MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); - const AArch64InstrInfo *TII = TM.getInstrInfo(); + const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); DebugLoc DL = MBB.findDebugLoc(MBBI); // Add callee saved registers to move list. @@ -204,8 +204,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock::iterator MBBI = MBB.begin(); const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *Fn = MF.getFunction(); - const AArch64RegisterInfo *RegInfo = TM.getRegisterInfo(); - const AArch64InstrInfo *TII = TM.getInstrInfo(); + const AArch64RegisterInfo *RegInfo = static_cast<const AArch64RegisterInfo *>( + MF.getTarget().getRegisterInfo()); + const TargetInstrInfo *TII = MF.getTarget().getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>(); bool needsFrameMoves = MMI.hasDebugInfo() || Fn->needsUnwindTableEntry(); diff --git a/lib/Target/AArch64/AArch64FrameLowering.h b/lib/Target/AArch64/AArch64FrameLowering.h index 0e00d16..7686e6f 100644 --- a/lib/Target/AArch64/AArch64FrameLowering.h +++ b/lib/Target/AArch64/AArch64FrameLowering.h @@ -18,18 +18,11 @@ namespace llvm { -class AArch64Subtarget; -class AArch64TargetMachine; - class AArch64FrameLowering : public TargetFrameLowering { - const AArch64TargetMachine &TM; - public: - explicit AArch64FrameLowering(const AArch64TargetMachine &TM, - const AArch64Subtarget &STI) + explicit AArch64FrameLowering() : TargetFrameLowering(StackGrowsDown, 16, 0, 16, - false /*StackRealignable*/), - TM(TM) {} + false /*StackRealignable*/) {} void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, diff --git a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 7007ffc..3f49fab 100644 --- a/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -153,9 +153,6 @@ public: SDNode *SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); SDNode *SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); - SDNode *SelectSIMDAddSubNarrowing(unsigned IntNo, SDNode *Node); - SDNode *SelectSIMDXtnNarrowing(unsigned IntNo, SDNode *Node); - SDNode *SelectBitfieldExtractOp(SDNode *N); SDNode *SelectBitfieldInsertOp(SDNode *N); @@ -596,8 +593,9 @@ bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, const GlobalValue *GV = GAN->getGlobal(); unsigned Alignment = GV->getAlignment(); const DataLayout *DL = TLI->getDataLayout(); - if (Alignment == 0 && !Subtarget->isTargetDarwin()) - Alignment = DL->getABITypeAlignment(GV->getType()->getElementType()); + Type *Ty = GV->getType()->getElementType(); + if (Alignment == 0 && Ty->isSized() && !Subtarget->isTargetDarwin()) + Alignment = DL->getABITypeAlignment(Ty); if (Alignment >= Size) return true; @@ -2111,7 +2109,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { .getVectorElementType() .getSizeInBits()) { default: - assert(0 && "Unexpected vector element type!"); + llvm_unreachable("Unexpected vector element type!"); case 64: SubReg = AArch64::dsub; break; diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index 80d6669..28d0035 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -67,15 +67,15 @@ EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden, //===----------------------------------------------------------------------===// // AArch64 Lowering public interface. //===----------------------------------------------------------------------===// -static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { - if (TM.getSubtarget<AArch64Subtarget>().isTargetDarwin()) +static TargetLoweringObjectFile *createTLOF(const Triple &TT) { + if (TT.isOSBinFormatMachO()) return new AArch64_MachoTargetObjectFile(); return new AArch64_ELFTargetObjectFile(); } -AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) - : TargetLowering(TM, createTLOF(TM)) { +AArch64TargetLowering::AArch64TargetLowering(TargetMachine &TM) + : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) { Subtarget = &TM.getSubtarget<AArch64Subtarget>(); // AArch64 doesn't have comparisons which set GPRs or setcc instructions, so @@ -627,7 +627,7 @@ MVT AArch64TargetLowering::getScalarShiftAmountTy(EVT LHSTy) const { unsigned AArch64TargetLowering::getMaximalGlobalOffset() const { // FIXME: On AArch64, this depends on the type. - // Basically, the addressable offsets are o to 4095 * Ty.getSizeInBytes(). + // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes(). // and the offset has to be a multiple of the related size in bytes. return 4095; } @@ -823,8 +823,7 @@ AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, #ifndef NDEBUG MI->dump(); #endif - assert(0 && "Unexpected instruction for custom inserter!"); - break; + llvm_unreachable("Unexpected instruction for custom inserter!"); case AArch64::F128CSEL: return EmitF128CSEL(MI, BB); @@ -833,7 +832,6 @@ AArch64TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case TargetOpcode::PATCHPOINT: return emitPatchPoint(MI, BB); } - llvm_unreachable("Unexpected instruction for custom inserter!"); } //===----------------------------------------------------------------------===// @@ -1273,7 +1271,7 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { bool ExtraOp = false; switch (Op.getOpcode()) { default: - assert(0 && "Invalid code"); + llvm_unreachable("Invalid code"); case ISD::ADDC: Opc = AArch64ISD::ADDS; break; @@ -1387,24 +1385,22 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { EVT InVT = Op.getOperand(0).getValueType(); EVT VT = Op.getValueType(); - // FP_TO_XINT conversion from the same type are legal. - if (VT.getSizeInBits() == InVT.getSizeInBits()) - return Op; - - if (InVT == MVT::v2f64 || InVT == MVT::v4f32) { + if (VT.getSizeInBits() < InVT.getSizeInBits()) { SDLoc dl(Op); SDValue Cv = DAG.getNode(Op.getOpcode(), dl, InVT.changeVectorElementTypeToInteger(), Op.getOperand(0)); return DAG.getNode(ISD::TRUNCATE, dl, VT, Cv); - } else if (InVT == MVT::v2f32) { + } + + if (VT.getSizeInBits() > InVT.getSizeInBits()) { SDLoc dl(Op); SDValue Ext = DAG.getNode(ISD::FP_EXTEND, dl, MVT::v2f64, Op.getOperand(0)); return DAG.getNode(Op.getOpcode(), dl, VT, Ext); } // Type changing conversions are illegal. - return SDValue(); + return Op; } SDValue AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, @@ -1440,32 +1436,23 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { SDValue In = Op.getOperand(0); EVT InVT = In.getValueType(); - // v2i32 to v2f32 is legal. - if (VT == MVT::v2f32 && InVT == MVT::v2i32) - return Op; - - // This function only handles v2f64 outputs. - if (VT == MVT::v2f64) { - // Extend the input argument to a v2i64 that we can feed into the - // floating point conversion. Zero or sign extend based on whether - // we're doing a signed or unsigned float conversion. - unsigned Opc = - Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND; - assert(Op.getNumOperands() == 1 && "FP conversions take one argument"); - SDValue Promoted = DAG.getNode(Opc, dl, MVT::v2i64, Op.getOperand(0)); - return DAG.getNode(Op.getOpcode(), dl, Op.getValueType(), Promoted); + if (VT.getSizeInBits() < InVT.getSizeInBits()) { + MVT CastVT = + MVT::getVectorVT(MVT::getFloatingPointVT(InVT.getScalarSizeInBits()), + InVT.getVectorNumElements()); + In = DAG.getNode(Op.getOpcode(), dl, CastVT, In); + return DAG.getNode(ISD::FP_ROUND, dl, VT, In, DAG.getIntPtrConstant(0)); } - // Scalarize v2i64 to v2f32 conversions. - std::vector<SDValue> BuildVectorOps; - for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) { - SDValue Sclr = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, In, - DAG.getConstant(i, MVT::i64)); - Sclr = DAG.getNode(Op->getOpcode(), dl, MVT::f32, Sclr); - BuildVectorOps.push_back(Sclr); + if (VT.getSizeInBits() > InVT.getSizeInBits()) { + unsigned CastOpc = + Op.getOpcode() == ISD::SINT_TO_FP ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + EVT CastVT = VT.changeVectorElementTypeToInteger(); + In = DAG.getNode(CastOpc, dl, CastVT, In); + return DAG.getNode(Op.getOpcode(), dl, VT, In); } - return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, BuildVectorOps); + return Op; } SDValue AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, @@ -1516,7 +1503,7 @@ SDValue AArch64TargetLowering::LowerFSINCOS(SDValue Op, StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) - .setCallee(CallingConv::Fast, RetTy, Callee, &Args, 0); + .setCallee(CallingConv::Fast, RetTy, Callee, std::move(Args), 0); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); return CallResult.first; @@ -1711,7 +1698,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments( InVals.push_back(FrameIdxN); continue; - } if (VA.isRegLoc()) { + } + + if (VA.isRegLoc()) { // Arguments stored in registers. EVT RegVT = VA.getLocVT(); @@ -1772,10 +1761,16 @@ SDValue AArch64TargetLowering::LowerFormalArguments( SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); SDValue ArgValue; + // For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT) ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; + MVT MemVT = VA.getValVT(); + switch (VA.getLocInfo()) { default: break; + case CCValAssign::BCvt: + MemVT = VA.getLocVT(); + break; case CCValAssign::SExt: ExtType = ISD::SEXTLOAD; break; @@ -1787,10 +1782,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments( break; } - ArgValue = DAG.getExtLoad(ExtType, DL, VA.getValVT(), Chain, FIN, + ArgValue = DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN, MachinePointerInfo::getFixedStack(FI), - VA.getLocVT(), - false, false, false, 0); + MemVT, false, false, false, nullptr); InVals.push_back(ArgValue); } @@ -2339,11 +2333,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // Since we pass i1/i8/i16 as i1/i8/i16 on stack and Arg is already // promoted to a legal register type i32, we should truncate Arg back to // i1/i8/i16. - if (Arg.getValueType().isSimple() && - Arg.getValueType().getSimpleVT() == MVT::i32 && - (VA.getLocVT() == MVT::i1 || VA.getLocVT() == MVT::i8 || - VA.getLocVT() == MVT::i16)) - Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getLocVT(), Arg); + if (VA.getValVT() == MVT::i1 || VA.getValVT() == MVT::i8 || + VA.getValVT() == MVT::i16) + Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg); SDValue Store = DAG.getStore(Chain, DL, Arg, DstAddr, DstInfo, false, false, 0); @@ -4116,6 +4108,7 @@ static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { // shuffle in combination with VEXTs. SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"); SDLoc dl(Op); EVT VT = Op.getValueType(); unsigned NumElts = VT.getVectorNumElements(); @@ -4164,35 +4157,47 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, SDValue ShuffleSrcs[2] = { DAG.getUNDEF(VT), DAG.getUNDEF(VT) }; int VEXTOffsets[2] = { 0, 0 }; + int OffsetMultipliers[2] = { 1, 1 }; // This loop extracts the usage patterns of the source vectors // and prepares appropriate SDValues for a shuffle if possible. for (unsigned i = 0; i < SourceVecs.size(); ++i) { - if (SourceVecs[i].getValueType() == VT) { + unsigned NumSrcElts = SourceVecs[i].getValueType().getVectorNumElements(); + SDValue CurSource = SourceVecs[i]; + if (SourceVecs[i].getValueType().getVectorElementType() != + VT.getVectorElementType()) { + // It may hit this case if SourceVecs[i] is AssertSext/AssertZext. + // Then bitcast it to the vector which holds asserted element type, + // and record the multiplier of element width between SourceVecs and + // Build_vector which is needed to extract the correct lanes later. + EVT CastVT = + EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), + SourceVecs[i].getValueSizeInBits() / + VT.getVectorElementType().getSizeInBits()); + + CurSource = DAG.getNode(ISD::BITCAST, dl, CastVT, SourceVecs[i]); + OffsetMultipliers[i] = CastVT.getVectorNumElements() / NumSrcElts; + NumSrcElts *= OffsetMultipliers[i]; + MaxElts[i] *= OffsetMultipliers[i]; + MinElts[i] *= OffsetMultipliers[i]; + } + + if (CurSource.getValueType() == VT) { // No VEXT necessary - ShuffleSrcs[i] = SourceVecs[i]; + ShuffleSrcs[i] = CurSource; VEXTOffsets[i] = 0; continue; - } else if (SourceVecs[i].getValueType().getVectorNumElements() < NumElts) { + } else if (NumSrcElts < NumElts) { // We can pad out the smaller vector for free, so if it's part of a // shuffle... - ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, SourceVecs[i], - DAG.getUNDEF(SourceVecs[i].getValueType())); + ShuffleSrcs[i] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, CurSource, + DAG.getUNDEF(CurSource.getValueType())); continue; } - // Don't attempt to extract subvectors from BUILD_VECTOR sources - // that expand or trunc the original value. - // TODO: We can try to bitcast and ANY_EXTEND the result but - // we need to consider the cost of vector ANY_EXTEND, and the - // legality of all the types. - if (SourceVecs[i].getValueType().getVectorElementType() != - VT.getVectorElementType()) - return SDValue(); - // Since only 64-bit and 128-bit vectors are legal on ARM and // we've eliminated the other cases... - assert(SourceVecs[i].getValueType().getVectorNumElements() == 2 * NumElts && + assert(NumSrcElts == 2 * NumElts && "unexpected vector sizes in ReconstructShuffle"); if (MaxElts[i] - MinElts[i] >= NumElts) { @@ -4203,22 +4208,20 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, if (MinElts[i] >= NumElts) { // The extraction can just take the second half VEXTOffsets[i] = NumElts; - ShuffleSrcs[i] = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i], - DAG.getIntPtrConstant(NumElts)); + ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource, + DAG.getIntPtrConstant(NumElts)); } else if (MaxElts[i] < NumElts) { // The extraction can just take the first half VEXTOffsets[i] = 0; - ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, - SourceVecs[i], DAG.getIntPtrConstant(0)); + ShuffleSrcs[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource, + DAG.getIntPtrConstant(0)); } else { // An actual VEXT is needed VEXTOffsets[i] = MinElts[i]; - SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, - SourceVecs[i], DAG.getIntPtrConstant(0)); - SDValue VEXTSrc2 = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, SourceVecs[i], - DAG.getIntPtrConstant(NumElts)); + SDValue VEXTSrc1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource, + DAG.getIntPtrConstant(0)); + SDValue VEXTSrc2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, CurSource, + DAG.getIntPtrConstant(NumElts)); unsigned Imm = VEXTOffsets[i] * getExtFactor(VEXTSrc1); ShuffleSrcs[i] = DAG.getNode(AArch64ISD::EXT, dl, VT, VEXTSrc1, VEXTSrc2, DAG.getConstant(Imm, MVT::i32)); @@ -4238,9 +4241,10 @@ SDValue AArch64TargetLowering::ReconstructShuffle(SDValue Op, int ExtractElt = cast<ConstantSDNode>(Op.getOperand(i).getOperand(1))->getSExtValue(); if (ExtractVec == SourceVecs[0]) { - Mask.push_back(ExtractElt - VEXTOffsets[0]); + Mask.push_back(ExtractElt * OffsetMultipliers[0] - VEXTOffsets[0]); } else { - Mask.push_back(ExtractElt + NumElts - VEXTOffsets[1]); + Mask.push_back(ExtractElt * OffsetMultipliers[1] + NumElts - + VEXTOffsets[1]); } } @@ -5177,11 +5181,37 @@ FailedModImm: return Op; } +// Normalize the operands of BUILD_VECTOR. The value of constant operands will +// be truncated to fit element width. +static SDValue NormalizeBuildVector(SDValue Op, + SelectionDAG &DAG) { + assert(Op.getOpcode() == ISD::BUILD_VECTOR && "Unknown opcode!"); + SDLoc dl(Op); + EVT VT = Op.getValueType(); + EVT EltTy= VT.getVectorElementType(); + + if (EltTy.isFloatingPoint() || EltTy.getSizeInBits() > 16) + return Op; + + SmallVector<SDValue, 16> Ops; + for (unsigned I = 0, E = VT.getVectorNumElements(); I != E; ++I) { + SDValue Lane = Op.getOperand(I); + if (Lane.getOpcode() == ISD::Constant) { + APInt LowBits(EltTy.getSizeInBits(), + cast<ConstantSDNode>(Lane)->getZExtValue()); + Lane = DAG.getConstant(LowBits.getZExtValue(), MVT::i32); + } + Ops.push_back(Lane); + } + return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); +} + SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { - BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); SDLoc dl(Op); EVT VT = Op.getValueType(); + Op = NormalizeBuildVector(Op, DAG); + BuildVectorSDNode *BVN = cast<BuildVectorSDNode>(Op.getNode()); APInt CnstBits(VT.getSizeInBits(), 0); APInt UndefBits(VT.getSizeInBits(), 0); @@ -6047,18 +6077,14 @@ bool AArch64TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); - if (NumBits1 <= NumBits2) - return false; - return true; + return NumBits1 > NumBits2; } bool AArch64TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { - if (!VT1.isInteger() || !VT2.isInteger()) + if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger()) return false; unsigned NumBits1 = VT1.getSizeInBits(); unsigned NumBits2 = VT2.getSizeInBits(); - if (NumBits1 <= NumBits2) - return false; - return true; + return NumBits1 > NumBits2; } // All 32-bit GPR operations implicitly zero the high-half of the corresponding @@ -6068,18 +6094,14 @@ bool AArch64TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { return false; unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); - if (NumBits1 == 32 && NumBits2 == 64) - return true; - return false; + return NumBits1 == 32 && NumBits2 == 64; } bool AArch64TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { - if (!VT1.isInteger() || !VT2.isInteger()) + if (VT1.isVector() || VT2.isVector() || !VT1.isInteger() || !VT2.isInteger()) return false; unsigned NumBits1 = VT1.getSizeInBits(); unsigned NumBits2 = VT2.getSizeInBits(); - if (NumBits1 == 32 && NumBits2 == 64) - return true; - return false; + return NumBits1 == 32 && NumBits2 == 64; } bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { @@ -6092,8 +6114,9 @@ bool AArch64TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { return false; // 8-, 16-, and 32-bit integer loads all implicitly zero-extend. - return (VT1.isSimple() && VT1.isInteger() && VT2.isSimple() && - VT2.isInteger() && VT1.getSizeInBits() <= 32); + return (VT1.isSimple() && !VT1.isVector() && VT1.isInteger() && + VT2.isSimple() && !VT2.isVector() && VT2.isInteger() && + VT1.getSizeInBits() <= 32); } bool AArch64TargetLowering::hasPairedLoad(Type *LoadedType, @@ -6346,23 +6369,45 @@ static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG, if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { APInt Value = C->getAPIntValue(); EVT VT = N->getValueType(0); - APInt VP1 = Value + 1; - if (VP1.isPowerOf2()) { - // Multiplying by one less than a power of two, replace with a shift - // and a subtract. - SDValue ShiftedVal = - DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), - DAG.getConstant(VP1.logBase2(), MVT::i64)); - return DAG.getNode(ISD::SUB, SDLoc(N), VT, ShiftedVal, N->getOperand(0)); - } - APInt VM1 = Value - 1; - if (VM1.isPowerOf2()) { - // Multiplying by one more than a power of two, replace with a shift - // and an add. - SDValue ShiftedVal = - DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), - DAG.getConstant(VM1.logBase2(), MVT::i64)); - return DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0)); + if (Value.isNonNegative()) { + // (mul x, 2^N + 1) => (add (shl x, N), x) + APInt VM1 = Value - 1; + if (VM1.isPowerOf2()) { + SDValue ShiftedVal = + DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), + DAG.getConstant(VM1.logBase2(), MVT::i64)); + return DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, + N->getOperand(0)); + } + // (mul x, 2^N - 1) => (sub (shl x, N), x) + APInt VP1 = Value + 1; + if (VP1.isPowerOf2()) { + SDValue ShiftedVal = + DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), + DAG.getConstant(VP1.logBase2(), MVT::i64)); + return DAG.getNode(ISD::SUB, SDLoc(N), VT, ShiftedVal, + N->getOperand(0)); + } + } else { + // (mul x, -(2^N + 1)) => - (add (shl x, N), x) + APInt VNM1 = -Value - 1; + if (VNM1.isPowerOf2()) { + SDValue ShiftedVal = + DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), + DAG.getConstant(VNM1.logBase2(), MVT::i64)); + SDValue Add = + DAG.getNode(ISD::ADD, SDLoc(N), VT, ShiftedVal, N->getOperand(0)); + return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), Add); + } + // (mul x, -(2^N - 1)) => (sub x, (shl x, N)) + APInt VNP1 = -Value + 1; + if (VNP1.isPowerOf2()) { + SDValue ShiftedVal = + DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), + DAG.getConstant(VNP1.logBase2(), MVT::i64)); + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N->getOperand(0), + ShiftedVal); + } } } return SDValue(); @@ -6687,7 +6732,7 @@ static SDValue tryCombineFixedPointConvert(SDNode *N, else if (Vec.getValueType() == MVT::v2i64) VecResTy = MVT::v2f64; else - assert(0 && "unexpected vector type!"); + llvm_unreachable("unexpected vector type!"); SDValue Convert = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VecResTy, IID, Vec, Shift); @@ -7020,7 +7065,7 @@ static SDValue tryCombineShiftImm(unsigned IID, SDNode *N, SelectionDAG &DAG) { if (IsRightShift && ShiftAmount <= -1 && ShiftAmount >= -(int)ElemBits) return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1), DAG.getConstant(-ShiftAmount, MVT::i32)); - else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount <= ElemBits) + else if (!IsRightShift && ShiftAmount >= 0 && ShiftAmount < ElemBits) return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), N->getOperand(1), DAG.getConstant(ShiftAmount, MVT::i32)); @@ -7867,6 +7912,18 @@ bool AArch64TargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const { return Inst->getType()->getPrimitiveSizeInBits() <= 128; } +TargetLoweringBase::LegalizeTypeAction +AArch64TargetLowering::getPreferredVectorAction(EVT VT) const { + MVT SVT = VT.getSimpleVT(); + // During type legalization, we prefer to widen v1i8, v1i16, v1i32 to v8i8, + // v4i16, v2i32 instead of to promote. + if (SVT == MVT::v1i8 || SVT == MVT::v1i16 || SVT == MVT::v1i32 + || SVT == MVT::v1f32) + return TypeWidenVector; + + return TargetLoweringBase::getPreferredVectorAction(VT); +} + Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, AtomicOrdering Ord) const { Module *M = Builder.GetInsertBlock()->getParent()->getParent(); diff --git a/lib/Target/AArch64/AArch64ISelLowering.h b/lib/Target/AArch64/AArch64ISelLowering.h index de16c4d..cb0b9ef 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.h +++ b/lib/Target/AArch64/AArch64ISelLowering.h @@ -197,7 +197,7 @@ class AArch64TargetLowering : public TargetLowering { bool RequireStrictAlign; public: - explicit AArch64TargetLowering(AArch64TargetMachine &TM); + explicit AArch64TargetLowering(TargetMachine &TM); /// Selects the correct CCAssignFn for a the given CallingConvention /// value. @@ -324,6 +324,9 @@ public: bool shouldExpandAtomicInIR(Instruction *Inst) const override; + TargetLoweringBase::LegalizeTypeAction + getPreferredVectorAction(EVT VT) const override; + private: /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can /// make the right decision when generating code for different targets. diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index d455d7e..5007172 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -448,13 +448,19 @@ def logical_imm64_XFORM : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(enc, MVT::i32); }]>; -def LogicalImm32Operand : AsmOperandClass { - let Name = "LogicalImm32"; - let DiagnosticType = "LogicalSecondSource"; -} -def LogicalImm64Operand : AsmOperandClass { - let Name = "LogicalImm64"; - let DiagnosticType = "LogicalSecondSource"; +let DiagnosticType = "LogicalSecondSource" in { + def LogicalImm32Operand : AsmOperandClass { + let Name = "LogicalImm32"; + } + def LogicalImm64Operand : AsmOperandClass { + let Name = "LogicalImm64"; + } + def LogicalImm32NotOperand : AsmOperandClass { + let Name = "LogicalImm32Not"; + } + def LogicalImm64NotOperand : AsmOperandClass { + let Name = "LogicalImm64Not"; + } } def logical_imm32 : Operand<i32>, PatLeaf<(imm), [{ return AArch64_AM::isLogicalImmediate(N->getZExtValue(), 32); @@ -468,6 +474,12 @@ def logical_imm64 : Operand<i64>, PatLeaf<(imm), [{ let PrintMethod = "printLogicalImm64"; let ParserMatchClass = LogicalImm64Operand; } +def logical_imm32_not : Operand<i32> { + let ParserMatchClass = LogicalImm32NotOperand; +} +def logical_imm64_not : Operand<i64> { + let ParserMatchClass = LogicalImm64NotOperand; +} // imm0_65535 predicate - True if the immediate is in the range [0,65535]. def Imm0_65535Operand : AsmImmRange<0, 65535>; @@ -963,8 +975,14 @@ def ccode : Operand<i32> { let ParserMatchClass = CondCode; } def inv_ccode : Operand<i32> { + // AL and NV are invalid in the aliases which use inv_ccode let PrintMethod = "printInverseCondCode"; let ParserMatchClass = CondCode; + let MCOperandPredicate = [{ + return MCOp.isImm() && + MCOp.getImm() != AArch64CC::AL && + MCOp.getImm() != AArch64CC::NV; + }]; } // Conditional branch target. 19-bit immediate. The low two bits of the target @@ -1323,13 +1341,13 @@ class BaseMulAccum<bit isSub, bits<3> opc, RegisterClass multype, multiclass MulAccum<bit isSub, string asm, SDNode AccNode> { def Wrrr : BaseMulAccum<isSub, 0b000, GPR32, GPR32, asm, [(set GPR32:$Rd, (AccNode GPR32:$Ra, (mul GPR32:$Rn, GPR32:$Rm)))]>, - Sched<[WriteIM32, ReadIMA, ReadIM, ReadIM]> { + Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> { let Inst{31} = 0; } def Xrrr : BaseMulAccum<isSub, 0b000, GPR64, GPR64, asm, [(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul GPR64:$Rn, GPR64:$Rm)))]>, - Sched<[WriteIM64, ReadIMA, ReadIM, ReadIM]> { + Sched<[WriteIM64, ReadIM, ReadIM, ReadIMA]> { let Inst{31} = 1; } } @@ -1339,7 +1357,7 @@ class WideMulAccum<bit isSub, bits<3> opc, string asm, : BaseMulAccum<isSub, opc, GPR32, GPR64, asm, [(set GPR64:$Rd, (AccNode GPR64:$Ra, (mul (ExtNode GPR32:$Rn), (ExtNode GPR32:$Rm))))]>, - Sched<[WriteIM32, ReadIMA, ReadIM, ReadIM]> { + Sched<[WriteIM32, ReadIM, ReadIM, ReadIMA]> { let Inst{31} = 1; } @@ -1738,6 +1756,10 @@ multiclass AddSubS<bit isSub, string mnemonic, SDNode OpNode, string cmp> { WZR, GPR32:$src1, GPR32:$src2, 0), 5>; def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Xrs") XZR, GPR64:$src1, GPR64:$src2, 0), 5>; + def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Wrx") + WZR, GPR32sponly:$src1, GPR32:$src2, 16), 5>; + def : InstAlias<cmp#" $src1, $src2", (!cast<Instruction>(NAME#"Xrx64") + XZR, GPR64sponly:$src1, GPR64:$src2, 24), 5>; // Register/register aliases with no shift when SP is not used. def : AddSubRegAlias<mnemonic, !cast<Instruction>(NAME#"Wrs"), @@ -1925,22 +1947,32 @@ class LogicalRegAlias<string asm, Instruction inst, RegisterClass regtype> : InstAlias<asm#" $dst, $src1, $src2", (inst regtype:$dst, regtype:$src1, regtype:$src2, 0)>; -let AddedComplexity = 6 in -multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode> { +multiclass LogicalImm<bits<2> opc, string mnemonic, SDNode OpNode, + string Alias> { + let AddedComplexity = 6 in def Wri : BaseLogicalImm<opc, GPR32sp, GPR32, logical_imm32, mnemonic, [(set GPR32sp:$Rd, (OpNode GPR32:$Rn, logical_imm32:$imm))]> { let Inst{31} = 0; let Inst{22} = 0; // 64-bit version has an additional bit of immediate. } + let AddedComplexity = 6 in def Xri : BaseLogicalImm<opc, GPR64sp, GPR64, logical_imm64, mnemonic, [(set GPR64sp:$Rd, (OpNode GPR64:$Rn, logical_imm64:$imm))]> { let Inst{31} = 1; } + + def : InstAlias<Alias # " $Rd, $Rn, $imm", + (!cast<Instruction>(NAME # "Wri") GPR32sp:$Rd, GPR32:$Rn, + logical_imm32_not:$imm), 0>; + def : InstAlias<Alias # " $Rd, $Rn, $imm", + (!cast<Instruction>(NAME # "Xri") GPR64sp:$Rd, GPR64:$Rn, + logical_imm64_not:$imm), 0>; } -multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode> { +multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode, + string Alias> { let isCompare = 1, Defs = [NZCV] in { def Wri : BaseLogicalImm<opc, GPR32, GPR32, logical_imm32, mnemonic, [(set GPR32:$Rd, (OpNode GPR32:$Rn, logical_imm32:$imm))]> { @@ -1952,6 +1984,13 @@ multiclass LogicalImmS<bits<2> opc, string mnemonic, SDNode OpNode> { let Inst{31} = 1; } } // end Defs = [NZCV] + + def : InstAlias<Alias # " $Rd, $Rn, $imm", + (!cast<Instruction>(NAME # "Wri") GPR32:$Rd, GPR32:$Rn, + logical_imm32_not:$imm), 0>; + def : InstAlias<Alias # " $Rd, $Rn, $imm", + (!cast<Instruction>(NAME # "Xri") GPR64:$Rd, GPR64:$Rn, + logical_imm64_not:$imm), 0>; } class BaseLogicalRegPseudo<RegisterClass regtype, SDPatternOperator OpNode> diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp index ff115c0..ce85b2c 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -35,8 +35,14 @@ AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) /// GetInstSize - Return the number of bytes of code the specified /// instruction may be. This returns the maximum number of bytes. unsigned AArch64InstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { - const MCInstrDesc &Desc = MI->getDesc(); + const MachineBasicBlock &MBB = *MI->getParent(); + const MachineFunction *MF = MBB.getParent(); + const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo(); + + if (MI->getOpcode() == AArch64::INLINEASM) + return getInlineAsmLength(MI->getOperand(0).getSymbolName(), *MAI); + const MCInstrDesc &Desc = MI->getDesc(); switch (Desc.getOpcode()) { default: // Anything not explicitly designated otherwise is a nomal 4-byte insn. @@ -1224,7 +1230,7 @@ void AArch64InstrInfo::copyPhysRegTuple( MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef<unsigned> Indices) const { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register copy without NEON"); const TargetRegisterInfo *TRI = &getRegisterInfo(); uint16_t DestEncoding = TRI->getEncodingValue(DestReg); @@ -1385,7 +1391,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (AArch64::FPR128RegClass.contains(DestReg) && AArch64::FPR128RegClass.contains(SrcReg)) { - if(getSubTarget().hasNEON()) { + if(Subtarget.hasNEON()) { BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg) .addReg(SrcReg) .addReg(SrcReg, getKillRegState(KillSrc)); @@ -1406,7 +1412,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (AArch64::FPR64RegClass.contains(DestReg) && AArch64::FPR64RegClass.contains(SrcReg)) { - if(getSubTarget().hasNEON()) { + if(Subtarget.hasNEON()) { DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub, &AArch64::FPR128RegClass); SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub, @@ -1423,7 +1429,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (AArch64::FPR32RegClass.contains(DestReg) && AArch64::FPR32RegClass.contains(SrcReg)) { - if(getSubTarget().hasNEON()) { + if(Subtarget.hasNEON()) { DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub, &AArch64::FPR128RegClass); SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub, @@ -1440,7 +1446,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (AArch64::FPR16RegClass.contains(DestReg) && AArch64::FPR16RegClass.contains(SrcReg)) { - if(getSubTarget().hasNEON()) { + if(Subtarget.hasNEON()) { DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR128RegClass); SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub, @@ -1461,7 +1467,7 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, if (AArch64::FPR8RegClass.contains(DestReg) && AArch64::FPR8RegClass.contains(SrcReg)) { - if(getSubTarget().hasNEON()) { + if(Subtarget.hasNEON()) { DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR128RegClass); SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub, @@ -1577,39 +1583,39 @@ void AArch64InstrInfo::storeRegToStackSlot( if (AArch64::FPR128RegClass.hasSubClassEq(RC)) Opc = AArch64::STRQui; else if (AArch64::DDRegClass.hasSubClassEq(RC)) { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Twov1d, Offset = false; } break; case 24: if (AArch64::DDDRegClass.hasSubClassEq(RC)) { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Threev1d, Offset = false; } break; case 32: if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Fourv1d, Offset = false; } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Twov2d, Offset = false; } break; case 48: if (AArch64::QQQRegClass.hasSubClassEq(RC)) { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Threev2d, Offset = false; } break; case 64: if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register store without NEON"); Opc = AArch64::ST1Fourv2d, Offset = false; } @@ -1675,39 +1681,39 @@ void AArch64InstrInfo::loadRegFromStackSlot( if (AArch64::FPR128RegClass.hasSubClassEq(RC)) Opc = AArch64::LDRQui; else if (AArch64::DDRegClass.hasSubClassEq(RC)) { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Twov1d, Offset = false; } break; case 24: if (AArch64::DDDRegClass.hasSubClassEq(RC)) { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Threev1d, Offset = false; } break; case 32: if (AArch64::DDDDRegClass.hasSubClassEq(RC)) { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Fourv1d, Offset = false; } else if (AArch64::QQRegClass.hasSubClassEq(RC)) { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Twov2d, Offset = false; } break; case 48: if (AArch64::QQQRegClass.hasSubClassEq(RC)) { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Threev2d, Offset = false; } break; case 64: if (AArch64::QQQQRegClass.hasSubClassEq(RC)) { - assert(getSubTarget().hasNEON() && + assert(Subtarget.hasNEON() && "Unexpected register load without NEON"); Opc = AArch64::LD1Fourv2d, Offset = false; } @@ -1726,7 +1732,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( void llvm::emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, int Offset, - const AArch64InstrInfo *TII, + const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool SetNZCV) { if (DestReg == SrcReg && Offset == 0) return; @@ -1835,7 +1841,7 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset, *OutUnscaledOp = 0; switch (MI.getOpcode()) { default: - assert(0 && "unhandled opcode in rewriteAArch64FrameIndex"); + llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex"); // Vector spills/fills can't take an immediate offset. case AArch64::LD1Twov2d: case AArch64::LD1Threev2d: diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h index 90ce75f..f70b82b 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.h +++ b/lib/Target/AArch64/AArch64InstrInfo.h @@ -44,8 +44,6 @@ public: /// always be able to get register info as well (through this method). const AArch64RegisterInfo &getRegisterInfo() const { return RI; } - const AArch64Subtarget &getSubTarget() const { return Subtarget; } - unsigned GetInstSizeInBytes(const MachineInstr *MI) const; bool isCoalescableExtInstr(const MachineInstr &MI, unsigned &SrcReg, @@ -168,7 +166,7 @@ private: /// if necessary, to be replaced by the scavenger at the end of PEI. void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, unsigned DestReg, unsigned SrcReg, int Offset, - const AArch64InstrInfo *TII, + const TargetInstrInfo *TII, MachineInstr::MIFlag = MachineInstr::NoFlags, bool SetNZCV = false); diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 9ad36e8..1211fba 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -323,7 +323,7 @@ def : Pat<(AArch64LOADgot tconstpool:$addr), // System instructions. //===----------------------------------------------------------------------===// -def HINT : HintI<"hint">; +def HINT : HintI<"hint">; def : InstAlias<"nop", (HINT 0b000)>; def : InstAlias<"yield",(HINT 0b001)>; def : InstAlias<"wfe", (HINT 0b010)>; @@ -671,10 +671,10 @@ def CRC32CXrr : BaseCRC32<1, 0b11, 1, GPR64, int_aarch64_crc32cx, "crc32cx">; //===----------------------------------------------------------------------===// // (immediate) -defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag>; -defm AND : LogicalImm<0b00, "and", and>; -defm EOR : LogicalImm<0b10, "eor", xor>; -defm ORR : LogicalImm<0b01, "orr", or>; +defm ANDS : LogicalImmS<0b11, "ands", AArch64and_flag, "bics">; +defm AND : LogicalImm<0b00, "and", and, "bic">; +defm EOR : LogicalImm<0b10, "eor", xor, "eon">; +defm ORR : LogicalImm<0b01, "orr", or, "orn">; // FIXME: these aliases *are* canonical sometimes (when movz can't be // used). Actually, it seems to be working right now, but putting logical_immXX @@ -737,6 +737,10 @@ def : Pat<(not GPR64:$Xm), (ORNXrr XZR, GPR64:$Xm)>; defm CLS : OneOperandData<0b101, "cls">; defm CLZ : OneOperandData<0b100, "clz", ctlz>; defm RBIT : OneOperandData<0b000, "rbit">; + +def : Pat<(int_aarch64_rbit GPR32:$Rn), (RBITWr $Rn)>; +def : Pat<(int_aarch64_rbit GPR64:$Rn), (RBITXr $Rn)>; + def REV16Wr : OneWRegData<0b001, "rev16", UnOpFrag<(rotr (bswap node:$LHS), (i64 16))>>; def REV16Xr : OneXRegData<0b001, "rev16", null_frag>; @@ -2238,6 +2242,81 @@ def : Pat<(f32_to_f16 FPR32:$Rn), def FCVTSHpseudo : Pseudo<(outs FPR32:$Rd), (ins FPR32:$Rn), [(set (f32 FPR32:$Rd), (f16_to_f32 i32:$Rn))]>; +// When converting from f16 coming directly from a load, make sure we +// load into the FPR16 registers rather than going through the GPRs. +// f16->f32 +def : Pat<(f32 (f16_to_f32 (i32 + (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend))))), + (FCVTSHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>; +def : Pat<(f32 (f16_to_f32 (i32 + (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend))))), + (FCVTSHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>; +def : Pat <(f32 (f16_to_f32 (i32 + (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))), + (FCVTSHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>; +def : Pat <(f32 (f16_to_f32 (i32 + (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))), + (FCVTSHr (LDURHi GPR64sp:$Rn, simm9:$offset))>; + +// f16->f64 +def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32 + (zextloadi16 (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend))))))), + (FCVTDHr (LDRHroW GPR64sp:$Rn, GPR32:$Rm, ro_Wextend16:$extend))>; +def : Pat<(f64 (fextend (f32 (f16_to_f32 (i32 + (zextloadi16 (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend))))))), + (FCVTDHr (LDRHroX GPR64sp:$Rn, GPR64:$Rm, ro_Xextend16:$extend))>; +def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32 + (zextloadi16 (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset))))))), + (FCVTDHr (LDRHui GPR64sp:$Rn, uimm12s2:$offset))>; +def : Pat <(f64 (fextend (f32 (f16_to_f32 (i32 + (zextloadi16 (am_unscaled16 GPR64sp:$Rn, simm9:$offset))))))), + (FCVTDHr (LDURHi GPR64sp:$Rn, simm9:$offset))>; + +// When converting to f16 going directly to a store, make sure we use the +// appropriate direct conversion instructions and store via the FPR16 +// registers rather than going through the GPRs. +let AddedComplexity = 10 in { +// f32->f16 +def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))), + (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend)), + (STRHroW (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend)>; +def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))), + (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend)), + (STRHroX (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend)>; +def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))), + (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), + (STRHui (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>; +def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 FPR32:$Rt))), + (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), + (STURHi (FCVTHSr FPR32:$Rt), GPR64sp:$Rn, simm9:$offset)>; +// f64->f16 +def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))), + (ro_Windexed16 GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend)), + (STRHroW (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR32:$Rm, + ro_Wextend16:$extend)>; +def : Pat< (truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))), + (ro_Xindexed16 GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend)), + (STRHroX (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, GPR64:$Rm, + ro_Xextend16:$extend)>; +def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))), + (am_indexed16 GPR64sp:$Rn, uimm12s2:$offset)), + (STRHui (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, uimm12s2:$offset)>; +def : Pat <(truncstorei16 (assertzext (i32 (f32_to_f16 (f32 (fround FPR64:$Rt))))), + (am_unscaled16 GPR64sp:$Rn, simm9:$offset)), + (STURHi (FCVTHDr FPR64:$Rt), GPR64sp:$Rn, simm9:$offset)>; +} + + //===----------------------------------------------------------------------===// // Floating point single operand instructions. //===----------------------------------------------------------------------===// diff --git a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp index e7454be..3df9c4f 100644 --- a/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp +++ b/lib/Target/AArch64/AArch64LoadStoreOptimizer.cpp @@ -40,14 +40,13 @@ STATISTIC(NumPreFolded, "Number of pre-index updates folded"); STATISTIC(NumUnscaledPairCreated, "Number of load/store from unscaled generated"); -static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit", cl::init(20), - cl::Hidden); +static cl::opt<unsigned> ScanLimit("aarch64-load-store-scan-limit", + cl::init(20), cl::Hidden); // Place holder while testing unscaled load/store combining -static cl::opt<bool> -EnableAArch64UnscaledMemOp("aarch64-unscaled-mem-op", cl::Hidden, - cl::desc("Allow AArch64 unscaled load/store combining"), - cl::init(true)); +static cl::opt<bool> EnableAArch64UnscaledMemOp( + "aarch64-unscaled-mem-op", cl::Hidden, + cl::desc("Allow AArch64 unscaled load/store combining"), cl::init(true)); namespace { struct AArch64LoadStoreOpt : public MachineFunctionPass { @@ -60,19 +59,19 @@ struct AArch64LoadStoreOpt : public MachineFunctionPass { // Scan the instructions looking for a load/store that can be combined // with the current instruction into a load/store pair. // Return the matching instruction if one is found, else MBB->end(). - // If a matching instruction is found, mergeForward is set to true if the + // If a matching instruction is found, MergeForward is set to true if the // merge is to remove the first instruction and replace the second with // a pair-wise insn, and false if the reverse is true. MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, - bool &mergeForward, + bool &MergeForward, unsigned Limit); // Merge the two instructions indicated into a single pair-wise instruction. - // If mergeForward is true, erase the first instruction and fold its + // If MergeForward is true, erase the first instruction and fold its // operation into the second. If false, the reverse. Return the instruction // following the first instruction (which may change during processing). MachineBasicBlock::iterator mergePairedInsns(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator Paired, bool mergeForward); + MachineBasicBlock::iterator Paired, bool MergeForward); // Scan the instruction list to find a base register update that can // be combined with the current instruction (a load or store) using @@ -142,7 +141,7 @@ static bool isUnscaledLdst(unsigned Opc) { int AArch64LoadStoreOpt::getMemSize(MachineInstr *MemMI) { switch (MemMI->getOpcode()) { default: - llvm_unreachable("Opcode has has unknown size!"); + llvm_unreachable("Opcode has unknown size!"); case AArch64::STRSui: case AArch64::STURSi: return 4; @@ -217,16 +216,26 @@ static unsigned getPreIndexedOpcode(unsigned Opc) { switch (Opc) { default: llvm_unreachable("Opcode has no pre-indexed equivalent!"); - case AArch64::STRSui: return AArch64::STRSpre; - case AArch64::STRDui: return AArch64::STRDpre; - case AArch64::STRQui: return AArch64::STRQpre; - case AArch64::STRWui: return AArch64::STRWpre; - case AArch64::STRXui: return AArch64::STRXpre; - case AArch64::LDRSui: return AArch64::LDRSpre; - case AArch64::LDRDui: return AArch64::LDRDpre; - case AArch64::LDRQui: return AArch64::LDRQpre; - case AArch64::LDRWui: return AArch64::LDRWpre; - case AArch64::LDRXui: return AArch64::LDRXpre; + case AArch64::STRSui: + return AArch64::STRSpre; + case AArch64::STRDui: + return AArch64::STRDpre; + case AArch64::STRQui: + return AArch64::STRQpre; + case AArch64::STRWui: + return AArch64::STRWpre; + case AArch64::STRXui: + return AArch64::STRXpre; + case AArch64::LDRSui: + return AArch64::LDRSpre; + case AArch64::LDRDui: + return AArch64::LDRDpre; + case AArch64::LDRQui: + return AArch64::LDRQpre; + case AArch64::LDRWui: + return AArch64::LDRWpre; + case AArch64::LDRXui: + return AArch64::LDRXpre; } } @@ -260,7 +269,7 @@ static unsigned getPostIndexedOpcode(unsigned Opc) { MachineBasicBlock::iterator AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Paired, - bool mergeForward) { + bool MergeForward) { MachineBasicBlock::iterator NextI = I; ++NextI; // If NextI is the second of the two instructions to be merged, we need @@ -276,12 +285,12 @@ AArch64LoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, unsigned NewOpc = getMatchingPairOpcode(I->getOpcode()); // Insert our new paired instruction after whichever of the paired - // instructions mergeForward indicates. - MachineBasicBlock::iterator InsertionPoint = mergeForward ? Paired : I; - // Also based on mergeForward is from where we copy the base register operand + // instructions MergeForward indicates. + MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; + // Also based on MergeForward is from where we copy the base register operand // so we get the flags compatible with the input code. MachineOperand &BaseRegOp = - mergeForward ? Paired->getOperand(1) : I->getOperand(1); + MergeForward ? Paired->getOperand(1) : I->getOperand(1); // Which register is Rt and which is Rt2 depends on the offset order. MachineInstr *RtMI, *Rt2MI; @@ -355,8 +364,8 @@ static bool inBoundsForPair(bool IsUnscaled, int Offset, int OffsetStride) { if (IsUnscaled) { // Convert the byte-offset used by unscaled into an "element" offset used // by the scaled pair load/store instructions. - int elemOffset = Offset / OffsetStride; - if (elemOffset > 63 || elemOffset < -64) + int ElemOffset = Offset / OffsetStride; + if (ElemOffset > 63 || ElemOffset < -64) return false; } return true; @@ -374,14 +383,14 @@ static int alignTo(int Num, int PowOf2) { /// be combined with the current instruction into a load/store pair. MachineBasicBlock::iterator AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, - bool &mergeForward, unsigned Limit) { + bool &MergeForward, unsigned Limit) { MachineBasicBlock::iterator E = I->getParent()->end(); MachineBasicBlock::iterator MBBI = I; MachineInstr *FirstMI = I; ++MBBI; int Opc = FirstMI->getOpcode(); - bool mayLoad = FirstMI->mayLoad(); + bool MayLoad = FirstMI->mayLoad(); bool IsUnscaled = isUnscaledLdst(Opc); unsigned Reg = FirstMI->getOperand(0).getReg(); unsigned BaseReg = FirstMI->getOperand(1).getReg(); @@ -453,7 +462,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // If the destination register of the loads is the same register, bail // and keep looking. A load-pair instruction with both destination // registers the same is UNPREDICTABLE and will result in an exception. - if (mayLoad && Reg == MI->getOperand(0).getReg()) { + if (MayLoad && Reg == MI->getOperand(0).getReg()) { trackRegDefsUses(MI, ModifiedRegs, UsedRegs, TRI); continue; } @@ -462,7 +471,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // the two instructions, we can combine the second into the first. if (!ModifiedRegs[MI->getOperand(0).getReg()] && !UsedRegs[MI->getOperand(0).getReg()]) { - mergeForward = false; + MergeForward = false; return MBBI; } @@ -471,7 +480,7 @@ AArch64LoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, // second. if (!ModifiedRegs[FirstMI->getOperand(0).getReg()] && !UsedRegs[FirstMI->getOperand(0).getReg()]) { - mergeForward = true; + MergeForward = true; return MBBI; } // Unable to combine these instructions due to interference in between. @@ -798,14 +807,14 @@ bool AArch64LoadStoreOpt::optimizeBlock(MachineBasicBlock &MBB) { break; } // Look ahead up to ScanLimit instructions for a pairable instruction. - bool mergeForward = false; + bool MergeForward = false; MachineBasicBlock::iterator Paired = - findMatchingInsn(MBBI, mergeForward, ScanLimit); + findMatchingInsn(MBBI, MergeForward, ScanLimit); if (Paired != E) { // Merge the loads into a pair. Keeping the iterator straight is a // pain, so we let the merge routine tell us what the next instruction // is after it's done mucking about. - MBBI = mergePairedInsns(MBBI, Paired, mergeForward); + MBBI = mergePairedInsns(MBBI, Paired, MergeForward); Modified = true; ++NumPairCreated; diff --git a/lib/Target/AArch64/AArch64MCInstLower.cpp b/lib/Target/AArch64/AArch64MCInstLower.cpp index ab6d375..75a17b9 100644 --- a/lib/Target/AArch64/AArch64MCInstLower.cpp +++ b/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -51,7 +51,7 @@ MCOperand AArch64MCInstLower::lowerSymbolOperandDarwin(const MachineOperand &MO, AArch64II::MO_PAGEOFF) RefKind = MCSymbolRefExpr::VK_GOTPAGEOFF; else - assert(0 && "Unexpected target flags with MO_GOT on GV operand"); + llvm_unreachable("Unexpected target flags with MO_GOT on GV operand"); } else if ((MO.getTargetFlags() & AArch64II::MO_TLS) != 0) { if ((MO.getTargetFlags() & AArch64II::MO_FRAGMENT) == AArch64II::MO_PAGE) RefKind = MCSymbolRefExpr::VK_TLVPPAGE; @@ -154,7 +154,7 @@ bool AArch64MCInstLower::lowerOperand(const MachineOperand &MO, MCOperand &MCOp) const { switch (MO.getType()) { default: - assert(0 && "unknown operand type"); + llvm_unreachable("unknown operand type"); case MachineOperand::MO_Register: // Ignore all implicit register operands. if (MO.isImplicit()) diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td index 21c927f..a30e4ad 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/lib/Target/AArch64/AArch64RegisterInfo.td @@ -175,7 +175,7 @@ def GPR64all : RegisterClass<"AArch64", [i64], 64, (add GPR64common, XZR, SP)>; // This is for indirect tail calls to store the address of the destination. def tcGPR64 : RegisterClass<"AArch64", [i64], 64, (sub GPR64common, X19, X20, X21, X22, X23, X24, X25, X26, - X27, X28)>; + X27, X28, FP, LR)>; // GPR register classes for post increment amount of vector load/store that // has alternate printing when Rm=31 and prints a constant immediate value diff --git a/lib/Target/AArch64/AArch64SchedA53.td b/lib/Target/AArch64/AArch64SchedA53.td index 0c3949e..d709bee 100644 --- a/lib/Target/AArch64/AArch64SchedA53.td +++ b/lib/Target/AArch64/AArch64SchedA53.td @@ -148,9 +148,9 @@ def : ReadAdvance<ReadVLD, 0>; // ALU - Most operands in the ALU pipes are not needed for two cycles. Shiftable // operands are needed one cycle later if and only if they are to be -// shifted. Otherwise, they too are needed two cycle later. This same +// shifted. Otherwise, they too are needed two cycles later. This same // ReadAdvance applies to Extended registers as well, even though there is -// a seperate SchedPredicate for them. +// a separate SchedPredicate for them. def : ReadAdvance<ReadI, 2, [WriteImm,WriteI, WriteISReg, WriteIEReg,WriteIS, WriteID32,WriteID64, diff --git a/lib/Target/AArch64/AArch64SchedA57.td b/lib/Target/AArch64/AArch64SchedA57.td new file mode 100644 index 0000000..8209f96 --- /dev/null +++ b/lib/Target/AArch64/AArch64SchedA57.td @@ -0,0 +1,304 @@ +//=- AArch64SchedA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for ARM Cortex-A57 to support +// instruction scheduling and other instruction cost heuristics. +// +//===----------------------------------------------------------------------===// + +def CortexA57Model : SchedMachineModel { + let IssueWidth = 8; // 3-way decode and 8-way issue + let MicroOpBufferSize = 128; // 128 micro-op re-order buffer + let LoadLatency = 4; // Optimistic load latency + let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch +} + +//===----------------------------------------------------------------------===// +// Define each kind of processor resource and number available on Cortex-A57. +// Cortex A-57 has 8 pipelines that each has its own 8-entry queue where +// micro-ops wait for their operands and then issue out-of-order. + +def A57UnitB : ProcResource<1> { let BufferSize = 8; } // Type B micro-ops +def A57UnitI : ProcResource<2> { let BufferSize = 8; } // Type I micro-ops +def A57UnitM : ProcResource<1> { let BufferSize = 8; } // Type M micro-ops +def A57UnitL : ProcResource<1> { let BufferSize = 8; } // Type L micro-ops +def A57UnitS : ProcResource<1> { let BufferSize = 8; } // Type S micro-ops +def A57UnitX : ProcResource<1> { let BufferSize = 8; } // Type X micro-ops +def A57UnitW : ProcResource<1> { let BufferSize = 8; } // Type W micro-ops +let SchedModel = CortexA57Model in { + def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>; // Type V micro-ops +} + + +let SchedModel = CortexA57Model in { + +//===----------------------------------------------------------------------===// +// Define customized scheduler read/write types specific to the Cortex-A57. + +include "AArch64SchedA57WriteRes.td" + +//===----------------------------------------------------------------------===// +// Map the target-defined scheduler read/write resources and latency for +// Cortex-A57. The Cortex-A57 types are directly associated with resources, so +// defining the aliases precludes the need for mapping them using WriteRes. The +// aliases are sufficient for creating a coarse, working model. As the model +// evolves, InstRWs will be used to override these SchedAliases. + +def : SchedAlias<WriteImm, A57Write_1cyc_1I>; +def : SchedAlias<WriteI, A57Write_1cyc_1I>; +def : SchedAlias<WriteISReg, A57Write_2cyc_1M>; +def : SchedAlias<WriteIEReg, A57Write_2cyc_1M>; +def : SchedAlias<WriteExtr, A57Write_1cyc_1I>; +def : SchedAlias<WriteIS, A57Write_1cyc_1I>; +def : SchedAlias<WriteID32, A57Write_19cyc_1M>; +def : SchedAlias<WriteID64, A57Write_35cyc_1M>; +def : SchedAlias<WriteIM32, A57Write_3cyc_1M>; +def : SchedAlias<WriteIM64, A57Write_5cyc_1M>; +def : SchedAlias<WriteBr, A57Write_1cyc_1B>; +def : SchedAlias<WriteBrReg, A57Write_1cyc_1B>; +def : SchedAlias<WriteLD, A57Write_4cyc_1L>; +def : SchedAlias<WriteST, A57Write_1cyc_1S>; +def : SchedAlias<WriteSTP, A57Write_1cyc_1S>; +def : SchedAlias<WriteAdr, A57Write_1cyc_1I>; +def : SchedAlias<WriteLDIdx, A57Write_4cyc_1I_1L>; +def : SchedAlias<WriteSTIdx, A57Write_1cyc_1I_1S>; +def : SchedAlias<WriteF, A57Write_3cyc_1V>; +def : SchedAlias<WriteFCmp, A57Write_3cyc_1V>; +def : SchedAlias<WriteFCvt, A57Write_5cyc_1V>; +def : SchedAlias<WriteFCopy, A57Write_3cyc_1V>; +def : SchedAlias<WriteFImm, A57Write_3cyc_1V>; +def : SchedAlias<WriteFMul, A57Write_5cyc_1V>; +def : SchedAlias<WriteFDiv, A57Write_18cyc_1X>; +def : SchedAlias<WriteV, A57Write_3cyc_1V>; +def : SchedAlias<WriteVLD, A57Write_5cyc_1L>; +def : SchedAlias<WriteVST, A57Write_1cyc_1S>; + +def : WriteRes<WriteSys, []> { let Latency = 1; } +def : WriteRes<WriteBarrier, []> { let Latency = 1; } +def : WriteRes<WriteHint, []> { let Latency = 1; } + +def : WriteRes<WriteLDHi, []> { let Latency = 4; } + +// Forwarding logic is not [yet] explicitly modeled beyond what is captured +// in the latencies of the A57 Generic SchedWriteRes's. +def : ReadAdvance<ReadI, 0>; +def : ReadAdvance<ReadISReg, 0>; +def : ReadAdvance<ReadIEReg, 0>; +def : ReadAdvance<ReadIM, 0>; +def : ReadAdvance<ReadIMA, 0>; +def : ReadAdvance<ReadID, 0>; +def : ReadAdvance<ReadExtrHi, 0>; +def : ReadAdvance<ReadAdrBase, 0>; +def : ReadAdvance<ReadVLD, 0>; + + +//===----------------------------------------------------------------------===// +// Specialize the coarse model by associating instruction groups with the +// subtarget-defined types. As the modeled is refined, this will override most +// of the above ShchedAlias mappings. + +// Miscellaneous +// ----------------------------------------------------------------------------- + +def : InstRW<[WriteI], (instrs COPY)>; + + +// Branch Instructions +// ----------------------------------------------------------------------------- + +def : InstRW<[A57Write_1cyc_1B_1I], (instrs BL)>; +def : InstRW<[A57Write_2cyc_1B_1I], (instrs BLR)>; + + +// Divide and Multiply Instructions +// ----------------------------------------------------------------------------- + +// Multiply high +def : InstRW<[A57Write_6cyc_1M], (instrs SMULHrr, UMULHrr)>; + + +// Miscellaneous Data-Processing Instructions +// ----------------------------------------------------------------------------- + +def : InstRW<[A57Write_1cyc_1I], (instrs EXTRWrri)>; +def : InstRW<[A57Write_3cyc_1I_1M], (instrs EXTRXrri)>; +def : InstRW<[A57Write_2cyc_1M], (instregex "BFM")>; + + +// Cryptography Extensions +// ----------------------------------------------------------------------------- + +def : InstRW<[A57Write_3cyc_1W], (instregex "CRC32")>; + + +// Vector Load +// ----------------------------------------------------------------------------- + +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1i(8|16|32)$")>; +def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1i(8|16|32)_POST$")>; +def : InstRW<[A57Write_5cyc_1L], (instregex "LD1i(64)$")>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1i(64)_POST$")>; + +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1Rv(8b|4h|2s)$")>; +def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Rv(1d)$")>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Rv(1d)_POST$")>; +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD1Rv(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD1Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Onev(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Onev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Onev(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Onev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[A57Write_5cyc_1L], (instregex "LD1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_7cyc_3L], (instregex "LD1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_7cyc_3L, WriteAdr], (instregex "LD1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_8cyc_4L], (instregex "LD1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD1Fourv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD2i(8|16)$")>; +def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD2i(8|16)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD2i(32)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD2i(32)_POST$")>; +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2i(64)$")>; +def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2i(64)_POST$")>; + +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2Rv(8b|4h|2s)$")>; +def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2Rv(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_5cyc_1L], (instregex "LD2Rv(1d)$")>; +def : InstRW<[A57Write_5cyc_1L, WriteAdr], (instregex "LD2Rv(1d)_POST$")>; +def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD2Rv(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD2Rv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A57Write_8cyc_1L_1V], (instregex "LD2Twov(8b|4h|2s)$")>; +def : InstRW<[A57Write_8cyc_1L_1V, WriteAdr], (instregex "LD2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD2Twov(16b|8h|4s)$")>; +def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD2Twov(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD2Twov(2d)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD2Twov(2d)_POST$")>; + +def : InstRW<[A57Write_9cyc_1L_3V], (instregex "LD3i(8|16)$")>; +def : InstRW<[A57Write_9cyc_1L_3V, WriteAdr], (instregex "LD3i(8|16)_POST$")>; +def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD3i(32)$")>; +def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD3i(32)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD3i(64)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD3i(64)_POST$")>; + +def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD3Rv(8b|4h|2s)$")>; +def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD3Rv(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD3Rv(1d)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD3Rv(1d)_POST$")>; +def : InstRW<[A57Write_9cyc_1L_3V], (instregex "LD3Rv(16b|8h|4s)$")>; +def : InstRW<[A57Write_9cyc_1L_3V, WriteAdr], (instregex "LD3Rv(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD3Rv(2d)$")>; +def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD3Rv(2d)_POST$")>; + +def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD3Threev(8b|4h|2s)$")>; +def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD3Threev(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_10cyc_3L_4V], (instregex "LD3Threev(16b|8h|4s)$")>; +def : InstRW<[A57Write_10cyc_3L_4V, WriteAdr], (instregex "LD3Threev(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_8cyc_4L], (instregex "LD3Threev(2d)$")>; +def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD3Threev(2d)_POST$")>; + +def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4i(8|16)$")>; +def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4i(8|16)_POST$")>; +def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD4i(32)$")>; +def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD4i(32)_POST$")>; +def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4i(64)$")>; +def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4i(64)_POST$")>; + +def : InstRW<[A57Write_8cyc_1L_2V], (instregex "LD4Rv(8b|4h|2s)$")>; +def : InstRW<[A57Write_8cyc_1L_2V, WriteAdr], (instregex "LD4Rv(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_6cyc_2L], (instregex "LD4Rv(1d)$")>; +def : InstRW<[A57Write_6cyc_2L, WriteAdr], (instregex "LD4Rv(1d)_POST$")>; +def : InstRW<[A57Write_9cyc_2L_3V], (instregex "LD4Rv(16b|8h|4s)$")>; +def : InstRW<[A57Write_9cyc_2L_3V, WriteAdr], (instregex "LD4Rv(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_9cyc_2L_4V], (instregex "LD4Rv(2d)$")>; +def : InstRW<[A57Write_9cyc_2L_4V, WriteAdr], (instregex "LD4Rv(2d)_POST$")>; + +def : InstRW<[A57Write_9cyc_2L_2V], (instregex "LD4Fourv(8b|4h|2s)$")>; +def : InstRW<[A57Write_9cyc_2L_2V, WriteAdr], (instregex "LD4Fourv(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_11cyc_4L_4V], (instregex "LD4Fourv(16b|8h|4s)$")>; +def : InstRW<[A57Write_11cyc_4L_4V, WriteAdr], (instregex "LD4Fourv(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_8cyc_4L], (instregex "LD4Fourv(2d)$")>; +def : InstRW<[A57Write_8cyc_4L, WriteAdr], (instregex "LD4Fourv(2d)_POST$")>; + +// Vector Store +// ----------------------------------------------------------------------------- + +def : InstRW<[A57Write_1cyc_1S], (instregex "ST1i(8|16|32)$")>; +def : InstRW<[A57Write_1cyc_1S, WriteAdr], (instregex "ST1i(8|16|32)_POST$")>; +def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST1i(64)$")>; +def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST1i(64)_POST$")>; + +def : InstRW<[A57Write_1cyc_1S], (instregex "ST1Onev(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_1cyc_1S, WriteAdr], (instregex "ST1Onev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_2cyc_2S], (instregex "ST1Onev(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST1Onev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[A57Write_2cyc_2S], (instregex "ST1Twov(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST1Twov(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_4cyc_4S], (instregex "ST1Twov(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST1Twov(16b|8h|4s|2d)_POST$")>; +def : InstRW<[A57Write_3cyc_3S], (instregex "ST1Threev(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_3cyc_3S, WriteAdr], (instregex "ST1Threev(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_6cyc_6S], (instregex "ST1Threev(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_6cyc_6S, WriteAdr], (instregex "ST1Threev(16b|8h|4s|2d)_POST$")>; +def : InstRW<[A57Write_4cyc_4S], (instregex "ST1Fourv(8b|4h|2s|1d)$")>; +def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST1Fourv(8b|4h|2s|1d)_POST$")>; +def : InstRW<[A57Write_8cyc_8S], (instregex "ST1Fourv(16b|8h|4s|2d)$")>; +def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST1Fourv(16b|8h|4s|2d)_POST$")>; + +def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST2i(8|16|32)$")>; +def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST2i(8|16|32)_POST$")>; +def : InstRW<[A57Write_2cyc_2S], (instregex "ST2i(64)$")>; +def : InstRW<[A57Write_2cyc_2S, WriteAdr], (instregex "ST2i(64)_POST$")>; + +def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST2Twov(8b|4h|2s)$")>; +def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST2Twov(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_4cyc_4S_2V], (instregex "ST2Twov(16b|8h|4s)$")>; +def : InstRW<[A57Write_4cyc_4S_2V, WriteAdr], (instregex "ST2Twov(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_4cyc_4S], (instregex "ST2Twov(2d)$")>; +def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST2Twov(2d)_POST$")>; + +def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST3i(8|16)$")>; +def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST3i(8|16)_POST$")>; +def : InstRW<[A57Write_3cyc_3S], (instregex "ST3i(32)$")>; +def : InstRW<[A57Write_3cyc_3S, WriteAdr], (instregex "ST3i(32)_POST$")>; +def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST3i(64)$")>; +def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST3i(64)_POST$")>; + +def : InstRW<[A57Write_3cyc_3S_2V], (instregex "ST3Threev(8b|4h|2s)$")>; +def : InstRW<[A57Write_3cyc_3S_2V, WriteAdr], (instregex "ST3Threev(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_6cyc_6S_4V], (instregex "ST3Threev(16b|8h|4s)$")>; +def : InstRW<[A57Write_6cyc_6S_4V, WriteAdr], (instregex "ST3Threev(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_6cyc_6S], (instregex "ST3Threev(2d)$")>; +def : InstRW<[A57Write_6cyc_6S, WriteAdr], (instregex "ST3Threev(2d)_POST$")>; + +def : InstRW<[A57Write_3cyc_1S_1V], (instregex "ST4i(8|16)$")>; +def : InstRW<[A57Write_3cyc_1S_1V, WriteAdr], (instregex "ST4i(8|16)_POST$")>; +def : InstRW<[A57Write_4cyc_4S], (instregex "ST4i(32)$")>; +def : InstRW<[A57Write_4cyc_4S, WriteAdr], (instregex "ST4i(32)_POST$")>; +def : InstRW<[A57Write_3cyc_2S_1V], (instregex "ST4i(64)$")>; +def : InstRW<[A57Write_3cyc_2S_1V, WriteAdr], (instregex "ST4i(64)_POST$")>; + +def : InstRW<[A57Write_4cyc_4S_2V], (instregex "ST4Fourv(8b|4h|2s)$")>; +def : InstRW<[A57Write_4cyc_4S_2V, WriteAdr], (instregex "ST4Fourv(8b|4h|2s)_POST$")>; +def : InstRW<[A57Write_8cyc_8S_4V], (instregex "ST4Fourv(16b|8h|4s)$")>; +def : InstRW<[A57Write_8cyc_8S_4V, WriteAdr], (instregex "ST4Fourv(16b|8h|4s)_POST$")>; +def : InstRW<[A57Write_8cyc_8S], (instregex "ST4Fourv(2d)$")>; +def : InstRW<[A57Write_8cyc_8S, WriteAdr], (instregex "ST4Fourv(2d)_POST$")>; + +} // SchedModel = CortexA57Model diff --git a/lib/Target/AArch64/AArch64SchedA57WriteRes.td b/lib/Target/AArch64/AArch64SchedA57WriteRes.td new file mode 100644 index 0000000..a8f421b --- /dev/null +++ b/lib/Target/AArch64/AArch64SchedA57WriteRes.td @@ -0,0 +1,512 @@ +//=- AArch64SchedA57WriteRes.td - ARM Cortex-A57 Write Res ---*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Contains all of the Cortex-A57 specific SchedWriteRes types. The approach +// below is to define a generic SchedWriteRes for every combination of +// latency and microOps. The naming conventions is to use a prefix, one field +// for latency, and one or more microOp count/type designators. +// Prefix: A57Write +// Latency: #cyc +// MicroOp Count/Types: #(B|I|M|L|S|X|W|V) +// +// e.g. A57Write_6cyc_1I_6S_4V means the total latency is 6 and there are +// 11 micro-ops to be issued down one I pipe, six S pipes and four V pipes. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// Define Generic 1 micro-op types + +def A57Write_5cyc_1L : SchedWriteRes<[A57UnitL]> { let Latency = 5; } +def A57Write_5cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 5; } +def A57Write_5cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 5; } +def A57Write_5cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 5; } +def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; } +def A57Write_18cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 18; } +def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19; } +def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; } +def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1; } +def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; } +def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2; } +def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32; } +def A57Write_35cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 35; } +def A57Write_3cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 3; } +def A57Write_3cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 3; } +def A57Write_3cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 3; } +def A57Write_3cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 3; } +def A57Write_4cyc_1L : SchedWriteRes<[A57UnitL]> { let Latency = 4; } +def A57Write_4cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 4; } +def A57Write_9cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 9; } +def A57Write_6cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 6; } +def A57Write_6cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 6; } + + +//===----------------------------------------------------------------------===// +// Define Generic 2 micro-op types + +def A57Write_64cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 64; + let NumMicroOps = 2; +} +def A57Write_6cyc_1I_1L : SchedWriteRes<[A57UnitI, + A57UnitL]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_7cyc_1V_1X : SchedWriteRes<[A57UnitV, + A57UnitX]> { + let Latency = 7; + let NumMicroOps = 2; +} +def A57Write_8cyc_1L_1V : SchedWriteRes<[A57UnitL, + A57UnitV]> { + let Latency = 8; + let NumMicroOps = 2; +} +def A57Write_9cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 2; +} +def A57Write_8cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 8; + let NumMicroOps = 2; +} +def A57Write_6cyc_2L : SchedWriteRes<[A57UnitL, A57UnitL]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_6cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_6cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> { + let Latency = 6; + let NumMicroOps = 2; +} +def A57Write_5cyc_1I_1L : SchedWriteRes<[A57UnitI, + A57UnitL]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_5cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_5cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 5; + let NumMicroOps = 2; +} +def A57Write_10cyc_1L_1V : SchedWriteRes<[A57UnitL, + A57UnitV]> { + let Latency = 10; + let NumMicroOps = 2; +} +def A57Write_10cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 10; + let NumMicroOps = 2; +} +def A57Write_1cyc_1B_1I : SchedWriteRes<[A57UnitB, + A57UnitI]> { + let Latency = 1; + let NumMicroOps = 2; +} +def A57Write_1cyc_1I_1S : SchedWriteRes<[A57UnitI, + A57UnitS]> { + let Latency = 1; + let NumMicroOps = 2; +} +def A57Write_2cyc_1B_1I : SchedWriteRes<[A57UnitB, + A57UnitI]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_2cyc_2S : SchedWriteRes<[A57UnitS, A57UnitS]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_2cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> { + let Latency = 2; + let NumMicroOps = 2; +} +def A57Write_36cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 36; + let NumMicroOps = 2; +} +def A57Write_3cyc_1I_1M : SchedWriteRes<[A57UnitI, + A57UnitM]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_3cyc_1I_1S : SchedWriteRes<[A57UnitI, + A57UnitS]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_3cyc_1S_1V : SchedWriteRes<[A57UnitS, + A57UnitV]> { + let Latency = 3; + let NumMicroOps = 2; +} +def A57Write_4cyc_1I_1L : SchedWriteRes<[A57UnitI, + A57UnitL]> { + let Latency = 4; + let NumMicroOps = 2; +} +def A57Write_4cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> { + let Latency = 4; + let NumMicroOps = 2; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 3 micro-op types + +def A57Write_10cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 10; + let NumMicroOps = 3; +} +def A57Write_2cyc_1I_2S : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS]> { + let Latency = 2; + let NumMicroOps = 3; +} +def A57Write_3cyc_1I_1S_1V : SchedWriteRes<[A57UnitI, + A57UnitS, + A57UnitV]> { + let Latency = 3; + let NumMicroOps = 3; +} +def A57Write_3cyc_1M_2S : SchedWriteRes<[A57UnitM, + A57UnitS, A57UnitS]> { + let Latency = 3; + let NumMicroOps = 3; +} +def A57Write_3cyc_3S : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS]> { + let Latency = 3; + let NumMicroOps = 3; +} +def A57Write_3cyc_2S_1V : SchedWriteRes<[A57UnitS, A57UnitS, + A57UnitV]> { + let Latency = 3; + let NumMicroOps = 3; +} +def A57Write_5cyc_1I_2L : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL]> { + let Latency = 5; + let NumMicroOps = 3; +} +def A57Write_6cyc_1I_2L : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL]> { + let Latency = 6; + let NumMicroOps = 3; +} +def A57Write_6cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 6; + let NumMicroOps = 3; +} +def A57Write_7cyc_3L : SchedWriteRes<[A57UnitL, A57UnitL, A57UnitL]> { + let Latency = 7; + let NumMicroOps = 3; +} +def A57Write_8cyc_1I_1L_1V : SchedWriteRes<[A57UnitI, + A57UnitL, + A57UnitV]> { + let Latency = 8; + let NumMicroOps = 3; +} +def A57Write_8cyc_1L_2V : SchedWriteRes<[A57UnitL, + A57UnitV, A57UnitV]> { + let Latency = 8; + let NumMicroOps = 3; +} +def A57Write_8cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 8; + let NumMicroOps = 3; +} +def A57Write_9cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 3; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 4 micro-op types + +def A57Write_2cyc_2I_2S : SchedWriteRes<[A57UnitI, A57UnitI, + A57UnitS, A57UnitS]> { + let Latency = 2; + let NumMicroOps = 4; +} +def A57Write_3cyc_2I_2S : SchedWriteRes<[A57UnitI, A57UnitI, + A57UnitS, A57UnitS]> { + let Latency = 3; + let NumMicroOps = 4; +} +def A57Write_3cyc_1I_3S : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, A57UnitS]> { + let Latency = 3; + let NumMicroOps = 4; +} +def A57Write_3cyc_1I_2S_1V : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, + A57UnitV]> { + let Latency = 3; + let NumMicroOps = 4; +} +def A57Write_4cyc_4S : SchedWriteRes<[A57UnitS, A57UnitS, + A57UnitS, A57UnitS]> { + let Latency = 4; + let NumMicroOps = 4; +} +def A57Write_7cyc_1I_3L : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, A57UnitL]> { + let Latency = 7; + let NumMicroOps = 4; +} +def A57Write_5cyc_2I_2L : SchedWriteRes<[A57UnitI, A57UnitI, + A57UnitL, A57UnitL]> { + let Latency = 5; + let NumMicroOps = 4; +} +def A57Write_8cyc_1I_1L_2V : SchedWriteRes<[A57UnitI, + A57UnitL, + A57UnitV, A57UnitV]> { + let Latency = 8; + let NumMicroOps = 4; +} +def A57Write_8cyc_4L : SchedWriteRes<[A57UnitL, A57UnitL, + A57UnitL, A57UnitL]> { + let Latency = 8; + let NumMicroOps = 4; +} +def A57Write_9cyc_2L_2V : SchedWriteRes<[A57UnitL, A57UnitL, + A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 4; +} +def A57Write_9cyc_1L_3V : SchedWriteRes<[A57UnitL, + A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 4; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 5 micro-op types + +def A57Write_3cyc_3S_2V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS, + A57UnitV, A57UnitV]> { + let Latency = 3; + let NumMicroOps = 5; +} +def A57Write_8cyc_1I_4L : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, + A57UnitL, A57UnitL]> { + let Latency = 8; + let NumMicroOps = 5; +} +def A57Write_4cyc_1I_4S : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS]> { + let Latency = 4; + let NumMicroOps = 5; +} +def A57Write_9cyc_1I_2L_2V : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, + A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 5; +} +def A57Write_9cyc_1I_1L_3V : SchedWriteRes<[A57UnitI, + A57UnitL, + A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 5; +} +def A57Write_9cyc_2L_3V : SchedWriteRes<[A57UnitL, A57UnitL, + A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 5; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 6 micro-op types + +def A57Write_3cyc_1I_3S_2V : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, A57UnitS, + A57UnitV, A57UnitV]> { + let Latency = 3; + let NumMicroOps = 6; +} +def A57Write_4cyc_2I_4S : SchedWriteRes<[A57UnitI, A57UnitI, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS]> { + let Latency = 4; + let NumMicroOps = 6; +} +def A57Write_4cyc_4S_2V : SchedWriteRes<[A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitV, A57UnitV]> { + let Latency = 4; + let NumMicroOps = 6; +} +def A57Write_6cyc_6S : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, A57UnitS]> { + let Latency = 6; + let NumMicroOps = 6; +} +def A57Write_9cyc_1I_2L_3V : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, + A57UnitV, A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 6; +} +def A57Write_9cyc_1I_1L_4V : SchedWriteRes<[A57UnitI, + A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 6; +} +def A57Write_9cyc_2L_4V : SchedWriteRes<[A57UnitL, A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 6; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 7 micro-op types + +def A57Write_10cyc_3L_4V : SchedWriteRes<[A57UnitL, A57UnitL, A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 10; + let NumMicroOps = 7; +} +def A57Write_4cyc_1I_4S_2V : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitV, A57UnitV]> { + let Latency = 4; + let NumMicroOps = 7; +} +def A57Write_6cyc_1I_6S : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, A57UnitS]> { + let Latency = 6; + let NumMicroOps = 7; +} +def A57Write_9cyc_1I_2L_4V : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 9; + let NumMicroOps = 7; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 8 micro-op types + +def A57Write_10cyc_1I_3L_4V : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 10; + let NumMicroOps = 8; +} +def A57Write_11cyc_4L_4V : SchedWriteRes<[A57UnitL, A57UnitL, + A57UnitL, A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 11; + let NumMicroOps = 8; +} +def A57Write_8cyc_8S : SchedWriteRes<[A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS]> { + let Latency = 8; + let NumMicroOps = 8; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 9 micro-op types + +def A57Write_8cyc_1I_8S : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitS, A57UnitS]> { + let Latency = 8; + let NumMicroOps = 9; +} +def A57Write_11cyc_1I_4L_4V : SchedWriteRes<[A57UnitI, + A57UnitL, A57UnitL, + A57UnitL, A57UnitL, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 11; + let NumMicroOps = 9; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 10 micro-op types + +def A57Write_6cyc_6S_4V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, A57UnitS, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 6; + let NumMicroOps = 10; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 11 micro-op types + +def A57Write_6cyc_1I_6S_4V : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, A57UnitS, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 6; + let NumMicroOps = 11; +} + + +//===----------------------------------------------------------------------===// +// Define Generic 12 micro-op types + +def A57Write_8cyc_8S_4V : SchedWriteRes<[A57UnitS, A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, A57UnitS, A57UnitS, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 8; + let NumMicroOps = 12; +} + +//===----------------------------------------------------------------------===// +// Define Generic 13 micro-op types + +def A57Write_8cyc_1I_8S_4V : SchedWriteRes<[A57UnitI, + A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, A57UnitS, + A57UnitS, A57UnitS, + A57UnitV, A57UnitV, + A57UnitV, A57UnitV]> { + let Latency = 8; + let NumMicroOps = 13; +} + diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp index 5c65b75..1bf64fc 100644 --- a/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp +++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.cpp @@ -16,9 +16,8 @@ using namespace llvm; #define DEBUG_TYPE "aarch64-selectiondag-info" -AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const TargetMachine &TM) - : TargetSelectionDAGInfo(TM), - Subtarget(&TM.getSubtarget<AArch64Subtarget>()) {} +AArch64SelectionDAGInfo::AArch64SelectionDAGInfo(const DataLayout *DL) + : TargetSelectionDAGInfo(DL) {} AArch64SelectionDAGInfo::~AArch64SelectionDAGInfo() {} @@ -30,7 +29,9 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset( ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); ConstantSDNode *SizeValue = dyn_cast<ConstantSDNode>(Size); const char *bzeroEntry = - (V && V->isNullValue()) ? Subtarget->getBZeroEntry() : nullptr; + (V && V->isNullValue()) + ? DAG.getTarget().getSubtarget<AArch64Subtarget>().getBZeroEntry() + : nullptr; // For small size (< 256), it is not beneficial to use bzero // instead of memset. if (bzeroEntry && (!SizeValue || SizeValue->getZExtValue() > 256)) { @@ -50,7 +51,7 @@ SDValue AArch64SelectionDAGInfo::EmitTargetCodeForMemset( TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Chain) .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol(bzeroEntry, IntPtr), &Args, 0) + DAG.getExternalSymbol(bzeroEntry, IntPtr), std::move(Args), 0) .setDiscardResult(); std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); return CallResult.second; diff --git a/lib/Target/AArch64/AArch64SelectionDAGInfo.h b/lib/Target/AArch64/AArch64SelectionDAGInfo.h index 8381f99..1180eea 100644 --- a/lib/Target/AArch64/AArch64SelectionDAGInfo.h +++ b/lib/Target/AArch64/AArch64SelectionDAGInfo.h @@ -19,12 +19,8 @@ namespace llvm { class AArch64SelectionDAGInfo : public TargetSelectionDAGInfo { - /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can - /// make the right decision when generating code for different targets. - const AArch64Subtarget *Subtarget; - public: - explicit AArch64SelectionDAGInfo(const TargetMachine &TM); + explicit AArch64SelectionDAGInfo(const DataLayout *DL); ~AArch64SelectionDAGInfo(); SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Chain, diff --git a/lib/Target/AArch64/AArch64Subtarget.cpp b/lib/Target/AArch64/AArch64Subtarget.cpp index cd69994..bb0b72c 100644 --- a/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/lib/Target/AArch64/AArch64Subtarget.cpp @@ -30,21 +30,35 @@ static cl::opt<bool> EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if " "converter pass"), cl::init(true), cl::Hidden); -AArch64Subtarget::AArch64Subtarget(const std::string &TT, - const std::string &CPU, - const std::string &FS, bool LittleEndian) - : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), - HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false), - HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), CPUString(CPU), - TargetTriple(TT), IsLittleEndian(LittleEndian) { +AArch64Subtarget & +AArch64Subtarget::initializeSubtargetDependencies(StringRef FS) { // Determine default and user-specified characteristics if (CPUString.empty()) CPUString = "generic"; ParseSubtargetFeatures(CPUString, FS); + return *this; } +AArch64Subtarget::AArch64Subtarget(const std::string &TT, + const std::string &CPU, + const std::string &FS, TargetMachine &TM, + bool LittleEndian) + : AArch64GenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), + HasFPARMv8(false), HasNEON(false), HasCrypto(false), HasCRC(false), + HasZeroCycleRegMove(false), HasZeroCycleZeroing(false), CPUString(CPU), + TargetTriple(TT), + // This nested ternary is horrible, but DL needs to be properly + // initialized + // before TLInfo is constructed. + DL(isTargetMachO() + ? "e-m:o-i64:64-i128:128-n32:64-S128" + : (LittleEndian ? "e-m:e-i64:64-i128:128-n32:64-S128" + : "E-m:e-i64:64-i128:128-n32:64-S128")), + FrameLowering(), InstrInfo(initializeSubtargetDependencies(FS)), + TSInfo(&DL), TLInfo(TM) {} + /// ClassifyGlobalReference - Find the target operand flags that describe /// how a global value should be referenced for the current subtarget. unsigned char diff --git a/lib/Target/AArch64/AArch64Subtarget.h b/lib/Target/AArch64/AArch64Subtarget.h index 590ea05..52124f6 100644 --- a/lib/Target/AArch64/AArch64Subtarget.h +++ b/lib/Target/AArch64/AArch64Subtarget.h @@ -14,8 +14,13 @@ #ifndef AArch64SUBTARGET_H #define AArch64SUBTARGET_H -#include "llvm/Target/TargetSubtargetInfo.h" +#include "AArch64InstrInfo.h" +#include "AArch64FrameLowering.h" +#include "AArch64ISelLowering.h" #include "AArch64RegisterInfo.h" +#include "AArch64SelectionDAGInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include <string> #define GET_SUBTARGETINFO_HEADER @@ -49,15 +54,32 @@ protected: /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; - /// IsLittleEndian - Is the target little endian? - bool IsLittleEndian; + const DataLayout DL; + AArch64FrameLowering FrameLowering; + AArch64InstrInfo InstrInfo; + AArch64SelectionDAGInfo TSInfo; + AArch64TargetLowering TLInfo; +private: + /// initializeSubtargetDependencies - Initializes using CPUString and the + /// passed in feature string so that we can use initializer lists for + /// subtarget initialization. + AArch64Subtarget &initializeSubtargetDependencies(StringRef FS); public: /// This constructor initializes the data members to match that /// of the specified triple. AArch64Subtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool LittleEndian); - + const std::string &FS, TargetMachine &TM, bool LittleEndian); + + const AArch64SelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } + const AArch64FrameLowering *getFrameLowering() const { + return &FrameLowering; + } + const AArch64TargetLowering *getTargetLowering() const { + return &TLInfo; + } + const AArch64InstrInfo *getInstrInfo() const { return &InstrInfo; } + const DataLayout *getDataLayout() const { return &DL; } bool enableMachineScheduler() const override { return true; } bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; } @@ -69,7 +91,7 @@ public: bool hasCrypto() const { return HasCrypto; } bool hasCRC() const { return HasCRC; } - bool isLittleEndian() const { return IsLittleEndian; } + bool isLittleEndian() const { return DL.isLittleEndian(); } bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); } diff --git a/lib/Target/AArch64/AArch64TargetMachine.cpp b/lib/Target/AArch64/AArch64TargetMachine.cpp index 0b5dd2f..f99b90b 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -53,6 +53,12 @@ static cl::opt<bool> EnableLoadStoreOpt("aarch64-load-store-opt", cl::desc("Enable the load/store pair" " optimization pass"), cl::init(true), cl::Hidden); +static cl::opt<bool> +EnableAtomicTidy("aarch64-atomic-cfg-tidy", cl::Hidden, + cl::desc("Run SimplifyCFG after expanding atomic operations" + " to make use of cmpxchg flow-based information"), + cl::init(true)); + extern "C" void LLVMInitializeAArch64Target() { // Register the target. RegisterTargetMachine<AArch64leTargetMachine> X(TheAArch64leTarget); @@ -71,16 +77,7 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL, bool LittleEndian) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, LittleEndian), - // This nested ternary is horrible, but DL needs to be properly - // initialized - // before TLInfo is constructed. - DL(Subtarget.isTargetMachO() - ? "e-m:o-i64:64-i128:128-n32:64-S128" - : (LittleEndian ? "e-m:e-i64:64-i128:128-n32:64-S128" - : "E-m:e-i64:64-i128:128-n32:64-S128")), - InstrInfo(Subtarget), TLInfo(*this), FrameLowering(*this, Subtarget), - TSInfo(*this) { + Subtarget(TT, CPU, FS, *this, LittleEndian) { initAsmInfo(); } @@ -113,6 +110,7 @@ public: return getTM<AArch64TargetMachine>(); } + void addIRPasses() override; bool addPreISel() override; bool addInstSelector() override; bool addILPOpts() override; @@ -135,6 +133,20 @@ TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) { return new AArch64PassConfig(this, PM); } +void AArch64PassConfig::addIRPasses() { + // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg + // ourselves. + addPass(createAtomicExpandLoadLinkedPass(TM)); + + // Cmpxchg instructions are often used with a subsequent comparison to + // determine whether it succeeded. We can exploit existing control-flow in + // ldrex/strex loops to simplify this, but it needs tidying up. + if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) + addPass(createCFGSimplificationPass()); + + TargetPassConfig::addIRPasses(); +} + // Pass Pipeline Configuration bool AArch64PassConfig::addPreISel() { // Run promote constant before global merge, so that the promoted constants @@ -146,10 +158,6 @@ bool AArch64PassConfig::addPreISel() { if (TM->getOptLevel() != CodeGenOpt::None) addPass(createAArch64AddressTypePromotionPass()); - // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg - // ourselves. - addPass(createAtomicExpandLoadLinkedPass(TM)); - return false; } diff --git a/lib/Target/AArch64/AArch64TargetMachine.h b/lib/Target/AArch64/AArch64TargetMachine.h index 079b19b..852cb3f 100644 --- a/lib/Target/AArch64/AArch64TargetMachine.h +++ b/lib/Target/AArch64/AArch64TargetMachine.h @@ -15,13 +15,9 @@ #define AArch64TARGETMACHINE_H #include "AArch64InstrInfo.h" -#include "AArch64ISelLowering.h" #include "AArch64Subtarget.h" -#include "AArch64FrameLowering.h" -#include "AArch64SelectionDAGInfo.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/MC/MCStreamer.h" namespace llvm { @@ -29,13 +25,6 @@ class AArch64TargetMachine : public LLVMTargetMachine { protected: AArch64Subtarget Subtarget; -private: - const DataLayout DL; - AArch64InstrInfo InstrInfo; - AArch64TargetLowering TLInfo; - AArch64FrameLowering FrameLowering; - AArch64SelectionDAGInfo TSInfo; - public: AArch64TargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, @@ -46,18 +35,22 @@ public: return &Subtarget; } const AArch64TargetLowering *getTargetLowering() const override { - return &TLInfo; + return getSubtargetImpl()->getTargetLowering(); + } + const DataLayout *getDataLayout() const override { + return getSubtargetImpl()->getDataLayout(); } - const DataLayout *getDataLayout() const override { return &DL; } const AArch64FrameLowering *getFrameLowering() const override { - return &FrameLowering; + return getSubtargetImpl()->getFrameLowering(); + } + const AArch64InstrInfo *getInstrInfo() const override { + return getSubtargetImpl()->getInstrInfo(); } - const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; } const AArch64RegisterInfo *getRegisterInfo() const override { - return &InstrInfo.getRegisterInfo(); + return &getInstrInfo()->getRegisterInfo(); } const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override { - return &TSInfo; + return getSubtargetImpl()->getSelectionDAGInfo(); } // Pass Pipeline Configuration diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 33e482a..1dac14b 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -306,28 +306,64 @@ unsigned AArch64TTI::getCastInstrCost(unsigned Opcode, Type *Dst, static const TypeConversionCostTblEntry<MVT> ConversionTbl[] = { // LowerVectorINT_TO_FP: { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 }, - { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 1 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 1 }, - { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, + + // Complex: to v2f32 + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 }, + { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 }, + { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 }, + + // Complex: to v4f32 + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 }, + { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, + { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, + + // Complex: to v2f64 + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 }, + { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 }, + { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, + + // LowerVectorFP_TO_INT + { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 }, { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 }, + { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 }, { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 }, - { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 1 }, - { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 1 }, - { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 4 }, - { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 4 }, - { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 4 }, - { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 4 }, - { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 4 }, - { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 4 }, + + // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext). + { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 }, + { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 }, + { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 }, + { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 }, + { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 }, + { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 }, + + // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2 + { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, + { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 }, + { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 }, + { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 }, + + // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2. + { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 }, + { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 }, + { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 }, + { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }, + { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 }, + { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 }, }; int Idx = ConvertCostTableLookup<MVT>( diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 65b77c5..c42d11e 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -38,14 +38,19 @@ namespace { class AArch64Operand; class AArch64AsmParser : public MCTargetAsmParser { -public: - typedef SmallVectorImpl<MCParsedAsmOperand *> OperandVector; - private: StringRef Mnemonic; ///< Instruction mnemonic. MCSubtargetInfo &STI; MCAsmParser &Parser; + // Map of register aliases registers via the .req directive. + StringMap<std::pair<bool, unsigned> > RegisterReqs; + + AArch64TargetStreamer &getTargetStreamer() { + MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); + return static_cast<AArch64TargetStreamer &>(TS); + } + MCAsmParser &getParser() const { return Parser; } MCAsmLexer &getLexer() const { return Parser.getLexer(); } @@ -54,6 +59,7 @@ private: bool parseSysAlias(StringRef Name, SMLoc NameLoc, OperandVector &Operands); AArch64CC::CondCode parseCondCodeString(StringRef Cond); bool parseCondCode(OperandVector &Operands, bool invertCondCode); + unsigned matchRegisterNameAlias(StringRef Name, bool isVector); int tryParseRegister(); int tryMatchVectorRegister(StringRef &Kind, bool expected); bool parseRegister(OperandVector &Operands); @@ -70,6 +76,10 @@ private: bool parseDirectiveTLSDescCall(SMLoc L); bool parseDirectiveLOH(StringRef LOH, SMLoc L); + bool parseDirectiveLtorg(SMLoc L); + + bool parseDirectiveReq(StringRef Name, SMLoc L); + bool parseDirectiveUnreq(SMLoc L); bool validateInstruction(MCInst &Inst, SmallVectorImpl<SMLoc> &Loc); bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, @@ -108,6 +118,8 @@ public: const MCTargetOptions &Options) : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { MCAsmParserExtension::Initialize(_Parser); + if (Parser.getStreamer().getTargetStreamer() == nullptr) + new AArch64TargetStreamer(Parser.getStreamer()); // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); @@ -117,7 +129,7 @@ public: SMLoc NameLoc, OperandVector &Operands) override; bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; bool ParseDirective(AsmToken DirectiveID) override; - unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, + unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind) override; static bool classifySymbolRef(const MCExpr *Expr, @@ -240,10 +252,10 @@ private: // the add<>Operands() calls. MCContext &Ctx; +public: AArch64Operand(KindTy K, MCContext &_Ctx) : MCParsedAsmOperand(), Kind(K), Ctx(_Ctx) {} -public: AArch64Operand(const AArch64Operand &o) : MCParsedAsmOperand(), Ctx(o.Ctx) { Kind = o.Kind; StartLoc = o.StartLoc; @@ -607,7 +619,11 @@ public: const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); if (!MCE) return false; - return AArch64_AM::isLogicalImmediate(MCE->getValue(), 32); + int64_t Val = MCE->getValue(); + if (Val >> 32 != 0 && Val >> 32 != ~0LL) + return false; + Val &= 0xFFFFFFFF; + return AArch64_AM::isLogicalImmediate(Val, 32); } bool isLogicalImm64() const { if (!isImm()) @@ -617,6 +633,23 @@ public: return false; return AArch64_AM::isLogicalImmediate(MCE->getValue(), 64); } + bool isLogicalImm32Not() const { + if (!isImm()) + return false; + const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); + if (!MCE) + return false; + int64_t Val = ~MCE->getValue() & 0xFFFFFFFF; + return AArch64_AM::isLogicalImmediate(Val, 32); + } + bool isLogicalImm64Not() const { + if (!isImm()) + return false; + const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); + if (!MCE) + return false; + return AArch64_AM::isLogicalImmediate(~MCE->getValue(), 64); + } bool isShiftedImm() const { return Kind == k_ShiftedImm; } bool isAddSubImm() const { if (!isShiftedImm() && !isImm()) @@ -1348,7 +1381,8 @@ public: assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); assert(MCE && "Invalid logical immediate operand!"); - uint64_t encoding = AArch64_AM::encodeLogicalImmediate(MCE->getValue(), 32); + uint64_t encoding = + AArch64_AM::encodeLogicalImmediate(MCE->getValue() & 0xFFFFFFFF, 32); Inst.addOperand(MCOperand::CreateImm(encoding)); } @@ -1360,6 +1394,22 @@ public: Inst.addOperand(MCOperand::CreateImm(encoding)); } + void addLogicalImm32NotOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); + int64_t Val = ~MCE->getValue() & 0xFFFFFFFF; + uint64_t encoding = AArch64_AM::encodeLogicalImmediate(Val, 32); + Inst.addOperand(MCOperand::CreateImm(encoding)); + } + + void addLogicalImm64NotOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + const MCConstantExpr *MCE = cast<MCConstantExpr>(getImm()); + uint64_t encoding = + AArch64_AM::encodeLogicalImmediate(~MCE->getValue(), 64); + Inst.addOperand(MCOperand::CreateImm(encoding)); + } + void addSIMDImmType10Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *MCE = dyn_cast<MCConstantExpr>(getImm()); @@ -1523,9 +1573,9 @@ public: void print(raw_ostream &OS) const override; - static AArch64Operand *CreateToken(StringRef Str, bool IsSuffix, SMLoc S, - MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_Token, Ctx); + static std::unique_ptr<AArch64Operand> + CreateToken(StringRef Str, bool IsSuffix, SMLoc S, MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_Token, Ctx); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->Tok.IsSuffix = IsSuffix; @@ -1534,9 +1584,9 @@ public: return Op; } - static AArch64Operand *CreateReg(unsigned RegNum, bool isVector, SMLoc S, - SMLoc E, MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_Register, Ctx); + static std::unique_ptr<AArch64Operand> + CreateReg(unsigned RegNum, bool isVector, SMLoc S, SMLoc E, MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_Register, Ctx); Op->Reg.RegNum = RegNum; Op->Reg.isVector = isVector; Op->StartLoc = S; @@ -1544,10 +1594,10 @@ public: return Op; } - static AArch64Operand *CreateVectorList(unsigned RegNum, unsigned Count, - unsigned NumElements, char ElementKind, - SMLoc S, SMLoc E, MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_VectorList, Ctx); + static std::unique_ptr<AArch64Operand> + CreateVectorList(unsigned RegNum, unsigned Count, unsigned NumElements, + char ElementKind, SMLoc S, SMLoc E, MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_VectorList, Ctx); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; Op->VectorList.NumElements = NumElements; @@ -1557,28 +1607,29 @@ public: return Op; } - static AArch64Operand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, - MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_VectorIndex, Ctx); + static std::unique_ptr<AArch64Operand> + CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_VectorIndex, Ctx); Op->VectorIndex.Val = Idx; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static AArch64Operand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, - MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_Immediate, Ctx); + static std::unique_ptr<AArch64Operand> CreateImm(const MCExpr *Val, SMLoc S, + SMLoc E, MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_Immediate, Ctx); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static AArch64Operand *CreateShiftedImm(const MCExpr *Val, - unsigned ShiftAmount, SMLoc S, - SMLoc E, MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_ShiftedImm, Ctx); + static std::unique_ptr<AArch64Operand> CreateShiftedImm(const MCExpr *Val, + unsigned ShiftAmount, + SMLoc S, SMLoc E, + MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_ShiftedImm, Ctx); Op->ShiftedImm .Val = Val; Op->ShiftedImm.ShiftAmount = ShiftAmount; Op->StartLoc = S; @@ -1586,34 +1637,36 @@ public: return Op; } - static AArch64Operand *CreateCondCode(AArch64CC::CondCode Code, SMLoc S, - SMLoc E, MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_CondCode, Ctx); + static std::unique_ptr<AArch64Operand> + CreateCondCode(AArch64CC::CondCode Code, SMLoc S, SMLoc E, MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_CondCode, Ctx); Op->CondCode.Code = Code; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static AArch64Operand *CreateFPImm(unsigned Val, SMLoc S, MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_FPImm, Ctx); + static std::unique_ptr<AArch64Operand> CreateFPImm(unsigned Val, SMLoc S, + MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_FPImm, Ctx); Op->FPImm.Val = Val; Op->StartLoc = S; Op->EndLoc = S; return Op; } - static AArch64Operand *CreateBarrier(unsigned Val, SMLoc S, MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_Barrier, Ctx); + static std::unique_ptr<AArch64Operand> CreateBarrier(unsigned Val, SMLoc S, + MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_Barrier, Ctx); Op->Barrier.Val = Val; Op->StartLoc = S; Op->EndLoc = S; return Op; } - static AArch64Operand *CreateSysReg(StringRef Str, SMLoc S, - uint64_t FeatureBits, MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_SysReg, Ctx); + static std::unique_ptr<AArch64Operand> + CreateSysReg(StringRef Str, SMLoc S, uint64_t FeatureBits, MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_SysReg, Ctx); Op->SysReg.Data = Str.data(); Op->SysReg.Length = Str.size(); Op->SysReg.FeatureBits = FeatureBits; @@ -1622,27 +1675,28 @@ public: return Op; } - static AArch64Operand *CreateSysCR(unsigned Val, SMLoc S, SMLoc E, - MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_SysCR, Ctx); + static std::unique_ptr<AArch64Operand> CreateSysCR(unsigned Val, SMLoc S, + SMLoc E, MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_SysCR, Ctx); Op->SysCRImm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static AArch64Operand *CreatePrefetch(unsigned Val, SMLoc S, MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_Prefetch, Ctx); + static std::unique_ptr<AArch64Operand> CreatePrefetch(unsigned Val, SMLoc S, + MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_Prefetch, Ctx); Op->Prefetch.Val = Val; Op->StartLoc = S; Op->EndLoc = S; return Op; } - static AArch64Operand *CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp, - unsigned Val, bool HasExplicitAmount, - SMLoc S, SMLoc E, MCContext &Ctx) { - AArch64Operand *Op = new AArch64Operand(k_ShiftExtend, Ctx); + static std::unique_ptr<AArch64Operand> + CreateShiftExtend(AArch64_AM::ShiftExtendType ShOp, unsigned Val, + bool HasExplicitAmount, SMLoc S, SMLoc E, MCContext &Ctx) { + auto Op = make_unique<AArch64Operand>(k_ShiftExtend, Ctx); Op->ShiftExtend.Type = ShOp; Op->ShiftExtend.Amount = Val; Op->ShiftExtend.HasExplicitAmount = HasExplicitAmount; @@ -1816,6 +1870,26 @@ bool AArch64AsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, return (RegNo == (unsigned)-1); } +// Matches a register name or register alias previously defined by '.req' +unsigned AArch64AsmParser::matchRegisterNameAlias(StringRef Name, + bool isVector) { + unsigned RegNum = isVector ? matchVectorRegName(Name) + : MatchRegisterName(Name); + + if (RegNum == 0) { + // Check for aliases registered via .req. Canonicalize to lower case. + // That's more consistent since register names are case insensitive, and + // it's how the original entry was passed in from MC/MCParser/AsmParser. + auto Entry = RegisterReqs.find(Name.lower()); + if (Entry == RegisterReqs.end()) + return 0; + // set RegNum if the match is the right kind of register + if (isVector == Entry->getValue().first) + RegNum = Entry->getValue().second; + } + return RegNum; +} + /// tryParseRegister - Try to parse a register name. The token must be an /// Identifier when called, and if it is a register name the token is eaten and /// the register is added to the operand list. @@ -1824,7 +1898,7 @@ int AArch64AsmParser::tryParseRegister() { assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); std::string lowerCase = Tok.getString().lower(); - unsigned RegNum = MatchRegisterName(lowerCase); + unsigned RegNum = matchRegisterNameAlias(lowerCase, false); // Also handle a few aliases of registers. if (RegNum == 0) RegNum = StringSwitch<unsigned>(lowerCase) @@ -1854,7 +1928,8 @@ int AArch64AsmParser::tryMatchVectorRegister(StringRef &Kind, bool expected) { // a '.'. size_t Start = 0, Next = Name.find('.'); StringRef Head = Name.slice(Start, Next); - unsigned RegNum = matchVectorRegName(Head); + unsigned RegNum = matchRegisterNameAlias(Head, true); + if (RegNum) { if (Next != StringRef::npos) { Kind = Name.slice(Next, StringRef::npos); @@ -2183,8 +2258,11 @@ bool AArch64AsmParser::parseCondCode(OperandVector &Operands, return TokError("invalid condition code"); Parser.Lex(); // Eat identifier token. - if (invertCondCode) + if (invertCondCode) { + if (CC == AArch64CC::AL || CC == AArch64CC::NV) + return TokError("condition codes AL and NV are invalid for this instruction"); CC = AArch64CC::getInvertedCondCode(AArch64CC::CondCode(CC)); + } Operands.push_back( AArch64Operand::CreateCondCode(CC, S, getLoc(), getContext())); @@ -2849,7 +2927,7 @@ AArch64AsmParser::tryParseGPR64sp0Operand(OperandVector &Operands) { if (!Tok.is(AsmToken::Identifier)) return MatchOperand_NoMatch; - unsigned RegNum = MatchRegisterName(Tok.getString().lower()); + unsigned RegNum = matchRegisterNameAlias(Tok.getString().lower(), false); MCContext &Ctx = getContext(); const MCRegisterInfo *RI = Ctx.getRegisterInfo(); @@ -3000,6 +3078,43 @@ bool AArch64AsmParser::parseOperand(OperandVector &Operands, bool isCondCode, Operands.push_back(AArch64Operand::CreateImm(ImmVal, S, E, getContext())); return false; } + case AsmToken::Equal: { + SMLoc Loc = Parser.getTok().getLoc(); + if (Mnemonic != "ldr") // only parse for ldr pseudo (e.g. ldr r0, =val) + return Error(Loc, "unexpected token in operand"); + Parser.Lex(); // Eat '=' + const MCExpr *SubExprVal; + if (getParser().parseExpression(SubExprVal)) + return true; + + MCContext& Ctx = getContext(); + E = SMLoc::getFromPointer(Loc.getPointer() - 1); + // If the op is an imm and can be fit into a mov, then replace ldr with mov. + if (isa<MCConstantExpr>(SubExprVal) && Operands.size() >= 2 && + static_cast<AArch64Operand &>(*Operands[1]).isReg()) { + bool IsXReg = AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains( + Operands[1]->getReg()); + uint64_t Imm = (cast<MCConstantExpr>(SubExprVal))->getValue(); + uint32_t ShiftAmt = 0, MaxShiftAmt = IsXReg ? 48 : 16; + while(Imm > 0xFFFF && countTrailingZeros(Imm) >= 16) { + ShiftAmt += 16; + Imm >>= 16; + } + if (ShiftAmt <= MaxShiftAmt && Imm <= 0xFFFF) { + Operands[0] = AArch64Operand::CreateToken("movz", false, Loc, Ctx); + Operands.push_back(AArch64Operand::CreateImm( + MCConstantExpr::Create(Imm, Ctx), S, E, Ctx)); + if (ShiftAmt) + Operands.push_back(AArch64Operand::CreateShiftExtend(AArch64_AM::LSL, + ShiftAmt, true, S, E, Ctx)); + return false; + } + } + // If it is a label or an imm that cannot fit in a movz, put it into CP. + const MCExpr *CPLoc = getTargetStreamer().addConstantPoolEntry(SubExprVal); + Operands.push_back(AArch64Operand::CreateImm(CPLoc, S, E, Ctx)); + return false; + } } } @@ -3029,6 +3144,15 @@ bool AArch64AsmParser::ParseInstruction(ParseInstructionInfo &Info, .Case("bnv", "b.nv") .Default(Name); + // First check for the AArch64-specific .req directive. + if (Parser.getTok().is(AsmToken::Identifier) && + Parser.getTok().getIdentifier() == ".req") { + parseDirectiveReq(Name, NameLoc); + // We always return 'error' for this, as we're done with this + // statement and don't need to match the 'instruction." + return true; + } + // Create the leading tokens for the mnemonic, split by '.' characters. size_t Start = 0, Next = Name.find('.'); StringRef Head = Name.slice(Start, Next); @@ -3443,8 +3567,7 @@ bool AArch64AsmParser::showMatchError(SMLoc Loc, unsigned ErrCode) { case Match_MnemonicFail: return Error(Loc, "unrecognized instruction mnemonic"); default: - assert(0 && "unexpected error code!"); - return Error(Loc, "invalid instruction format"); + llvm_unreachable("unexpected error code!"); } } @@ -3456,23 +3579,23 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, unsigned &ErrorInfo, bool MatchingInlineAsm) { assert(!Operands.empty() && "Unexpect empty operand list!"); - AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[0]); - assert(Op->isToken() && "Leading operand should always be a mnemonic!"); + AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[0]); + assert(Op.isToken() && "Leading operand should always be a mnemonic!"); - StringRef Tok = Op->getToken(); + StringRef Tok = Op.getToken(); unsigned NumOperands = Operands.size(); if (NumOperands == 4 && Tok == "lsl") { - AArch64Operand *Op2 = static_cast<AArch64Operand *>(Operands[2]); - AArch64Operand *Op3 = static_cast<AArch64Operand *>(Operands[3]); - if (Op2->isReg() && Op3->isImm()) { - const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm()); + AArch64Operand &Op2 = static_cast<AArch64Operand &>(*Operands[2]); + AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]); + if (Op2.isReg() && Op3.isImm()) { + const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3.getImm()); if (Op3CE) { uint64_t Op3Val = Op3CE->getValue(); uint64_t NewOp3Val = 0; uint64_t NewOp4Val = 0; if (AArch64MCRegisterClasses[AArch64::GPR32allRegClassID].contains( - Op2->getReg())) { + Op2.getReg())) { NewOp3Val = (32 - Op3Val) & 0x1f; NewOp4Val = 31 - Op3Val; } else { @@ -3484,26 +3607,24 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, const MCExpr *NewOp4 = MCConstantExpr::Create(NewOp4Val, getContext()); Operands[0] = AArch64Operand::CreateToken( - "ubfm", false, Op->getStartLoc(), getContext()); - Operands[3] = AArch64Operand::CreateImm(NewOp3, Op3->getStartLoc(), - Op3->getEndLoc(), getContext()); + "ubfm", false, Op.getStartLoc(), getContext()); Operands.push_back(AArch64Operand::CreateImm( - NewOp4, Op3->getStartLoc(), Op3->getEndLoc(), getContext())); - delete Op3; - delete Op; + NewOp4, Op3.getStartLoc(), Op3.getEndLoc(), getContext())); + Operands[3] = AArch64Operand::CreateImm(NewOp3, Op3.getStartLoc(), + Op3.getEndLoc(), getContext()); } } } else if (NumOperands == 5) { // FIXME: Horrible hack to handle the BFI -> BFM, SBFIZ->SBFM, and // UBFIZ -> UBFM aliases. if (Tok == "bfi" || Tok == "sbfiz" || Tok == "ubfiz") { - AArch64Operand *Op1 = static_cast<AArch64Operand *>(Operands[1]); - AArch64Operand *Op3 = static_cast<AArch64Operand *>(Operands[3]); - AArch64Operand *Op4 = static_cast<AArch64Operand *>(Operands[4]); + AArch64Operand &Op1 = static_cast<AArch64Operand &>(*Operands[1]); + AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]); + AArch64Operand &Op4 = static_cast<AArch64Operand &>(*Operands[4]); - if (Op1->isReg() && Op3->isImm() && Op4->isImm()) { - const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm()); - const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4->getImm()); + if (Op1.isReg() && Op3.isImm() && Op4.isImm()) { + const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3.getImm()); + const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4.getImm()); if (Op3CE && Op4CE) { uint64_t Op3Val = Op3CE->getValue(); @@ -3511,21 +3632,21 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, uint64_t RegWidth = 0; if (AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains( - Op1->getReg())) + Op1.getReg())) RegWidth = 64; else RegWidth = 32; if (Op3Val >= RegWidth) - return Error(Op3->getStartLoc(), + return Error(Op3.getStartLoc(), "expected integer in range [0, 31]"); if (Op4Val < 1 || Op4Val > RegWidth) - return Error(Op4->getStartLoc(), + return Error(Op4.getStartLoc(), "expected integer in range [1, 32]"); uint64_t NewOp3Val = 0; if (AArch64MCRegisterClasses[AArch64::GPR32allRegClassID].contains( - Op1->getReg())) + Op1.getReg())) NewOp3Val = (32 - Op3Val) & 0x1f; else NewOp3Val = (64 - Op3Val) & 0x3f; @@ -3533,7 +3654,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, uint64_t NewOp4Val = Op4Val - 1; if (NewOp3Val != 0 && NewOp4Val >= NewOp3Val) - return Error(Op4->getStartLoc(), + return Error(Op4.getStartLoc(), "requested insert overflows register"); const MCExpr *NewOp3 = @@ -3541,24 +3662,20 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, const MCExpr *NewOp4 = MCConstantExpr::Create(NewOp4Val, getContext()); Operands[3] = AArch64Operand::CreateImm( - NewOp3, Op3->getStartLoc(), Op3->getEndLoc(), getContext()); + NewOp3, Op3.getStartLoc(), Op3.getEndLoc(), getContext()); Operands[4] = AArch64Operand::CreateImm( - NewOp4, Op4->getStartLoc(), Op4->getEndLoc(), getContext()); + NewOp4, Op4.getStartLoc(), Op4.getEndLoc(), getContext()); if (Tok == "bfi") Operands[0] = AArch64Operand::CreateToken( - "bfm", false, Op->getStartLoc(), getContext()); + "bfm", false, Op.getStartLoc(), getContext()); else if (Tok == "sbfiz") Operands[0] = AArch64Operand::CreateToken( - "sbfm", false, Op->getStartLoc(), getContext()); + "sbfm", false, Op.getStartLoc(), getContext()); else if (Tok == "ubfiz") Operands[0] = AArch64Operand::CreateToken( - "ubfm", false, Op->getStartLoc(), getContext()); + "ubfm", false, Op.getStartLoc(), getContext()); else llvm_unreachable("No valid mnemonic for alias?"); - - delete Op; - delete Op3; - delete Op4; } } @@ -3566,13 +3683,13 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // UBFX -> UBFM aliases. } else if (NumOperands == 5 && (Tok == "bfxil" || Tok == "sbfx" || Tok == "ubfx")) { - AArch64Operand *Op1 = static_cast<AArch64Operand *>(Operands[1]); - AArch64Operand *Op3 = static_cast<AArch64Operand *>(Operands[3]); - AArch64Operand *Op4 = static_cast<AArch64Operand *>(Operands[4]); + AArch64Operand &Op1 = static_cast<AArch64Operand &>(*Operands[1]); + AArch64Operand &Op3 = static_cast<AArch64Operand &>(*Operands[3]); + AArch64Operand &Op4 = static_cast<AArch64Operand &>(*Operands[4]); - if (Op1->isReg() && Op3->isImm() && Op4->isImm()) { - const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3->getImm()); - const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4->getImm()); + if (Op1.isReg() && Op3.isImm() && Op4.isImm()) { + const MCConstantExpr *Op3CE = dyn_cast<MCConstantExpr>(Op3.getImm()); + const MCConstantExpr *Op4CE = dyn_cast<MCConstantExpr>(Op4.getImm()); if (Op3CE && Op4CE) { uint64_t Op3Val = Op3CE->getValue(); @@ -3580,42 +3697,39 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, uint64_t RegWidth = 0; if (AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains( - Op1->getReg())) + Op1.getReg())) RegWidth = 64; else RegWidth = 32; if (Op3Val >= RegWidth) - return Error(Op3->getStartLoc(), + return Error(Op3.getStartLoc(), "expected integer in range [0, 31]"); if (Op4Val < 1 || Op4Val > RegWidth) - return Error(Op4->getStartLoc(), + return Error(Op4.getStartLoc(), "expected integer in range [1, 32]"); uint64_t NewOp4Val = Op3Val + Op4Val - 1; if (NewOp4Val >= RegWidth || NewOp4Val < Op3Val) - return Error(Op4->getStartLoc(), + return Error(Op4.getStartLoc(), "requested extract overflows register"); const MCExpr *NewOp4 = MCConstantExpr::Create(NewOp4Val, getContext()); Operands[4] = AArch64Operand::CreateImm( - NewOp4, Op4->getStartLoc(), Op4->getEndLoc(), getContext()); + NewOp4, Op4.getStartLoc(), Op4.getEndLoc(), getContext()); if (Tok == "bfxil") Operands[0] = AArch64Operand::CreateToken( - "bfm", false, Op->getStartLoc(), getContext()); + "bfm", false, Op.getStartLoc(), getContext()); else if (Tok == "sbfx") Operands[0] = AArch64Operand::CreateToken( - "sbfm", false, Op->getStartLoc(), getContext()); + "sbfm", false, Op.getStartLoc(), getContext()); else if (Tok == "ubfx") Operands[0] = AArch64Operand::CreateToken( - "ubfm", false, Op->getStartLoc(), getContext()); + "ubfm", false, Op.getStartLoc(), getContext()); else llvm_unreachable("No valid mnemonic for alias?"); - - delete Op; - delete Op4; } } } @@ -3626,63 +3740,58 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, if (NumOperands == 3 && (Tok == "sxtw" || Tok == "uxtw")) { // The source register can be Wn here, but the matcher expects a // GPR64. Twiddle it here if necessary. - AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[2]); - if (Op->isReg()) { - unsigned Reg = getXRegFromWReg(Op->getReg()); - Operands[2] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(), - Op->getEndLoc(), getContext()); - delete Op; + AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[2]); + if (Op.isReg()) { + unsigned Reg = getXRegFromWReg(Op.getReg()); + Operands[2] = AArch64Operand::CreateReg(Reg, false, Op.getStartLoc(), + Op.getEndLoc(), getContext()); } } // FIXME: Likewise for sxt[bh] with a Xd dst operand else if (NumOperands == 3 && (Tok == "sxtb" || Tok == "sxth")) { - AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[1]); - if (Op->isReg() && + AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]); + if (Op.isReg() && AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains( - Op->getReg())) { + Op.getReg())) { // The source register can be Wn here, but the matcher expects a // GPR64. Twiddle it here if necessary. - AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[2]); - if (Op->isReg()) { - unsigned Reg = getXRegFromWReg(Op->getReg()); - Operands[2] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(), - Op->getEndLoc(), getContext()); - delete Op; + AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[2]); + if (Op.isReg()) { + unsigned Reg = getXRegFromWReg(Op.getReg()); + Operands[2] = AArch64Operand::CreateReg(Reg, false, Op.getStartLoc(), + Op.getEndLoc(), getContext()); } } } // FIXME: Likewise for uxt[bh] with a Xd dst operand else if (NumOperands == 3 && (Tok == "uxtb" || Tok == "uxth")) { - AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[1]); - if (Op->isReg() && + AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]); + if (Op.isReg() && AArch64MCRegisterClasses[AArch64::GPR64allRegClassID].contains( - Op->getReg())) { + Op.getReg())) { // The source register can be Wn here, but the matcher expects a // GPR32. Twiddle it here if necessary. - AArch64Operand *Op = static_cast<AArch64Operand *>(Operands[1]); - if (Op->isReg()) { - unsigned Reg = getWRegFromXReg(Op->getReg()); - Operands[1] = AArch64Operand::CreateReg(Reg, false, Op->getStartLoc(), - Op->getEndLoc(), getContext()); - delete Op; + AArch64Operand &Op = static_cast<AArch64Operand &>(*Operands[1]); + if (Op.isReg()) { + unsigned Reg = getWRegFromXReg(Op.getReg()); + Operands[1] = AArch64Operand::CreateReg(Reg, false, Op.getStartLoc(), + Op.getEndLoc(), getContext()); } } } // Yet another horrible hack to handle FMOV Rd, #0.0 using [WX]ZR. if (NumOperands == 3 && Tok == "fmov") { - AArch64Operand *RegOp = static_cast<AArch64Operand *>(Operands[1]); - AArch64Operand *ImmOp = static_cast<AArch64Operand *>(Operands[2]); - if (RegOp->isReg() && ImmOp->isFPImm() && - ImmOp->getFPImm() == (unsigned)-1) { + AArch64Operand &RegOp = static_cast<AArch64Operand &>(*Operands[1]); + AArch64Operand &ImmOp = static_cast<AArch64Operand &>(*Operands[2]); + if (RegOp.isReg() && ImmOp.isFPImm() && ImmOp.getFPImm() == (unsigned)-1) { unsigned zreg = AArch64MCRegisterClasses[AArch64::FPR32RegClassID].contains( - RegOp->getReg()) + RegOp.getReg()) ? AArch64::WZR : AArch64::XZR; - Operands[2] = AArch64Operand::CreateReg(zreg, false, Op->getStartLoc(), - Op->getEndLoc(), getContext()); - delete ImmOp; + Operands[2] = AArch64Operand::CreateReg(zreg, false, Op.getStartLoc(), + Op.getEndLoc(), getContext()); } } @@ -3735,14 +3844,14 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, if (ErrorInfo >= Operands.size()) return Error(IDLoc, "too few operands for instruction"); - ErrorLoc = ((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(); + ErrorLoc = ((AArch64Operand &)*Operands[ErrorInfo]).getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; } // If the match failed on a suffix token operand, tweak the diagnostic // accordingly. - if (((AArch64Operand *)Operands[ErrorInfo])->isToken() && - ((AArch64Operand *)Operands[ErrorInfo])->isTokenSuffix()) + if (((AArch64Operand &)*Operands[ErrorInfo]).isToken() && + ((AArch64Operand &)*Operands[ErrorInfo]).isTokenSuffix()) MatchResult = Match_InvalidSuffix; return showMatchError(ErrorLoc, MatchResult); @@ -3794,9 +3903,11 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_InvalidLabel: case Match_MSR: case Match_MRS: { + if (ErrorInfo >= Operands.size()) + return Error(IDLoc, "too few operands for instruction"); // Any time we get here, there's nothing fancy to do. Just get the // operand SMLoc and display the diagnostic. - SMLoc ErrorLoc = ((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(); + SMLoc ErrorLoc = ((AArch64Operand &)*Operands[ErrorInfo]).getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; return showMatchError(ErrorLoc, MatchResult); @@ -3819,6 +3930,10 @@ bool AArch64AsmParser::ParseDirective(AsmToken DirectiveID) { return parseDirectiveWord(8, Loc); if (IDVal == ".tlsdesccall") return parseDirectiveTLSDescCall(Loc); + if (IDVal == ".ltorg" || IDVal == ".pool") + return parseDirectiveLtorg(Loc); + if (IDVal == ".unreq") + return parseDirectiveUnreq(DirectiveID.getLoc()); return parseDirectiveLOH(IDVal, Loc); } @@ -3920,6 +4035,66 @@ bool AArch64AsmParser::parseDirectiveLOH(StringRef IDVal, SMLoc Loc) { return false; } +/// parseDirectiveLtorg +/// ::= .ltorg | .pool +bool AArch64AsmParser::parseDirectiveLtorg(SMLoc L) { + getTargetStreamer().emitCurrentConstantPool(); + return false; +} + +/// parseDirectiveReq +/// ::= name .req registername +bool AArch64AsmParser::parseDirectiveReq(StringRef Name, SMLoc L) { + Parser.Lex(); // Eat the '.req' token. + SMLoc SRegLoc = getLoc(); + unsigned RegNum = tryParseRegister(); + bool IsVector = false; + + if (RegNum == static_cast<unsigned>(-1)) { + StringRef Kind; + RegNum = tryMatchVectorRegister(Kind, false); + if (!Kind.empty()) { + Error(SRegLoc, "vector register without type specifier expected"); + return false; + } + IsVector = true; + } + + if (RegNum == static_cast<unsigned>(-1)) { + Parser.eatToEndOfStatement(); + Error(SRegLoc, "register name or alias expected"); + return false; + } + + // Shouldn't be anything else. + if (Parser.getTok().isNot(AsmToken::EndOfStatement)) { + Error(Parser.getTok().getLoc(), "unexpected input in .req directive"); + Parser.eatToEndOfStatement(); + return false; + } + + Parser.Lex(); // Consume the EndOfStatement + + auto pair = std::make_pair(IsVector, RegNum); + if (RegisterReqs.GetOrCreateValue(Name, pair).getValue() != pair) + Warning(L, "ignoring redefinition of register alias '" + Name + "'"); + + return true; +} + +/// parseDirectiveUneq +/// ::= .unreq registername +bool AArch64AsmParser::parseDirectiveUnreq(SMLoc L) { + if (Parser.getTok().isNot(AsmToken::Identifier)) { + Error(Parser.getTok().getLoc(), "unexpected input in .unreq directive."); + Parser.eatToEndOfStatement(); + return false; + } + RegisterReqs.erase(Parser.getTok().getIdentifier().lower()); + Parser.Lex(); // Eat the identifier. + return false; +} + bool AArch64AsmParser::classifySymbolRef(const MCExpr *Expr, AArch64MCExpr::VariantKind &ELFRefKind, @@ -3986,9 +4161,9 @@ extern "C" void LLVMInitializeAArch64AsmParser() { // Define this matcher function after the auto-generated include so we // have the match class enum definitions. -unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, +unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, unsigned Kind) { - AArch64Operand *Op = static_cast<AArch64Operand *>(AsmOp); + AArch64Operand &Op = static_cast<AArch64Operand &>(AsmOp); // If the kind is a token for a literal immediate, check if our asm // operand matches. This is for InstAliases which have a fixed-value // immediate in the syntax. @@ -4036,9 +4211,9 @@ unsigned AArch64AsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, ExpectedVal = 8; break; } - if (!Op->isImm()) + if (!Op.isImm()) return Match_InvalidOperand; - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm()); if (!CE) return Match_InvalidOperand; if (CE->getValue() == ExpectedVal) diff --git a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp index 2466368..2057c51 100644 --- a/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp +++ b/lib/Target/AArch64/Disassembler/AArch64ExternalSymbolizer.cpp @@ -37,8 +37,7 @@ getVariant(uint64_t LLVMDisassembler_VariantKind) { case LLVMDisassembler_VariantKind_ARM64_TLVP: case LLVMDisassembler_VariantKind_ARM64_TLVOFF: default: - assert(0 && "bad LLVMDisassembler_VariantKind"); - return MCSymbolRefExpr::VK_None; + llvm_unreachable("bad LLVMDisassembler_VariantKind"); } } diff --git a/lib/Target/AArch64/Disassembler/CMakeLists.txt b/lib/Target/AArch64/Disassembler/CMakeLists.txt index be4ccad..d64c05b 100644 --- a/lib/Target/AArch64/Disassembler/CMakeLists.txt +++ b/lib/Target/AArch64/Disassembler/CMakeLists.txt @@ -4,11 +4,5 @@ add_llvm_library(LLVMAArch64Disassembler AArch64Disassembler.cpp AArch64ExternalSymbolizer.cpp ) -# workaround for hanging compilation on MSVC8, 9 and 10 -#if( MSVC_VERSION EQUAL 1400 OR MSVC_VERSION EQUAL 1500 OR MSVC_VERSION EQUAL 1600 ) -#set_property( -# SOURCE ARMDisassembler.cpp -# PROPERTY COMPILE_FLAGS "/Od" -# ) -#endif() + add_dependencies(LLVMAArch64Disassembler AArch64CommonTableGen) diff --git a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp index f484a5b..8a21f06 100644 --- a/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ b/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -918,7 +918,7 @@ void AArch64InstPrinter::printPostIncOperand(const MCInst *MI, unsigned OpNo, else O << getRegisterName(Reg); } else - assert(0 && "unknown operand kind in printPostIncOperand64"); + llvm_unreachable("unknown operand kind in printPostIncOperand64"); } void AArch64InstPrinter::printVRegOperand(const MCInst *MI, unsigned OpNo, @@ -1109,7 +1109,7 @@ static unsigned getNextVectorRegister(unsigned Reg, unsigned Stride = 1) { while (Stride--) { switch (Reg) { default: - assert(0 && "Vector register expected!"); + llvm_unreachable("Vector register expected!"); case AArch64::Q0: Reg = AArch64::Q1; break; case AArch64::Q1: Reg = AArch64::Q2; break; case AArch64::Q2: Reg = AArch64::Q3; break; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index d8900d4..a917616 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -86,7 +86,7 @@ public: static unsigned getFixupKindNumBytes(unsigned Kind) { switch (Kind) { default: - assert(0 && "Unknown fixup kind!"); + llvm_unreachable("Unknown fixup kind!"); case AArch64::fixup_aarch64_tlsdesc_call: return 0; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index dc4a8bf..1763b40 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -96,4 +96,6 @@ AArch64MCAsmInfoELF::AArch64MCAsmInfoELF(StringRef TT) { ExceptionsType = ExceptionHandling::DwarfCFI; UseIntegratedAssembler = true; + + HasIdentDirective = true; } diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index 464a18c..f051357 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -218,13 +218,9 @@ AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, const MCSubtargetInfo &STI) const { if (MO.isReg()) return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()); - else { - assert(MO.isImm() && "did not expect relocated expression"); - return static_cast<unsigned>(MO.getImm()); - } - assert(0 && "Unable to encode MCOperand!"); - return 0; + assert(MO.isImm() && "did not expect relocated expression"); + return static_cast<unsigned>(MO.getImm()); } template<unsigned FixupKind> uint32_t diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp index 85c3ec7..42a6787 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp @@ -81,37 +81,8 @@ void AArch64MCExpr::PrintImpl(raw_ostream &OS) const { OS << *Expr; } -// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps -// that method should be made public? -// FIXME: really do above: now that two backends are using it. -static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) { - switch (Value->getKind()) { - case MCExpr::Target: - llvm_unreachable("Can't handle nested target expr!"); - break; - - case MCExpr::Constant: - break; - - case MCExpr::Binary: { - const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value); - AddValueSymbolsImpl(BE->getLHS(), Asm); - AddValueSymbolsImpl(BE->getRHS(), Asm); - break; - } - - case MCExpr::SymbolRef: - Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol()); - break; - - case MCExpr::Unary: - AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm); - break; - } -} - -void AArch64MCExpr::AddValueSymbols(MCAssembler *Asm) const { - AddValueSymbolsImpl(getSubExpr(), Asm); +void AArch64MCExpr::visitUsedExpr(MCStreamer &Streamer) const { + Streamer.visitUsedExpr(*getSubExpr()); } const MCSection *AArch64MCExpr::FindAssociatedSection() const { diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h index e869ed0..5422f9d 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h @@ -147,7 +147,7 @@ public: void PrintImpl(raw_ostream &OS) const override; - void AddValueSymbols(MCAssembler *) const override; + void visitUsedExpr(MCStreamer &Streamer) const override; const MCSection *FindAssociatedSection() const override; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp index 5c86189..ba95366 100644 --- a/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp +++ b/lib/Target/AArch64/MCTargetDesc/AArch64MachObjectWriter.cpp @@ -75,7 +75,7 @@ bool AArch64MachObjectWriter::getAArch64FixupKindMachOInfo( Log2Size = llvm::Log2_32(4); switch (Sym->getKind()) { default: - assert(0 && "Unexpected symbol reference variant kind!"); + llvm_unreachable("Unexpected symbol reference variant kind!"); case MCSymbolRefExpr::VK_PAGEOFF: RelocType = unsigned(MachO::ARM64_RELOC_PAGEOFF12); return true; diff --git a/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp new file mode 100644 index 0000000..f9aeb35 --- /dev/null +++ b/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp @@ -0,0 +1,40 @@ +//===- AArch64TargetStreamer.cpp - AArch64TargetStreamer class --*- C++ -*---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the AArch64TargetStreamer class. +// +//===----------------------------------------------------------------------===// +#include "llvm/ADT/MapVector.h" +#include "llvm/MC/ConstantPools.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCStreamer.h" + +using namespace llvm; + +// +// AArch64TargetStreamer Implemenation +// +AArch64TargetStreamer::AArch64TargetStreamer(MCStreamer &S) + : MCTargetStreamer(S), ConstantPools(new AssemblerConstantPools()) {} + +AArch64TargetStreamer::~AArch64TargetStreamer() {} + +// The constant pool handling is shared by all AArch64TargetStreamer +// implementations. +const MCExpr *AArch64TargetStreamer::addConstantPoolEntry(const MCExpr *Expr) { + return ConstantPools->addEntry(Streamer, Expr); +} + +void AArch64TargetStreamer::emitCurrentConstantPool() { + ConstantPools->emitForCurrentSection(Streamer); +} + +// finish() - write out any non-empty assembler constant pools. +void AArch64TargetStreamer::finish() { ConstantPools->emitAll(Streamer); } diff --git a/lib/Target/AArch64/MCTargetDesc/Android.mk b/lib/Target/AArch64/MCTargetDesc/Android.mk index e9d2323..a23c0e5 100644 --- a/lib/Target/AArch64/MCTargetDesc/Android.mk +++ b/lib/Target/AArch64/MCTargetDesc/Android.mk @@ -14,7 +14,8 @@ aarch64_mc_desc_SRC_FILES := \ AArch64MCAsmInfo.cpp \ AArch64MCCodeEmitter.cpp \ AArch64MCExpr.cpp \ - AArch64MCTargetDesc.cpp + AArch64MCTargetDesc.cpp \ + AArch64TargetStreamer.cpp # For the host # ===================================================== diff --git a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt index 7d5bced..6d8be5e 100644 --- a/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/AArch64/MCTargetDesc/CMakeLists.txt @@ -7,6 +7,7 @@ add_llvm_library(LLVMAArch64Desc AArch64MCExpr.cpp AArch64MCTargetDesc.cpp AArch64MachObjectWriter.cpp + AArch64TargetStreamer.cpp ) add_dependencies(LLVMAArch64Desc AArch64CommonTableGen) diff --git a/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 9e4c389..9d2ce21 100644 --- a/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -233,23 +233,9 @@ inline static const char *getCondCodeName(CondCode Code) { } inline static CondCode getInvertedCondCode(CondCode Code) { - switch (Code) { - default: llvm_unreachable("Unknown condition code"); - case EQ: return NE; - case NE: return EQ; - case HS: return LO; - case LO: return HS; - case MI: return PL; - case PL: return MI; - case VS: return VC; - case VC: return VS; - case HI: return LS; - case LS: return HI; - case GE: return LT; - case LT: return GE; - case GT: return LE; - case LE: return GT; - } + // To reverse a condition it's necessary to only invert the low bit: + + return static_cast<CondCode>(static_cast<unsigned>(Code) ^ 0x1); } /// Given a condition code, return NZCV flags that would satisfy that condition. diff --git a/lib/Target/ARM/A15SDOptimizer.cpp b/lib/Target/ARM/A15SDOptimizer.cpp index 94faf6f..92eaf9e 100644 --- a/lib/Target/ARM/A15SDOptimizer.cpp +++ b/lib/Target/ARM/A15SDOptimizer.cpp @@ -321,8 +321,7 @@ unsigned A15SDOptimizer::optimizeSDPattern(MachineInstr *MI) { return optimizeAllLanesPattern(MI, MI->getOperand(0).getReg()); } - assert(0 && "Unhandled update pattern!"); - return 0; + llvm_unreachable("Unhandled update pattern!"); } // Return true if this MachineInstr inserts a scalar (SPR) value into diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 55e9fe5..28d2610 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -82,7 +82,8 @@ void ARMAsmPrinter::EmitXXStructor(const Constant *CV) { const GlobalValue *GV = dyn_cast<GlobalValue>(CV->stripPointerCasts()); assert(GV && "C++ constructor pointer was not a GlobalValue!"); - const MCExpr *E = MCSymbolRefExpr::Create(getSymbol(GV), + const MCExpr *E = MCSymbolRefExpr::Create(GetARMGVSymbol(GV, + ARMII::MO_NO_FLAG), (Subtarget->isTargetELF() ? MCSymbolRefExpr::VK_ARM_TARGET1 : MCSymbolRefExpr::VK_None), @@ -164,7 +165,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, else if ((Modifier && strcmp(Modifier, "hi16") == 0) || (TF & ARMII::MO_HI16)) O << ":upper16:"; - O << *getSymbol(GV); + O << *GetARMGVSymbol(GV, TF); printOffset(MO.getOffset(), O); if (TF == ARMII::MO_PLT) @@ -730,6 +731,32 @@ void ARMAsmPrinter::emitAttributes() { if (Subtarget->hasDivideInARMMode() && !Subtarget->hasV8Ops()) ATS.emitAttribute(ARMBuildAttrs::DIV_use, ARMBuildAttrs::AllowDIVExt); + if (MMI) { + if (const Module *SourceModule = MMI->getModule()) { + // ABI_PCS_wchar_t to indicate wchar_t width + // FIXME: There is no way to emit value 0 (wchar_t prohibited). + if (auto WCharWidthValue = cast_or_null<ConstantInt>( + SourceModule->getModuleFlag("wchar_size"))) { + int WCharWidth = WCharWidthValue->getZExtValue(); + assert((WCharWidth == 2 || WCharWidth == 4) && + "wchar_t width must be 2 or 4 bytes"); + ATS.emitAttribute(ARMBuildAttrs::ABI_PCS_wchar_t, WCharWidth); + } + + // ABI_enum_size to indicate enum width + // FIXME: There is no way to emit value 0 (enums prohibited) or value 3 + // (all enums contain a value needing 32 bits to encode). + if (auto EnumWidthValue = cast_or_null<ConstantInt>( + SourceModule->getModuleFlag("min_enum_size"))) { + int EnumWidth = EnumWidthValue->getZExtValue(); + assert((EnumWidth == 1 || EnumWidth == 4) && + "Minimum enum width must be 1 or 4 bytes"); + int EnumBuildAttr = EnumWidth == 1 ? 1 : 2; + ATS.emitAttribute(ARMBuildAttrs::ABI_enum_size, EnumBuildAttr); + } + } + } + if (Subtarget->hasTrustZone() && Subtarget->hasVirtualization()) ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, ARMBuildAttrs::AllowTZVirtualization); @@ -768,23 +795,41 @@ getModifierVariantKind(ARMCP::ARMCPModifier Modifier) { MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV, unsigned char TargetFlags) { - bool isIndirect = Subtarget->isTargetMachO() && - (TargetFlags & ARMII::MO_NONLAZY) && - Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel()); - if (!isIndirect) - return getSymbol(GV); + if (Subtarget->isTargetMachO()) { + bool IsIndirect = (TargetFlags & ARMII::MO_NONLAZY) && + Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel()); + + if (!IsIndirect) + return getSymbol(GV); - // FIXME: Remove this when Darwin transition to @GOT like syntax. - MCSymbol *MCSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); - MachineModuleInfoMachO &MMIMachO = - MMI->getObjFileInfo<MachineModuleInfoMachO>(); - MachineModuleInfoImpl::StubValueTy &StubSym = - GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym) : - MMIMachO.getGVStubEntry(MCSym); - if (!StubSym.getPointer()) - StubSym = MachineModuleInfoImpl:: - StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); - return MCSym; + // FIXME: Remove this when Darwin transition to @GOT like syntax. + MCSymbol *MCSym = getSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); + MachineModuleInfoMachO &MMIMachO = + MMI->getObjFileInfo<MachineModuleInfoMachO>(); + MachineModuleInfoImpl::StubValueTy &StubSym = + GV->hasHiddenVisibility() ? MMIMachO.getHiddenGVStubEntry(MCSym) + : MMIMachO.getGVStubEntry(MCSym); + if (!StubSym.getPointer()) + StubSym = MachineModuleInfoImpl::StubValueTy(getSymbol(GV), + !GV->hasInternalLinkage()); + return MCSym; + } else if (Subtarget->isTargetCOFF()) { + assert(Subtarget->isTargetWindows() && + "Windows is the only supported COFF target"); + + bool IsIndirect = (TargetFlags & ARMII::MO_DLLIMPORT); + if (!IsIndirect) + return getSymbol(GV); + + SmallString<128> Name; + Name = "__imp_"; + getNameWithPrefix(Name, GV); + + return OutContext.GetOrCreateSymbol(Name); + } else if (Subtarget->isTargetELF()) { + return getSymbol(GV); + } + llvm_unreachable("unexpected target"); } void ARMAsmPrinter:: @@ -928,7 +973,7 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) { for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) { MachineBasicBlock *MBB = JTBBs[i]; const MCExpr *MBBSymbolExpr = MCSymbolRefExpr::Create(MBB->getSymbol(), - OutContext); + OutContext); // If this isn't a TBB or TBH, the entries are direct branch instructions. if (OffsetWidth == 4) { EmitToStreamer(OutStreamer, MCInstBuilder(ARM::t2B) @@ -1225,8 +1270,10 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Add 's' bit operand (always reg0 for this) .addReg(0)); - const GlobalValue *GV = MI->getOperand(0).getGlobal(); - MCSymbol *GVSym = getSymbol(GV); + const MachineOperand &Op = MI->getOperand(0); + const GlobalValue *GV = Op.getGlobal(); + const unsigned TF = Op.getTargetFlags(); + MCSymbol *GVSym = GetARMGVSymbol(GV, TF); const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); EmitToStreamer(OutStreamer, MCInstBuilder(ARM::Bcc) .addExpr(GVSymExpr) diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index bc266e8..0288db9 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -32,6 +32,7 @@ #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" #include "llvm/Support/BranchProbability.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -102,14 +103,15 @@ ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) // Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl // currently defaults to no prepass hazard recognizer. -ScheduleHazardRecognizer *ARMBaseInstrInfo:: -CreateTargetHazardRecognizer(const TargetMachine *TM, - const ScheduleDAG *DAG) const { +ScheduleHazardRecognizer * +ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, + const ScheduleDAG *DAG) const { if (usePreRAHazardRecognizer()) { - const InstrItineraryData *II = TM->getInstrItineraryData(); + const InstrItineraryData *II = + &static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData(); return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched"); } - return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG); + return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG); } ScheduleHazardRecognizer *ARMBaseInstrInfo:: @@ -1885,7 +1887,8 @@ bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, unsigned NumBytes) { // This optimisation potentially adds lots of load and store // micro-operations, it's only really a great benefit to code-size. - if (!Subtarget.isMinSize()) + if (!MF.getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::MinSize)) return false; // If only one register is pushed/popped, LLVM can use an LDR/STR @@ -4358,6 +4361,29 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, MI->addRegisterKilled(DReg, TRI, true); } +void ARMBaseInstrInfo::getUnconditionalBranch( + MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const { + if (Subtarget.isThumb()) + Branch.setOpcode(ARM::tB); + else if (Subtarget.isThumb2()) + Branch.setOpcode(ARM::t2B); + else + Branch.setOpcode(ARM::Bcc); + + Branch.addOperand(MCOperand::CreateExpr(BranchTarget)); + Branch.addOperand(MCOperand::CreateImm(ARMCC::AL)); + Branch.addOperand(MCOperand::CreateReg(0)); +} + +void ARMBaseInstrInfo::getTrap(MCInst &MI) const { + if (Subtarget.isThumb()) + MI.setOpcode(ARM::tTRAP); + else if (Subtarget.useNaClTrap()) + MI.setOpcode(ARM::TRAPNaCl); + else + MI.setOpcode(ARM::TRAP); +} + bool ARMBaseInstrInfo::hasNOP() const { return (Subtarget.getFeatureBits() & ARM::HasV6T2Ops) != 0; } diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index 4b3e740..b8d6758 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -50,7 +50,7 @@ public: const ARMSubtarget &getSubtarget() const { return Subtarget; } ScheduleHazardRecognizer * - CreateTargetHazardRecognizer(const TargetMachine *TM, + CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override; ScheduleHazardRecognizer * @@ -229,6 +229,13 @@ public: const TargetRegisterInfo*) const override; void breakPartialRegDependency(MachineBasicBlock::iterator, unsigned, const TargetRegisterInfo *TRI) const override; + + void + getUnconditionalBranch(MCInst &Branch, + const MCSymbolRefExpr *BranchTarget) const override; + + void getTrap(MCInst &MI) const override; + /// Get the number of addresses by LDM or VLDM or zero for unknown. unsigned getNumLDMAddresses(const MachineInstr *MI) const; diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index a2eee9f..cdd91c7 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -45,9 +45,12 @@ using namespace llvm; ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti) : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), BasePtr(ARM::R6) { - if (STI.isTargetMachO()) - FramePtr = ARM::R7; - else if (STI.isTargetWindows()) + if (STI.isTargetMachO()) { + if (STI.isTargetDarwin() || STI.isThumb1Only()) + FramePtr = ARM::R7; + else + FramePtr = ARM::R11; + } else if (STI.isTargetWindows()) FramePtr = ARM::R11; else // ARM EABI FramePtr = STI.isThumb() ? ARM::R7 : ARM::R11; diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 2fd7edd..5fb6ebf 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -15,6 +15,7 @@ #include "ARM.h" #include "ARMBaseInstrInfo.h" #include "ARMConstantPoolValue.h" +#include "ARMMachineFunctionInfo.h" #include "ARMRelocations.h" #include "ARMSubtarget.h" #include "ARMTargetMachine.h" diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 6045738..51d3dbb 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -927,10 +927,16 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, } case ARM::tTPsoft: case ARM::TPsoft: { - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, MI.getDebugLoc(), - TII->get(Opcode == ARM::tTPsoft ? ARM::tBL : ARM::BL)) - .addExternalSymbol("__aeabi_read_tp", 0); + MachineInstrBuilder MIB; + if (Opcode == ARM::tTPsoft) + MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get( ARM::tBL)) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addExternalSymbol("__aeabi_read_tp", 0); + else + MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), + TII->get( ARM::BL)) + .addExternalSymbol("__aeabi_read_tp", 0); MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end()); TransferImpOps(MI, MIB, MIB); diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 6f8fb1a..e2d90cd 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -590,7 +590,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { // Use movw+movt when possible, it avoids constant pool entries. // Non-darwin targets only support static movt relocations in FastISel. - if (Subtarget->useMovt() && + if (Subtarget->useMovt(*FuncInfo.MF) && (Subtarget->isTargetMachO() || RelocM == Reloc::Static)) { unsigned Opc; unsigned char TF = 0; diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 0caf4bf..a67b360 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -39,6 +39,10 @@ static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs); +ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti) + : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4), + STI(sti) {} + /// hasFP - Return true if the specified function should have a dedicated frame /// pointer register. This is true if the function has variable sized allocas /// or if frame pointer elimination is disabled. @@ -220,7 +224,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.isTargetMachO()) { + if (STI.isTargetDarwin()) { GPRCS2Size += 4; break; } @@ -380,7 +384,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.isTargetMachO()) + if (STI.isTargetDarwin()) break; // fallthrough case ARM::R0: @@ -445,7 +449,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.isTargetMachO()) { + if (STI.isTargetDarwin()) { unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); unsigned Offset = MFI->getObjectOffset(FI); unsigned CFIIndex = MMI.addFrameInst( @@ -810,7 +814,7 @@ void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, unsigned LastReg = 0; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (!(Func)(Reg, STI.isTargetMachO())) continue; + if (!(Func)(Reg, STI.isTargetDarwin())) continue; // D-registers in the aligned area DPRCS2 are NOT spilled here. if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) @@ -888,7 +892,7 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, bool DeleteRet = false; for (; i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); - if (!(Func)(Reg, STI.isTargetMachO())) continue; + if (!(Func)(Reg, STI.isTargetDarwin())) continue; // The aligned reloads from area DPRCS2 are not inserted here. if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) @@ -1438,7 +1442,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (Spilled) { NumGPRSpills++; - if (!STI.isTargetMachO()) { + if (!STI.isTargetDarwin()) { if (Reg == ARM::LR) LRSpilled = true; CS1Spilled = true; @@ -1460,7 +1464,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, break; } } else { - if (!STI.isTargetMachO()) { + if (!STI.isTargetDarwin()) { UnspilledCS1GPRs.push_back(Reg); continue; } diff --git a/lib/Target/ARM/ARMFrameLowering.h b/lib/Target/ARM/ARMFrameLowering.h index 981d320..709afbc 100644 --- a/lib/Target/ARM/ARMFrameLowering.h +++ b/lib/Target/ARM/ARMFrameLowering.h @@ -14,7 +14,6 @@ #ifndef ARM_FRAMEINFO_H #define ARM_FRAMEINFO_H -#include "ARMSubtarget.h" #include "llvm/Target/TargetFrameLowering.h" namespace llvm { @@ -25,10 +24,7 @@ protected: const ARMSubtarget &STI; public: - explicit ARMFrameLowering(const ARMSubtarget &sti) - : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4), - STI(sti) { - } + explicit ARMFrameLowering(const ARMSubtarget &sti); /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 08d598d..38547cf 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -60,22 +60,17 @@ enum AddrMode2Type { }; class ARMDAGToDAGISel : public SelectionDAGISel { - ARMBaseTargetMachine &TM; - /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when generating code for different targets. const ARMSubtarget *Subtarget; public: - explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, - CodeGenOpt::Level OptLevel) - : SelectionDAGISel(tm, OptLevel), TM(tm), - Subtarget(&TM.getSubtarget<ARMSubtarget>()) { - } + explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) + : SelectionDAGISel(tm, OptLevel) {} bool runOnMachineFunction(MachineFunction &MF) override { // Reset the subtarget each time through. - Subtarget = &TM.getSubtarget<ARMSubtarget>(); + Subtarget = &MF.getTarget().getSubtarget<ARMSubtarget>(); SelectionDAGISel::runOnMachineFunction(MF); return true; } @@ -429,8 +424,8 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { if (Use->getOpcode() == ISD::CopyToReg) return true; if (Use->isMachineOpcode()) { - const ARMBaseInstrInfo *TII = - static_cast<const ARMBaseInstrInfo*>(TM.getInstrInfo()); + const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( + CurDAG->getTarget().getInstrInfo()); const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); if (MCID.mayStore()) @@ -2444,7 +2439,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ISD::Constant: { unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); bool UseCP = true; - if (Subtarget->useMovt()) + if (Subtarget->useMovt(*MF)) // Thumb2-aware targets have the MOVT instruction, so all immediates can // be done with MOV + MOVT, at worst. UseCP = false; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 00d07e8..4bfa5a8 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -155,16 +155,16 @@ void ARMTargetLowering::addQRTypeForNEON(MVT VT) { addTypeForNEON(VT, MVT::v2f64, MVT::v4i32); } -static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) { - if (TM.getSubtarget<ARMSubtarget>().isTargetMachO()) +static TargetLoweringObjectFile *createTLOF(const Triple &TT) { + if (TT.isOSBinFormatMachO()) return new TargetLoweringObjectFileMachO(); - if (TM.getSubtarget<ARMSubtarget>().isTargetWindows()) + if (TT.isOSWindows()) return new TargetLoweringObjectFileCOFF(); return new ARMElfTargetObjectFile(); } ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) - : TargetLowering(TM, createTLOF(TM)) { + : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) { Subtarget = &TM.getSubtarget<ARMSubtarget>(); RegInfo = TM.getRegisterInfo(); Itins = TM.getInstrItineraryData(); @@ -710,7 +710,11 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setExceptionSelectorRegister(ARM::R1); } - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + else + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use // the default expansion. if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { @@ -983,6 +987,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; + case ARMISD::WIN__CHKSTK: return "ARMISD:::WIN__CHKSTK"; + case ARMISD::VCEQ: return "ARMISD::VCEQ"; case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; case ARMISD::VCGE: return "ARMISD::VCGE"; @@ -1199,7 +1205,7 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, case CallingConv::C: if (!Subtarget->isAAPCS_ABI()) return CallingConv::ARM_APCS; - else if (Subtarget->hasVFP2() && + else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && getTargetMachine().Options.FloatABIType == FloatABI::Hard && !isVarArg) return CallingConv::ARM_AAPCS_VFP; @@ -1207,10 +1213,10 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC, return CallingConv::ARM_AAPCS; case CallingConv::Fast: if (!Subtarget->isAAPCS_ABI()) { - if (Subtarget->hasVFP2() && !isVarArg) + if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) return CallingConv::Fast; return CallingConv::ARM_APCS; - } else if (Subtarget->hasVFP2() && !isVarArg) + } else if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg) return CallingConv::ARM_AAPCS_VFP; else return CallingConv::ARM_AAPCS; @@ -1598,8 +1604,9 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); if (EnableARMLongCalls) { - assert (getTargetMachine().getRelocationModel() == Reloc::Static - && "long-calls with non-static relocation model!"); + assert((Subtarget->isTargetWindows() || + getTargetMachine().getRelocationModel() == Reloc::Static) && + "long-calls with non-static relocation model!"); // Handle a global address or an external symbol. If it's not one of // those, the target's already in a register, so we don't need to do // anything extra. @@ -1647,6 +1654,19 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, assert(Subtarget->isTargetMachO() && "WrapperPIC use on non-MachO?"); Callee = DAG.getNode(ARMISD::WrapperPIC, dl, getPointerTy(), DAG.getTargetGlobalAddress(GV, dl, getPointerTy())); + } else if (Subtarget->isTargetCOFF()) { + assert(Subtarget->isTargetWindows() && + "Windows is the only supported COFF target"); + unsigned TargetFlags = GV->hasDLLImportStorageClass() + ? ARMII::MO_DLLIMPORT + : ARMII::MO_NO_FLAG; + Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), /*Offset=*/0, + TargetFlags); + if (GV->hasDLLImportStorageClass()) + Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), + DAG.getNode(ARMISD::Wrapper, dl, getPointerTy(), + Callee), MachinePointerInfo::getGOT(), + false, false, false, 0); } else { // On ELF targets for PIC code, direct calls should go through the PLT unsigned OpFlags = 0; @@ -1688,7 +1708,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // FIXME: handle tail calls differently. unsigned CallOpc; - bool HasMinSizeAttr = Subtarget->isMinSize(); + bool HasMinSizeAttr = MF.getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::MinSize); if (Subtarget->isThumb()) { if ((!isDirect || isARMFunc) && !Subtarget->hasV5TOps()) CallOpc = ARMISD::CALL_NOLINK; @@ -2326,7 +2347,8 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Chain) .setCallee(CallingConv::C, Type::getInt32Ty(*DAG.getContext()), - DAG.getExternalSymbol("__tls_get_addr", PtrVT), &Args, 0); + DAG.getExternalSymbol("__tls_get_addr", PtrVT), std::move(Args), + 0); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); return CallResult.first; @@ -2434,7 +2456,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, // If we have T2 ops, we can materialize the address directly via movt/movw // pair. This is always cheaper. - if (Subtarget->useMovt()) { + if (Subtarget->useMovt(DAG.getMachineFunction())) { ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into two nodes. @@ -2456,7 +2478,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); - if (Subtarget->useMovt()) + if (Subtarget->useMovt(DAG.getMachineFunction())) ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register @@ -2476,18 +2498,27 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SDValue ARMTargetLowering::LowerGlobalAddressWindows(SDValue Op, SelectionDAG &DAG) const { assert(Subtarget->isTargetWindows() && "non-Windows COFF is not supported"); - assert(Subtarget->useMovt() && "Windows on ARM expects to use movw/movt"); + assert(Subtarget->useMovt(DAG.getMachineFunction()) && + "Windows on ARM expects to use movw/movt"); const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal(); + const ARMII::TOF TargetFlags = + (GV->hasDLLImportStorageClass() ? ARMII::MO_DLLIMPORT : ARMII::MO_NO_FLAG); EVT PtrVT = getPointerTy(); + SDValue Result; SDLoc DL(Op); ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into two nodes. - return DAG.getNode(ARMISD::Wrapper, DL, PtrVT, - DAG.getTargetGlobalAddress(GV, DL, PtrVT)); + Result = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, + DAG.getTargetGlobalAddress(GV, DL, PtrVT, /*Offset=*/0, + TargetFlags)); + if (GV->hasDLLImportStorageClass()) + Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Result, + MachinePointerInfo::getGOT(), false, false, false, 0); + return Result; } SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, @@ -2535,6 +2566,11 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, SDLoc dl(Op); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. + case Intrinsic::arm_rbit: { + assert(Op.getOperand(0).getValueType() == MVT::i32 && + "RBIT intrinsic must have i32 type!"); + return DAG.getNode(ARMISD::RBIT, dl, MVT::i32, Op.getOperand(0)); + } case Intrinsic::arm_thread_pointer: { EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); return DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT); @@ -4492,6 +4528,11 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, BitMask <<= 8; ImmMask <<= 1; } + + if (DAG.getTargetLoweringInfo().isBigEndian()) + // swap higher and lower 32 bit word + Imm = ((Imm & 0xf) << 4) | ((Imm & 0xf0) >> 4); + // Op=1, Cmode=1110. OpCmode = 0x1e; VT = is128Bits ? MVT::v2i64 : MVT::v1i64; @@ -6078,7 +6119,7 @@ SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), Callee, - &Args, 0) + std::move(Args), 0) .setDiscardResult(); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); @@ -6213,6 +6254,10 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); case ISD::SDIVREM: case ISD::UDIVREM: return LowerDivRem(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: + if (Subtarget->getTargetTriple().isWindowsItaniumEnvironment()) + return LowerDYNAMIC_STACKALLOC(Op, DAG); + llvm_unreachable("Don't know how to custom lower this!"); } } @@ -7112,6 +7157,73 @@ ARMTargetLowering::EmitStructByval(MachineInstr *MI, } MachineBasicBlock * +ARMTargetLowering::EmitLowered__chkstk(MachineInstr *MI, + MachineBasicBlock *MBB) const { + const TargetMachine &TM = getTargetMachine(); + const TargetInstrInfo &TII = *TM.getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + + assert(Subtarget->isTargetWindows() && + "__chkstk is only supported on Windows"); + assert(Subtarget->isThumb2() && "Windows on ARM requires Thumb-2 mode"); + + // __chkstk takes the number of words to allocate on the stack in R4, and + // returns the stack adjustment in number of bytes in R4. This will not + // clober any other registers (other than the obvious lr). + // + // Although, technically, IP should be considered a register which may be + // clobbered, the call itself will not touch it. Windows on ARM is a pure + // thumb-2 environment, so there is no interworking required. As a result, we + // do not expect a veneer to be emitted by the linker, clobbering IP. + // + // Each module receives its own copy of __chkstk, so no import thunk is + // required, again, ensuring that IP is not clobbered. + // + // Finally, although some linkers may theoretically provide a trampoline for + // out of range calls (which is quite common due to a 32M range limitation of + // branches for Thumb), we can generate the long-call version via + // -mcmodel=large, alleviating the need for the trampoline which may clobber + // IP. + + switch (TM.getCodeModel()) { + case CodeModel::Small: + case CodeModel::Medium: + case CodeModel::Default: + case CodeModel::Kernel: + BuildMI(*MBB, MI, DL, TII.get(ARM::tBL)) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addExternalSymbol("__chkstk") + .addReg(ARM::R4, RegState::Implicit | RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Define) + .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); + break; + case CodeModel::Large: + case CodeModel::JITDefault: { + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + unsigned Reg = MRI.createVirtualRegister(&ARM::rGPRRegClass); + + BuildMI(*MBB, MI, DL, TII.get(ARM::t2MOVi32imm), Reg) + .addExternalSymbol("__chkstk"); + BuildMI(*MBB, MI, DL, TII.get(ARM::tBLXr)) + .addImm((unsigned)ARMCC::AL).addReg(0) + .addReg(Reg, RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Kill) + .addReg(ARM::R4, RegState::Implicit | RegState::Define) + .addReg(ARM::R12, RegState::Implicit | RegState::Define | RegState::Dead); + break; + } + } + + AddDefaultCC(AddDefaultPred(BuildMI(*MBB, MI, DL, TII.get(ARM::t2SUBrr), + ARM::SP) + .addReg(ARM::SP, RegState::Define) + .addReg(ARM::R4, RegState::Kill))); + + MI->eraseFromParent(); + return MBB; +} + +MachineBasicBlock * ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const { const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); @@ -7360,6 +7472,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case ARM::COPY_STRUCT_BYVAL_I32: ++NumLoopByVals; return EmitStructByval(MI, BB); + case ARM::WIN__CHKSTK: + return EmitLowered__chkstk(MI, BB); } } @@ -8315,6 +8429,8 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, std::min(4U, LD->getAlignment() / 2)); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); + if (DCI.DAG.getTargetLoweringInfo().isBigEndian()) + std::swap (NewLD1, NewLD2); SDValue Result = DCI.CombineTo(N, NewLD1, NewLD2); DCI.RemoveFromWorklist(LD); DAG.DeleteNode(LD); @@ -8382,7 +8498,8 @@ static SDValue PerformSTORECombine(SDNode *N, SDLoc DL(St); SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); SmallVector<int, 8> ShuffleVec(NumElems * SizeRatio, -1); - for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio; + for (unsigned i = 0; i < NumElems; ++i) + ShuffleVec[i] = TLI.isBigEndian() ? (i+1) * SizeRatio - 1 : i * SizeRatio; // Can't shuffle using an illegal type. if (!TLI.isTypeLegal(WideVecVT)) return SDValue(); @@ -10471,13 +10588,39 @@ SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(InChain) - .setCallee(getLibcallCallingConv(LC), RetTy, Callee, &Args, 0) + .setCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args), 0) .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); return CallInfo.first; } +SDValue +ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget->isTargetWindows() && "unsupported target platform"); + SDLoc DL(Op); + + // Get the inputs. + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + + SDValue Words = DAG.getNode(ISD::SRL, DL, MVT::i32, Size, + DAG.getConstant(2, MVT::i32)); + + SDValue Flag; + Chain = DAG.getCopyToReg(Chain, DL, ARM::R4, Words, Flag); + Flag = Chain.getValue(1); + + SDVTList NodeTys = DAG.getVTList(MVT::i32, MVT::Glue); + Chain = DAG.getNode(ARMISD::WIN__CHKSTK, DL, NodeTys, Chain, Flag); + + SDValue NewSP = DAG.getCopyFromReg(Chain, DL, ARM::SP, MVT::i32); + Chain = NewSP.getValue(1); + + SDValue Ops[2] = { NewSP, Chain }; + return DAG.getMergeValues(Ops, DL); +} + bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The ARM target isn't yet aware of offsets. @@ -10635,14 +10778,20 @@ bool ARMTargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, bool ARMTargetLowering::shouldExpandAtomicInIR(Instruction *Inst) const { // Loads and stores less than 64-bits are already atomic; ones above that // are doomed anyway, so defer to the default libcall and blame the OS when - // things go wrong: - if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) - return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() == 64; - else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) - return LI->getType()->getPrimitiveSizeInBits() == 64; - - // For the real atomic operations, we have ldrex/strex up to 64 bits. - return Inst->getType()->getPrimitiveSizeInBits() <= 64; + // things go wrong. Cortex M doesn't have ldrexd/strexd though, so don't emit + // anything for those. + bool IsMClass = Subtarget->isMClass(); + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + unsigned Size = SI->getValueOperand()->getType()->getPrimitiveSizeInBits(); + return Size == 64 && !IsMClass; + } else if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + return LI->getType()->getPrimitiveSizeInBits() == 64 && !IsMClass; + } + + // For the real atomic operations, we have ldrex/strex up to 32 bits, + // and up to 64 bits on the non-M profiles + unsigned AtomicLimit = IsMClass ? 32 : 64; + return Inst->getType()->getPrimitiveSizeInBits() <= AtomicLimit; } Value *ARMTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr, diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index c15305c..1ace0f3 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -95,6 +95,8 @@ namespace llvm { PRELOAD, // Preload + WIN__CHKSTK, // Windows' __chkstk call to do stack probing. + VCEQ, // Vector compare equal. VCEQZ, // Vector compare equal to zero. VCGE, // Vector compare greater than or equal. @@ -470,6 +472,7 @@ namespace llvm { const ARMSubtarget *ST) const; SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; unsigned getRegisterByName(const char* RegName, EVT VT) const override; @@ -578,6 +581,9 @@ namespace llvm { MachineBasicBlock *EmitStructByval(MachineInstr *MI, MachineBasicBlock *MBB) const; + + MachineBasicBlock *EmitLowered__chkstk(MachineInstr *MI, + MachineBasicBlock *MBB) const; }; enum NEONModImmType { diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 718d5da..2bb8976 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -270,8 +270,8 @@ def UseNaClTrap : Predicate<"Subtarget->useNaClTrap()">, def DontUseNaClTrap : Predicate<"!Subtarget->useNaClTrap()">; // FIXME: Eventually this will be just "hasV6T2Ops". -def UseMovt : Predicate<"Subtarget->useMovt()">; -def DontUseMovt : Predicate<"!Subtarget->useMovt()">; +def UseMovt : Predicate<"Subtarget->useMovt(*MF)">; +def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">; def UseFPVMLx : Predicate<"Subtarget->useFPVMLx()">; def UseMulOps : Predicate<"Subtarget->useMulOps()">; @@ -493,7 +493,7 @@ def neon_vcvt_imm32 : Operand<i32> { // rot_imm: An integer that encodes a rotate amount. Must be 8, 16, or 24. def rot_imm_XFORM: SDNodeXForm<imm, [{ switch (N->getZExtValue()){ - default: assert(0); + default: llvm_unreachable(nullptr); case 0: return CurDAG->getTargetConstant(0, MVT::i32); case 8: return CurDAG->getTargetConstant(1, MVT::i32); case 16: return CurDAG->getTargetConstant(2, MVT::i32); @@ -594,7 +594,7 @@ def so_imm2part : PatLeaf<(imm), [{ /// arm_i32imm - True for +V6T2, or true only if so_imm2part is true. /// def arm_i32imm : PatLeaf<(imm), [{ - if (Subtarget->useMovt()) + if (Subtarget->useMovt(*MF)) return true; return ARM_AM::isSOImmTwoPartVal((unsigned)N->getZExtValue()); }]>; @@ -3334,8 +3334,8 @@ def SBFX : I<(outs GPRnopc:$Rd), let Inst{3-0} = Rn; } -def UBFX : I<(outs GPR:$Rd), - (ins GPR:$Rn, imm0_31:$lsb, imm1_32:$width), +def UBFX : I<(outs GPRnopc:$Rd), + (ins GPRnopc:$Rn, imm0_31:$lsb, imm1_32:$width), AddrMode1, 4, IndexModeNone, DPFrm, IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $width", "", []>, Requires<[IsARM, HasV6T2]> { @@ -4443,7 +4443,7 @@ def instsyncb_opt : Operand<i32> { let DecoderMethod = "DecodeInstSyncBarrierOption"; } -// memory barriers protect the atomic sequences +// Memory barriers protect the atomic sequences let hasSideEffects = 1 in { def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, "dmb", "\t$opt", [(int_arm_dmb (i32 imm0_15:$opt))]>, @@ -4452,7 +4452,6 @@ def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, let Inst{31-4} = 0xf57ff05; let Inst{3-0} = opt; } -} def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, "dsb", "\t$opt", [(int_arm_dsb (i32 imm0_15:$opt))]>, @@ -4464,12 +4463,13 @@ def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, // ISB has only full system option def ISB : AInoP<(outs), (ins instsyncb_opt:$opt), MiscFrm, NoItinerary, - "isb", "\t$opt", []>, + "isb", "\t$opt", [(int_arm_isb (i32 imm0_15:$opt))]>, Requires<[IsARM, HasDB]> { bits<4> opt; let Inst{31-4} = 0xf57ff06; let Inst{3-0} = opt; } +} let usesCustomInserter = 1, Defs = [CPSR] in { @@ -5093,6 +5093,19 @@ def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary, let Inst{11-0} = a; } +// Dynamic stack allocation yields a _chkstk for Windows targets. These calls +// are needed to probe the stack when allocating more than +// 4k bytes in one go. Touching the stack at 4K increments is necessary to +// ensure that the guard pages used by the OS virtual memory manager are +// allocated in correct sequence. +// The main point of having separate instruction are extra unmodelled effects +// (compared to ordinary calls) like stack pointer change. + +def win__chkstk : SDNode<"ARMISD::WIN__CHKSTK", SDTNone, + [SDNPHasChain, SDNPSideEffect]>; +let usesCustomInserter = 1, Uses = [R4], Defs = [R4, SP] in + def WIN__CHKSTK : PseudoInst<(outs), (ins), NoItinerary, [(win__chkstk)]>; + //===----------------------------------------------------------------------===// // TLS Instructions // @@ -5100,9 +5113,11 @@ def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary, // __aeabi_read_tp preserves the registers r1-r3. // This is a pseudo inst so that we can get the encoding right, // complete with fixup for the aeabi_read_tp function. +// TPsoft is valid for ARM mode only, in case of Thumb mode a tTPsoft pattern +// is defined in "ARMInstrThumb.td". let isCall = 1, Defs = [R0, R12, LR, CPSR], Uses = [SP] in { - def TPsoft : PseudoInst<(outs), (ins), IIC_Br, + def TPsoft : ARMPseudoInst<(outs), (ins), 4, IIC_Br, [(set R0, ARMthread_pointer)]>, Sched<[WriteBr]>; } diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index b32b5d2..c02bb3b 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -6372,6 +6372,32 @@ multiclass Lengthen_HalfSingle<string DestLanes, string DestTy, string SrcTy, dsub_0)>; } +// The following class definition is basically a copy of the +// Lengthen_HalfSingle definition above, however with an additional parameter +// "RevLanes" to select the correct VREV32dXX instruction. This is to convert +// data loaded by VLD1LN into proper vector format in big endian mode. +multiclass Lengthen_HalfSingle_Big_Endian<string DestLanes, string DestTy, string SrcTy, + string InsnLanes, string InsnTy, string RevLanes> { + def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) + (!cast<Instruction>("VREV32d" # RevLanes) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0)>; + def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # InsnLanes # InsnTy) + (!cast<Instruction>("VREV32d" # RevLanes) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0)>; + def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # InsnLanes # InsnTy) + (!cast<Instruction>("VREV32d" # RevLanes) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0)>; +} + // extload, zextload and sextload for a lengthening load followed by another // lengthening load, to quadruple the initial length. // @@ -6406,6 +6432,36 @@ multiclass Lengthen_Double<string DestLanes, string DestTy, string SrcTy, dsub_0))>; } +// The following class definition is basically a copy of the +// Lengthen_Double definition above, however with an additional parameter +// "RevLanes" to select the correct VREV32dXX instruction. This is to convert +// data loaded by VLD1LN into proper vector format in big endian mode. +multiclass Lengthen_Double_Big_Endian<string DestLanes, string DestTy, string SrcTy, + string Insn1Lanes, string Insn1Ty, string Insn2Lanes, + string Insn2Ty, string RevLanes> { + def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("extloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) + (!cast<Instruction>("VREV32d" # RevLanes) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0))>; + def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) + (!cast<Instruction>("VREV32d" # RevLanes) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0))>; + def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6oneL32:$addr)), + (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) + (!cast<Instruction>("VREV32d" # RevLanes) + (VLD1LNd32 addrmode6oneL32:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0))>; +} + // extload, zextload and sextload for a lengthening load followed by another // lengthening load, to quadruple the initial length, but which ends up only // requiring half the available lanes (a 64-bit outcome instead of a 128-bit). @@ -6443,33 +6499,102 @@ multiclass Lengthen_HalfDouble<string DestLanes, string DestTy, string SrcTy, dsub_0)>; } +// The following class definition is basically a copy of the +// Lengthen_HalfDouble definition above, however with an additional VREV16d8 +// instruction to convert data loaded by VLD1LN into proper vector format +// in big endian mode. +multiclass Lengthen_HalfDouble_Big_Endian<string DestLanes, string DestTy, string SrcTy, + string Insn1Lanes, string Insn1Ty, string Insn2Lanes, + string Insn2Ty> { + def _Any : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("extloadv" # SrcTy) addrmode6:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) + (!cast<Instruction>("VREV16d8") + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0)), + dsub_0)>; + def _Z : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("zextloadv" # SrcTy) addrmode6:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLuv" # Insn1Lanes # Insn1Ty) + (!cast<Instruction>("VREV16d8") + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0)), + dsub_0)>; + def _S : Pat<(!cast<ValueType>("v" # DestLanes # DestTy) + (!cast<PatFrag>("sextloadv" # SrcTy) addrmode6:$addr)), + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn2Lanes # Insn2Ty) + (EXTRACT_SUBREG (!cast<Instruction>("VMOVLsv" # Insn1Lanes # Insn1Ty) + (!cast<Instruction>("VREV16d8") + (VLD1LNd16 addrmode6:$addr, (f64 (IMPLICIT_DEF)), (i32 0)))), + dsub_0)), + dsub_0)>; +} + defm : Lengthen_Single<"8", "i16", "8">; // v8i8 -> v8i16 defm : Lengthen_Single<"4", "i32", "16">; // v4i16 -> v4i32 defm : Lengthen_Single<"2", "i64", "32">; // v2i32 -> v2i64 -defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 -defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 +let Predicates = [IsLE] in { + defm : Lengthen_HalfSingle<"4", "i16", "i8", "8", "i16">; // v4i8 -> v4i16 + defm : Lengthen_HalfSingle<"2", "i32", "i16", "4", "i32">; // v2i16 -> v2i32 -// Double lengthening - v4i8 -> v4i16 -> v4i32 -defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; -// v2i8 -> v2i16 -> v2i32 -defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; -// v2i16 -> v2i32 -> v2i64 -defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; + // Double lengthening - v4i8 -> v4i16 -> v4i32 + defm : Lengthen_Double<"4", "i32", "i8", "8", "i16", "4", "i32">; + // v2i8 -> v2i16 -> v2i32 + defm : Lengthen_HalfDouble<"2", "i32", "i8", "8", "i16", "4", "i32">; + // v2i16 -> v2i32 -> v2i64 + defm : Lengthen_Double<"2", "i64", "i16", "4", "i32", "2", "i64">; +} + +let Predicates = [IsBE] in { + defm : Lengthen_HalfSingle_Big_Endian<"4", "i16", "i8", "8", "i16", "8">; // v4i8 -> v4i16 + defm : Lengthen_HalfSingle_Big_Endian<"2", "i32", "i16", "4", "i32", "16">; // v2i16 -> v2i32 + + // Double lengthening - v4i8 -> v4i16 -> v4i32 + defm : Lengthen_Double_Big_Endian<"4", "i32", "i8", "8", "i16", "4", "i32", "8">; + // v2i8 -> v2i16 -> v2i32 + defm : Lengthen_HalfDouble_Big_Endian<"2", "i32", "i8", "8", "i16", "4", "i32">; + // v2i16 -> v2i32 -> v2i64 + defm : Lengthen_Double_Big_Endian<"2", "i64", "i16", "4", "i32", "2", "i64", "16">; +} // Triple lengthening - v2i8 -> v2i16 -> v2i32 -> v2i64 -def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), - (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 - (VLD1LNd16 addrmode6:$addr, - (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; -def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), - (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 - (VLD1LNd16 addrmode6:$addr, - (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; -def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), - (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 - (VLD1LNd16 addrmode6:$addr, - (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; +let Predicates = [IsLE] in { + def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; + def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; + def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), + (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>; +} +// The following patterns are basically a copy of the patterns above, +// however with an additional VREV16d instruction to convert data +// loaded by VLD1LN into proper vector format in big endian mode. +let Predicates = [IsBE] in { + def : Pat<(v2i64 (extloadvi8 addrmode6:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (!cast<Instruction>("VREV16d8") + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; + def : Pat<(v2i64 (zextloadvi8 addrmode6:$addr)), + (VMOVLuv2i64 (EXTRACT_SUBREG (VMOVLuv4i32 (EXTRACT_SUBREG (VMOVLuv8i16 + (!cast<Instruction>("VREV16d8") + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; + def : Pat<(v2i64 (sextloadvi8 addrmode6:$addr)), + (VMOVLsv2i64 (EXTRACT_SUBREG (VMOVLsv4i32 (EXTRACT_SUBREG (VMOVLsv8i16 + (!cast<Instruction>("VREV16d8") + (VLD1LNd16 addrmode6:$addr, + (f64 (IMPLICIT_DEF)), (i32 0)))), dsub_0)), dsub_0))>; +} //===----------------------------------------------------------------------===// // Assembler aliases diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index c30d6ab..85e9351 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3209,27 +3209,28 @@ def t2MOVCCi32imm let hasSideEffects = 1 in { def t2DMB : T2I<(outs), (ins memb_opt:$opt), NoItinerary, "dmb", "\t$opt", [(int_arm_dmb (i32 imm0_15:$opt))]>, - Requires<[HasDB]> { + Requires<[IsThumb, HasDB]> { bits<4> opt; let Inst{31-4} = 0xf3bf8f5; let Inst{3-0} = opt; } -} def t2DSB : T2I<(outs), (ins memb_opt:$opt), NoItinerary, "dsb", "\t$opt", [(int_arm_dsb (i32 imm0_15:$opt))]>, - Requires<[HasDB]> { + Requires<[IsThumb, HasDB]> { bits<4> opt; let Inst{31-4} = 0xf3bf8f4; let Inst{3-0} = opt; } def t2ISB : T2I<(outs), (ins instsyncb_opt:$opt), NoItinerary, - "isb", "\t$opt", []>, Requires<[HasDB]> { + "isb", "\t$opt", [(int_arm_isb (i32 imm0_15:$opt))]>, + Requires<[IsThumb, HasDB]> { bits<4> opt; let Inst{31-4} = 0xf3bf8f6; let Inst{3-0} = opt; } +} class T2I_ldrex<bits<4> opcod, dag oops, dag iops, AddrMode am, int sz, InstrItinClass itin, string opc, string asm, string cstr, diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp index 8821c2d..6d1114d 100644 --- a/lib/Target/ARM/ARMJITInfo.cpp +++ b/lib/Target/ARM/ARMJITInfo.cpp @@ -13,6 +13,7 @@ #include "ARMJITInfo.h" #include "ARMConstantPoolValue.h" +#include "ARMMachineFunctionInfo.h" #include "ARMRelocations.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/CodeGen/JITCodeEmitter.h" @@ -334,3 +335,10 @@ void ARMJITInfo::relocate(void *Function, MachineRelocation *MR, } } } + +void ARMJITInfo::Initialize(const MachineFunction &MF, bool isPIC) { + const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); + ConstPoolId2AddrMap.resize(AFI->getNumPICLabels()); + JumpTableId2AddrMap.resize(AFI->getNumJumpTables()); + IsPIC = isPIC; +} diff --git a/lib/Target/ARM/ARMJITInfo.h b/lib/Target/ARM/ARMJITInfo.h index ee4c863..27e2a20 100644 --- a/lib/Target/ARM/ARMJITInfo.h +++ b/lib/Target/ARM/ARMJITInfo.h @@ -14,7 +14,6 @@ #ifndef ARMJITINFO_H #define ARMJITINFO_H -#include "ARMMachineFunctionInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineConstantPool.h" @@ -103,12 +102,7 @@ namespace llvm { /// Resize constant pool ids to CONSTPOOL_ENTRY addresses map; resize /// jump table ids to jump table bases map; remember if codegen relocation /// model is PIC. - void Initialize(const MachineFunction &MF, bool isPIC) { - const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); - ConstPoolId2AddrMap.resize(AFI->getNumPICLabels()); - JumpTableId2AddrMap.resize(AFI->getNumJumpTables()); - IsPIC = isPIC; - } + void Initialize(const MachineFunction &MF, bool isPIC); /// getConstantPoolEntryAddr - The ARM target puts all constant /// pool entries into constant islands. This returns the address of the diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index ee7df54..a03bcdb 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -505,7 +505,7 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, // Exception: If the base register is in the input reglist, Thumb1 LDM is // non-writeback. Check for this. - if (Opcode == ARM::tLDRi && isThumb1) + if (Opcode == ARM::tLDMIA && isThumb1) for (unsigned I = 0; I < NumRegs; ++I) if (Base == Regs[I].first) { Writeback = false; @@ -519,17 +519,17 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, // Update tLDMIA with writeback if necessary. Opcode = ARM::tLDMIA_UPD; - // The base isn't dead after a merged instruction with writeback. Update - // future uses of the base with the added offset (if possible), or reset - // the base register as necessary. - if (!BaseKill) - UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg); - MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); // Thumb1: we might need to set base writeback when building the MI. MIB.addReg(Base, getDefRegState(true)) .addReg(Base, getKillRegState(BaseKill)); + + // The base isn't dead after a merged instruction with writeback. Update + // future uses of the base with the added offset (if possible), or reset + // the base register as necessary. + if (!BaseKill) + UpdateBaseRegUses(MBB, MBBI, dl, Base, NumRegs, Pred, PredReg); } else { // No writeback, simply build the MachineInstr. MIB = BuildMI(MBB, MBBI, dl, TII->get(Opcode)); @@ -1734,6 +1734,12 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { isThumb2 = AFI->isThumb2Function(); isThumb1 = AFI->isThumbFunction() && !isThumb2; + // FIXME: Temporarily disabling for Thumb-1 due to miscompiles + if (isThumb1) { + delete RS; + return false; + } + bool Modified = false; for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; ++MFI) { diff --git a/lib/Target/ARM/ARMMCInstLower.cpp b/lib/Target/ARM/ARMMCInstLower.cpp index 48141b1..023f5f8 100644 --- a/lib/Target/ARM/ARMMCInstLower.cpp +++ b/lib/Target/ARM/ARMMCInstLower.cpp @@ -34,7 +34,7 @@ MCOperand ARMAsmPrinter::GetSymbolRef(const MachineOperand &MO, OutContext); switch (Option) { default: llvm_unreachable("Unknown target flag on symbol operand"); - case 0: + case ARMII::MO_NO_FLAG: break; case ARMII::MO_LO16: Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/lib/Target/ARM/ARMMachineFunctionInfo.cpp index af445e2..892b269 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.cpp +++ b/lib/Target/ARM/ARMMachineFunctionInfo.cpp @@ -12,3 +12,13 @@ using namespace llvm; void ARMFunctionInfo::anchor() { } + +ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF) + : isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), + hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()), + StByValParamsPadding(0), ArgRegsSaveSize(0), HasStackFrame(false), + RestoreSPFromFP(false), LRSpilledForFarJump(false), + FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), + GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), JumpTableUId(0), + PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false), + GlobalBaseReg(0) {} diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h index d7ec6eb..44a9e34 100644 --- a/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -130,16 +130,7 @@ public: JumpTableUId(0), PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {} - explicit ARMFunctionInfo(MachineFunction &MF) : - isThumb(MF.getTarget().getSubtarget<ARMSubtarget>().isThumb()), - hasThumb2(MF.getTarget().getSubtarget<ARMSubtarget>().hasThumb2()), - StByValParamsPadding(0), - ArgRegsSaveSize(0), HasStackFrame(false), RestoreSPFromFP(false), - LRSpilledForFarJump(false), - FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0), - GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0), - JumpTableUId(0), PICLabelUId(0), - VarArgsFrameIndex(0), HasITBlocks(false), GlobalBaseReg(0) {} + explicit ARMFunctionInfo(MachineFunction &MF); bool isThumbFunction() const { return isThumb; } bool isThumb1OnlyFunction() const { return isThumb && !hasThumb2; } @@ -220,7 +211,7 @@ public: void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) { if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second) - assert(0 && "Duplicate entries!"); + llvm_unreachable("Duplicate entries!"); } unsigned getOriginalCPIdx(unsigned CloneIdx) const { diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index 008ad64..3dcc0df 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -18,10 +18,8 @@ using namespace llvm; #define DEBUG_TYPE "arm-selectiondag-info" -ARMSelectionDAGInfo::ARMSelectionDAGInfo(const TargetMachine &TM) - : TargetSelectionDAGInfo(TM), - Subtarget(&TM.getSubtarget<ARMSubtarget>()) { -} +ARMSelectionDAGInfo::ARMSelectionDAGInfo(const DataLayout &DL) + : TargetSelectionDAGInfo(&DL) {} ARMSelectionDAGInfo::~ARMSelectionDAGInfo() { } @@ -34,6 +32,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, bool isVolatile, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + const ARMSubtarget &Subtarget = DAG.getTarget().getSubtarget<ARMSubtarget>(); // Do repeated 4-byte loads and stores. To be improved. // This requires 4-byte alignment. if ((Align & 3) != 0) @@ -44,7 +43,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, if (!ConstantSize) return SDValue(); uint64_t SizeVal = ConstantSize->getZExtValue(); - if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) + if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold()) return SDValue(); unsigned BytesLeft = SizeVal & 3; @@ -54,7 +53,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, unsigned VTSize = 4; unsigned i = 0; // Emit a maximum of 4 loads in Thumb1 since we have fewer registers - const unsigned MAX_LOADS_IN_LDM = Subtarget->isThumb1Only() ? 4 : 6; + const unsigned MAX_LOADS_IN_LDM = Subtarget.isThumb1Only() ? 4 : 6; SDValue TFOps[6]; SDValue Loads[6]; uint64_t SrcOff = 0, DstOff = 0; @@ -151,9 +150,10 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Src, SDValue Size, unsigned Align, bool isVolatile, MachinePointerInfo DstPtrInfo) const { + const ARMSubtarget &Subtarget = DAG.getTarget().getSubtarget<ARMSubtarget>(); // Use default for non-AAPCS (or MachO) subtargets - if (!Subtarget->isAAPCS_ABI() || Subtarget->isTargetMachO() || - Subtarget->isTargetWindows()) + if (!Subtarget.isAAPCS_ABI() || Subtarget.isTargetMachO() || + Subtarget.isTargetWindows()) return SDValue(); const ARMTargetLowering &TLI = @@ -191,7 +191,7 @@ EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMSET), Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), - TLI.getPointerTy()), &Args, 0) + TLI.getPointerTy()), std::move(Args), 0) .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.h b/lib/Target/ARM/ARMSelectionDAGInfo.h index 8c2397b..13769dc 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.h +++ b/lib/Target/ARM/ARMSelectionDAGInfo.h @@ -36,12 +36,8 @@ namespace ARM_AM { } // end namespace ARM_AM class ARMSelectionDAGInfo : public TargetSelectionDAGInfo { - /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can - /// make the right decision when generating code for different targets. - const ARMSubtarget *Subtarget; - public: - explicit ARMSelectionDAGInfo(const TargetMachine &TM); + explicit ARMSelectionDAGInfo(const DataLayout &DL); ~ARMSelectionDAGInfo(); SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 5b204f6..0eb24ef 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -12,8 +12,15 @@ //===----------------------------------------------------------------------===// #include "ARMSubtarget.h" -#include "ARMBaseInstrInfo.h" -#include "ARMBaseRegisterInfo.h" +#include "ARMFrameLowering.h" +#include "ARMISelLowering.h" +#include "ARMInstrInfo.h" +#include "ARMJITInfo.h" +#include "ARMSelectionDAGInfo.h" +#include "ARMSubtarget.h" +#include "Thumb1FrameLowering.h" +#include "Thumb1InstrInfo.h" +#include "Thumb2InstrInfo.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" @@ -76,22 +83,89 @@ IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), "Allow IT blocks based on ARMv7"), clEnumValEnd)); -ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool IsLittle, - const TargetOptions &Options) - : ARMGenSubtargetInfo(TT, CPU, FS) - , ARMProcFamily(Others) - , ARMProcClass(None) - , stackAlignment(4) - , CPUString(CPU) - , IsLittle(IsLittle) - , TargetTriple(TT) - , Options(Options) - , TargetABI(ARM_ABI_UNKNOWN) { +static std::string computeDataLayout(ARMSubtarget &ST) { + std::string Ret = ""; + + if (ST.isLittle()) + // Little endian. + Ret += "e"; + else + // Big endian. + Ret += "E"; + + Ret += DataLayout::getManglingComponent(ST.getTargetTriple()); + + // Pointers are 32 bits and aligned to 32 bits. + Ret += "-p:32:32"; + + // On thumb, i16,i18 and i1 have natural aligment requirements, but we try to + // align to 32. + if (ST.isThumb()) + Ret += "-i1:8:32-i8:8:32-i16:16:32"; + + // ABIs other than APCS have 64 bit integers with natural alignment. + if (!ST.isAPCS_ABI()) + Ret += "-i64:64"; + + // We have 64 bits floats. The APCS ABI requires them to be aligned to 32 + // bits, others to 64 bits. We always try to align to 64 bits. + if (ST.isAPCS_ABI()) + Ret += "-f64:32:64"; + + // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others + // to 64. We always ty to give them natural alignment. + if (ST.isAPCS_ABI()) + Ret += "-v64:32:64-v128:32:128"; + else + Ret += "-v128:64:128"; + + // On thumb and APCS, only try to align aggregates to 32 bits (the default is + // 64 bits). + if (ST.isThumb() || ST.isAPCS_ABI()) + Ret += "-a:0:32"; + + // Integer registers are 32 bits. + Ret += "-n32"; + + // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit + // aligned everywhere else. + if (ST.isTargetNaCl()) + Ret += "-S128"; + else if (ST.isAAPCS_ABI()) + Ret += "-S64"; + else + Ret += "-S32"; + + return Ret; +} + +/// initializeSubtargetDependencies - Initializes using a CPU and feature string +/// so that we can use initializer lists for subtarget initialization. +ARMSubtarget &ARMSubtarget::initializeSubtargetDependencies(StringRef CPU, + StringRef FS) { initializeEnvironment(); resetSubtargetFeatures(CPU, FS); + return *this; } +ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, TargetMachine &TM, + bool IsLittle, const TargetOptions &Options) + : ARMGenSubtargetInfo(TT, CPU, FS), ARMProcFamily(Others), + ARMProcClass(None), stackAlignment(4), CPUString(CPU), IsLittle(IsLittle), + TargetTriple(TT), Options(Options), TargetABI(ARM_ABI_UNKNOWN), + DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS))), + TSInfo(DL), JITInfo(), + InstrInfo(isThumb1Only() + ? (ARMBaseInstrInfo *)new Thumb1InstrInfo(*this) + : !isThumb() + ? (ARMBaseInstrInfo *)new ARMInstrInfo(*this) + : (ARMBaseInstrInfo *)new Thumb2InstrInfo(*this)), + TLInfo(TM), + FrameLowering(!isThumb1Only() + ? new ARMFrameLowering(*this) + : (ARMFrameLowering *)new Thumb1FrameLowering(*this)) {} + void ARMSubtarget::initializeEnvironment() { HasV4TOps = false; HasV5TOps = false; @@ -106,7 +180,6 @@ void ARMSubtarget::initializeEnvironment() { HasVFPv4 = false; HasFPARMv8 = false; HasNEON = false; - MinSize = false; UseNEONForSinglePrecisionFP = false; UseMulOps = UseFusedMulOps; SlowFPVMLx = false; @@ -158,9 +231,6 @@ void ARMSubtarget::resetSubtargetFeatures(const MachineFunction *MF) { initializeEnvironment(); resetSubtargetFeatures(CPU, FS); } - - MinSize = - FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); } void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { @@ -353,6 +423,17 @@ bool ARMSubtarget::hasSinCos() const { !getTargetTriple().isOSVersionLT(7, 0); } +// Enable the PostMachineScheduler if the target selects it instead of +// PostRAScheduler. Currently only available on the command line via +// -misched-postra. +bool ARMSubtarget::enablePostMachineScheduler() const { + return PostRAScheduler; +} + +bool ARMSubtarget::enableAtomicExpandLoadLinked() const { + return hasAnyDataBarrier() && !isThumb1Only(); +} + bool ARMSubtarget::enablePostRAScheduler( CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, @@ -360,3 +441,12 @@ bool ARMSubtarget::enablePostRAScheduler( Mode = TargetSubtargetInfo::ANTIDEP_NONE; return PostRAScheduler && OptLevel >= CodeGenOpt::Default; } + +bool ARMSubtarget::useMovt(const MachineFunction &MF) const { + // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit + // immediates as it is inherently position independent, and may be out of + // range otherwise. + return UseMovt && (isTargetWindows() || + !MF.getFunction()->getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::MinSize)); +} diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 38536b2..8f6c165 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -14,8 +14,20 @@ #ifndef ARMSUBTARGET_H #define ARMSUBTARGET_H + +#include "ARMFrameLowering.h" +#include "ARMISelLowering.h" +#include "ARMInstrInfo.h" +#include "ARMJITInfo.h" +#include "ARMSelectionDAGInfo.h" +#include "ARMSubtarget.h" +#include "Thumb1FrameLowering.h" +#include "Thumb1InstrInfo.h" +#include "Thumb2InstrInfo.h" +#include "ARMJITInfo.h" #include "MCTargetDesc/ARMMCTargetDesc.h" #include "llvm/ADT/Triple.h" +#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <string> @@ -64,10 +76,6 @@ protected: bool HasFPARMv8; bool HasNEON; - /// MinSize - True if the function being compiled has the "minsize" attribute - /// and should be optimised for size at the expense of speed. - bool MinSize; - /// UseNEONForSinglePrecisionFP - if the NEONFP attribute has been /// specified. Use the method useNEONForSinglePrecisionFP() to /// determine if NEON should actually be used. @@ -236,7 +244,7 @@ protected: /// of the specified triple. /// ARMSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool IsLittle, + const std::string &FS, TargetMachine &TM, bool IsLittle, const TargetOptions &Options); /// getMaxInlineSizeThreshold - Returns the maximum memset / memcpy size @@ -250,7 +258,31 @@ protected: /// \brief Reset the features for the ARM target. void resetSubtargetFeatures(const MachineFunction *MF) override; + + /// initializeSubtargetDependencies - Initializes using a CPU and feature string + /// so that we can use initializer lists for subtarget initialization. + ARMSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); + + const DataLayout *getDataLayout() const { return &DL; } + const ARMSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } + ARMJITInfo *getJITInfo() { return &JITInfo; } + const ARMBaseInstrInfo *getInstrInfo() const { return InstrInfo.get(); } + const ARMTargetLowering *getTargetLowering() const { return &TLInfo; } + const ARMFrameLowering *getFrameLowering() const { return FrameLowering.get(); } + const ARMBaseRegisterInfo *getRegisterInfo() const { + return &InstrInfo->getRegisterInfo(); + } + private: + const DataLayout DL; + ARMSelectionDAGInfo TSInfo; + ARMJITInfo JITInfo; + // Either Thumb1InstrInfo or Thumb2InstrInfo. + std::unique_ptr<ARMBaseInstrInfo> InstrInfo; + ARMTargetLowering TLInfo; + // Either Thumb1FrameLowering or ARMFrameLowering. + std::unique_ptr<ARMFrameLowering> FrameLowering; + void initializeEnvironment(); void resetSubtargetFeatures(StringRef CPU, StringRef FS); public: @@ -286,7 +318,6 @@ public: bool hasCrypto() const { return HasCrypto; } bool hasCRC() const { return HasCRC; } bool hasVirtualization() const { return HasVirtualization; } - bool isMinSize() const { return MinSize; } bool useNEONForSinglePrecisionFP() const { return hasNEON() && UseNEONForSinglePrecisionFP; } @@ -382,7 +413,8 @@ public: bool isR9Reserved() const { return IsR9Reserved; } - bool useMovt() const { return UseMovt && !isMinSize(); } + bool useMovt(const MachineFunction &MF) const; + bool supportsTailCall() const { return SupportsTailCall; } bool allowsUnalignedMem() const { return AllowsUnalignedMem; } @@ -399,11 +431,17 @@ public: /// compiler runtime or math libraries. bool hasSinCos() const; + /// True for some subtargets at > -O0. + bool enablePostMachineScheduler() const; + /// enablePostRAScheduler - True at 'More' optimization. bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, TargetSubtargetInfo::AntiDepBreakMode& Mode, RegClassVector& CriticalPathRCs) const override; + // enableAtomicExpandLoadLinked - True if we need to expand our atomics. + bool enableAtomicExpandLoadLinked() const override; + /// getInstrItins - Return the instruction itineraies based on subtarget /// selection. const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 8876227..d85194b 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -28,6 +28,12 @@ DisableA15SDOptimization("disable-a15-sd-optimization", cl::Hidden, cl::desc("Inhibit optimization of S->D register accesses on A15"), cl::init(false)); +static cl::opt<bool> +EnableAtomicTidy("arm-atomic-cfg-tidy", cl::Hidden, + cl::desc("Run SimplifyCFG after expanding atomic operations" + " to make use of cmpxchg flow-based information"), + cl::init(true)); + extern "C" void LLVMInitializeARMTarget() { // Register the target. RegisterTargetMachine<ARMLETargetMachine> X(TheARMLETarget); @@ -43,12 +49,9 @@ ARMBaseTargetMachine::ARMBaseTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, - bool isLittle) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, isLittle, Options), - JITInfo(), - InstrItins(Subtarget.getInstrItineraryData()) { + CodeGenOpt::Level OL, bool isLittle) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS, *this, isLittle, Options) { // Default to triple-appropriate float ABI if (Options.FloatABIType == FloatABI::Default) @@ -67,74 +70,11 @@ void ARMBaseTargetMachine::addAnalysisPasses(PassManagerBase &PM) { void ARMTargetMachine::anchor() { } -static std::string computeDataLayout(ARMSubtarget &ST) { - std::string Ret = ""; - - if (ST.isLittle()) - // Little endian. - Ret += "e"; - else - // Big endian. - Ret += "E"; - - Ret += DataLayout::getManglingComponent(ST.getTargetTriple()); - - // Pointers are 32 bits and aligned to 32 bits. - Ret += "-p:32:32"; - - // On thumb, i16,i18 and i1 have natural aligment requirements, but we try to - // align to 32. - if (ST.isThumb()) - Ret += "-i1:8:32-i8:8:32-i16:16:32"; - - // ABIs other than APCS have 64 bit integers with natural alignment. - if (!ST.isAPCS_ABI()) - Ret += "-i64:64"; - - // We have 64 bits floats. The APCS ABI requires them to be aligned to 32 - // bits, others to 64 bits. We always try to align to 64 bits. - if (ST.isAPCS_ABI()) - Ret += "-f64:32:64"; - - // We have 128 and 64 bit vectors. The APCS ABI aligns them to 32 bits, others - // to 64. We always ty to give them natural alignment. - if (ST.isAPCS_ABI()) - Ret += "-v64:32:64-v128:32:128"; - else - Ret += "-v128:64:128"; - - // On thumb and APCS, only try to align aggregates to 32 bits (the default is - // 64 bits). - if (ST.isThumb() || ST.isAPCS_ABI()) - Ret += "-a:0:32"; - - // Integer registers are 32 bits. - Ret += "-n32"; - - // The stack is 128 bit aligned on NaCl, 64 bit aligned on AAPCS and 32 bit - // aligned everywhere else. - if (ST.isTargetNaCl()) - Ret += "-S128"; - else if (ST.isAAPCS_ABI()) - Ret += "-S64"; - else - Ret += "-S32"; - - return Ret; -} - -ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, +ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, - bool isLittle) - : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle), - InstrInfo(Subtarget), - DL(computeDataLayout(Subtarget)), - TLInfo(*this), - TSInfo(*this), - FrameLowering(Subtarget) { + CodeGenOpt::Level OL, bool isLittle) + : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle) { initAsmInfo(); if (!Subtarget.hasARMOps()) report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not " @@ -143,21 +83,21 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT, void ARMLETargetMachine::anchor() { } -ARMLETargetMachine:: -ARMLETargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} +ARMLETargetMachine::ARMLETargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} void ARMBETargetMachine::anchor() { } -ARMBETargetMachine:: -ARMBETargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} +ARMBETargetMachine::ARMBETargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : ARMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} void ThumbTargetMachine::anchor() { } @@ -165,38 +105,29 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, - bool isLittle) - : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, isLittle), - InstrInfo(Subtarget.hasThumb2() - ? ((ARMBaseInstrInfo*)new Thumb2InstrInfo(Subtarget)) - : ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))), - DL(computeDataLayout(Subtarget)), - TLInfo(*this), - TSInfo(*this), - FrameLowering(Subtarget.hasThumb2() - ? new ARMFrameLowering(Subtarget) - : (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) { + CodeGenOpt::Level OL, bool isLittle) + : ARMBaseTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, + isLittle) { initAsmInfo(); } void ThumbLETargetMachine::anchor() { } -ThumbLETargetMachine:: -ThumbLETargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} +ThumbLETargetMachine::ThumbLETargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} void ThumbBETargetMachine::anchor() { } -ThumbBETargetMachine:: -ThumbBETargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL) - : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} +ThumbBETargetMachine::ThumbBETargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL) + : ThumbTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} namespace { /// ARM Code Generator Pass Configuration Options. @@ -213,6 +144,7 @@ public: return *getARMTargetMachine().getSubtargetImpl(); } + void addIRPasses() override; bool addPreISel() override; bool addInstSelector() override; bool addPreRegAlloc() override; @@ -225,11 +157,21 @@ TargetPassConfig *ARMBaseTargetMachine::createPassConfig(PassManagerBase &PM) { return new ARMPassConfig(this, PM); } -bool ARMPassConfig::addPreISel() { +void ARMPassConfig::addIRPasses() { + addPass(createAtomicExpandLoadLinkedPass(TM)); + + // Cmpxchg instructions are often used with a subsequent comparison to + // determine whether it succeeded. We can exploit existing control-flow in + // ldrex/strex loops to simplify this, but it needs tidying up. const ARMSubtarget *Subtarget = &getARMSubtarget(); if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) - addPass(createAtomicExpandLoadLinkedPass(TM)); + if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) + addPass(createCFGSimplificationPass()); + TargetPassConfig::addIRPasses(); +} + +bool ARMPassConfig::addPreISel() { if (TM->getOptLevel() != CodeGenOpt::None) addPass(createGlobalMergePass(TM)); diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 664c992..b72b1df 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -14,17 +14,9 @@ #ifndef ARMTARGETMACHINE_H #define ARMTARGETMACHINE_H -#include "ARMFrameLowering.h" -#include "ARMISelLowering.h" #include "ARMInstrInfo.h" -#include "ARMJITInfo.h" -#include "ARMSelectionDAGInfo.h" #include "ARMSubtarget.h" -#include "Thumb1FrameLowering.h" -#include "Thumb1InstrInfo.h" -#include "Thumb2InstrInfo.h" #include "llvm/IR/DataLayout.h" -#include "llvm/MC/MCStreamer.h" #include "llvm/Target/TargetMachine.h" namespace llvm { @@ -32,10 +24,6 @@ namespace llvm { class ARMBaseTargetMachine : public LLVMTargetMachine { protected: ARMSubtarget Subtarget; -private: - ARMJITInfo JITInfo; - InstrItineraryData InstrItins; - public: ARMBaseTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, @@ -44,15 +32,29 @@ public: CodeGenOpt::Level OL, bool isLittle); - ARMJITInfo *getJITInfo() override { return &JITInfo; } const ARMSubtarget *getSubtargetImpl() const override { return &Subtarget; } + const ARMBaseRegisterInfo *getRegisterInfo() const override { + return getSubtargetImpl()->getRegisterInfo(); + } const ARMTargetLowering *getTargetLowering() const override { - // Implemented by derived classes - llvm_unreachable("getTargetLowering not implemented"); + return getSubtargetImpl()->getTargetLowering(); + } + const ARMSelectionDAGInfo *getSelectionDAGInfo() const override { + return getSubtargetImpl()->getSelectionDAGInfo(); + } + const ARMBaseInstrInfo *getInstrInfo() const override { + return getSubtargetImpl()->getInstrInfo(); + } + const ARMFrameLowering *getFrameLowering() const override { + return getSubtargetImpl()->getFrameLowering(); } const InstrItineraryData *getInstrItineraryData() const override { - return &InstrItins; + return &getSubtargetImpl()->getInstrItineraryData(); } + const DataLayout *getDataLayout() const override { + return getSubtargetImpl()->getDataLayout(); + } + ARMJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); } /// \brief Register ARM analysis passes with a pass manager. void addAnalysisPasses(PassManagerBase &PM) override; @@ -67,35 +69,10 @@ public: /// class ARMTargetMachine : public ARMBaseTargetMachine { virtual void anchor(); - ARMInstrInfo InstrInfo; - const DataLayout DL; // Calculates type size & alignment - ARMTargetLowering TLInfo; - ARMSelectionDAGInfo TSInfo; - ARMFrameLowering FrameLowering; public: - ARMTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, - bool isLittle); - - const ARMRegisterInfo *getRegisterInfo() const override { - return &InstrInfo.getRegisterInfo(); - } - - const ARMTargetLowering *getTargetLowering() const override { - return &TLInfo; - } - - const ARMSelectionDAGInfo *getSelectionDAGInfo() const override { - return &TSInfo; - } - const ARMFrameLowering *getFrameLowering() const override { - return &FrameLowering; - } - const ARMInstrInfo *getInstrInfo() const override { return &InstrInfo; } - const DataLayout *getDataLayout() const override { return &DL; } + ARMTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, + CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle); }; /// ARMLETargetMachine - ARM little endian target machine. @@ -114,10 +91,9 @@ public: class ARMBETargetMachine : public ARMTargetMachine { void anchor() override; public: - ARMBETargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL); + ARMBETargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, + CodeModel::Model CM, CodeGenOpt::Level OL); }; /// ThumbTargetMachine - Thumb target machine. @@ -126,43 +102,10 @@ public: /// class ThumbTargetMachine : public ARMBaseTargetMachine { virtual void anchor(); - // Either Thumb1InstrInfo or Thumb2InstrInfo. - std::unique_ptr<ARMBaseInstrInfo> InstrInfo; - const DataLayout DL; // Calculates type size & alignment - ARMTargetLowering TLInfo; - ARMSelectionDAGInfo TSInfo; - // Either Thumb1FrameLowering or ARMFrameLowering. - std::unique_ptr<ARMFrameLowering> FrameLowering; public: - ThumbTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, - bool isLittle); - - /// returns either Thumb1RegisterInfo or Thumb2RegisterInfo - const ARMBaseRegisterInfo *getRegisterInfo() const override { - return &InstrInfo->getRegisterInfo(); - } - - const ARMTargetLowering *getTargetLowering() const override { - return &TLInfo; - } - - const ARMSelectionDAGInfo *getSelectionDAGInfo() const override { - return &TSInfo; - } - - /// returns either Thumb1InstrInfo or Thumb2InstrInfo - const ARMBaseInstrInfo *getInstrInfo() const override { - return InstrInfo.get(); - } - /// returns either Thumb1FrameLowering or ARMFrameLowering - const ARMFrameLowering *getFrameLowering() const override { - return FrameLowering.get(); - } - const DataLayout *getDataLayout() const override { return &DL; } + ThumbTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, + CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle); }; /// ThumbLETargetMachine - Thumb little endian target machine. @@ -170,10 +113,10 @@ public: class ThumbLETargetMachine : public ThumbTargetMachine { void anchor() override; public: - ThumbLETargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL); + ThumbLETargetMachine(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL); }; /// ThumbBETargetMachine - Thumb big endian target machine. diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 57df7da..a2ace62 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -443,31 +443,58 @@ unsigned ARMTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { unsigned ARMTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) const { - // We only handle costs of reverse shuffles for now. - if (Kind != SK_Reverse) + // We only handle costs of reverse and alternate shuffles for now. + if (Kind != SK_Reverse && Kind != SK_Alternate) return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); - static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = { - // Reverse shuffle cost one instruction if we are shuffling within a double - // word (vrev) or two if we shuffle a quad word (vrev, vext). - { ISD::VECTOR_SHUFFLE, MVT::v2i32, 1 }, - { ISD::VECTOR_SHUFFLE, MVT::v2f32, 1 }, - { ISD::VECTOR_SHUFFLE, MVT::v2i64, 1 }, - { ISD::VECTOR_SHUFFLE, MVT::v2f64, 1 }, - - { ISD::VECTOR_SHUFFLE, MVT::v4i32, 2 }, - { ISD::VECTOR_SHUFFLE, MVT::v4f32, 2 }, - { ISD::VECTOR_SHUFFLE, MVT::v8i16, 2 }, - { ISD::VECTOR_SHUFFLE, MVT::v16i8, 2 } - }; + if (Kind == SK_Reverse) { + static const CostTblEntry<MVT::SimpleValueType> NEONShuffleTbl[] = { + // Reverse shuffle cost one instruction if we are shuffling within a + // double word (vrev) or two if we shuffle a quad word (vrev, vext). + {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); + {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}}; - int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); - if (Idx == -1) - return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); + + int Idx = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); + if (Idx == -1) + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); - return LT.first * NEONShuffleTbl[Idx].Cost; + return LT.first * NEONShuffleTbl[Idx].Cost; + } + if (Kind == SK_Alternate) { + static const CostTblEntry<MVT::SimpleValueType> NEONAltShuffleTbl[] = { + // Alt shuffle cost table for ARM. Cost is the number of instructions + // required to create the shuffled vector. + + {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, + + {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2}, + + {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16}, + + {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}}; + + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); + int Idx = + CostTableLookup(NEONAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); + if (Idx == -1) + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); + return LT.first * NEONAltShuffleTbl[Idx].Cost; + } + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); } unsigned ARMTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 5cdf394..b62706c 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -190,11 +190,11 @@ class ARMAsmParser : public MCTargetAsmParser { } int tryParseRegister(); - bool tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &); - int tryParseShiftRegister(SmallVectorImpl<MCParsedAsmOperand*> &); - bool parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &); - bool parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &); - bool parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &, StringRef Mnemonic); + bool tryParseRegisterWithWriteBack(OperandVector &); + int tryParseShiftRegister(OperandVector &); + bool parseRegisterList(OperandVector &); + bool parseMemory(OperandVector &); + bool parseOperand(OperandVector &, StringRef Mnemonic); bool parsePrefix(ARMMCExpr::VariantKind &RefKind); bool parseMemRegOffsetShift(ARM_AM::ShiftOpc &ShiftType, unsigned &ShiftAmount); @@ -282,54 +282,42 @@ class ARMAsmParser : public MCTargetAsmParser { /// } - OperandMatchResultTy parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseCoprocNumOperand( - SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseCoprocRegOperand( - SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseCoprocOptionOperand( - SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseMemBarrierOptOperand( - SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseInstSyncBarrierOptOperand( - SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseProcIFlagsOperand( - SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseMSRMaskOperand( - SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &O, - StringRef Op, int Low, int High); - OperandMatchResultTy parsePKHLSLImm(SmallVectorImpl<MCParsedAsmOperand*> &O) { + OperandMatchResultTy parseITCondCode(OperandVector &); + OperandMatchResultTy parseCoprocNumOperand(OperandVector &); + OperandMatchResultTy parseCoprocRegOperand(OperandVector &); + OperandMatchResultTy parseCoprocOptionOperand(OperandVector &); + OperandMatchResultTy parseMemBarrierOptOperand(OperandVector &); + OperandMatchResultTy parseInstSyncBarrierOptOperand(OperandVector &); + OperandMatchResultTy parseProcIFlagsOperand(OperandVector &); + OperandMatchResultTy parseMSRMaskOperand(OperandVector &); + OperandMatchResultTy parsePKHImm(OperandVector &O, StringRef Op, int Low, + int High); + OperandMatchResultTy parsePKHLSLImm(OperandVector &O) { return parsePKHImm(O, "lsl", 0, 31); } - OperandMatchResultTy parsePKHASRImm(SmallVectorImpl<MCParsedAsmOperand*> &O) { + OperandMatchResultTy parsePKHASRImm(OperandVector &O) { return parsePKHImm(O, "asr", 1, 32); } - OperandMatchResultTy parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseRotImm(SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseBitfield(SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseFPImm(SmallVectorImpl<MCParsedAsmOperand*>&); - OperandMatchResultTy parseVectorList(SmallVectorImpl<MCParsedAsmOperand*>&); + OperandMatchResultTy parseSetEndImm(OperandVector &); + OperandMatchResultTy parseShifterImm(OperandVector &); + OperandMatchResultTy parseRotImm(OperandVector &); + OperandMatchResultTy parseBitfield(OperandVector &); + OperandMatchResultTy parsePostIdxReg(OperandVector &); + OperandMatchResultTy parseAM3Offset(OperandVector &); + OperandMatchResultTy parseFPImm(OperandVector &); + OperandMatchResultTy parseVectorList(OperandVector &); OperandMatchResultTy parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc); // Asm Match Converter Methods - void cvtThumbMultiply(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - void cvtThumbBranches(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &); - - bool validateInstruction(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Ops); - bool processInstruction(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Ops); - bool shouldOmitCCOutOperand(StringRef Mnemonic, - SmallVectorImpl<MCParsedAsmOperand*> &Operands); - bool shouldOmitPredicateOperand(StringRef Mnemonic, - SmallVectorImpl<MCParsedAsmOperand*> &Operands); + void cvtThumbMultiply(MCInst &Inst, const OperandVector &); + void cvtThumbBranches(MCInst &Inst, const OperandVector &); + + bool validateInstruction(MCInst &Inst, const OperandVector &Ops); + bool processInstruction(MCInst &Inst, const OperandVector &Ops); + bool shouldOmitCCOutOperand(StringRef Mnemonic, OperandVector &Operands); + bool shouldOmitPredicateOperand(StringRef Mnemonic, OperandVector &Operands); + public: enum ARMMatchResultTy { Match_RequiresITBlock = FIRST_TARGET_MATCH_RESULT_TY, @@ -361,19 +349,17 @@ public: // Implementation of the MCTargetAsmParser interface: bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; - bool - ParseInstruction(ParseInstructionInfo &Info, StringRef Name, - SMLoc NameLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands) override; + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) override; bool ParseDirective(AsmToken DirectiveID) override; - unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, + unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind) override; unsigned checkTargetMatchPredicate(MCInst &Inst) override; bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out, unsigned &ErrorInfo, + OperandVector &Operands, MCStreamer &Out, + unsigned &ErrorInfo, bool MatchingInlineAsm) override; void onLabelParsed(MCSymbol *Symbol) override; }; @@ -545,8 +531,8 @@ class ARMOperand : public MCParsedAsmOperand { struct BitfieldOp Bitfield; }; - ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} public: + ARMOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} ARMOperand(const ARMOperand &o) : MCParsedAsmOperand() { Kind = o.Kind; StartLoc = o.StartLoc; @@ -2481,56 +2467,58 @@ public: void print(raw_ostream &OS) const override; - static ARMOperand *CreateITMask(unsigned Mask, SMLoc S) { - ARMOperand *Op = new ARMOperand(k_ITCondMask); + static std::unique_ptr<ARMOperand> CreateITMask(unsigned Mask, SMLoc S) { + auto Op = make_unique<ARMOperand>(k_ITCondMask); Op->ITMask.Mask = Mask; Op->StartLoc = S; Op->EndLoc = S; return Op; } - static ARMOperand *CreateCondCode(ARMCC::CondCodes CC, SMLoc S) { - ARMOperand *Op = new ARMOperand(k_CondCode); + static std::unique_ptr<ARMOperand> CreateCondCode(ARMCC::CondCodes CC, + SMLoc S) { + auto Op = make_unique<ARMOperand>(k_CondCode); Op->CC.Val = CC; Op->StartLoc = S; Op->EndLoc = S; return Op; } - static ARMOperand *CreateCoprocNum(unsigned CopVal, SMLoc S) { - ARMOperand *Op = new ARMOperand(k_CoprocNum); + static std::unique_ptr<ARMOperand> CreateCoprocNum(unsigned CopVal, SMLoc S) { + auto Op = make_unique<ARMOperand>(k_CoprocNum); Op->Cop.Val = CopVal; Op->StartLoc = S; Op->EndLoc = S; return Op; } - static ARMOperand *CreateCoprocReg(unsigned CopVal, SMLoc S) { - ARMOperand *Op = new ARMOperand(k_CoprocReg); + static std::unique_ptr<ARMOperand> CreateCoprocReg(unsigned CopVal, SMLoc S) { + auto Op = make_unique<ARMOperand>(k_CoprocReg); Op->Cop.Val = CopVal; Op->StartLoc = S; Op->EndLoc = S; return Op; } - static ARMOperand *CreateCoprocOption(unsigned Val, SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(k_CoprocOption); + static std::unique_ptr<ARMOperand> CreateCoprocOption(unsigned Val, SMLoc S, + SMLoc E) { + auto Op = make_unique<ARMOperand>(k_CoprocOption); Op->Cop.Val = Val; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static ARMOperand *CreateCCOut(unsigned RegNum, SMLoc S) { - ARMOperand *Op = new ARMOperand(k_CCOut); + static std::unique_ptr<ARMOperand> CreateCCOut(unsigned RegNum, SMLoc S) { + auto Op = make_unique<ARMOperand>(k_CCOut); Op->Reg.RegNum = RegNum; Op->StartLoc = S; Op->EndLoc = S; return Op; } - static ARMOperand *CreateToken(StringRef Str, SMLoc S) { - ARMOperand *Op = new ARMOperand(k_Token); + static std::unique_ptr<ARMOperand> CreateToken(StringRef Str, SMLoc S) { + auto Op = make_unique<ARMOperand>(k_Token); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->StartLoc = S; @@ -2538,20 +2526,20 @@ public: return Op; } - static ARMOperand *CreateReg(unsigned RegNum, SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(k_Register); + static std::unique_ptr<ARMOperand> CreateReg(unsigned RegNum, SMLoc S, + SMLoc E) { + auto Op = make_unique<ARMOperand>(k_Register); Op->Reg.RegNum = RegNum; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static ARMOperand *CreateShiftedRegister(ARM_AM::ShiftOpc ShTy, - unsigned SrcReg, - unsigned ShiftReg, - unsigned ShiftImm, - SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(k_ShiftedRegister); + static std::unique_ptr<ARMOperand> + CreateShiftedRegister(ARM_AM::ShiftOpc ShTy, unsigned SrcReg, + unsigned ShiftReg, unsigned ShiftImm, SMLoc S, + SMLoc E) { + auto Op = make_unique<ARMOperand>(k_ShiftedRegister); Op->RegShiftedReg.ShiftTy = ShTy; Op->RegShiftedReg.SrcReg = SrcReg; Op->RegShiftedReg.ShiftReg = ShiftReg; @@ -2561,11 +2549,10 @@ public: return Op; } - static ARMOperand *CreateShiftedImmediate(ARM_AM::ShiftOpc ShTy, - unsigned SrcReg, - unsigned ShiftImm, - SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(k_ShiftedImmediate); + static std::unique_ptr<ARMOperand> + CreateShiftedImmediate(ARM_AM::ShiftOpc ShTy, unsigned SrcReg, + unsigned ShiftImm, SMLoc S, SMLoc E) { + auto Op = make_unique<ARMOperand>(k_ShiftedImmediate); Op->RegShiftedImm.ShiftTy = ShTy; Op->RegShiftedImm.SrcReg = SrcReg; Op->RegShiftedImm.ShiftImm = ShiftImm; @@ -2574,9 +2561,9 @@ public: return Op; } - static ARMOperand *CreateShifterImm(bool isASR, unsigned Imm, - SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(k_ShifterImmediate); + static std::unique_ptr<ARMOperand> CreateShifterImm(bool isASR, unsigned Imm, + SMLoc S, SMLoc E) { + auto Op = make_unique<ARMOperand>(k_ShifterImmediate); Op->ShifterImm.isASR = isASR; Op->ShifterImm.Imm = Imm; Op->StartLoc = S; @@ -2584,17 +2571,18 @@ public: return Op; } - static ARMOperand *CreateRotImm(unsigned Imm, SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(k_RotateImmediate); + static std::unique_ptr<ARMOperand> CreateRotImm(unsigned Imm, SMLoc S, + SMLoc E) { + auto Op = make_unique<ARMOperand>(k_RotateImmediate); Op->RotImm.Imm = Imm; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static ARMOperand *CreateBitfield(unsigned LSB, unsigned Width, - SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(k_BitfieldDescriptor); + static std::unique_ptr<ARMOperand> + CreateBitfield(unsigned LSB, unsigned Width, SMLoc S, SMLoc E) { + auto Op = make_unique<ARMOperand>(k_BitfieldDescriptor); Op->Bitfield.LSB = LSB; Op->Bitfield.Width = Width; Op->StartLoc = S; @@ -2602,8 +2590,8 @@ public: return Op; } - static ARMOperand * - CreateRegList(SmallVectorImpl<std::pair<unsigned, unsigned> > &Regs, + static std::unique_ptr<ARMOperand> + CreateRegList(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs, SMLoc StartLoc, SMLoc EndLoc) { assert (Regs.size() > 0 && "RegList contains no registers?"); KindTy Kind = k_RegisterList; @@ -2617,7 +2605,7 @@ public: // Sort based on the register encoding values. array_pod_sort(Regs.begin(), Regs.end()); - ARMOperand *Op = new ARMOperand(Kind); + auto Op = make_unique<ARMOperand>(Kind); for (SmallVectorImpl<std::pair<unsigned, unsigned> >::const_iterator I = Regs.begin(), E = Regs.end(); I != E; ++I) Op->Registers.push_back(I->second); @@ -2626,9 +2614,11 @@ public: return Op; } - static ARMOperand *CreateVectorList(unsigned RegNum, unsigned Count, - bool isDoubleSpaced, SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(k_VectorList); + static std::unique_ptr<ARMOperand> CreateVectorList(unsigned RegNum, + unsigned Count, + bool isDoubleSpaced, + SMLoc S, SMLoc E) { + auto Op = make_unique<ARMOperand>(k_VectorList); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; Op->VectorList.isDoubleSpaced = isDoubleSpaced; @@ -2637,10 +2627,10 @@ public: return Op; } - static ARMOperand *CreateVectorListAllLanes(unsigned RegNum, unsigned Count, - bool isDoubleSpaced, - SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(k_VectorListAllLanes); + static std::unique_ptr<ARMOperand> + CreateVectorListAllLanes(unsigned RegNum, unsigned Count, bool isDoubleSpaced, + SMLoc S, SMLoc E) { + auto Op = make_unique<ARMOperand>(k_VectorListAllLanes); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; Op->VectorList.isDoubleSpaced = isDoubleSpaced; @@ -2649,11 +2639,10 @@ public: return Op; } - static ARMOperand *CreateVectorListIndexed(unsigned RegNum, unsigned Count, - unsigned Index, - bool isDoubleSpaced, - SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(k_VectorListIndexed); + static std::unique_ptr<ARMOperand> + CreateVectorListIndexed(unsigned RegNum, unsigned Count, unsigned Index, + bool isDoubleSpaced, SMLoc S, SMLoc E) { + auto Op = make_unique<ARMOperand>(k_VectorListIndexed); Op->VectorList.RegNum = RegNum; Op->VectorList.Count = Count; Op->VectorList.LaneIndex = Index; @@ -2663,33 +2652,30 @@ public: return Op; } - static ARMOperand *CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, - MCContext &Ctx) { - ARMOperand *Op = new ARMOperand(k_VectorIndex); + static std::unique_ptr<ARMOperand> + CreateVectorIndex(unsigned Idx, SMLoc S, SMLoc E, MCContext &Ctx) { + auto Op = make_unique<ARMOperand>(k_VectorIndex); Op->VectorIndex.Val = Idx; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static ARMOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(k_Immediate); + static std::unique_ptr<ARMOperand> CreateImm(const MCExpr *Val, SMLoc S, + SMLoc E) { + auto Op = make_unique<ARMOperand>(k_Immediate); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static ARMOperand *CreateMem(unsigned BaseRegNum, - const MCConstantExpr *OffsetImm, - unsigned OffsetRegNum, - ARM_AM::ShiftOpc ShiftType, - unsigned ShiftImm, - unsigned Alignment, - bool isNegative, - SMLoc S, SMLoc E, - SMLoc AlignmentLoc = SMLoc()) { - ARMOperand *Op = new ARMOperand(k_Memory); + static std::unique_ptr<ARMOperand> + CreateMem(unsigned BaseRegNum, const MCConstantExpr *OffsetImm, + unsigned OffsetRegNum, ARM_AM::ShiftOpc ShiftType, + unsigned ShiftImm, unsigned Alignment, bool isNegative, SMLoc S, + SMLoc E, SMLoc AlignmentLoc = SMLoc()) { + auto Op = make_unique<ARMOperand>(k_Memory); Op->Memory.BaseRegNum = BaseRegNum; Op->Memory.OffsetImm = OffsetImm; Op->Memory.OffsetRegNum = OffsetRegNum; @@ -2703,11 +2689,10 @@ public: return Op; } - static ARMOperand *CreatePostIdxReg(unsigned RegNum, bool isAdd, - ARM_AM::ShiftOpc ShiftTy, - unsigned ShiftImm, - SMLoc S, SMLoc E) { - ARMOperand *Op = new ARMOperand(k_PostIndexRegister); + static std::unique_ptr<ARMOperand> + CreatePostIdxReg(unsigned RegNum, bool isAdd, ARM_AM::ShiftOpc ShiftTy, + unsigned ShiftImm, SMLoc S, SMLoc E) { + auto Op = make_unique<ARMOperand>(k_PostIndexRegister); Op->PostIdxReg.RegNum = RegNum; Op->PostIdxReg.isAdd = isAdd; Op->PostIdxReg.ShiftTy = ShiftTy; @@ -2717,33 +2702,35 @@ public: return Op; } - static ARMOperand *CreateMemBarrierOpt(ARM_MB::MemBOpt Opt, SMLoc S) { - ARMOperand *Op = new ARMOperand(k_MemBarrierOpt); + static std::unique_ptr<ARMOperand> CreateMemBarrierOpt(ARM_MB::MemBOpt Opt, + SMLoc S) { + auto Op = make_unique<ARMOperand>(k_MemBarrierOpt); Op->MBOpt.Val = Opt; Op->StartLoc = S; Op->EndLoc = S; return Op; } - static ARMOperand *CreateInstSyncBarrierOpt(ARM_ISB::InstSyncBOpt Opt, - SMLoc S) { - ARMOperand *Op = new ARMOperand(k_InstSyncBarrierOpt); + static std::unique_ptr<ARMOperand> + CreateInstSyncBarrierOpt(ARM_ISB::InstSyncBOpt Opt, SMLoc S) { + auto Op = make_unique<ARMOperand>(k_InstSyncBarrierOpt); Op->ISBOpt.Val = Opt; Op->StartLoc = S; Op->EndLoc = S; return Op; } - static ARMOperand *CreateProcIFlags(ARM_PROC::IFlags IFlags, SMLoc S) { - ARMOperand *Op = new ARMOperand(k_ProcIFlags); + static std::unique_ptr<ARMOperand> CreateProcIFlags(ARM_PROC::IFlags IFlags, + SMLoc S) { + auto Op = make_unique<ARMOperand>(k_ProcIFlags); Op->IFlags.Val = IFlags; Op->StartLoc = S; Op->EndLoc = S; return Op; } - static ARMOperand *CreateMSRMask(unsigned MMask, SMLoc S) { - ARMOperand *Op = new ARMOperand(k_MSRMask); + static std::unique_ptr<ARMOperand> CreateMSRMask(unsigned MMask, SMLoc S) { + auto Op = make_unique<ARMOperand>(k_MSRMask); Op->MMask.Val = MMask; Op->StartLoc = S; Op->EndLoc = S; @@ -2947,8 +2934,7 @@ int ARMAsmParser::tryParseRegister() { // occurs, return -1. An irrecoverable error is one where tokens have been // consumed in the process of trying to parse the shifter (i.e., when it is // indeed a shifter operand, but malformed). -int ARMAsmParser::tryParseShiftRegister( - SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +int ARMAsmParser::tryParseShiftRegister(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) @@ -2972,7 +2958,8 @@ int ARMAsmParser::tryParseShiftRegister( // The source register for the shift has already been added to the // operand list, so we need to pop it off and combine it into the shifted // register operand instead. - std::unique_ptr<ARMOperand> PrevOp((ARMOperand*)Operands.pop_back_val()); + std::unique_ptr<ARMOperand> PrevOp( + (ARMOperand *)Operands.pop_back_val().release()); if (!PrevOp->isReg()) return Error(PrevOp->getStartLoc(), "shift must be of a register"); int SrcReg = PrevOp->getReg(); @@ -3049,8 +3036,7 @@ int ARMAsmParser::tryParseShiftRegister( /// /// TODO this is likely to change to allow different register types and or to /// parse for a specific register type. -bool ARMAsmParser:: -tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +bool ARMAsmParser::tryParseRegisterWithWriteBack(OperandVector &Operands) { const AsmToken &RegTok = Parser.getTok(); int RegNo = tryParseRegister(); if (RegNo == -1) @@ -3096,17 +3082,25 @@ tryParseRegisterWithWriteBack(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } /// MatchCoprocessorOperandName - Try to parse an coprocessor related -/// instruction with a symbolic operand name. Example: "p1", "p7", "c3", -/// "c5", ... +/// instruction with a symbolic operand name. +/// We accept "crN" syntax for GAS compatibility. +/// <operand-name> ::= <prefix><number> +/// If CoprocOp is 'c', then: +/// <prefix> ::= c | cr +/// If CoprocOp is 'p', then : +/// <prefix> ::= p +/// <number> ::= integer in range [0, 15] static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) { // Use the same layout as the tablegen'erated register name matcher. Ugly, // but efficient. + if (Name.size() < 2 || Name[0] != CoprocOp) + return -1; + Name = (Name[1] == 'r') ? Name.drop_front(2) : Name.drop_front(); + switch (Name.size()) { default: return -1; - case 2: - if (Name[0] != CoprocOp) - return -1; - switch (Name[1]) { + case 1: + switch (Name[0]) { default: return -1; case '0': return 0; case '1': return 1; @@ -3119,10 +3113,10 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) { case '8': return 8; case '9': return 9; } - case 3: - if (Name[0] != CoprocOp || Name[1] != '1') + case 2: + if (Name[0] != '1') return -1; - switch (Name[2]) { + switch (Name[1]) { default: return -1; // p10 and p11 are invalid for coproc instructions (reserved for FP/NEON) case '0': return CoprocOp == 'p'? -1: 10; @@ -3136,8 +3130,8 @@ static int MatchCoprocessorOperandName(StringRef Name, char CoprocOp) { } /// parseITCondCode - Try to parse a condition code for an IT instruction. -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseITCondCode(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (!Tok.is(AsmToken::Identifier)) @@ -3173,8 +3167,8 @@ parseITCondCode(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { /// parseCoprocNumOperand - Try to parse an coprocessor number operand. The /// token must be an Identifier when called, and if it is a coprocessor /// number, the token is eaten and the operand is added to the operand list. -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseCoprocNumOperand(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) @@ -3192,8 +3186,8 @@ parseCoprocNumOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { /// parseCoprocRegOperand - Try to parse an coprocessor register operand. The /// token must be an Identifier when called, and if it is a coprocessor /// number, the token is eaten and the operand is added to the operand list. -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseCoprocRegOperand(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) @@ -3210,8 +3204,8 @@ parseCoprocRegOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { /// parseCoprocOptionOperand - Try to parse an coprocessor option operand. /// coproc_option : '{' imm0_255 '}' -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseCoprocOptionOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseCoprocOptionOperand(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); // If this isn't a '{', this isn't a coprocessor immediate operand. @@ -3288,8 +3282,7 @@ static unsigned getDRegFromQReg(unsigned QReg) { } /// Parse a register list. -bool ARMAsmParser:: -parseRegisterList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +bool ARMAsmParser::parseRegisterList(OperandVector &Operands) { assert(Parser.getTok().is(AsmToken::LCurly) && "Token is not a Left Curly Brace"); SMLoc S = Parser.getTok().getLoc(); @@ -3470,8 +3463,8 @@ parseVectorLane(VectorLaneTy &LaneKind, unsigned &Index, SMLoc &EndLoc) { } // parse a vector register list -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseVectorList(OperandVector &Operands) { VectorLaneTy LaneKind; unsigned LaneIndex; SMLoc S = Parser.getTok().getLoc(); @@ -3721,8 +3714,8 @@ parseVectorList(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } /// parseMemBarrierOptOperand - Try to parse DSB/DMB data barrier options. -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseMemBarrierOptOperand(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); unsigned Opt; @@ -3792,8 +3785,8 @@ parseMemBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } /// parseInstSyncBarrierOptOperand - Try to parse ISB inst sync barrier options. -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseInstSyncBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseInstSyncBarrierOptOperand(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); unsigned Opt; @@ -3843,8 +3836,8 @@ parseInstSyncBarrierOptOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { /// parseProcIFlagsOperand - Try to parse iflags from CPS instruction. -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseProcIFlagsOperand(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (!Tok.is(AsmToken::Identifier)) @@ -3877,8 +3870,8 @@ parseProcIFlagsOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } /// parseMSRMaskOperand - Try to parse mask flags from MSR instruction. -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseMSRMaskOperand(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); const AsmToken &Tok = Parser.getTok(); if (!Tok.is(AsmToken::Identifier)) @@ -4005,9 +3998,9 @@ parseMSRMaskOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op, - int Low, int High) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parsePKHImm(OperandVector &Operands, StringRef Op, int Low, + int High) { const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier)) { Error(Parser.getTok().getLoc(), Op + " operand expected."); @@ -4053,8 +4046,8 @@ parsePKHImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands, StringRef Op, return MatchOperand_Success; } -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseSetEndImm(OperandVector &Operands) { const AsmToken &Tok = Parser.getTok(); SMLoc S = Tok.getLoc(); if (Tok.isNot(AsmToken::Identifier)) { @@ -4082,8 +4075,8 @@ parseSetEndImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { /// lsl #n 'n' in [0,31] /// asr #n 'n' in [1,32] /// n == 32 encoded as n == 0. -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseShifterImm(OperandVector &Operands) { const AsmToken &Tok = Parser.getTok(); SMLoc S = Tok.getLoc(); if (Tok.isNot(AsmToken::Identifier)) { @@ -4152,8 +4145,8 @@ parseShifterImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { /// parseRotImm - Parse the shifter immediate operand for SXTB/UXTB family /// of instructions. Legal values are: /// ror #n 'n' in {0, 8, 16, 24} -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseRotImm(OperandVector &Operands) { const AsmToken &Tok = Parser.getTok(); SMLoc S = Tok.getLoc(); if (Tok.isNot(AsmToken::Identifier)) @@ -4198,8 +4191,8 @@ parseRotImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseBitfield(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); // The bitfield descriptor is really two operands, the LSB and the width. if (Parser.getTok().isNot(AsmToken::Hash) && @@ -4266,8 +4259,8 @@ parseBitfield(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parsePostIdxReg(OperandVector &Operands) { // Check for a post-index addressing register operand. Specifically: // postidx_reg := '+' register {, shift} // | '-' register {, shift} @@ -4315,8 +4308,8 @@ parsePostIdxReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseAM3Offset(OperandVector &Operands) { // Check for a post-index addressing register operand. Specifically: // am3offset := '+' register // | '-' register @@ -4388,26 +4381,24 @@ parseAM3Offset(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { /// Convert parsed operands to MCInst. Needed here because this instruction /// only has two register operands, but multiplication is commutative so /// assemblers should accept both "mul rD, rN, rD" and "mul rD, rD, rN". -void ARMAsmParser:: -cvtThumbMultiply(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { - ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); - ((ARMOperand*)Operands[1])->addCCOutOperands(Inst, 1); +void ARMAsmParser::cvtThumbMultiply(MCInst &Inst, + const OperandVector &Operands) { + ((ARMOperand &)*Operands[3]).addRegOperands(Inst, 1); + ((ARMOperand &)*Operands[1]).addCCOutOperands(Inst, 1); // If we have a three-operand form, make sure to set Rn to be the operand // that isn't the same as Rd. unsigned RegOp = 4; if (Operands.size() == 6 && - ((ARMOperand*)Operands[4])->getReg() == - ((ARMOperand*)Operands[3])->getReg()) + ((ARMOperand &)*Operands[4]).getReg() == + ((ARMOperand &)*Operands[3]).getReg()) RegOp = 5; - ((ARMOperand*)Operands[RegOp])->addRegOperands(Inst, 1); + ((ARMOperand &)*Operands[RegOp]).addRegOperands(Inst, 1); Inst.addOperand(Inst.getOperand(0)); - ((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2); + ((ARMOperand &)*Operands[2]).addCondCodeOperands(Inst, 2); } -void ARMAsmParser:: -cvtThumbBranches(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +void ARMAsmParser::cvtThumbBranches(MCInst &Inst, + const OperandVector &Operands) { int CondOp = -1, ImmOp = -1; switch(Inst.getOpcode()) { case ARM::tB: @@ -4430,7 +4421,7 @@ cvtThumbBranches(MCInst &Inst, } else { // outside IT blocks we can only have unconditional branches with AL // condition code or conditional branches with non-AL condition code - unsigned Cond = static_cast<ARMOperand*>(Operands[CondOp])->getCondCode(); + unsigned Cond = static_cast<ARMOperand &>(*Operands[CondOp]).getCondCode(); switch(Inst.getOpcode()) { case ARM::tB: case ARM::tBcc: @@ -4447,27 +4438,26 @@ cvtThumbBranches(MCInst &Inst, switch(Inst.getOpcode()) { // classify tB as either t2B or t1B based on range of immediate operand case ARM::tB: { - ARMOperand* op = static_cast<ARMOperand*>(Operands[ImmOp]); - if(!op->isSignedOffset<11, 1>() && isThumbTwo()) + ARMOperand &op = static_cast<ARMOperand &>(*Operands[ImmOp]); + if (!op.isSignedOffset<11, 1>() && isThumbTwo()) Inst.setOpcode(ARM::t2B); break; } // classify tBcc as either t2Bcc or t1Bcc based on range of immediate operand case ARM::tBcc: { - ARMOperand* op = static_cast<ARMOperand*>(Operands[ImmOp]); - if(!op->isSignedOffset<8, 1>() && isThumbTwo()) + ARMOperand &op = static_cast<ARMOperand &>(*Operands[ImmOp]); + if (!op.isSignedOffset<8, 1>() && isThumbTwo()) Inst.setOpcode(ARM::t2Bcc); break; } } - ((ARMOperand*)Operands[ImmOp])->addImmOperands(Inst, 1); - ((ARMOperand*)Operands[CondOp])->addCondCodeOperands(Inst, 2); + ((ARMOperand &)*Operands[ImmOp]).addImmOperands(Inst, 1); + ((ARMOperand &)*Operands[CondOp]).addCondCodeOperands(Inst, 2); } /// Parse an ARM memory expression, return false if successful else return true /// or an error. The first token must be a '[' when called. -bool ARMAsmParser:: -parseMemory(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +bool ARMAsmParser::parseMemory(OperandVector &Operands) { SMLoc S, E; assert(Parser.getTok().is(AsmToken::LBrac) && "Token is not a Left Bracket"); @@ -4717,8 +4707,8 @@ bool ARMAsmParser::parseMemRegOffsetShift(ARM_AM::ShiftOpc &St, } /// parseFPImm - A floating point immediate expression operand. -ARMAsmParser::OperandMatchResultTy ARMAsmParser:: -parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +ARMAsmParser::OperandMatchResultTy +ARMAsmParser::parseFPImm(OperandVector &Operands) { // Anything that can accept a floating point constant as an operand // needs to go through here, as the regular parseExpression is // integer only. @@ -4744,12 +4734,12 @@ parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { // integer constant. Make sure we don't try to parse an FPImm // for these: // vmov.i{8|16|32|64} <dreg|qreg>, #imm - ARMOperand *TyOp = static_cast<ARMOperand*>(Operands[2]); - bool isVmovf = TyOp->isToken() && (TyOp->getToken() == ".f32" || - TyOp->getToken() == ".f64"); - ARMOperand *Mnemonic = static_cast<ARMOperand*>(Operands[0]); - bool isFconst = Mnemonic->isToken() && (Mnemonic->getToken() == "fconstd" || - Mnemonic->getToken() == "fconsts"); + ARMOperand &TyOp = static_cast<ARMOperand &>(*Operands[2]); + bool isVmovf = TyOp.isToken() && + (TyOp.getToken() == ".f32" || TyOp.getToken() == ".f64"); + ARMOperand &Mnemonic = static_cast<ARMOperand &>(*Operands[0]); + bool isFconst = Mnemonic.isToken() && (Mnemonic.getToken() == "fconstd" || + Mnemonic.getToken() == "fconsts"); if (!(isVmovf || isFconst)) return MatchOperand_NoMatch; @@ -4798,8 +4788,7 @@ parseFPImm(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { /// Parse a arm instruction operand. For now this parses the operand regardless /// of the mnemonic. -bool ARMAsmParser::parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - StringRef Mnemonic) { +bool ARMAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { SMLoc S, E; // Check if the current operand has a custom associated parser, if so, try to @@ -5125,7 +5114,7 @@ getMnemonicAcceptInfo(StringRef Mnemonic, StringRef FullInst, } bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, - SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + OperandVector &Operands) { // FIXME: This is all horribly hacky. We really need a better way to deal // with optional operands like this in the matcher table. @@ -5138,17 +5127,17 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, // conditionally adding the cc_out in the first place because we need // to check the type of the parsed immediate operand. if (Mnemonic == "mov" && Operands.size() > 4 && !isThumb() && - !static_cast<ARMOperand*>(Operands[4])->isARMSOImm() && - static_cast<ARMOperand*>(Operands[4])->isImm0_65535Expr() && - static_cast<ARMOperand*>(Operands[1])->getReg() == 0) + !static_cast<ARMOperand &>(*Operands[4]).isARMSOImm() && + static_cast<ARMOperand &>(*Operands[4]).isImm0_65535Expr() && + static_cast<ARMOperand &>(*Operands[1]).getReg() == 0) return true; // Register-register 'add' for thumb does not have a cc_out operand // when there are only two register operands. if (isThumb() && Mnemonic == "add" && Operands.size() == 5 && - static_cast<ARMOperand*>(Operands[3])->isReg() && - static_cast<ARMOperand*>(Operands[4])->isReg() && - static_cast<ARMOperand*>(Operands[1])->getReg() == 0) + static_cast<ARMOperand &>(*Operands[3]).isReg() && + static_cast<ARMOperand &>(*Operands[4]).isReg() && + static_cast<ARMOperand &>(*Operands[1]).getReg() == 0) return true; // Register-register 'add' for thumb does not have a cc_out operand // when it's an ADD Rdm, SP, {Rdm|#imm0_255} instruction. We do @@ -5156,13 +5145,12 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, // that can handle a different range and has a cc_out operand. if (((isThumb() && Mnemonic == "add") || (isThumbTwo() && Mnemonic == "sub")) && - Operands.size() == 6 && - static_cast<ARMOperand*>(Operands[3])->isReg() && - static_cast<ARMOperand*>(Operands[4])->isReg() && - static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::SP && - static_cast<ARMOperand*>(Operands[1])->getReg() == 0 && - ((Mnemonic == "add" &&static_cast<ARMOperand*>(Operands[5])->isReg()) || - static_cast<ARMOperand*>(Operands[5])->isImm0_1020s4())) + Operands.size() == 6 && static_cast<ARMOperand &>(*Operands[3]).isReg() && + static_cast<ARMOperand &>(*Operands[4]).isReg() && + static_cast<ARMOperand &>(*Operands[4]).getReg() == ARM::SP && + static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 && + ((Mnemonic == "add" && static_cast<ARMOperand &>(*Operands[5]).isReg()) || + static_cast<ARMOperand &>(*Operands[5]).isImm0_1020s4())) return true; // For Thumb2, add/sub immediate does not have a cc_out operand for the // imm0_4095 variant. That's the least-preferred variant when @@ -5170,23 +5158,22 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, // should remove the cc_out operand, we have to explicitly check that // it's not one of the other variants. Ugh. if (isThumbTwo() && (Mnemonic == "add" || Mnemonic == "sub") && - Operands.size() == 6 && - static_cast<ARMOperand*>(Operands[3])->isReg() && - static_cast<ARMOperand*>(Operands[4])->isReg() && - static_cast<ARMOperand*>(Operands[5])->isImm()) { + Operands.size() == 6 && static_cast<ARMOperand &>(*Operands[3]).isReg() && + static_cast<ARMOperand &>(*Operands[4]).isReg() && + static_cast<ARMOperand &>(*Operands[5]).isImm()) { // Nest conditions rather than one big 'if' statement for readability. // // If both registers are low, we're in an IT block, and the immediate is // in range, we should use encoding T1 instead, which has a cc_out. if (inITBlock() && - isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) && - isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) && - static_cast<ARMOperand*>(Operands[5])->isImm0_7()) + isARMLowRegister(static_cast<ARMOperand &>(*Operands[3]).getReg()) && + isARMLowRegister(static_cast<ARMOperand &>(*Operands[4]).getReg()) && + static_cast<ARMOperand &>(*Operands[5]).isImm0_7()) return false; // Check against T3. If the second register is the PC, this is an // alternate form of ADR, which uses encoding T4, so check for that too. - if (static_cast<ARMOperand*>(Operands[4])->getReg() != ARM::PC && - static_cast<ARMOperand*>(Operands[5])->isT2SOImm()) + if (static_cast<ARMOperand &>(*Operands[4]).getReg() != ARM::PC && + static_cast<ARMOperand &>(*Operands[5]).isT2SOImm()) return false; // Otherwise, we use encoding T4, which does not have a cc_out @@ -5198,35 +5185,34 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, // if we have a "mul" mnemonic in Thumb mode, check if we'll be able to // use the 16-bit encoding or not. if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 6 && - static_cast<ARMOperand*>(Operands[1])->getReg() == 0 && - static_cast<ARMOperand*>(Operands[3])->isReg() && - static_cast<ARMOperand*>(Operands[4])->isReg() && - static_cast<ARMOperand*>(Operands[5])->isReg() && + static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 && + static_cast<ARMOperand &>(*Operands[3]).isReg() && + static_cast<ARMOperand &>(*Operands[4]).isReg() && + static_cast<ARMOperand &>(*Operands[5]).isReg() && // If the registers aren't low regs, the destination reg isn't the // same as one of the source regs, or the cc_out operand is zero // outside of an IT block, we have to use the 32-bit encoding, so // remove the cc_out operand. - (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) || - !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) || - !isARMLowRegister(static_cast<ARMOperand*>(Operands[5])->getReg()) || - !inITBlock() || - (static_cast<ARMOperand*>(Operands[3])->getReg() != - static_cast<ARMOperand*>(Operands[5])->getReg() && - static_cast<ARMOperand*>(Operands[3])->getReg() != - static_cast<ARMOperand*>(Operands[4])->getReg()))) + (!isARMLowRegister(static_cast<ARMOperand &>(*Operands[3]).getReg()) || + !isARMLowRegister(static_cast<ARMOperand &>(*Operands[4]).getReg()) || + !isARMLowRegister(static_cast<ARMOperand &>(*Operands[5]).getReg()) || + !inITBlock() || (static_cast<ARMOperand &>(*Operands[3]).getReg() != + static_cast<ARMOperand &>(*Operands[5]).getReg() && + static_cast<ARMOperand &>(*Operands[3]).getReg() != + static_cast<ARMOperand &>(*Operands[4]).getReg()))) return true; // Also check the 'mul' syntax variant that doesn't specify an explicit // destination register. if (isThumbTwo() && Mnemonic == "mul" && Operands.size() == 5 && - static_cast<ARMOperand*>(Operands[1])->getReg() == 0 && - static_cast<ARMOperand*>(Operands[3])->isReg() && - static_cast<ARMOperand*>(Operands[4])->isReg() && + static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 && + static_cast<ARMOperand &>(*Operands[3]).isReg() && + static_cast<ARMOperand &>(*Operands[4]).isReg() && // If the registers aren't low regs or the cc_out operand is zero // outside of an IT block, we have to use the 32-bit encoding, so // remove the cc_out operand. - (!isARMLowRegister(static_cast<ARMOperand*>(Operands[3])->getReg()) || - !isARMLowRegister(static_cast<ARMOperand*>(Operands[4])->getReg()) || + (!isARMLowRegister(static_cast<ARMOperand &>(*Operands[3]).getReg()) || + !isARMLowRegister(static_cast<ARMOperand &>(*Operands[4]).getReg()) || !inITBlock())) return true; @@ -5239,32 +5225,32 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, // anyway. if (isThumb() && (Mnemonic == "add" || Mnemonic == "sub") && (Operands.size() == 5 || Operands.size() == 6) && - static_cast<ARMOperand*>(Operands[3])->isReg() && - static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::SP && - static_cast<ARMOperand*>(Operands[1])->getReg() == 0 && - (static_cast<ARMOperand*>(Operands[4])->isImm() || + static_cast<ARMOperand &>(*Operands[3]).isReg() && + static_cast<ARMOperand &>(*Operands[3]).getReg() == ARM::SP && + static_cast<ARMOperand &>(*Operands[1]).getReg() == 0 && + (static_cast<ARMOperand &>(*Operands[4]).isImm() || (Operands.size() == 6 && - static_cast<ARMOperand*>(Operands[5])->isImm()))) + static_cast<ARMOperand &>(*Operands[5]).isImm()))) return true; return false; } -bool ARMAsmParser::shouldOmitPredicateOperand( - StringRef Mnemonic, SmallVectorImpl<MCParsedAsmOperand *> &Operands) { +bool ARMAsmParser::shouldOmitPredicateOperand(StringRef Mnemonic, + OperandVector &Operands) { // VRINT{Z, R, X} have a predicate operand in VFP, but not in NEON unsigned RegIdx = 3; if ((Mnemonic == "vrintz" || Mnemonic == "vrintx" || Mnemonic == "vrintr") && - static_cast<ARMOperand *>(Operands[2])->getToken() == ".f32") { - if (static_cast<ARMOperand *>(Operands[3])->isToken() && - static_cast<ARMOperand *>(Operands[3])->getToken() == ".f32") + static_cast<ARMOperand &>(*Operands[2]).getToken() == ".f32") { + if (static_cast<ARMOperand &>(*Operands[3]).isToken() && + static_cast<ARMOperand &>(*Operands[3]).getToken() == ".f32") RegIdx = 4; - if (static_cast<ARMOperand *>(Operands[RegIdx])->isReg() && - (ARMMCRegisterClasses[ARM::DPRRegClassID] - .contains(static_cast<ARMOperand *>(Operands[RegIdx])->getReg()) || - ARMMCRegisterClasses[ARM::QPRRegClassID] - .contains(static_cast<ARMOperand *>(Operands[RegIdx])->getReg()))) + if (static_cast<ARMOperand &>(*Operands[RegIdx]).isReg() && + (ARMMCRegisterClasses[ARM::DPRRegClassID].contains( + static_cast<ARMOperand &>(*Operands[RegIdx]).getReg()) || + ARMMCRegisterClasses[ARM::QPRRegClassID].contains( + static_cast<ARMOperand &>(*Operands[RegIdx]).getReg()))) return true; } return false; @@ -5309,8 +5295,7 @@ static bool RequiresVFPRegListValidation(StringRef Inst, /// Parse an arm instruction mnemonic followed by its operands. bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, - SMLoc NameLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + SMLoc NameLoc, OperandVector &Operands) { // FIXME: Can this be done via tablegen in some fashion? bool RequireVFPRegisterListCheck; bool AcceptSinglePrecisionOnly; @@ -5489,12 +5474,12 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, Parser.Lex(); // Consume the EndOfStatement if (RequireVFPRegisterListCheck) { - ARMOperand *Op = static_cast<ARMOperand*>(Operands.back()); - if (AcceptSinglePrecisionOnly && !Op->isSPRRegList()) - return Error(Op->getStartLoc(), + ARMOperand &Op = static_cast<ARMOperand &>(*Operands.back()); + if (AcceptSinglePrecisionOnly && !Op.isSPRRegList()) + return Error(Op.getStartLoc(), "VFP/Neon single precision register expected"); - if (AcceptDoublePrecisionOnly && !Op->isDPRRegList()) - return Error(Op->getStartLoc(), + if (AcceptDoublePrecisionOnly && !Op.isDPRRegList()) + return Error(Op.getStartLoc(), "VFP/Neon double precision register expected"); } @@ -5505,20 +5490,14 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // try to remove a cc_out operand that was explicitly set on the the // mnemonic, of course (CarrySetting == true). Reason number #317 the // table driven matcher doesn't fit well with the ARM instruction set. - if (!CarrySetting && shouldOmitCCOutOperand(Mnemonic, Operands)) { - ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]); + if (!CarrySetting && shouldOmitCCOutOperand(Mnemonic, Operands)) Operands.erase(Operands.begin() + 1); - delete Op; - } // Some instructions have the same mnemonic, but don't always // have a predicate. Distinguish them here and delete the // predicate if needed. - if (shouldOmitPredicateOperand(Mnemonic, Operands)) { - ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]); + if (shouldOmitPredicateOperand(Mnemonic, Operands)) Operands.erase(Operands.begin() + 1); - delete Op; - } // ARM mode 'blx' need special handling, as the register operand version // is predicable, but the label operand version is not. So, we can't rely @@ -5526,11 +5505,8 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // a k_CondCode operand in the list. If we're trying to match the label // version, remove the k_CondCode operand here. if (!isThumb() && Mnemonic == "blx" && Operands.size() == 3 && - static_cast<ARMOperand*>(Operands[2])->isImm()) { - ARMOperand *Op = static_cast<ARMOperand*>(Operands[1]); + static_cast<ARMOperand &>(*Operands[2]).isImm()) Operands.erase(Operands.begin() + 1); - delete Op; - } // Adjust operands of ldrexd/strexd to MCK_GPRPair. // ldrexd/strexd require even/odd GPR pair. To enforce this constraint, @@ -5543,53 +5519,50 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, Mnemonic == "stlexd")) { bool isLoad = (Mnemonic == "ldrexd" || Mnemonic == "ldaexd"); unsigned Idx = isLoad ? 2 : 3; - ARMOperand* Op1 = static_cast<ARMOperand*>(Operands[Idx]); - ARMOperand* Op2 = static_cast<ARMOperand*>(Operands[Idx+1]); + ARMOperand &Op1 = static_cast<ARMOperand &>(*Operands[Idx]); + ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[Idx + 1]); const MCRegisterClass& MRC = MRI->getRegClass(ARM::GPRRegClassID); // Adjust only if Op1 and Op2 are GPRs. - if (Op1->isReg() && Op2->isReg() && MRC.contains(Op1->getReg()) && - MRC.contains(Op2->getReg())) { - unsigned Reg1 = Op1->getReg(); - unsigned Reg2 = Op2->getReg(); + if (Op1.isReg() && Op2.isReg() && MRC.contains(Op1.getReg()) && + MRC.contains(Op2.getReg())) { + unsigned Reg1 = Op1.getReg(); + unsigned Reg2 = Op2.getReg(); unsigned Rt = MRI->getEncodingValue(Reg1); unsigned Rt2 = MRI->getEncodingValue(Reg2); // Rt2 must be Rt + 1 and Rt must be even. if (Rt + 1 != Rt2 || (Rt & 1)) { - Error(Op2->getStartLoc(), isLoad ? - "destination operands must be sequential" : - "source operands must be sequential"); + Error(Op2.getStartLoc(), isLoad + ? "destination operands must be sequential" + : "source operands must be sequential"); return true; } unsigned NewReg = MRI->getMatchingSuperReg(Reg1, ARM::gsub_0, &(MRI->getRegClass(ARM::GPRPairRegClassID))); - Operands.erase(Operands.begin() + Idx, Operands.begin() + Idx + 2); - Operands.insert(Operands.begin() + Idx, ARMOperand::CreateReg( - NewReg, Op1->getStartLoc(), Op2->getEndLoc())); - delete Op1; - delete Op2; + Operands[Idx] = + ARMOperand::CreateReg(NewReg, Op1.getStartLoc(), Op2.getEndLoc()); + Operands.erase(Operands.begin() + Idx + 1); } } // GNU Assembler extension (compatibility) if ((Mnemonic == "ldrd" || Mnemonic == "strd")) { - ARMOperand *Op2 = static_cast<ARMOperand *>(Operands[2]); - ARMOperand *Op3 = static_cast<ARMOperand *>(Operands[3]); - if (Op3->isMem()) { - assert(Op2->isReg() && "expected register argument"); + ARMOperand &Op2 = static_cast<ARMOperand &>(*Operands[2]); + ARMOperand &Op3 = static_cast<ARMOperand &>(*Operands[3]); + if (Op3.isMem()) { + assert(Op2.isReg() && "expected register argument"); unsigned SuperReg = MRI->getMatchingSuperReg( - Op2->getReg(), ARM::gsub_0, &MRI->getRegClass(ARM::GPRPairRegClassID)); + Op2.getReg(), ARM::gsub_0, &MRI->getRegClass(ARM::GPRPairRegClassID)); assert(SuperReg && "expected register pair"); unsigned PairedReg = MRI->getSubReg(SuperReg, ARM::gsub_1); - Operands.insert(Operands.begin() + 3, - ARMOperand::CreateReg(PairedReg, - Op2->getStartLoc(), - Op2->getEndLoc())); + Operands.insert( + Operands.begin() + 3, + ARMOperand::CreateReg(PairedReg, Op2.getStartLoc(), Op2.getEndLoc())); } } @@ -5599,19 +5572,13 @@ bool ARMAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, // so the Mnemonic is the original name "subs" and delete the predicate // operand so it will match the table entry. if (isThumbTwo() && Mnemonic == "sub" && Operands.size() == 6 && - static_cast<ARMOperand*>(Operands[3])->isReg() && - static_cast<ARMOperand*>(Operands[3])->getReg() == ARM::PC && - static_cast<ARMOperand*>(Operands[4])->isReg() && - static_cast<ARMOperand*>(Operands[4])->getReg() == ARM::LR && - static_cast<ARMOperand*>(Operands[5])->isImm()) { - ARMOperand *Op0 = static_cast<ARMOperand*>(Operands[0]); - Operands.erase(Operands.begin()); - delete Op0; - Operands.insert(Operands.begin(), ARMOperand::CreateToken(Name, NameLoc)); - - ARMOperand *Op1 = static_cast<ARMOperand*>(Operands[1]); + static_cast<ARMOperand &>(*Operands[3]).isReg() && + static_cast<ARMOperand &>(*Operands[3]).getReg() == ARM::PC && + static_cast<ARMOperand &>(*Operands[4]).isReg() && + static_cast<ARMOperand &>(*Operands[4]).getReg() == ARM::LR && + static_cast<ARMOperand &>(*Operands[5]).isImm()) { + Operands.front() = ARMOperand::CreateToken(Name, NameLoc); Operands.erase(Operands.begin() + 1); - delete Op1; } return false; } @@ -5657,9 +5624,8 @@ static bool instIsBreakpoint(const MCInst &Inst) { } // FIXME: We would really like to be able to tablegen'erate this. -bool ARMAsmParser:: -validateInstruction(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +bool ARMAsmParser::validateInstruction(MCInst &Inst, + const OperandVector &Operands) { const MCInstrDesc &MCID = MII.get(Inst.getOpcode()); SMLoc Loc = Operands[0]->getStartLoc(); @@ -5682,7 +5648,7 @@ validateInstruction(MCInst &Inst, // Find the condition code Operand to get its SMLoc information. SMLoc CondLoc; for (unsigned I = 1; I < Operands.size(); ++I) - if (static_cast<ARMOperand*>(Operands[I])->isCondCode()) + if (static_cast<ARMOperand &>(*Operands[I]).isCondCode()) CondLoc = Operands[I]->getStartLoc(); return Error(CondLoc, "incorrect condition in IT block; got '" + StringRef(ARMCondCodeToString(ARMCC::CondCodes(Cond))) + @@ -5782,8 +5748,8 @@ validateInstruction(MCInst &Inst, // in the register list. unsigned Rn = Inst.getOperand(0).getReg(); bool HasWritebackToken = - (static_cast<ARMOperand*>(Operands[3])->isToken() && - static_cast<ARMOperand*>(Operands[3])->getToken() == "!"); + (static_cast<ARMOperand &>(*Operands[3]).isToken() && + static_cast<ARMOperand &>(*Operands[3]).getToken() == "!"); bool ListContainsBase; if (checkLowRegisterList(Inst, 3, Rn, 0, ListContainsBase) && !isThumbTwo()) return Error(Operands[3 + HasWritebackToken]->getStartLoc(), @@ -5843,11 +5809,10 @@ validateInstruction(MCInst &Inst, // this first statement is always true for the new Inst. Essentially, the // destination is unconditionally copied into the second source operand // without checking to see if it matches what we actually parsed. - if (Operands.size() == 6 && - (((ARMOperand*)Operands[3])->getReg() != - ((ARMOperand*)Operands[5])->getReg()) && - (((ARMOperand*)Operands[3])->getReg() != - ((ARMOperand*)Operands[4])->getReg())) { + if (Operands.size() == 6 && (((ARMOperand &)*Operands[3]).getReg() != + ((ARMOperand &)*Operands[5]).getReg()) && + (((ARMOperand &)*Operands[3]).getReg() != + ((ARMOperand &)*Operands[4]).getReg())) { return Error(Operands[3]->getStartLoc(), "destination register must match source register"); } @@ -5900,23 +5865,23 @@ validateInstruction(MCInst &Inst, } // Final range checking for Thumb unconditional branch instructions. case ARM::tB: - if (!(static_cast<ARMOperand*>(Operands[2]))->isSignedOffset<11, 1>()) + if (!(static_cast<ARMOperand &>(*Operands[2])).isSignedOffset<11, 1>()) return Error(Operands[2]->getStartLoc(), "branch target out of range"); break; case ARM::t2B: { int op = (Operands[2]->isImm()) ? 2 : 3; - if (!(static_cast<ARMOperand*>(Operands[op]))->isSignedOffset<24, 1>()) + if (!static_cast<ARMOperand &>(*Operands[op]).isSignedOffset<24, 1>()) return Error(Operands[op]->getStartLoc(), "branch target out of range"); break; } // Final range checking for Thumb conditional branch instructions. case ARM::tBcc: - if (!(static_cast<ARMOperand*>(Operands[2]))->isSignedOffset<8, 1>()) + if (!static_cast<ARMOperand &>(*Operands[2]).isSignedOffset<8, 1>()) return Error(Operands[2]->getStartLoc(), "branch target out of range"); break; case ARM::t2Bcc: { int Op = (Operands[2]->isImm()) ? 2 : 3; - if (!(static_cast<ARMOperand*>(Operands[Op]))->isSignedOffset<20, 1>()) + if (!static_cast<ARMOperand &>(*Operands[Op]).isSignedOffset<20, 1>()) return Error(Operands[Op]->getStartLoc(), "branch target out of range"); break; } @@ -5931,19 +5896,19 @@ validateInstruction(MCInst &Inst, // lead to bugs that are difficult to find since this is an easy mistake // to make. int i = (Operands[3]->isImm()) ? 3 : 4; - ARMOperand *Op = static_cast<ARMOperand*>(Operands[i]); - const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm()); + ARMOperand &Op = static_cast<ARMOperand &>(*Operands[i]); + const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm()); if (CE) break; - const MCExpr *E = dyn_cast<MCExpr>(Op->getImm()); + const MCExpr *E = dyn_cast<MCExpr>(Op.getImm()); if (!E) break; const ARMMCExpr *ARM16Expr = dyn_cast<ARMMCExpr>(E); if (!ARM16Expr || (ARM16Expr->getKind() != ARMMCExpr::VK_ARM_HI16 && - ARM16Expr->getKind() != ARMMCExpr::VK_ARM_LO16)) { - return Error(Op->getStartLoc(), - "immediate expression for mov requires :lower16: or :upper16"); - break; - } - } + ARM16Expr->getKind() != ARMMCExpr::VK_ARM_LO16)) + return Error( + Op.getStartLoc(), + "immediate expression for mov requires :lower16: or :upper16"); + break; + } } return false; @@ -6205,9 +6170,8 @@ static unsigned getRealVLDOpcode(unsigned Opc, unsigned &Spacing) { } } -bool ARMAsmParser:: -processInstruction(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +bool ARMAsmParser::processInstruction(MCInst &Inst, + const OperandVector &Operands) { switch (Inst.getOpcode()) { // Alias for alternate form of 'ldr{,b}t Rt, [Rn], #imm' instruction. case ARM::LDRT_POST: @@ -6264,8 +6228,8 @@ processInstruction(MCInst &Inst, // Select the narrow version if the immediate will fit. if (Inst.getOperand(1).getImm() > 0 && Inst.getOperand(1).getImm() <= 0xff && - !(static_cast<ARMOperand*>(Operands[2])->isToken() && - static_cast<ARMOperand*>(Operands[2])->getToken() == ".w")) + !(static_cast<ARMOperand &>(*Operands[2]).isToken() && + static_cast<ARMOperand &>(*Operands[2]).getToken() == ".w")) Inst.setOpcode(ARM::tLDRpci); else Inst.setOpcode(ARM::t2LDRpci); @@ -7355,8 +7319,8 @@ processInstruction(MCInst &Inst, if (isARMLowRegister(Inst.getOperand(0).getReg()) && Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && Inst.getOperand(5).getReg() == (inITBlock() ? 0 : ARM::CPSR) && - !(static_cast<ARMOperand*>(Operands[3])->isToken() && - static_cast<ARMOperand*>(Operands[3])->getToken() == ".w")) { + !(static_cast<ARMOperand &>(*Operands[3]).isToken() && + static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) { unsigned NewOpc; switch (Inst.getOpcode()) { default: llvm_unreachable("unexpected opcode"); @@ -7559,7 +7523,7 @@ processInstruction(MCInst &Inst, case ARM::LDMIA_UPD: // If this is a load of a single register via a 'pop', then we should use // a post-indexed LDR instruction instead, per the ARM ARM. - if (static_cast<ARMOperand*>(Operands[0])->getToken() == "pop" && + if (static_cast<ARMOperand &>(*Operands[0]).getToken() == "pop" && Inst.getNumOperands() == 5) { MCInst TmpInst; TmpInst.setOpcode(ARM::LDR_POST_IMM); @@ -7577,7 +7541,7 @@ processInstruction(MCInst &Inst, case ARM::STMDB_UPD: // If this is a store of a single register via a 'push', then we should use // a pre-indexed STR instruction instead, per the ARM ARM. - if (static_cast<ARMOperand*>(Operands[0])->getToken() == "push" && + if (static_cast<ARMOperand &>(*Operands[0]).getToken() == "push" && Inst.getNumOperands() == 5) { MCInst TmpInst; TmpInst.setOpcode(ARM::STR_PRE_IMM); @@ -7593,7 +7557,7 @@ processInstruction(MCInst &Inst, case ARM::t2ADDri12: // If the immediate fits for encoding T3 (t2ADDri) and the generic "add" // mnemonic was used (not "addw"), encoding T3 is preferred. - if (static_cast<ARMOperand*>(Operands[0])->getToken() != "add" || + if (static_cast<ARMOperand &>(*Operands[0]).getToken() != "add" || ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1) break; Inst.setOpcode(ARM::t2ADDri); @@ -7602,7 +7566,7 @@ processInstruction(MCInst &Inst, case ARM::t2SUBri12: // If the immediate fits for encoding T3 (t2SUBri) and the generic "sub" // mnemonic was used (not "subw"), encoding T3 is preferred. - if (static_cast<ARMOperand*>(Operands[0])->getToken() != "sub" || + if (static_cast<ARMOperand &>(*Operands[0]).getToken() != "sub" || ARM_AM::getT2SOImmVal(Inst.getOperand(2).getImm()) == -1) break; Inst.setOpcode(ARM::t2SUBri); @@ -7638,9 +7602,9 @@ processInstruction(MCInst &Inst, !isARMLowRegister(Inst.getOperand(0).getReg()) || (unsigned)Inst.getOperand(2).getImm() > 255 || ((!inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR) || - (inITBlock() && Inst.getOperand(5).getReg() != 0)) || - (static_cast<ARMOperand*>(Operands[3])->isToken() && - static_cast<ARMOperand*>(Operands[3])->getToken() == ".w")) + (inITBlock() && Inst.getOperand(5).getReg() != 0)) || + (static_cast<ARMOperand &>(*Operands[3]).isToken() && + static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) break; MCInst TmpInst; TmpInst.setOpcode(Inst.getOpcode() == ARM::t2ADDri ? @@ -7661,8 +7625,8 @@ processInstruction(MCInst &Inst, // 'as' behaviour. Make sure the wide encoding wasn't explicit. if (Inst.getOperand(0).getReg() != Inst.getOperand(1).getReg() || Inst.getOperand(5).getReg() != 0 || - (static_cast<ARMOperand*>(Operands[3])->isToken() && - static_cast<ARMOperand*>(Operands[3])->getToken() == ".w")) + (static_cast<ARMOperand &>(*Operands[3]).isToken() && + static_cast<ARMOperand &>(*Operands[3]).getToken() == ".w")) break; MCInst TmpInst; TmpInst.setOpcode(ARM::tADDhirr); @@ -7719,8 +7683,8 @@ processInstruction(MCInst &Inst, // an error in validateInstruction(). unsigned Rn = Inst.getOperand(0).getReg(); bool hasWritebackToken = - (static_cast<ARMOperand*>(Operands[3])->isToken() && - static_cast<ARMOperand*>(Operands[3])->getToken() == "!"); + (static_cast<ARMOperand &>(*Operands[3]).isToken() && + static_cast<ARMOperand &>(*Operands[3]).getToken() == "!"); bool listContainsBase; if (checkLowRegisterList(Inst, 3, Rn, 0, listContainsBase) || (!listContainsBase && !hasWritebackToken) || @@ -7782,10 +7746,10 @@ processInstruction(MCInst &Inst, if (isARMLowRegister(Inst.getOperand(0).getReg()) && (unsigned)Inst.getOperand(1).getImm() <= 255 && ((!inITBlock() && Inst.getOperand(2).getImm() == ARMCC::AL && - Inst.getOperand(4).getReg() == ARM::CPSR) || - (inITBlock() && Inst.getOperand(4).getReg() == 0)) && - (!static_cast<ARMOperand*>(Operands[2])->isToken() || - static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) { + Inst.getOperand(4).getReg() == ARM::CPSR) || + (inITBlock() && Inst.getOperand(4).getReg() == 0)) && + (!static_cast<ARMOperand &>(*Operands[2]).isToken() || + static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) { // The operands aren't in the same order for tMOVi8... MCInst TmpInst; TmpInst.setOpcode(ARM::tMOVi8); @@ -7806,8 +7770,8 @@ processInstruction(MCInst &Inst, isARMLowRegister(Inst.getOperand(1).getReg()) && Inst.getOperand(2).getImm() == ARMCC::AL && Inst.getOperand(4).getReg() == ARM::CPSR && - (!static_cast<ARMOperand*>(Operands[2])->isToken() || - static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) { + (!static_cast<ARMOperand &>(*Operands[2]).isToken() || + static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) { // The operands aren't the same for tMOV[S]r... (no cc_out) MCInst TmpInst; TmpInst.setOpcode(Inst.getOperand(4).getReg() ? ARM::tMOVSr : ARM::tMOVr); @@ -7829,8 +7793,8 @@ processInstruction(MCInst &Inst, if (isARMLowRegister(Inst.getOperand(0).getReg()) && isARMLowRegister(Inst.getOperand(1).getReg()) && Inst.getOperand(2).getImm() == 0 && - (!static_cast<ARMOperand*>(Operands[2])->isToken() || - static_cast<ARMOperand*>(Operands[2])->getToken() != ".w")) { + (!static_cast<ARMOperand &>(*Operands[2]).isToken() || + static_cast<ARMOperand &>(*Operands[2]).getToken() != ".w")) { unsigned NewOpc; switch (Inst.getOpcode()) { default: llvm_unreachable("Illegal opcode!"); @@ -7942,9 +7906,10 @@ processInstruction(MCInst &Inst, isARMLowRegister(Inst.getOperand(2).getReg())) && Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() && ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) || - (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) && - (!static_cast<ARMOperand*>(Operands[3])->isToken() || - !static_cast<ARMOperand*>(Operands[3])->getToken().equals_lower(".w"))) { + (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) && + (!static_cast<ARMOperand &>(*Operands[3]).isToken() || + !static_cast<ARMOperand &>(*Operands[3]).getToken().equals_lower( + ".w"))) { unsigned NewOpc; switch (Inst.getOpcode()) { default: llvm_unreachable("unexpected opcode"); @@ -7981,9 +7946,10 @@ processInstruction(MCInst &Inst, (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg() || Inst.getOperand(0).getReg() == Inst.getOperand(2).getReg()) && ((!inITBlock() && Inst.getOperand(5).getReg() == ARM::CPSR) || - (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) && - (!static_cast<ARMOperand*>(Operands[3])->isToken() || - !static_cast<ARMOperand*>(Operands[3])->getToken().equals_lower(".w"))) { + (inITBlock() && Inst.getOperand(5).getReg() != ARM::CPSR)) && + (!static_cast<ARMOperand &>(*Operands[3]).isToken() || + !static_cast<ARMOperand &>(*Operands[3]).getToken().equals_lower( + ".w"))) { unsigned NewOpc; switch (Inst.getOpcode()) { default: llvm_unreachable("unexpected opcode"); @@ -8063,11 +8029,10 @@ template <> inline bool IsCPSRDead<MCInst>(MCInst *Instr) { } static const char *getSubtargetFeatureName(unsigned Val); -bool ARMAsmParser:: -MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out, unsigned &ErrorInfo, - bool MatchingInlineAsm) { +bool ARMAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm) { MCInst Inst; unsigned MatchResult; @@ -8136,7 +8101,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, if (ErrorInfo >= Operands.size()) return Error(IDLoc, "too few operands for instruction"); - ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc(); + ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; } @@ -8144,7 +8109,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, } case Match_MnemonicFail: return Error(IDLoc, "invalid instruction", - ((ARMOperand*)Operands[0])->getLocRange()); + ((ARMOperand &)*Operands[0]).getLocRange()); case Match_RequiresNotITBlock: return Error(IDLoc, "flag setting instruction only valid outside IT block"); case Match_RequiresITBlock: @@ -8154,12 +8119,12 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_RequiresThumb2: return Error(IDLoc, "instruction variant requires Thumb2"); case Match_ImmRange0_15: { - SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc(); + SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; return Error(ErrorLoc, "immediate operand must be in the range [0,15]"); } case Match_ImmRange0_239: { - SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getStartLoc(); + SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; return Error(ErrorLoc, "immediate operand must be in the range [0,239]"); } @@ -8175,7 +8140,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_DupAlignedMemoryRequires64or128: case Match_AlignedMemoryRequires64or128or256: { - SMLoc ErrorLoc = ((ARMOperand*)Operands[ErrorInfo])->getAlignmentLoc(); + SMLoc ErrorLoc = ((ARMOperand &)*Operands[ErrorInfo]).getAlignmentLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; switch (MatchResult) { default: @@ -8923,28 +8888,22 @@ bool ARMAsmParser::parseDirectiveRegSave(SMLoc L, bool IsVector) { } // RAII object to make sure parsed operands are deleted. - struct CleanupObject { - SmallVector<MCParsedAsmOperand *, 1> Operands; - ~CleanupObject() { - for (unsigned I = 0, E = Operands.size(); I != E; ++I) - delete Operands[I]; - } - } CO; + SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands; // Parse the register list - if (parseRegisterList(CO.Operands)) + if (parseRegisterList(Operands)) return false; - ARMOperand *Op = (ARMOperand*)CO.Operands[0]; - if (!IsVector && !Op->isRegList()) { + ARMOperand &Op = (ARMOperand &)*Operands[0]; + if (!IsVector && !Op.isRegList()) { Error(L, ".save expects GPR registers"); return false; } - if (IsVector && !Op->isDPRRegList()) { + if (IsVector && !Op.isDPRRegList()) { Error(L, ".vsave expects DPR registers"); return false; } - getTargetStreamer().emitRegSave(Op->getRegList(), IsVector); + getTargetStreamer().emitRegSave(Op.getRegList(), IsVector); return false; } @@ -9468,23 +9427,23 @@ bool ARMAsmParser::parseDirectiveArchExtension(SMLoc L) { // Define this matcher function after the auto-generated include so we // have the match class enum definitions. -unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, +unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, unsigned Kind) { - ARMOperand *Op = static_cast<ARMOperand*>(AsmOp); + ARMOperand &Op = static_cast<ARMOperand &>(AsmOp); // If the kind is a token for a literal immediate, check if our asm // operand matches. This is for InstAliases which have a fixed-value // immediate in the syntax. switch (Kind) { default: break; case MCK__35_0: - if (Op->isImm()) - if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op->getImm())) + if (Op.isImm()) + if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Op.getImm())) if (CE->getValue() == 0) return Match_Success; break; case MCK_ARMSOImm: - if (Op->isImm()) { - const MCExpr *SOExpr = Op->getImm(); + if (Op.isImm()) { + const MCExpr *SOExpr = Op.getImm(); int64_t Value; if (!SOExpr->EvaluateAsAbsolute(Value)) return Match_Success; @@ -9493,8 +9452,8 @@ unsigned ARMAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, } break; case MCK_GPRPair: - if (Op->isReg() && - MRI->getRegClass(ARM::GPRRegClassID).contains(Op->getReg())) + if (Op.isReg() && + MRI->getRegClass(ARM::GPRRegClassID).contains(Op.getReg())) return Match_Success; break; } diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index e4b785d..228fb57 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -1092,13 +1092,13 @@ void ARMInstPrinter::printAddrModeImm12Operand(const MCInst *MI, unsigned OpNum, if (isSub) { O << ", " << markup("<imm:") - << "#-" << -OffImm + << "#-" << formatImm(-OffImm) << markup(">"); } else if (AlwaysPrintImm0 || OffImm > 0) { O << ", " << markup("<imm:") - << "#" << OffImm + << "#" << formatImm(OffImm) << markup(">"); } O << "]" << markup(">"); diff --git a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h index 42a1cbb..1686d76 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -295,7 +295,12 @@ namespace ARMII { /// MO_OPTION_MASK - Most flags are mutually exclusive; this mask selects /// just that part of the flag set. - MO_OPTION_MASK = 0x7f, + MO_OPTION_MASK = 0x3f, + + /// MO_DLLIMPORT - On a symbol operand, this represents that the reference + /// to the symbol is for an import stub. This is used for DLL import + /// storage class indication on Windows. + MO_DLLIMPORT = 0x40, /// MO_NONLAZY - This is an independent flag, on a symbol operand "FOO" it /// represents a symbol which, if indirect, will get special Darwin mangling diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index a4d13ed..7b5d8b0 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -992,7 +992,8 @@ void ARMTargetELFStreamer::emitLabel(MCSymbol *Symbol) { return; const MCSymbolData &SD = Streamer.getOrCreateSymbolData(Symbol); - if (MCELF::GetType(SD) & (ELF::STT_FUNC << ELF_STT_Shift)) + unsigned Type = MCELF::GetType(SD); + if (Type == ELF_STT_Func || Type == ELF_STT_GnuIFunc) Streamer.EmitThumbFunc(Symbol); } @@ -1160,7 +1161,7 @@ void ARMELFStreamer::EmitPersonalityFixup(StringRef Name) { const MCSymbolRefExpr *PersonalityRef = MCSymbolRefExpr::Create( PersonalitySym, MCSymbolRefExpr::VK_ARM_NONE, getContext()); - AddValueSymbols(PersonalityRef); + visitUsedExpr(*PersonalityRef); MCDataFragment *DF = getOrCreateDataFragment(); DF->getFixups().push_back(MCFixup::Create(DF->getContents().size(), PersonalityRef, @@ -1332,6 +1333,12 @@ MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, return S; } +MCStreamer *createARMNullStreamer(MCContext &Ctx) { + MCStreamer *S = llvm::createNullStreamer(Ctx); + new ARMTargetStreamer(*S); + return S; +} + MCELFStreamer* createARMELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter, bool RelaxAll, bool NoExecStack, diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp index 5b51a52..b8ee555 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp @@ -1047,8 +1047,7 @@ ARMMCCodeEmitter::getHiLo16ImmOpValue(const MCInst &MI, unsigned OpIdx, // we have a movt or a movw, but that led to misleadingly results. // This is now disallowed in the the AsmParser in validateInstruction() // so this should never happen. - assert(0 && "expression without :upper16: or :lower16:"); - return 0; + llvm_unreachable("expression without :upper16: or :lower16:"); } uint32_t ARMMCCodeEmitter:: diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp index 87ea875..e545e3c 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp @@ -41,33 +41,6 @@ ARMMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, return false; } -// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps -// that method should be made public? -static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) { - switch (Value->getKind()) { - case MCExpr::Target: - llvm_unreachable("Can't handle nested target expr!"); - - case MCExpr::Constant: - break; - - case MCExpr::Binary: { - const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value); - AddValueSymbols_(BE->getLHS(), Asm); - AddValueSymbols_(BE->getRHS(), Asm); - break; - } - - case MCExpr::SymbolRef: - Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol()); - break; - - case MCExpr::Unary: - AddValueSymbols_(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm); - break; - } -} - -void ARMMCExpr::AddValueSymbols(MCAssembler *Asm) const { - AddValueSymbols_(getSubExpr(), Asm); +void ARMMCExpr::visitUsedExpr(MCStreamer &Streamer) const { + Streamer.visitUsedExpr(*getSubExpr()); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h index d819139..c5c0b10 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCExpr.h @@ -59,7 +59,7 @@ public: void PrintImpl(raw_ostream &OS) const override; bool EvaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout) const override; - void AddValueSymbols(MCAssembler *) const override; + void visitUsedExpr(MCStreamer &Streamer) const override; const MCSection *FindAssociatedSection() const override { return getSubExpr()->FindAssociatedSection(); } diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp index 04d63a7..2b3855d 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp @@ -427,6 +427,12 @@ extern "C" void LLVMInitializeARMTargetMC() { TargetRegistry::RegisterAsmStreamer(TheThumbLETarget, createMCAsmStreamer); TargetRegistry::RegisterAsmStreamer(TheThumbBETarget, createMCAsmStreamer); + // Register the null streamer. + TargetRegistry::RegisterNullStreamer(TheARMLETarget, createARMNullStreamer); + TargetRegistry::RegisterNullStreamer(TheARMBETarget, createARMNullStreamer); + TargetRegistry::RegisterNullStreamer(TheThumbLETarget, createARMNullStreamer); + TargetRegistry::RegisterNullStreamer(TheThumbBETarget, createARMNullStreamer); + // Register the MCInstPrinter. TargetRegistry::RegisterMCInstPrinter(TheARMLETarget, createARMMCInstPrinter); TargetRegistry::RegisterMCInstPrinter(TheARMBETarget, createARMMCInstPrinter); diff --git a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h index 8853a8c..5326e56 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h +++ b/lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h @@ -51,6 +51,8 @@ MCStreamer *createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, MCInstPrinter *InstPrint, MCCodeEmitter *CE, MCAsmBackend *TAB, bool ShowInst); +MCStreamer *createARMNullStreamer(MCContext &Ctx); + MCCodeEmitter *createARMLEMCCodeEmitter(const MCInstrInfo &MCII, const MCRegisterInfo &MRI, const MCSubtargetInfo &STI, diff --git a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp index ecfa4e5..186776a 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMMachObjectWriter.cpp @@ -32,6 +32,7 @@ class ARMMachObjectWriter : public MCMachObjectTargetWriter { const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, + unsigned Type, unsigned Log2Size, uint64_t &FixedValue); void RecordARMScatteredHalfRelocation(MachObjectWriter *Writer, @@ -251,11 +252,11 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, const MCFragment *Fragment, const MCFixup &Fixup, MCValue Target, + unsigned Type, unsigned Log2Size, uint64_t &FixedValue) { uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); unsigned IsPCRel = Writer->isFixupKindPCRel(Asm, Fixup.getKind()); - unsigned Type = MachO::ARM_RELOC_VANILLA; // See <reloc.h>. const MCSymbol *A = &Target.getSymA()->getSymbol(); @@ -272,6 +273,7 @@ void ARMMachObjectWriter::RecordARMScatteredRelocation(MachObjectWriter *Writer, uint32_t Value2 = 0; if (const MCSymbolRefExpr *B = Target.getSymB()) { + assert(Type == MachO::ARM_RELOC_VANILLA && "invalid reloc for 2 symbols"); const MCSymbolData *B_SD = &Asm.getSymbolData(B->getSymbol()); if (!B_SD->getFragment()) @@ -374,7 +376,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, return RecordARMScatteredHalfRelocation(Writer, Asm, Layout, Fragment, Fixup, Target, FixedValue); return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, - Target, Log2Size, FixedValue); + Target, RelocType, Log2Size, + FixedValue); } // Get the symbol data, if any. @@ -392,7 +395,8 @@ void ARMMachObjectWriter::RecordRelocation(MachObjectWriter *Writer, Offset += 1 << Log2Size; if (Offset && SD && !Writer->doesSymbolRequireExternRelocation(SD)) return RecordARMScatteredRelocation(Writer, Asm, Layout, Fragment, Fixup, - Target, Log2Size, FixedValue); + Target, RelocType, Log2Size, + FixedValue); // See <reloc.h>. uint32_t FixupOffset = Layout.getFragmentOffset(Fragment)+Fixup.getOffset(); diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp index e3cfb05..ad3f1ca 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp @@ -11,147 +11,12 @@ // //===----------------------------------------------------------------------===// #include "llvm/ADT/MapVector.h" +#include "llvm/MC/ConstantPools.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" using namespace llvm; - -namespace { -// A class to keep track of assembler-generated constant pools that are use to -// implement the ldr-pseudo. -class ConstantPool { - typedef SmallVector<std::pair<MCSymbol *, const MCExpr *>, 4> EntryVecTy; - EntryVecTy Entries; - -public: - // Initialize a new empty constant pool - ConstantPool() {} - - // Add a new entry to the constant pool in the next slot. - // \param Value is the new entry to put in the constant pool. - // - // \returns a MCExpr that references the newly inserted value - const MCExpr *addEntry(const MCExpr *Value, MCContext &Context); - - // Emit the contents of the constant pool using the provided streamer. - void emitEntries(MCStreamer &Streamer); - - // Return true if the constant pool is empty - bool empty(); -}; -} - -namespace llvm { -class AssemblerConstantPools { - // Map type used to keep track of per-Section constant pools used by the - // ldr-pseudo opcode. The map associates a section to its constant pool. The - // constant pool is a vector of (label, value) pairs. When the ldr - // pseudo is parsed we insert a new (label, value) pair into the constant pool - // for the current section and add MCSymbolRefExpr to the new label as - // an opcode to the ldr. After we have parsed all the user input we - // output the (label, value) pairs in each constant pool at the end of the - // section. - // - // We use the MapVector for the map type to ensure stable iteration of - // the sections at the end of the parse. We need to iterate over the - // sections in a stable order to ensure that we have print the - // constant pools in a deterministic order when printing an assembly - // file. - typedef MapVector<const MCSection *, ConstantPool> ConstantPoolMapTy; - ConstantPoolMapTy ConstantPools; - -public: - AssemblerConstantPools() {} - ~AssemblerConstantPools() {} - - void emitAll(MCStreamer &Streamer); - void emitForCurrentSection(MCStreamer &Streamer); - const MCExpr *addEntry(MCStreamer &Streamer, const MCExpr *Expr); - -private: - ConstantPool *getConstantPool(const MCSection *Section); - ConstantPool &getOrCreateConstantPool(const MCSection *Section); -}; -} - -// -// ConstantPool implementation -// -// Emit the contents of the constant pool using the provided streamer. -void ConstantPool::emitEntries(MCStreamer &Streamer) { - if (Entries.empty()) - return; - Streamer.EmitCodeAlignment(4); // align to 4-byte address - Streamer.EmitDataRegion(MCDR_DataRegion); - for (EntryVecTy::const_iterator I = Entries.begin(), E = Entries.end(); - I != E; ++I) { - Streamer.EmitLabel(I->first); - Streamer.EmitValue(I->second, 4); - } - Streamer.EmitDataRegion(MCDR_DataRegionEnd); - Entries.clear(); -} - -const MCExpr *ConstantPool::addEntry(const MCExpr *Value, MCContext &Context) { - MCSymbol *CPEntryLabel = Context.CreateTempSymbol(); - - Entries.push_back(std::make_pair(CPEntryLabel, Value)); - return MCSymbolRefExpr::Create(CPEntryLabel, Context); -} - -bool ConstantPool::empty() { return Entries.empty(); } - -// -// AssemblerConstantPools implementation -// -ConstantPool * -AssemblerConstantPools::getConstantPool(const MCSection *Section) { - ConstantPoolMapTy::iterator CP = ConstantPools.find(Section); - if (CP == ConstantPools.end()) - return nullptr; - - return &CP->second; -} - -ConstantPool & -AssemblerConstantPools::getOrCreateConstantPool(const MCSection *Section) { - return ConstantPools[Section]; -} - -static void emitConstantPool(MCStreamer &Streamer, const MCSection *Section, - ConstantPool &CP) { - if (!CP.empty()) { - Streamer.SwitchSection(Section); - CP.emitEntries(Streamer); - } -} - -void AssemblerConstantPools::emitAll(MCStreamer &Streamer) { - // Dump contents of assembler constant pools. - for (ConstantPoolMapTy::iterator CPI = ConstantPools.begin(), - CPE = ConstantPools.end(); - CPI != CPE; ++CPI) { - const MCSection *Section = CPI->first; - ConstantPool &CP = CPI->second; - - emitConstantPool(Streamer, Section, CP); - } -} - -void AssemblerConstantPools::emitForCurrentSection(MCStreamer &Streamer) { - const MCSection *Section = Streamer.getCurrentSection().first; - if (ConstantPool *CP = getConstantPool(Section)) { - emitConstantPool(Streamer, Section, *CP); - } -} - -const MCExpr *AssemblerConstantPools::addEntry(MCStreamer &Streamer, - const MCExpr *Expr) { - const MCSection *Section = Streamer.getCurrentSection().first; - return getOrCreateConstantPool(Section).addEntry(Expr, Streamer.getContext()); -} - // // ARMTargetStreamer Implemenation // @@ -175,78 +40,34 @@ void ARMTargetStreamer::finish() { ConstantPools->emitAll(Streamer); } // The remaining callbacks should be handled separately by each // streamer. -void ARMTargetStreamer::emitFnStart() { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitFnEnd() { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitCantUnwind() { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitPersonality(const MCSymbol *Personality) { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitPersonalityIndex(unsigned Index) { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitHandlerData() { - llvm_unreachable("unimplemented"); -} +void ARMTargetStreamer::emitFnStart() {} +void ARMTargetStreamer::emitFnEnd() {} +void ARMTargetStreamer::emitCantUnwind() {} +void ARMTargetStreamer::emitPersonality(const MCSymbol *Personality) {} +void ARMTargetStreamer::emitPersonalityIndex(unsigned Index) {} +void ARMTargetStreamer::emitHandlerData() {} void ARMTargetStreamer::emitSetFP(unsigned FpReg, unsigned SpReg, - int64_t Offset) { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitMovSP(unsigned Reg, int64_t Offset) { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitPad(int64_t Offset) { - llvm_unreachable("unimplemented"); -} -void -ARMTargetStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList, - bool isVector) { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitUnwindRaw( - int64_t StackOffset, const SmallVectorImpl<uint8_t> &Opcodes) { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::switchVendor(StringRef Vendor) { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitAttribute(unsigned Attribute, unsigned Value) { - llvm_unreachable("unimplemented"); -} + int64_t Offset) {} +void ARMTargetStreamer::emitMovSP(unsigned Reg, int64_t Offset) {} +void ARMTargetStreamer::emitPad(int64_t Offset) {} +void ARMTargetStreamer::emitRegSave(const SmallVectorImpl<unsigned> &RegList, + bool isVector) {} +void ARMTargetStreamer::emitUnwindRaw(int64_t StackOffset, + const SmallVectorImpl<uint8_t> &Opcodes) { +} +void ARMTargetStreamer::switchVendor(StringRef Vendor) {} +void ARMTargetStreamer::emitAttribute(unsigned Attribute, unsigned Value) {} void ARMTargetStreamer::emitTextAttribute(unsigned Attribute, - StringRef String) { - llvm_unreachable("unimplemented"); -} + StringRef String) {} void ARMTargetStreamer::emitIntTextAttribute(unsigned Attribute, - unsigned IntValue, - StringRef StringValue) { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitArch(unsigned Arch) { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitObjectArch(unsigned Arch) { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitFPU(unsigned FPU) { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::finishAttributeSection() { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) { - llvm_unreachable("unimplemented"); -} -void ARMTargetStreamer::AnnotateTLSDescriptorSequence( - const MCSymbolRefExpr *SRE) { - llvm_unreachable("unimplemented"); -} + unsigned IntValue, + StringRef StringValue) {} +void ARMTargetStreamer::emitArch(unsigned Arch) {} +void ARMTargetStreamer::emitObjectArch(unsigned Arch) {} +void ARMTargetStreamer::emitFPU(unsigned FPU) {} +void ARMTargetStreamer::finishAttributeSection() {} +void ARMTargetStreamer::emitInst(uint32_t Inst, char Suffix) {} +void +ARMTargetStreamer::AnnotateTLSDescriptorSequence(const MCSymbolRefExpr *SRE) {} -void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) { - llvm_unreachable("unimplemented"); -} +void ARMTargetStreamer::emitThumbSet(MCSymbol *Symbol, const MCExpr *Value) {} diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index be29dc5..baa97a7 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -21,6 +21,9 @@ using namespace llvm; +Thumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget &sti) + : ARMFrameLowering(sti) {} + bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{ const MachineFrameInfo *FFI = MF.getFrameInfo(); unsigned CFSize = FFI->getMaxCallFrameSize(); diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h index f61874b..a227f8e 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.h +++ b/lib/Target/ARM/Thumb1FrameLowering.h @@ -11,11 +11,10 @@ // //===----------------------------------------------------------------------===// -#ifndef __THUMB_FRAMEINFO_H_ -#define __THUMB_FRAMEINFO_H_ +#ifndef LLVM_ARM_THUMB1FRAMELOWERING_H +#define LLVM_ARM_THUMB1FRAMELOWERING_H #include "ARMFrameLowering.h" -#include "ARMSubtarget.h" #include "Thumb1InstrInfo.h" #include "Thumb1RegisterInfo.h" #include "llvm/Target/TargetFrameLowering.h" @@ -24,9 +23,7 @@ namespace llvm { class Thumb1FrameLowering : public ARMFrameLowering { public: - explicit Thumb1FrameLowering(const ARMSubtarget &sti) - : ARMFrameLowering(sti) { - } + explicit Thumb1FrameLowering(const ARMSubtarget &sti); /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 6267ecf..09debe7 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -1010,7 +1010,8 @@ bool Thumb2SizeReduce::runOnMachineFunction(MachineFunction &MF) { AttributeSet FnAttrs = MF.getFunction()->getAttributes(); OptimizeSize = FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); - MinimizeSize = STI->isMinSize(); + MinimizeSize = + FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); BlockInfo.clear(); BlockInfo.resize(MF.getNumBlockIDs()); diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 15b574d..f610fbb 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -1577,6 +1577,10 @@ void CppWriter::printInstruction(const Instruction *I, nl(Out) << iName << "->setName(\""; printEscapedString(cxi->getName()); Out << "\");"; + nl(Out) << iName << "->setVolatile(" + << (cxi->isVolatile() ? "true" : "false") << ");"; + nl(Out) << iName << "->setWeak(" + << (cxi->isWeak() ? "true" : "false") << ");"; break; } case Instruction::AtomicRMW: { @@ -1607,6 +1611,8 @@ void CppWriter::printInstruction(const Instruction *I, nl(Out) << iName << "->setName(\""; printEscapedString(rmwi->getName()); Out << "\");"; + nl(Out) << iName << "->setVolatile(" + << (rmwi->isVolatile() ? "true" : "false") << ");"; break; } case Instruction::LandingPad: { diff --git a/lib/Target/Hexagon/HexagonFrameLowering.cpp b/lib/Target/Hexagon/HexagonFrameLowering.cpp index d551ca9..21df12f 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.cpp +++ b/lib/Target/Hexagon/HexagonFrameLowering.cpp @@ -165,8 +165,8 @@ void HexagonFrameLowering::emitEpilogue(MachineFunction &MF, } // Replace 'jumpr r31' instruction with dealloc_return for V4 and higher // versions. - if (STI.hasV4TOps() && MBBI->getOpcode() == Hexagon::JMPret - && !DisableDeallocRet) { + if (MF.getTarget().getSubtarget<HexagonSubtarget>().hasV4TOps() && + MBBI->getOpcode() == Hexagon::JMPret && !DisableDeallocRet) { // Check for RESTORE_DEALLOC_RET_JMP_V4 call. Don't emit an extra DEALLOC // instruction if we encounter it. MachineBasicBlock::iterator BeforeJMPR = diff --git a/lib/Target/Hexagon/HexagonFrameLowering.h b/lib/Target/Hexagon/HexagonFrameLowering.h index 446af16..2d4b0b9 100644 --- a/lib/Target/Hexagon/HexagonFrameLowering.h +++ b/lib/Target/Hexagon/HexagonFrameLowering.h @@ -11,20 +11,16 @@ #define HEXAGON_FRAMEINFO_H #include "Hexagon.h" -#include "HexagonSubtarget.h" #include "llvm/Target/TargetFrameLowering.h" namespace llvm { class HexagonFrameLowering : public TargetFrameLowering { private: - const HexagonSubtarget &STI; void determineFrameLayout(MachineFunction &MF) const; public: - explicit HexagonFrameLowering(const HexagonSubtarget &sti) - : TargetFrameLowering(StackGrowsDown, 8, 0), STI(sti) { - } + explicit HexagonFrameLowering() : TargetFrameLowering(StackGrowsDown, 8, 0) {} /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index b8e5d24..a460ea4 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -463,9 +463,10 @@ HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass; SmallVector<SDValue, 8> MemOpChains; + const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>( + DAG.getTarget().getRegisterInfo()); SDValue StackPtr = - DAG.getCopyFromReg(Chain, dl, TM.getRegisterInfo()->getStackRegister(), - getPointerTy()); + DAG.getCopyFromReg(Chain, dl, QRI->getStackRegister(), getPointerTy()); // Walk the register/memloc assignments, inserting copies/loads. for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { @@ -720,7 +721,10 @@ SDValue HexagonTargetLowering::LowerINLINEASM(SDValue Op, cast<RegisterSDNode>(Node->getOperand(i))->getReg(); // Check it to be lr - if (Reg == TM.getRegisterInfo()->getRARegister()) { + const HexagonRegisterInfo *QRI = + static_cast<const HexagonRegisterInfo *>( + DAG.getTarget().getRegisterInfo()); + if (Reg == QRI->getRARegister()) { FuncInfo->setHasClobberLR(true); break; } @@ -812,9 +816,9 @@ HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, // The Sub result contains the new stack start address, so it // must be placed in the stack pointer register. - SDValue CopyChain = DAG.getCopyToReg(Chain, dl, - TM.getRegisterInfo()->getStackRegister(), - Sub); + const HexagonRegisterInfo *QRI = static_cast<const HexagonRegisterInfo *>( + DAG.getTarget().getRegisterInfo()); + SDValue CopyChain = DAG.getCopyToReg(Chain, dl, QRI->getStackRegister(), Sub); SDValue Ops[2] = { ArgAdjust, CopyChain }; return DAG.getMergeValues(Ops, dl); @@ -944,21 +948,6 @@ HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { } SDValue -HexagonTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDValue CC = Op.getOperand(4); - SDValue TrueVal = Op.getOperand(2); - SDValue FalseVal = Op.getOperand(3); - SDLoc dl(Op); - SDNode* OpNode = Op.getNode(); - EVT SVT = OpNode->getValueType(0); - - SDValue Cond = DAG.getNode(ISD::SETCC, dl, MVT::i1, LHS, RHS, CC); - return DAG.getNode(ISD::SELECT, dl, SVT, Cond, TrueVal, FalseVal); -} - -SDValue HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { EVT ValTy = Op.getValueType(); SDLoc dl(Op); @@ -975,7 +964,7 @@ HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { SDValue HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); + const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MFI->setReturnAddressIsTaken(true); @@ -1001,7 +990,8 @@ HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const { SDValue HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { - const HexagonRegisterInfo *TRI = TM.getRegisterInfo(); + const HexagonRegisterInfo *TRI = + static_cast<const HexagonRegisterInfo *>(DAG.getTarget().getRegisterInfo()); MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); MFI->setFrameAddressIsTaken(true); @@ -1053,429 +1043,422 @@ HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { // TargetLowering Implementation //===----------------------------------------------------------------------===// -HexagonTargetLowering::HexagonTargetLowering(HexagonTargetMachine - &targetmachine) - : TargetLowering(targetmachine, new HexagonTargetObjectFile()), - TM(targetmachine) { - - const HexagonRegisterInfo* QRI = TM.getRegisterInfo(); +HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &targetmachine) + : TargetLowering(targetmachine, new HexagonTargetObjectFile()), + TM(targetmachine) { - // Set up the register classes. - addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass); - addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass); - - if (QRI->Subtarget.hasV5TOps()) { - addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass); - addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass); - } + const HexagonSubtarget &Subtarget = TM.getSubtarget<HexagonSubtarget>(); - addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass); + // Set up the register classes. + addRegisterClass(MVT::i32, &Hexagon::IntRegsRegClass); + addRegisterClass(MVT::i64, &Hexagon::DoubleRegsRegClass); - computeRegisterProperties(); + if (Subtarget.hasV5TOps()) { + addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass); + addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass); + } - // Align loop entry - setPrefLoopAlignment(4); + addRegisterClass(MVT::i1, &Hexagon::PredRegsRegClass); - // Limits for inline expansion of memcpy/memmove - MaxStoresPerMemcpy = 6; - MaxStoresPerMemmove = 6; + computeRegisterProperties(); - // - // Library calls for unsupported operations - // + // Align loop entry + setPrefLoopAlignment(4); - setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf"); - setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf"); - - setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti"); - setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti"); - - setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti"); - setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti"); - - setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3"); - setOperationAction(ISD::SDIV, MVT::i32, Expand); - setLibcallName(RTLIB::SREM_I32, "__hexagon_umodsi3"); - setOperationAction(ISD::SREM, MVT::i32, Expand); - - setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3"); - setOperationAction(ISD::SDIV, MVT::i64, Expand); - setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3"); - setOperationAction(ISD::SREM, MVT::i64, Expand); - - setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3"); - setOperationAction(ISD::UDIV, MVT::i32, Expand); - - setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3"); - setOperationAction(ISD::UDIV, MVT::i64, Expand); - - setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3"); - setOperationAction(ISD::UREM, MVT::i32, Expand); - - setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3"); - setOperationAction(ISD::UREM, MVT::i64, Expand); - - setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3"); - setOperationAction(ISD::FDIV, MVT::f32, Expand); - - setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3"); - setOperationAction(ISD::FDIV, MVT::f64, Expand); - - setOperationAction(ISD::FSQRT, MVT::f32, Expand); - setOperationAction(ISD::FSQRT, MVT::f64, Expand); - setOperationAction(ISD::FSIN, MVT::f32, Expand); - setOperationAction(ISD::FSIN, MVT::f64, Expand); - - if (QRI->Subtarget.hasV5TOps()) { - // Hexagon V5 Support. - setOperationAction(ISD::FADD, MVT::f32, Legal); - setOperationAction(ISD::FADD, MVT::f64, Legal); - setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal); - setCondCodeAction(ISD::SETOEQ, MVT::f32, Legal); - setCondCodeAction(ISD::SETOEQ, MVT::f64, Legal); - setCondCodeAction(ISD::SETUEQ, MVT::f32, Legal); - setCondCodeAction(ISD::SETUEQ, MVT::f64, Legal); - - setCondCodeAction(ISD::SETOGE, MVT::f32, Legal); - setCondCodeAction(ISD::SETOGE, MVT::f64, Legal); - setCondCodeAction(ISD::SETUGE, MVT::f32, Legal); - setCondCodeAction(ISD::SETUGE, MVT::f64, Legal); - - setCondCodeAction(ISD::SETOGT, MVT::f32, Legal); - setCondCodeAction(ISD::SETOGT, MVT::f64, Legal); - setCondCodeAction(ISD::SETUGT, MVT::f32, Legal); - setCondCodeAction(ISD::SETUGT, MVT::f64, Legal); - - setCondCodeAction(ISD::SETOLE, MVT::f32, Legal); - setCondCodeAction(ISD::SETOLE, MVT::f64, Legal); - setCondCodeAction(ISD::SETOLT, MVT::f32, Legal); - setCondCodeAction(ISD::SETOLT, MVT::f64, Legal); - - setOperationAction(ISD::ConstantFP, MVT::f32, Legal); - setOperationAction(ISD::ConstantFP, MVT::f64, Legal); - - setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote); - setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); - - setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); - setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); - - setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); - setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote); - setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); - setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); - - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); - - setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal); - setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal); - setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal); - setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal); - - setOperationAction(ISD::FABS, MVT::f32, Legal); - setOperationAction(ISD::FABS, MVT::f64, Expand); - - setOperationAction(ISD::FNEG, MVT::f32, Legal); - setOperationAction(ISD::FNEG, MVT::f64, Expand); - } else { + // Limits for inline expansion of memcpy/memmove + MaxStoresPerMemcpy = 6; + MaxStoresPerMemmove = 6; - // Expand fp<->uint. - setOperationAction(ISD::FP_TO_SINT, MVT::i32, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); + // + // Library calls for unsupported operations + // - setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); + setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf"); + setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf"); + + setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti"); + setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti"); + + setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti"); + setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti"); + + setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3"); + setOperationAction(ISD::SDIV, MVT::i32, Expand); + setLibcallName(RTLIB::SREM_I32, "__hexagon_umodsi3"); + setOperationAction(ISD::SREM, MVT::i32, Expand); + + setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3"); + setOperationAction(ISD::SDIV, MVT::i64, Expand); + setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3"); + setOperationAction(ISD::SREM, MVT::i64, Expand); + + setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3"); + setOperationAction(ISD::UDIV, MVT::i32, Expand); + + setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3"); + setOperationAction(ISD::UDIV, MVT::i64, Expand); + + setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3"); + setOperationAction(ISD::UREM, MVT::i32, Expand); + + setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3"); + setOperationAction(ISD::UREM, MVT::i64, Expand); + + setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3"); + setOperationAction(ISD::FDIV, MVT::f32, Expand); + + setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3"); + setOperationAction(ISD::FDIV, MVT::f64, Expand); + + setOperationAction(ISD::FSQRT, MVT::f32, Expand); + setOperationAction(ISD::FSQRT, MVT::f64, Expand); + setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FSIN, MVT::f64, Expand); + + if (Subtarget.hasV5TOps()) { + // Hexagon V5 Support. + setOperationAction(ISD::FADD, MVT::f32, Legal); + setOperationAction(ISD::FADD, MVT::f64, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal); + setCondCodeAction(ISD::SETOEQ, MVT::f32, Legal); + setCondCodeAction(ISD::SETOEQ, MVT::f64, Legal); + setCondCodeAction(ISD::SETUEQ, MVT::f32, Legal); + setCondCodeAction(ISD::SETUEQ, MVT::f64, Legal); + + setCondCodeAction(ISD::SETOGE, MVT::f32, Legal); + setCondCodeAction(ISD::SETOGE, MVT::f64, Legal); + setCondCodeAction(ISD::SETUGE, MVT::f32, Legal); + setCondCodeAction(ISD::SETUGE, MVT::f64, Legal); + + setCondCodeAction(ISD::SETOGT, MVT::f32, Legal); + setCondCodeAction(ISD::SETOGT, MVT::f64, Legal); + setCondCodeAction(ISD::SETUGT, MVT::f32, Legal); + setCondCodeAction(ISD::SETUGT, MVT::f64, Legal); + + setCondCodeAction(ISD::SETOLE, MVT::f32, Legal); + setCondCodeAction(ISD::SETOLE, MVT::f64, Legal); + setCondCodeAction(ISD::SETOLT, MVT::f32, Legal); + setCondCodeAction(ISD::SETOLT, MVT::f64, Legal); + + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + + setOperationAction(ISD::FP_TO_UINT, MVT::i1, Promote); + setOperationAction(ISD::FP_TO_SINT, MVT::i1, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i1, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); + + setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); + setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); + + setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); + setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote); + setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); + setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote); + + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal); + + setOperationAction(ISD::FP_TO_UINT, MVT::i64, Legal); + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Legal); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Legal); + setOperationAction(ISD::SINT_TO_FP, MVT::i64, Legal); + + setOperationAction(ISD::FABS, MVT::f32, Legal); + setOperationAction(ISD::FABS, MVT::f64, Expand); + + setOperationAction(ISD::FNEG, MVT::f32, Legal); + setOperationAction(ISD::FNEG, MVT::f64, Expand); + } else { - setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf"); - setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf"); + // Expand fp<->uint. + setOperationAction(ISD::FP_TO_SINT, MVT::i32, Expand); + setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); - setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf"); - setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf"); + setOperationAction(ISD::SINT_TO_FP, MVT::i32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); - setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf"); - setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf"); + setLibcallName(RTLIB::SINTTOFP_I64_F32, "__hexagon_floatdisf"); + setLibcallName(RTLIB::UINTTOFP_I64_F32, "__hexagon_floatundisf"); - setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf"); - setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf"); + setLibcallName(RTLIB::UINTTOFP_I32_F32, "__hexagon_floatunsisf"); + setLibcallName(RTLIB::SINTTOFP_I32_F32, "__hexagon_floatsisf"); - setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi"); - setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi"); + setLibcallName(RTLIB::SINTTOFP_I64_F64, "__hexagon_floatdidf"); + setLibcallName(RTLIB::UINTTOFP_I64_F64, "__hexagon_floatundidf"); - setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi"); - setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi"); + setLibcallName(RTLIB::UINTTOFP_I32_F64, "__hexagon_floatunsidf"); + setLibcallName(RTLIB::SINTTOFP_I32_F64, "__hexagon_floatsidf"); - setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi"); - setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi"); + setLibcallName(RTLIB::FPTOUINT_F32_I32, "__hexagon_fixunssfsi"); + setLibcallName(RTLIB::FPTOUINT_F32_I64, "__hexagon_fixunssfdi"); - setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3"); - setOperationAction(ISD::FADD, MVT::f64, Expand); + setLibcallName(RTLIB::FPTOSINT_F64_I64, "__hexagon_fixdfdi"); + setLibcallName(RTLIB::FPTOSINT_F32_I64, "__hexagon_fixsfdi"); - setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3"); - setOperationAction(ISD::FADD, MVT::f32, Expand); + setLibcallName(RTLIB::FPTOUINT_F64_I32, "__hexagon_fixunsdfsi"); + setLibcallName(RTLIB::FPTOUINT_F64_I64, "__hexagon_fixunsdfdi"); - setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2"); - setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand); + setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3"); + setOperationAction(ISD::FADD, MVT::f64, Expand); - setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2"); - setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand); + setLibcallName(RTLIB::ADD_F32, "__hexagon_addsf3"); + setOperationAction(ISD::FADD, MVT::f32, Expand); - setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2"); - setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand); + setLibcallName(RTLIB::FPEXT_F32_F64, "__hexagon_extendsfdf2"); + setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand); - setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2"); - setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); + setLibcallName(RTLIB::OEQ_F32, "__hexagon_eqsf2"); + setCondCodeAction(ISD::SETOEQ, MVT::f32, Expand); - setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2"); - setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); + setLibcallName(RTLIB::OEQ_F64, "__hexagon_eqdf2"); + setCondCodeAction(ISD::SETOEQ, MVT::f64, Expand); - setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2"); - setCondCodeAction(ISD::SETOGT, MVT::f32, Expand); + setLibcallName(RTLIB::OGE_F32, "__hexagon_gesf2"); + setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); - setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2"); - setCondCodeAction(ISD::SETOGT, MVT::f64, Expand); + setLibcallName(RTLIB::OGE_F64, "__hexagon_gedf2"); + setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); - setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi"); - setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand); + setLibcallName(RTLIB::OGT_F32, "__hexagon_gtsf2"); + setCondCodeAction(ISD::SETOGT, MVT::f32, Expand); - setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi"); - setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand); + setLibcallName(RTLIB::OGT_F64, "__hexagon_gtdf2"); + setCondCodeAction(ISD::SETOGT, MVT::f64, Expand); - setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2"); - setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); + setLibcallName(RTLIB::FPTOSINT_F64_I32, "__hexagon_fixdfsi"); + setOperationAction(ISD::FP_TO_SINT, MVT::f64, Expand); - setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2"); - setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); + setLibcallName(RTLIB::FPTOSINT_F32_I32, "__hexagon_fixsfsi"); + setOperationAction(ISD::FP_TO_SINT, MVT::f32, Expand); - setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2"); - setCondCodeAction(ISD::SETOLT, MVT::f64, Expand); + setLibcallName(RTLIB::OLE_F64, "__hexagon_ledf2"); + setCondCodeAction(ISD::SETOLE, MVT::f64, Expand); - setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2"); - setCondCodeAction(ISD::SETOLT, MVT::f32, Expand); + setLibcallName(RTLIB::OLE_F32, "__hexagon_lesf2"); + setCondCodeAction(ISD::SETOLE, MVT::f32, Expand); - setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3"); - setOperationAction(ISD::FMUL, MVT::f64, Expand); + setLibcallName(RTLIB::OLT_F64, "__hexagon_ltdf2"); + setCondCodeAction(ISD::SETOLT, MVT::f64, Expand); - setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3"); - setOperationAction(ISD::MUL, MVT::f32, Expand); + setLibcallName(RTLIB::OLT_F32, "__hexagon_ltsf2"); + setCondCodeAction(ISD::SETOLT, MVT::f32, Expand); - setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2"); - setCondCodeAction(ISD::SETUNE, MVT::f64, Expand); + setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3"); + setOperationAction(ISD::FMUL, MVT::f64, Expand); - setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2"); + setLibcallName(RTLIB::MUL_F32, "__hexagon_mulsf3"); + setOperationAction(ISD::MUL, MVT::f32, Expand); - setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3"); - setOperationAction(ISD::SUB, MVT::f64, Expand); + setLibcallName(RTLIB::UNE_F64, "__hexagon_nedf2"); + setCondCodeAction(ISD::SETUNE, MVT::f64, Expand); - setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3"); - setOperationAction(ISD::SUB, MVT::f32, Expand); + setLibcallName(RTLIB::UNE_F32, "__hexagon_nesf2"); - setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2"); - setOperationAction(ISD::FP_ROUND, MVT::f64, Expand); + setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3"); + setOperationAction(ISD::SUB, MVT::f64, Expand); - setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2"); - setCondCodeAction(ISD::SETUO, MVT::f64, Expand); + setLibcallName(RTLIB::SUB_F32, "__hexagon_subsf3"); + setOperationAction(ISD::SUB, MVT::f32, Expand); - setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2"); - setCondCodeAction(ISD::SETO, MVT::f64, Expand); + setLibcallName(RTLIB::FPROUND_F64_F32, "__hexagon_truncdfsf2"); + setOperationAction(ISD::FP_ROUND, MVT::f64, Expand); - setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2"); - setCondCodeAction(ISD::SETO, MVT::f32, Expand); + setLibcallName(RTLIB::UO_F64, "__hexagon_unorddf2"); + setCondCodeAction(ISD::SETUO, MVT::f64, Expand); - setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2"); - setCondCodeAction(ISD::SETUO, MVT::f32, Expand); + setLibcallName(RTLIB::O_F64, "__hexagon_unorddf2"); + setCondCodeAction(ISD::SETO, MVT::f64, Expand); - setOperationAction(ISD::FABS, MVT::f32, Expand); - setOperationAction(ISD::FABS, MVT::f64, Expand); - setOperationAction(ISD::FNEG, MVT::f32, Expand); - setOperationAction(ISD::FNEG, MVT::f64, Expand); - } + setLibcallName(RTLIB::O_F32, "__hexagon_unordsf2"); + setCondCodeAction(ISD::SETO, MVT::f32, Expand); - setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3"); - setOperationAction(ISD::SREM, MVT::i32, Expand); - - setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal); - setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal); - setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal); - setIndexedLoadAction(ISD::POST_INC, MVT::i64, Legal); - - setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal); - setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal); - setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal); - setIndexedStoreAction(ISD::POST_INC, MVT::i64, Legal); - - setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); - - // Turn FP extload into load/fextend. - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); - // Hexagon has a i1 sign extending load. - setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand); - // Turn FP truncstore into trunc + store. - setTruncStoreAction(MVT::f64, MVT::f32, Expand); - - // Custom legalize GlobalAddress nodes into CONST32. - setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); - setOperationAction(ISD::GlobalAddress, MVT::i8, Custom); - setOperationAction(ISD::BlockAddress, MVT::i32, Custom); - // Truncate action? - setOperationAction(ISD::TRUNCATE, MVT::i64, Expand); - - // Hexagon doesn't have sext_inreg, replace them with shl/sra. - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); - - // Hexagon has no REM or DIVREM operations. - setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::SREM, MVT::i32, Expand); - setOperationAction(ISD::SDIVREM, MVT::i32, Expand); - setOperationAction(ISD::UDIVREM, MVT::i32, Expand); - setOperationAction(ISD::SREM, MVT::i64, Expand); - setOperationAction(ISD::SDIVREM, MVT::i64, Expand); - setOperationAction(ISD::UDIVREM, MVT::i64, Expand); - - setOperationAction(ISD::BSWAP, MVT::i64, Expand); - - // Lower SELECT_CC to SETCC and SELECT. - setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); - - if (QRI->Subtarget.hasV5TOps()) { - - // We need to make the operation type of SELECT node to be Custom, - // such that we don't go into the infinite loop of - // select -> setcc -> select_cc -> select loop. - setOperationAction(ISD::SELECT, MVT::f32, Custom); - setOperationAction(ISD::SELECT, MVT::f64, Custom); - - setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); - setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); - setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + setLibcallName(RTLIB::UO_F32, "__hexagon_unordsf2"); + setCondCodeAction(ISD::SETUO, MVT::f32, Expand); - } else { - - // Hexagon has no select or setcc: expand to SELECT_CC. - setOperationAction(ISD::SELECT, MVT::f32, Expand); - setOperationAction(ISD::SELECT, MVT::f64, Expand); + setOperationAction(ISD::FABS, MVT::f32, Expand); + setOperationAction(ISD::FABS, MVT::f64, Expand); + setOperationAction(ISD::FNEG, MVT::f32, Expand); + setOperationAction(ISD::FNEG, MVT::f64, Expand); + } - // This is a workaround documented in DAGCombiner.cpp:2892 We don't - // support SELECT_CC on every type. - setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3"); + setOperationAction(ISD::SREM, MVT::i32, Expand); + + setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal); + setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal); + setIndexedLoadAction(ISD::POST_INC, MVT::i32, Legal); + setIndexedLoadAction(ISD::POST_INC, MVT::i64, Legal); + + setIndexedStoreAction(ISD::POST_INC, MVT::i8, Legal); + setIndexedStoreAction(ISD::POST_INC, MVT::i16, Legal); + setIndexedStoreAction(ISD::POST_INC, MVT::i32, Legal); + setIndexedStoreAction(ISD::POST_INC, MVT::i64, Legal); + + setOperationAction(ISD::BUILD_PAIR, MVT::i64, Expand); + + // Turn FP extload into load/fextend. + setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); + // Hexagon has a i1 sign extending load. + setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Expand); + // Turn FP truncstore into trunc + store. + setTruncStoreAction(MVT::f64, MVT::f32, Expand); + + // Custom legalize GlobalAddress nodes into CONST32. + setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + setOperationAction(ISD::GlobalAddress, MVT::i8, Custom); + setOperationAction(ISD::BlockAddress, MVT::i32, Custom); + // Truncate action? + setOperationAction(ISD::TRUNCATE, MVT::i64, Expand); + + // Hexagon doesn't have sext_inreg, replace them with shl/sra. + setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + + // Hexagon has no REM or DIVREM operations. + setOperationAction(ISD::UREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i32, Expand); + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + setOperationAction(ISD::SREM, MVT::i64, Expand); + setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + setOperationAction(ISD::UDIVREM, MVT::i64, Expand); + + setOperationAction(ISD::BSWAP, MVT::i64, Expand); + + // Lower SELECT_CC to SETCC and SELECT. + setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); + + if (Subtarget.hasV5TOps()) { + + // We need to make the operation type of SELECT node to be Custom, + // such that we don't go into the infinite loop of + // select -> setcc -> select_cc -> select loop. + setOperationAction(ISD::SELECT, MVT::f32, Custom); + setOperationAction(ISD::SELECT, MVT::f64, Custom); + + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); - } + } else { - if (EmitJumpTables) { - setOperationAction(ISD::BR_JT, MVT::Other, Custom); - } else { - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - } - // Increase jump tables cutover to 5, was 4. - setMinimumJumpTableEntries(5); - - setOperationAction(ISD::BR_CC, MVT::f32, Expand); - setOperationAction(ISD::BR_CC, MVT::f64, Expand); - setOperationAction(ISD::BR_CC, MVT::i1, Expand); - setOperationAction(ISD::BR_CC, MVT::i32, Expand); - setOperationAction(ISD::BR_CC, MVT::i64, Expand); - - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); - - setOperationAction(ISD::FSIN , MVT::f64, Expand); - setOperationAction(ISD::FCOS , MVT::f64, Expand); - setOperationAction(ISD::FREM , MVT::f64, Expand); - setOperationAction(ISD::FSIN , MVT::f32, Expand); - setOperationAction(ISD::FCOS , MVT::f32, Expand); - setOperationAction(ISD::FREM , MVT::f32, Expand); - setOperationAction(ISD::FSINCOS, MVT::f64, Expand); - setOperationAction(ISD::FSINCOS, MVT::f32, Expand); - - // In V4, we have double word add/sub with carry. The problem with - // modelling this instruction is that it produces 2 results - Rdd and Px. - // To model update of Px, we will have to use Defs[p0..p3] which will - // cause any predicate live range to spill. So, we pretend we dont't - // have these instructions. - setOperationAction(ISD::ADDE, MVT::i8, Expand); - setOperationAction(ISD::ADDE, MVT::i16, Expand); - setOperationAction(ISD::ADDE, MVT::i32, Expand); - setOperationAction(ISD::ADDE, MVT::i64, Expand); - setOperationAction(ISD::SUBE, MVT::i8, Expand); - setOperationAction(ISD::SUBE, MVT::i16, Expand); - setOperationAction(ISD::SUBE, MVT::i32, Expand); - setOperationAction(ISD::SUBE, MVT::i64, Expand); - setOperationAction(ISD::ADDC, MVT::i8, Expand); - setOperationAction(ISD::ADDC, MVT::i16, Expand); - setOperationAction(ISD::ADDC, MVT::i32, Expand); - setOperationAction(ISD::ADDC, MVT::i64, Expand); - setOperationAction(ISD::SUBC, MVT::i8, Expand); - setOperationAction(ISD::SUBC, MVT::i16, Expand); - setOperationAction(ISD::SUBC, MVT::i32, Expand); - setOperationAction(ISD::SUBC, MVT::i64, Expand); - - setOperationAction(ISD::CTPOP, MVT::i32, Expand); - setOperationAction(ISD::CTPOP, MVT::i64, Expand); - setOperationAction(ISD::CTTZ , MVT::i32, Expand); - setOperationAction(ISD::CTTZ , MVT::i64, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); - setOperationAction(ISD::CTLZ , MVT::i32, Expand); - setOperationAction(ISD::CTLZ , MVT::i64, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); - setOperationAction(ISD::ROTL , MVT::i32, Expand); - setOperationAction(ISD::ROTR , MVT::i32, Expand); - setOperationAction(ISD::BSWAP, MVT::i32, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); - setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); - setOperationAction(ISD::FPOW , MVT::f64, Expand); - setOperationAction(ISD::FPOW , MVT::f32, Expand); - - setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); - setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); - setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); - - setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); - - setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); - - setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); - - if (TM.getSubtargetImpl()->isSubtargetV2()) { - setExceptionPointerRegister(Hexagon::R20); - setExceptionSelectorRegister(Hexagon::R21); - } else { - setExceptionPointerRegister(Hexagon::R0); - setExceptionSelectorRegister(Hexagon::R1); - } + // Hexagon has no select or setcc: expand to SELECT_CC. + setOperationAction(ISD::SELECT, MVT::f32, Expand); + setOperationAction(ISD::SELECT, MVT::f64, Expand); + } - // VASTART needs to be custom lowered to use the VarArgsFrameIndex. - setOperationAction(ISD::VASTART , MVT::Other, Custom); + if (EmitJumpTables) { + setOperationAction(ISD::BR_JT, MVT::Other, Custom); + } else { + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + } + // Increase jump tables cutover to 5, was 4. + setMinimumJumpTableEntries(5); + + setOperationAction(ISD::BR_CC, MVT::f32, Expand); + setOperationAction(ISD::BR_CC, MVT::f64, Expand); + setOperationAction(ISD::BR_CC, MVT::i1, Expand); + setOperationAction(ISD::BR_CC, MVT::i32, Expand); + setOperationAction(ISD::BR_CC, MVT::i64, Expand); + + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + + setOperationAction(ISD::FSIN, MVT::f64, Expand); + setOperationAction(ISD::FCOS, MVT::f64, Expand); + setOperationAction(ISD::FREM, MVT::f64, Expand); + setOperationAction(ISD::FSIN, MVT::f32, Expand); + setOperationAction(ISD::FCOS, MVT::f32, Expand); + setOperationAction(ISD::FREM, MVT::f32, Expand); + setOperationAction(ISD::FSINCOS, MVT::f64, Expand); + setOperationAction(ISD::FSINCOS, MVT::f32, Expand); + + // In V4, we have double word add/sub with carry. The problem with + // modelling this instruction is that it produces 2 results - Rdd and Px. + // To model update of Px, we will have to use Defs[p0..p3] which will + // cause any predicate live range to spill. So, we pretend we dont't + // have these instructions. + setOperationAction(ISD::ADDE, MVT::i8, Expand); + setOperationAction(ISD::ADDE, MVT::i16, Expand); + setOperationAction(ISD::ADDE, MVT::i32, Expand); + setOperationAction(ISD::ADDE, MVT::i64, Expand); + setOperationAction(ISD::SUBE, MVT::i8, Expand); + setOperationAction(ISD::SUBE, MVT::i16, Expand); + setOperationAction(ISD::SUBE, MVT::i32, Expand); + setOperationAction(ISD::SUBE, MVT::i64, Expand); + setOperationAction(ISD::ADDC, MVT::i8, Expand); + setOperationAction(ISD::ADDC, MVT::i16, Expand); + setOperationAction(ISD::ADDC, MVT::i32, Expand); + setOperationAction(ISD::ADDC, MVT::i64, Expand); + setOperationAction(ISD::SUBC, MVT::i8, Expand); + setOperationAction(ISD::SUBC, MVT::i16, Expand); + setOperationAction(ISD::SUBC, MVT::i32, Expand); + setOperationAction(ISD::SUBC, MVT::i64, Expand); + + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + setOperationAction(ISD::CTPOP, MVT::i64, Expand); + setOperationAction(ISD::CTTZ, MVT::i32, Expand); + setOperationAction(ISD::CTTZ, MVT::i64, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); + setOperationAction(ISD::CTLZ, MVT::i32, Expand); + setOperationAction(ISD::CTLZ, MVT::i64, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); + setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTR, MVT::i32, Expand); + setOperationAction(ISD::BSWAP, MVT::i32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + setOperationAction(ISD::FPOW, MVT::f64, Expand); + setOperationAction(ISD::FPOW, MVT::f32, Expand); + + setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); + setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); + + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + + setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); + + setOperationAction(ISD::EH_RETURN, MVT::Other, Custom); + + if (Subtarget.isSubtargetV2()) { + setExceptionPointerRegister(Hexagon::R20); + setExceptionSelectorRegister(Hexagon::R21); + } else { + setExceptionPointerRegister(Hexagon::R0); + setExceptionSelectorRegister(Hexagon::R1); + } - // Use the default implementation. - setOperationAction(ISD::VAARG , MVT::Other, Expand); - setOperationAction(ISD::VACOPY , MVT::Other, Expand); - setOperationAction(ISD::VAEND , MVT::Other, Expand); - setOperationAction(ISD::STACKSAVE , MVT::Other, Expand); - setOperationAction(ISD::STACKRESTORE , MVT::Other, Expand); + // VASTART needs to be custom lowered to use the VarArgsFrameIndex. + setOperationAction(ISD::VASTART, MVT::Other, Custom); + // Use the default implementation. + setOperationAction(ISD::VAARG, MVT::Other, Expand); + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32 , Custom); - setOperationAction(ISD::INLINEASM , MVT::Other, Custom); + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + setOperationAction(ISD::INLINEASM, MVT::Other, Custom); - setMinFunctionAlignment(2); + setMinFunctionAlignment(2); - // Needed for DYNAMIC_STACKALLOC expansion. - unsigned StackRegister = TM.getRegisterInfo()->getStackRegister(); - setStackPointerRegisterToSaveRestore(StackRegister); - setSchedulingPreference(Sched::VLIW); + // Needed for DYNAMIC_STACKALLOC expansion. + const HexagonRegisterInfo *QRI = + static_cast<const HexagonRegisterInfo *>(TM.getRegisterInfo()); + setStackPointerRegisterToSaveRestore(QRI->getStackRegister()); + setSchedulingPreference(Sched::VLIW); } - const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { switch (Opcode) { @@ -1577,7 +1560,6 @@ HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); - case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::SELECT: return Op; case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); case ISD::INLINEASM: return LowerINLINEASM(Op, DAG); @@ -1641,8 +1623,7 @@ HexagonTargetLowering::getRegForInlineAsmConstraint(const /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { - const HexagonRegisterInfo* QRI = TM.getRegisterInfo(); - return QRI->Subtarget.hasV5TOps(); + return TM.getSubtarget<HexagonSubtarget>().hasV5TOps(); } /// isLegalAddressingMode - Return true if the addressing mode represented by diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 4f27c27..ec16cc8 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -74,8 +74,8 @@ namespace llvm { unsigned& RetSize) const; public: - HexagonTargetMachine &TM; - explicit HexagonTargetLowering(HexagonTargetMachine &targetmachine); + const TargetMachine &TM; + explicit HexagonTargetLowering(const TargetMachine &targetmachine); /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call @@ -124,7 +124,6 @@ namespace llvm { const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const; - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index ea6367a..1c95e06 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -1538,14 +1538,13 @@ int HexagonInstrInfo::GetDotOldOp(const int opc) const { int NewOp = opc; if (isPredicated(NewOp) && isPredicatedNew(NewOp)) { // Get predicate old form NewOp = Hexagon::getPredOldOpcode(NewOp); - if (NewOp < 0) - assert(0 && "Couldn't change predicate new instruction to its old form."); + assert(NewOp >= 0 && + "Couldn't change predicate new instruction to its old form."); } if (isNewValueStore(NewOp)) { // Convert into non-new-value format NewOp = Hexagon::getNonNVStore(NewOp); - if (NewOp < 0) - assert(0 && "Couldn't change new-value store to its old form."); + assert(NewOp >= 0 && "Couldn't change new-value store to its old form."); } return NewOp; } diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.cpp b/lib/Target/Hexagon/HexagonMachineScheduler.cpp index 7dd6e95..6fcaa20 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.cpp +++ b/lib/Target/Hexagon/HexagonMachineScheduler.cpp @@ -20,7 +20,7 @@ using namespace llvm; #define DEBUG_TYPE "misched" -/// Platform specific modifications to DAG. +/// Platform-specific modifications to DAG. void VLIWMachineScheduler::postprocessDAG() { SUnit* LastSequentialCall = nullptr; // Currently we only catch the situation when compare gets scheduled @@ -150,7 +150,7 @@ void VLIWMachineScheduler::schedule() { buildDAGWithRegPressure(); - // Postprocess the DAG to add platform specific artificial dependencies. + // Postprocess the DAG to add platform-specific artificial dependencies. postprocessDAG(); SmallVector<SUnit*, 8> TopRoots, BotRoots; diff --git a/lib/Target/Hexagon/HexagonMachineScheduler.h b/lib/Target/Hexagon/HexagonMachineScheduler.h index 99100a1..8c41086 100644 --- a/lib/Target/Hexagon/HexagonMachineScheduler.h +++ b/lib/Target/Hexagon/HexagonMachineScheduler.h @@ -100,7 +100,7 @@ public: /// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's /// time to do some work. virtual void schedule() override; - /// Perform platform specific DAG postprocessing. + /// Perform platform-specific DAG postprocessing. void postprocessDAG(); }; diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp index 9e1e0fd..b5db997 100644 --- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp +++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp @@ -18,10 +18,8 @@ using namespace llvm; bool llvm::flag_aligned_memcpy; -HexagonSelectionDAGInfo::HexagonSelectionDAGInfo(const HexagonTargetMachine - &TM) - : TargetSelectionDAGInfo(TM) { -} +HexagonSelectionDAGInfo::HexagonSelectionDAGInfo(const DataLayout &DL) + : TargetSelectionDAGInfo(&DL) {} HexagonSelectionDAGInfo::~HexagonSelectionDAGInfo() { } diff --git a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h index 8ba6108..b40b303 100644 --- a/lib/Target/Hexagon/HexagonSelectionDAGInfo.h +++ b/lib/Target/Hexagon/HexagonSelectionDAGInfo.h @@ -18,11 +18,9 @@ namespace llvm { -class HexagonTargetMachine; - class HexagonSelectionDAGInfo : public TargetSelectionDAGInfo { public: - explicit HexagonSelectionDAGInfo(const HexagonTargetMachine &TM); + explicit HexagonSelectionDAGInfo(const DataLayout &DL); ~HexagonSelectionDAGInfo(); SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, diff --git a/lib/Target/Hexagon/HexagonSubtarget.cpp b/lib/Target/Hexagon/HexagonSubtarget.cpp index 70c87fa..657893f 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.cpp +++ b/lib/Target/Hexagon/HexagonSubtarget.cpp @@ -48,10 +48,8 @@ EnableIEEERndNear( cl::Hidden, cl::ZeroOrMore, cl::init(false), cl::desc("Generate non-chopped conversion from fp to int.")); -HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS): - HexagonGenSubtargetInfo(TT, CPU, FS), - CPUString(CPU.str()) { - +HexagonSubtarget & +HexagonSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { // If the programmer has not specified a Hexagon version, default to -mv4. if (CPUString.empty()) CPUString = "hexagonv4"; @@ -70,6 +68,15 @@ HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS): } ParseSubtargetFeatures(CPUString, FS); + return *this; +} + +HexagonSubtarget::HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS, + const TargetMachine &TM) + : HexagonGenSubtargetInfo(TT, CPU, FS), CPUString(CPU.str()), + DL("e-m:e-p:32:32-i1:32-i64:64-a:0-n32"), + InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM), + TSInfo(DL), FrameLowering() { // Initialize scheduling itinerary for the specified CPU. InstrItins = getInstrItineraryForCPU(CPUString); diff --git a/lib/Target/Hexagon/HexagonSubtarget.h b/lib/Target/Hexagon/HexagonSubtarget.h index 690bef0..b184e62 100644 --- a/lib/Target/Hexagon/HexagonSubtarget.h +++ b/lib/Target/Hexagon/HexagonSubtarget.h @@ -14,6 +14,11 @@ #ifndef Hexagon_SUBTARGET_H #define Hexagon_SUBTARGET_H +#include "HexagonFrameLowering.h" +#include "HexagonInstrInfo.h" +#include "HexagonISelLowering.h" +#include "HexagonSelectionDAGInfo.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <string> @@ -28,6 +33,7 @@ namespace llvm { class HexagonSubtarget : public HexagonGenSubtargetInfo { virtual void anchor(); + bool UseMemOps; bool ModeIEEERndNear; @@ -37,16 +43,35 @@ public: }; HexagonArchEnum HexagonArchVersion; +private: std::string CPUString; + const DataLayout DL; // Calculates type size & alignment. + HexagonInstrInfo InstrInfo; + HexagonTargetLowering TLInfo; + HexagonSelectionDAGInfo TSInfo; + HexagonFrameLowering FrameLowering; InstrItineraryData InstrItins; public: - HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS); + HexagonSubtarget(StringRef TT, StringRef CPU, StringRef FS, + const TargetMachine &TM); /// getInstrItins - Return the instruction itineraies based on subtarget /// selection. const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } + const HexagonInstrInfo *getInstrInfo() const { return &InstrInfo; } + const HexagonRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + const HexagonTargetLowering *getTargetLowering() const { return &TLInfo; } + const HexagonFrameLowering *getFrameLowering() const { + return &FrameLowering; + } + const HexagonSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } + const DataLayout *getDataLayout() const { return &DL; } + HexagonSubtarget &initializeSubtargetDependencies(StringRef CPU, + StringRef FS); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp index b923764..7831410 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.cpp +++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp @@ -67,15 +67,10 @@ SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler", HexagonTargetMachine::HexagonTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, - CodeModel::Model CM, + Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - DL("e-m:e-p:32:32-i1:32-i64:64-a:0-n32") , - Subtarget(TT, CPU, FS), InstrInfo(Subtarget), TLInfo(*this), - TSInfo(*this), - FrameLowering(Subtarget), - InstrItins(&Subtarget.getInstrItineraryData()) { + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS, *this) { initAsmInfo(); } diff --git a/lib/Target/Hexagon/HexagonTargetMachine.h b/lib/Target/Hexagon/HexagonTargetMachine.h index 70b835e..d88178e 100644 --- a/lib/Target/Hexagon/HexagonTargetMachine.h +++ b/lib/Target/Hexagon/HexagonTargetMachine.h @@ -14,12 +14,8 @@ #ifndef HexagonTARGETMACHINE_H #define HexagonTARGETMACHINE_H -#include "HexagonFrameLowering.h" -#include "HexagonISelLowering.h" #include "HexagonInstrInfo.h" -#include "HexagonSelectionDAGInfo.h" #include "HexagonSubtarget.h" -#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" namespace llvm { @@ -27,13 +23,7 @@ namespace llvm { class Module; class HexagonTargetMachine : public LLVMTargetMachine { - const DataLayout DL; // Calculates type size & alignment. HexagonSubtarget Subtarget; - HexagonInstrInfo InstrInfo; - HexagonTargetLowering TLInfo; - HexagonSelectionDAGInfo TSInfo; - HexagonFrameLowering FrameLowering; - const InstrItineraryData* InstrItins; public: HexagonTargetMachine(const Target &T, StringRef TT,StringRef CPU, @@ -42,33 +32,29 @@ public: CodeGenOpt::Level OL); const HexagonInstrInfo *getInstrInfo() const override { - return &InstrInfo; + return getSubtargetImpl()->getInstrInfo(); } const HexagonSubtarget *getSubtargetImpl() const override { return &Subtarget; } const HexagonRegisterInfo *getRegisterInfo() const override { - return &InstrInfo.getRegisterInfo(); + return getSubtargetImpl()->getRegisterInfo(); } - const InstrItineraryData* getInstrItineraryData() const override { - return InstrItins; + return &getSubtargetImpl()->getInstrItineraryData(); } - - const HexagonTargetLowering* getTargetLowering() const override { - return &TLInfo; + return getSubtargetImpl()->getTargetLowering(); } - const HexagonFrameLowering* getFrameLowering() const override { - return &FrameLowering; + return getSubtargetImpl()->getFrameLowering(); } - const HexagonSelectionDAGInfo* getSelectionDAGInfo() const override { - return &TSInfo; + return getSubtargetImpl()->getSelectionDAGInfo(); + } + const DataLayout *getDataLayout() const override { + return getSubtargetImpl()->getDataLayout(); } - - const DataLayout *getDataLayout() const override { return &DL; } static unsigned getModuleMatchQuality(const Module &M); TargetPassConfig *createPassConfig(PassManagerBase &PM) override; diff --git a/lib/Target/MSP430/MSP430FrameLowering.h b/lib/Target/MSP430/MSP430FrameLowering.h index d464dd9..fadfeed 100644 --- a/lib/Target/MSP430/MSP430FrameLowering.h +++ b/lib/Target/MSP430/MSP430FrameLowering.h @@ -15,20 +15,15 @@ #define MSP430_FRAMEINFO_H #include "MSP430.h" -#include "MSP430Subtarget.h" #include "llvm/Target/TargetFrameLowering.h" namespace llvm { - class MSP430Subtarget; - class MSP430FrameLowering : public TargetFrameLowering { protected: - const MSP430Subtarget &STI; public: - explicit MSP430FrameLowering(const MSP430Subtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2, 2), - STI(sti) {} + explicit MSP430FrameLowering() + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2, 2) {} /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index c5901bc..3d3ee92 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -57,11 +57,8 @@ HWMultMode("msp430-hwmult-mode", cl::Hidden, "Assume hardware multiplier cannot be used inside interrupts"), clEnumValEnd)); -MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : - TargetLowering(tm, new TargetLoweringObjectFileELF()), - Subtarget(*tm.getSubtargetImpl()) { - - TD = getDataLayout(); +MSP430TargetLowering::MSP430TargetLowering(const TargetMachine &TM) + : TargetLowering(TM, new TargetLoweringObjectFileELF()) { // Set up the register classes. addRegisterClass(MVT::i8, &MSP430::GR8RegClass); @@ -1032,7 +1029,7 @@ MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { if (ReturnAddrIndex == 0) { // Set up a frame object for the return address. - uint64_t SlotSize = TD->getPointerSize(); + uint64_t SlotSize = getDataLayout()->getPointerSize(); ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize, true); FuncInfo->setRAIndex(ReturnAddrIndex); @@ -1055,7 +1052,7 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, if (Depth > 0) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); SDValue Offset = - DAG.getConstant(TD->getPointerSize(), MVT::i16); + DAG.getConstant(getDataLayout()->getPointerSize(), MVT::i16); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameAddr, Offset), diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h index 3ced61d..3e2f344 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.h +++ b/lib/Target/MSP430/MSP430ISelLowering.h @@ -66,12 +66,9 @@ namespace llvm { }; } - class MSP430Subtarget; - class MSP430TargetMachine; - class MSP430TargetLowering : public TargetLowering { public: - explicit MSP430TargetLowering(MSP430TargetMachine &TM); + explicit MSP430TargetLowering(const TargetMachine &TM); MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i8; } @@ -170,9 +167,6 @@ namespace llvm { SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override; - - const MSP430Subtarget &Subtarget; - const DataLayout *TD; }; } // namespace llvm diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp index 0c04ddb..ccb6c09 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.cpp +++ b/lib/Target/MSP430/MSP430InstrInfo.cpp @@ -30,9 +30,9 @@ using namespace llvm; // Pin the vtable to this file. void MSP430InstrInfo::anchor() {} -MSP430InstrInfo::MSP430InstrInfo(MSP430TargetMachine &tm) +MSP430InstrInfo::MSP430InstrInfo(MSP430Subtarget &STI) : MSP430GenInstrInfo(MSP430::ADJCALLSTACKDOWN, MSP430::ADJCALLSTACKUP), - RI(tm) {} + RI() {} void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h index 1ffcebb..e6baaef 100644 --- a/lib/Target/MSP430/MSP430InstrInfo.h +++ b/lib/Target/MSP430/MSP430InstrInfo.h @@ -22,7 +22,7 @@ namespace llvm { -class MSP430TargetMachine; +class MSP430Subtarget; /// MSP430II - This namespace holds all of the target specific flags that /// instruction info tracks. @@ -44,7 +44,7 @@ class MSP430InstrInfo : public MSP430GenInstrInfo { const MSP430RegisterInfo RI; virtual void anchor(); public: - explicit MSP430InstrInfo(MSP430TargetMachine &TM); + explicit MSP430InstrInfo(MSP430Subtarget &STI); /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp index 341fb64..691bcee 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.cpp +++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp @@ -32,10 +32,8 @@ using namespace llvm; #include "MSP430GenRegisterInfo.inc" // FIXME: Provide proper call frame setup / destroy opcodes. -MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm) - : MSP430GenRegisterInfo(MSP430::PCW), TM(tm) { - StackAlign = TM.getFrameLowering()->getStackAlignment(); -} +MSP430RegisterInfo::MSP430RegisterInfo() + : MSP430GenRegisterInfo(MSP430::PCW) {} const MCPhysReg* MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h index a607528..cb01961 100644 --- a/lib/Target/MSP430/MSP430RegisterInfo.h +++ b/lib/Target/MSP430/MSP430RegisterInfo.h @@ -21,18 +21,9 @@ namespace llvm { -class TargetInstrInfo; -class MSP430TargetMachine; - struct MSP430RegisterInfo : public MSP430GenRegisterInfo { -private: - MSP430TargetMachine &TM; - - /// StackAlign - Default stack alignment. - /// - unsigned StackAlign; public: - MSP430RegisterInfo(MSP430TargetMachine &tm); + MSP430RegisterInfo(); /// Code Generation virtual methods... const MCPhysReg * diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp index c700383..3897ef6 100644 --- a/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp +++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.cpp @@ -16,9 +16,8 @@ using namespace llvm; #define DEBUG_TYPE "msp430-selectiondag-info" -MSP430SelectionDAGInfo::MSP430SelectionDAGInfo(const MSP430TargetMachine &TM) - : TargetSelectionDAGInfo(TM) { -} +MSP430SelectionDAGInfo::MSP430SelectionDAGInfo(const DataLayout &DL) + : TargetSelectionDAGInfo(&DL) {} MSP430SelectionDAGInfo::~MSP430SelectionDAGInfo() { } diff --git a/lib/Target/MSP430/MSP430SelectionDAGInfo.h b/lib/Target/MSP430/MSP430SelectionDAGInfo.h index fa81948..cb04adc 100644 --- a/lib/Target/MSP430/MSP430SelectionDAGInfo.h +++ b/lib/Target/MSP430/MSP430SelectionDAGInfo.h @@ -22,7 +22,7 @@ class MSP430TargetMachine; class MSP430SelectionDAGInfo : public TargetSelectionDAGInfo { public: - explicit MSP430SelectionDAGInfo(const MSP430TargetMachine &TM); + explicit MSP430SelectionDAGInfo(const DataLayout &DL); ~MSP430SelectionDAGInfo(); }; diff --git a/lib/Target/MSP430/MSP430Subtarget.cpp b/lib/Target/MSP430/MSP430Subtarget.cpp index 68ad091..dbddc52 100644 --- a/lib/Target/MSP430/MSP430Subtarget.cpp +++ b/lib/Target/MSP430/MSP430Subtarget.cpp @@ -25,12 +25,15 @@ using namespace llvm; void MSP430Subtarget::anchor() { } -MSP430Subtarget::MSP430Subtarget(const std::string &TT, - const std::string &CPU, - const std::string &FS) : - MSP430GenSubtargetInfo(TT, CPU, FS) { - std::string CPUName = "generic"; - - // Parse features string. - ParseSubtargetFeatures(CPUName, FS); +MSP430Subtarget &MSP430Subtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { + ParseSubtargetFeatures("generic", FS); + return *this; } + +MSP430Subtarget::MSP430Subtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, const TargetMachine &TM) + : MSP430GenSubtargetInfo(TT, CPU, FS), + // FIXME: Check DataLayout string. + DL("e-m:e-p:16:16-i32:16:32-n8:16"), FrameLowering(), + InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM), + TSInfo(DL) {} diff --git a/lib/Target/MSP430/MSP430Subtarget.h b/lib/Target/MSP430/MSP430Subtarget.h index 4d8792e..0152ad1 100644 --- a/lib/Target/MSP430/MSP430Subtarget.h +++ b/lib/Target/MSP430/MSP430Subtarget.h @@ -14,6 +14,12 @@ #ifndef LLVM_TARGET_MSP430_SUBTARGET_H #define LLVM_TARGET_MSP430_SUBTARGET_H +#include "MSP430FrameLowering.h" +#include "MSP430InstrInfo.h" +#include "MSP430ISelLowering.h" +#include "MSP430RegisterInfo.h" +#include "MSP430SelectionDAGInfo.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <string> @@ -26,16 +32,33 @@ class StringRef; class MSP430Subtarget : public MSP430GenSubtargetInfo { virtual void anchor(); bool ExtendedInsts; + const DataLayout DL; // Calculates type size & alignment + MSP430FrameLowering FrameLowering; + MSP430InstrInfo InstrInfo; + MSP430TargetLowering TLInfo; + MSP430SelectionDAGInfo TSInfo; + public: /// This constructor initializes the data members to match that /// of the specified triple. /// MSP430Subtarget(const std::string &TT, const std::string &CPU, - const std::string &FS); + const std::string &FS, const TargetMachine &TM); + + MSP430Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; } + const MSP430InstrInfo *getInstrInfo() const { return &InstrInfo; } + const DataLayout *getDataLayout() const { return &DL; } + const TargetRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + const MSP430TargetLowering *getTargetLowering() const { return &TLInfo; } + const MSP430SelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } }; } // End llvm namespace diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp index 50be2be..5ca36f2 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.cpp +++ b/lib/Target/MSP430/MSP430TargetMachine.cpp @@ -24,19 +24,13 @@ extern "C" void LLVMInitializeMSP430Target() { RegisterTargetMachine<MSP430TargetMachine> X(TheMSP430Target); } -MSP430TargetMachine::MSP430TargetMachine(const Target &T, - StringRef TT, - StringRef CPU, - StringRef FS, +MSP430TargetMachine::MSP430TargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS), - // FIXME: Check DataLayout string. - DL("e-m:e-p:16:16-i32:16:32-n8:16"), - InstrInfo(*this), TLInfo(*this), TSInfo(*this), - FrameLowering(Subtarget) { + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS, *this) { initAsmInfo(); } diff --git a/lib/Target/MSP430/MSP430TargetMachine.h b/lib/Target/MSP430/MSP430TargetMachine.h index ea5d407..efa8403 100644 --- a/lib/Target/MSP430/MSP430TargetMachine.h +++ b/lib/Target/MSP430/MSP430TargetMachine.h @@ -15,13 +15,7 @@ #ifndef LLVM_TARGET_MSP430_TARGETMACHINE_H #define LLVM_TARGET_MSP430_TARGETMACHINE_H -#include "MSP430FrameLowering.h" -#include "MSP430ISelLowering.h" -#include "MSP430InstrInfo.h" -#include "MSP430RegisterInfo.h" -#include "MSP430SelectionDAGInfo.h" #include "MSP430Subtarget.h" -#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" @@ -31,11 +25,6 @@ namespace llvm { /// class MSP430TargetMachine : public LLVMTargetMachine { MSP430Subtarget Subtarget; - const DataLayout DL; // Calculates type size & alignment - MSP430InstrInfo InstrInfo; - MSP430TargetLowering TLInfo; - MSP430SelectionDAGInfo TSInfo; - MSP430FrameLowering FrameLowering; public: MSP430TargetMachine(const Target &T, StringRef TT, @@ -44,22 +33,25 @@ public: CodeGenOpt::Level OL); const TargetFrameLowering *getFrameLowering() const override { - return &FrameLowering; + return getSubtargetImpl()->getFrameLowering(); + } + const MSP430InstrInfo *getInstrInfo() const override { + return getSubtargetImpl()->getInstrInfo(); + } + const DataLayout *getDataLayout() const override { + return getSubtargetImpl()->getDataLayout(); + } + const MSP430Subtarget *getSubtargetImpl() const override { + return &Subtarget; } - const MSP430InstrInfo *getInstrInfo() const override { return &InstrInfo; } - const DataLayout *getDataLayout() const override { return &DL;} - const MSP430Subtarget *getSubtargetImpl() const override { return &Subtarget; } - const TargetRegisterInfo *getRegisterInfo() const override { - return &InstrInfo.getRegisterInfo(); + return getSubtargetImpl()->getRegisterInfo(); } - const MSP430TargetLowering *getTargetLowering() const override { - return &TLInfo; + return getSubtargetImpl()->getTargetLowering(); } - - const MSP430SelectionDAGInfo* getSelectionDAGInfo() const override { - return &TSInfo; + const MSP430SelectionDAGInfo *getSelectionDAGInfo() const override { + return getSubtargetImpl()->getSelectionDAGInfo(); } TargetPassConfig *createPassConfig(PassManagerBase &PM) override; }; // MSP430TargetMachine. diff --git a/lib/Target/Mips/Android.mk b/lib/Target/Mips/Android.mk index 4e8831c..9f437f8 100644 --- a/lib/Target/Mips/Android.mk +++ b/lib/Target/Mips/Android.mk @@ -8,6 +8,7 @@ mips_codegen_TBLGEN_TABLES := \ MipsGenMCPseudoLowering.inc \ MipsGenAsmWriter.inc \ MipsGenDAGISel.inc \ + MipsGenFastISel.inc \ MipsGenCallingConv.inc \ MipsGenSubtargetInfo.inc diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 86fd386..0c06be8 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -72,72 +72,69 @@ class MipsAsmParser : public MCTargetAsmParser { #define GET_ASSEMBLER_HEADER #include "MipsGenAsmMatcher.inc" + unsigned checkTargetMatchPredicate(MCInst &Inst) override; + bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - SmallVectorImpl<MCParsedAsmOperand *> &Operands, - MCStreamer &Out, unsigned &ErrorInfo, + OperandVector &Operands, MCStreamer &Out, + unsigned &ErrorInfo, bool MatchingInlineAsm) override; /// Parse a register as used in CFI directives bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; - bool ParseParenSuffix(StringRef Name, - SmallVectorImpl<MCParsedAsmOperand *> &Operands); + bool ParseParenSuffix(StringRef Name, OperandVector &Operands); - bool ParseBracketSuffix(StringRef Name, - SmallVectorImpl<MCParsedAsmOperand *> &Operands); + bool ParseBracketSuffix(StringRef Name, OperandVector &Operands); - bool - ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, - SmallVectorImpl<MCParsedAsmOperand *> &Operands) override; + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) override; bool ParseDirective(AsmToken DirectiveID) override; - MipsAsmParser::OperandMatchResultTy - parseMemOperand(SmallVectorImpl<MCParsedAsmOperand *> &Operands); - - MipsAsmParser::OperandMatchResultTy MatchAnyRegisterNameWithoutDollar( - SmallVectorImpl<MCParsedAsmOperand *> &Operands, StringRef Identifier, - SMLoc S); + MipsAsmParser::OperandMatchResultTy parseMemOperand(OperandVector &Operands); MipsAsmParser::OperandMatchResultTy - MatchAnyRegisterWithoutDollar(SmallVectorImpl<MCParsedAsmOperand *> &Operands, - SMLoc S); + MatchAnyRegisterNameWithoutDollar(OperandVector &Operands, + StringRef Identifier, SMLoc S); MipsAsmParser::OperandMatchResultTy - ParseAnyRegister(SmallVectorImpl<MCParsedAsmOperand *> &Operands); + MatchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S); - MipsAsmParser::OperandMatchResultTy - ParseImm(SmallVectorImpl<MCParsedAsmOperand *> &Operands); + MipsAsmParser::OperandMatchResultTy ParseAnyRegister(OperandVector &Operands); - MipsAsmParser::OperandMatchResultTy - ParseJumpTarget(SmallVectorImpl<MCParsedAsmOperand *> &Operands); + MipsAsmParser::OperandMatchResultTy ParseImm(OperandVector &Operands); - MipsAsmParser::OperandMatchResultTy - parseInvNum(SmallVectorImpl<MCParsedAsmOperand *> &Operands); + MipsAsmParser::OperandMatchResultTy ParseJumpTarget(OperandVector &Operands); - MipsAsmParser::OperandMatchResultTy - ParseLSAImm(SmallVectorImpl<MCParsedAsmOperand *> &Operands); + MipsAsmParser::OperandMatchResultTy parseInvNum(OperandVector &Operands); - bool searchSymbolAlias(SmallVectorImpl<MCParsedAsmOperand *> &Operands); + MipsAsmParser::OperandMatchResultTy ParseLSAImm(OperandVector &Operands); - bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand *> &, - StringRef Mnemonic); + bool searchSymbolAlias(OperandVector &Operands); + + bool ParseOperand(OperandVector &, StringRef Mnemonic); bool needsExpansion(MCInst &Inst); - void expandInstruction(MCInst &Inst, SMLoc IDLoc, + // Expands assembly pseudo instructions. + // Returns false on success, true otherwise. + bool expandInstruction(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions); - void expandLoadImm(MCInst &Inst, SMLoc IDLoc, + + bool expandLoadImm(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions); - void expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc, + + bool expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions); - void expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc, + + bool expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions); + void expandMemInst(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions, bool isLoad, bool isImmOpnd); - bool reportParseError(StringRef ErrorMsg); - bool reportParseError(SMLoc Loc, StringRef ErrorMsg); + bool reportParseError(Twine ErrorMsg); + bool reportParseError(SMLoc Loc, Twine ErrorMsg); bool parseMemOffset(const MCExpr *&Res, bool isParenExpr); bool parseRelocOperand(const MCExpr *&Res); @@ -159,32 +156,20 @@ class MipsAsmParser : public MCTargetAsmParser { bool parseSetReorderDirective(); bool parseSetNoReorderDirective(); bool parseSetNoMips16Directive(); + bool parseSetFpDirective(); bool parseSetAssignment(); bool parseDataDirective(unsigned Size, SMLoc L); bool parseDirectiveGpWord(); bool parseDirectiveGpDWord(); + bool parseDirectiveModule(); + bool parseDirectiveModuleFP(); + bool parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI, + StringRef Directive); MCSymbolRefExpr::VariantKind getVariantKind(StringRef Symbol); - bool isGP64() const { - return (STI.getFeatureBits() & Mips::FeatureGP64Bit) != 0; - } - - bool isFP64() const { - return (STI.getFeatureBits() & Mips::FeatureFP64Bit) != 0; - } - - bool isN32() const { return STI.getFeatureBits() & Mips::FeatureN32; } - bool isN64() const { return STI.getFeatureBits() & Mips::FeatureN64; } - - bool isMicroMips() const { - return STI.getFeatureBits() & Mips::FeatureMicroMips; - } - - bool parseRegister(unsigned &RegNum); - bool eatComma(StringRef ErrorStr); int matchCPURegisterName(StringRef Symbol); @@ -205,7 +190,7 @@ class MipsAsmParser : public MCTargetAsmParser { unsigned getGPR(int RegNo); - int getATReg(); + int getATReg(SMLoc Loc); bool processInstruction(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions); @@ -230,23 +215,85 @@ class MipsAsmParser : public MCTargetAsmParser { } public: + enum MipsMatchResultTy { + Match_RequiresDifferentSrcAndDst = FIRST_TARGET_MATCH_RESULT_TY +#define GET_OPERAND_DIAGNOSTIC_TYPES +#include "MipsGenAsmMatcher.inc" +#undef GET_OPERAND_DIAGNOSTIC_TYPES + + }; + MipsAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser, - const MCInstrInfo &MII, - const MCTargetOptions &Options) + const MCInstrInfo &MII, const MCTargetOptions &Options) : MCTargetAsmParser(), STI(sti), Parser(parser) { // Initialize the set of available features. setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); + getTargetStreamer().updateABIInfo(*this); + // Assert exactly one ABI was chosen. assert((((STI.getFeatureBits() & Mips::FeatureO32) != 0) + ((STI.getFeatureBits() & Mips::FeatureEABI) != 0) + ((STI.getFeatureBits() & Mips::FeatureN32) != 0) + ((STI.getFeatureBits() & Mips::FeatureN64) != 0)) == 1); + + if (!isABI_O32() && !allowOddSPReg() != 0) + report_fatal_error("-mno-odd-spreg requires the O32 ABI"); } MCAsmParser &getParser() const { return Parser; } MCAsmLexer &getLexer() const { return Parser.getLexer(); } + /// True if all of $fcc0 - $fcc7 exist for the current ISA. + bool hasEightFccRegisters() const { return hasMips4() || hasMips32(); } + + bool isGP64bit() const { return STI.getFeatureBits() & Mips::FeatureGP64Bit; } + bool isFP64bit() const { return STI.getFeatureBits() & Mips::FeatureFP64Bit; } + bool isABI_N32() const { return STI.getFeatureBits() & Mips::FeatureN32; } + bool isABI_N64() const { return STI.getFeatureBits() & Mips::FeatureN64; } + bool isABI_O32() const { return STI.getFeatureBits() & Mips::FeatureO32; } + bool isABI_FPXX() const { return false; } // TODO: add check for FeatureXX + + bool allowOddSPReg() const { + return !(STI.getFeatureBits() & Mips::FeatureNoOddSPReg); + } + + bool inMicroMipsMode() const { + return STI.getFeatureBits() & Mips::FeatureMicroMips; + } + bool hasMips1() const { return STI.getFeatureBits() & Mips::FeatureMips1; } + bool hasMips2() const { return STI.getFeatureBits() & Mips::FeatureMips2; } + bool hasMips3() const { return STI.getFeatureBits() & Mips::FeatureMips3; } + bool hasMips4() const { return STI.getFeatureBits() & Mips::FeatureMips4; } + bool hasMips5() const { return STI.getFeatureBits() & Mips::FeatureMips5; } + bool hasMips32() const { + return (STI.getFeatureBits() & Mips::FeatureMips32); + } + bool hasMips64() const { + return (STI.getFeatureBits() & Mips::FeatureMips64); + } + bool hasMips32r2() const { + return (STI.getFeatureBits() & Mips::FeatureMips32r2); + } + bool hasMips64r2() const { + return (STI.getFeatureBits() & Mips::FeatureMips64r2); + } + bool hasMips32r6() const { + return (STI.getFeatureBits() & Mips::FeatureMips32r6); + } + bool hasMips64r6() const { + return (STI.getFeatureBits() & Mips::FeatureMips64r6); + } + bool hasDSP() const { return (STI.getFeatureBits() & Mips::FeatureDSP); } + bool hasDSPR2() const { return (STI.getFeatureBits() & Mips::FeatureDSPR2); } + bool hasMSA() const { return (STI.getFeatureBits() & Mips::FeatureMSA); } + + bool inMips16Mode() const { + return STI.getFeatureBits() & Mips::FeatureMips16; + } + // TODO: see how can we get this info. + bool mipsSEUsesSoftFloat() const { return false; } + /// Warn if RegNo is the current assembler temporary. void WarnIfAssemblerTemporary(int RegNo, SMLoc Loc); }; @@ -261,9 +308,9 @@ public: /// Broad categories of register classes /// The exact class is finalized by the render method. enum RegKind { - RegKind_GPR = 1, /// GPR32 and GPR64 (depending on isGP64()) + RegKind_GPR = 1, /// GPR32 and GPR64 (depending on isGP64bit()) RegKind_FGR = 2, /// FGR32, FGR64, AFGR64 (depending on context and - /// isFP64()) + /// isFP64bit()) RegKind_FCC = 4, /// FCC RegKind_MSA128 = 8, /// MSA128[BHWD] (makes no difference which) RegKind_MSACtrl = 16, /// MSA control registers @@ -289,9 +336,11 @@ private: k_Token /// A simple token } Kind; +public: MipsOperand(KindTy K, MipsAsmParser &Parser) : MCParsedAsmOperand(), Kind(K), AsmParser(Parser) {} +private: /// For diagnostics, and checking the assembler temporary MipsAsmParser &AsmParser; @@ -330,10 +379,11 @@ private: SMLoc StartLoc, EndLoc; /// Internal constructor for register kinds - static MipsOperand *CreateReg(unsigned Index, RegKind RegKind, - const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E, - MipsAsmParser &Parser) { - MipsOperand *Op = new MipsOperand(k_RegisterIndex, Parser); + static std::unique_ptr<MipsOperand> CreateReg(unsigned Index, RegKind RegKind, + const MCRegisterInfo *RegInfo, + SMLoc S, SMLoc E, + MipsAsmParser &Parser) { + auto Op = make_unique<MipsOperand>(k_RegisterIndex, Parser); Op->RegIdx.Index = Index; Op->RegIdx.RegInfo = RegInfo; Op->RegIdx.Kind = RegKind; @@ -521,6 +571,10 @@ public: void addFGR32AsmRegOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateReg(getFGR32Reg())); + // FIXME: We ought to do this for -integrated-as without -via-file-asm too. + if (!AsmParser.allowOddSPReg() && RegIdx.Index & 1) + AsmParser.Error(StartLoc, "-mno-odd-spreg prohibits the use of odd FPU " + "registers"); } void addFGRH32AsmRegOperands(MCInst &Inst, unsigned N) const { @@ -612,6 +666,12 @@ public: return Kind == k_Token; } bool isMem() const override { return Kind == k_Memory; } + bool isConstantMemOff() const { + return isMem() && dyn_cast<MCConstantExpr>(getMemOff()); + } + template <unsigned Bits> bool isMemWithSimmOffset() const { + return isMem() && isConstantMemOff() && isInt<Bits>(getConstantMemOff()); + } bool isInvNum() const { return Kind == k_Immediate; } bool isLSAImm() const { if (!isConstantImm()) @@ -656,9 +716,13 @@ public: return Mem.Off; } - static MipsOperand *CreateToken(StringRef Str, SMLoc S, - MipsAsmParser &Parser) { - MipsOperand *Op = new MipsOperand(k_Token, Parser); + int64_t getConstantMemOff() const { + return static_cast<const MCConstantExpr *>(getMemOff())->getValue(); + } + + static std::unique_ptr<MipsOperand> CreateToken(StringRef Str, SMLoc S, + MipsAsmParser &Parser) { + auto Op = make_unique<MipsOperand>(k_Token, Parser); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->StartLoc = S; @@ -668,74 +732,75 @@ public: /// Create a numeric register (e.g. $1). The exact register remains /// unresolved until an instruction successfully matches - static MipsOperand *CreateNumericReg(unsigned Index, - const MCRegisterInfo *RegInfo, SMLoc S, - SMLoc E, MipsAsmParser &Parser) { + static std::unique_ptr<MipsOperand> + CreateNumericReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, + SMLoc E, MipsAsmParser &Parser) { DEBUG(dbgs() << "CreateNumericReg(" << Index << ", ...)\n"); return CreateReg(Index, RegKind_Numeric, RegInfo, S, E, Parser); } /// Create a register that is definitely a GPR. /// This is typically only used for named registers such as $gp. - static MipsOperand *CreateGPRReg(unsigned Index, - const MCRegisterInfo *RegInfo, SMLoc S, - SMLoc E, MipsAsmParser &Parser) { + static std::unique_ptr<MipsOperand> + CreateGPRReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E, + MipsAsmParser &Parser) { return CreateReg(Index, RegKind_GPR, RegInfo, S, E, Parser); } /// Create a register that is definitely a FGR. /// This is typically only used for named registers such as $f0. - static MipsOperand *CreateFGRReg(unsigned Index, - const MCRegisterInfo *RegInfo, SMLoc S, - SMLoc E, MipsAsmParser &Parser) { + static std::unique_ptr<MipsOperand> + CreateFGRReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E, + MipsAsmParser &Parser) { return CreateReg(Index, RegKind_FGR, RegInfo, S, E, Parser); } /// Create a register that is definitely an FCC. /// This is typically only used for named registers such as $fcc0. - static MipsOperand *CreateFCCReg(unsigned Index, - const MCRegisterInfo *RegInfo, SMLoc S, - SMLoc E, MipsAsmParser &Parser) { + static std::unique_ptr<MipsOperand> + CreateFCCReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E, + MipsAsmParser &Parser) { return CreateReg(Index, RegKind_FCC, RegInfo, S, E, Parser); } /// Create a register that is definitely an ACC. /// This is typically only used for named registers such as $ac0. - static MipsOperand *CreateACCReg(unsigned Index, - const MCRegisterInfo *RegInfo, SMLoc S, - SMLoc E, MipsAsmParser &Parser) { + static std::unique_ptr<MipsOperand> + CreateACCReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, SMLoc E, + MipsAsmParser &Parser) { return CreateReg(Index, RegKind_ACC, RegInfo, S, E, Parser); } /// Create a register that is definitely an MSA128. /// This is typically only used for named registers such as $w0. - static MipsOperand *CreateMSA128Reg(unsigned Index, - const MCRegisterInfo *RegInfo, SMLoc S, - SMLoc E, MipsAsmParser &Parser) { + static std::unique_ptr<MipsOperand> + CreateMSA128Reg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, + SMLoc E, MipsAsmParser &Parser) { return CreateReg(Index, RegKind_MSA128, RegInfo, S, E, Parser); } /// Create a register that is definitely an MSACtrl. /// This is typically only used for named registers such as $msaaccess. - static MipsOperand *CreateMSACtrlReg(unsigned Index, - const MCRegisterInfo *RegInfo, SMLoc S, - SMLoc E, MipsAsmParser &Parser) { + static std::unique_ptr<MipsOperand> + CreateMSACtrlReg(unsigned Index, const MCRegisterInfo *RegInfo, SMLoc S, + SMLoc E, MipsAsmParser &Parser) { return CreateReg(Index, RegKind_MSACtrl, RegInfo, S, E, Parser); } - static MipsOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, - MipsAsmParser &Parser) { - MipsOperand *Op = new MipsOperand(k_Immediate, Parser); + static std::unique_ptr<MipsOperand> + CreateImm(const MCExpr *Val, SMLoc S, SMLoc E, MipsAsmParser &Parser) { + auto Op = make_unique<MipsOperand>(k_Immediate, Parser); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static MipsOperand *CreateMem(MipsOperand *Base, const MCExpr *Off, SMLoc S, - SMLoc E, MipsAsmParser &Parser) { - MipsOperand *Op = new MipsOperand(k_Memory, Parser); - Op->Mem.Base = Base; + static std::unique_ptr<MipsOperand> + CreateMem(std::unique_ptr<MipsOperand> Base, const MCExpr *Off, SMLoc S, + SMLoc E, MipsAsmParser &Parser) { + auto Op = make_unique<MipsOperand>(k_Memory, Parser); + Op->Mem.Base = Base.release(); Op->Mem.Off = Off; Op->StartLoc = S; Op->EndLoc = E; @@ -756,7 +821,11 @@ public: return isRegIdx() && RegIdx.Kind & RegKind_CCR && RegIdx.Index <= 31; } bool isFCCAsmReg() const { - return isRegIdx() && RegIdx.Kind & RegKind_FCC && RegIdx.Index <= 7; + if (!(isRegIdx() && RegIdx.Kind & RegKind_FCC)) + return false; + if (!AsmParser.hasEightFccRegisters()) + return RegIdx.Index == 0; + return RegIdx.Index <= 7; } bool isACCAsmReg() const { return isRegIdx() && RegIdx.Kind & RegKind_ACC && RegIdx.Index <= 3; @@ -849,9 +918,10 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, Offset = Inst.getOperand(2); if (!Offset.isImm()) break; // We'll deal with this situation later on when applying fixups. - if (!isIntN(isMicroMips() ? 17 : 18, Offset.getImm())) + if (!isIntN(inMicroMipsMode() ? 17 : 18, Offset.getImm())) return Error(IDLoc, "branch target out of range"); - if (OffsetToAlignment(Offset.getImm(), 1LL << (isMicroMips() ? 1 : 2))) + if (OffsetToAlignment(Offset.getImm(), + 1LL << (inMicroMipsMode() ? 1 : 2))) return Error(IDLoc, "branch to misaligned address"); break; case Mips::BGEZ: @@ -874,14 +944,23 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, Offset = Inst.getOperand(1); if (!Offset.isImm()) break; // We'll deal with this situation later on when applying fixups. - if (!isIntN(isMicroMips() ? 17 : 18, Offset.getImm())) + if (!isIntN(inMicroMipsMode() ? 17 : 18, Offset.getImm())) return Error(IDLoc, "branch target out of range"); - if (OffsetToAlignment(Offset.getImm(), 1LL << (isMicroMips() ? 1 : 2))) + if (OffsetToAlignment(Offset.getImm(), + 1LL << (inMicroMipsMode() ? 1 : 2))) return Error(IDLoc, "branch to misaligned address"); break; } } + // SSNOP is deprecated on MIPS32r6/MIPS64r6 + // We still accept it but it is a normal nop. + if (hasMips32r6() && Inst.getOpcode() == Mips::SSNOP) { + std::string ISA = hasMips64r6() ? "MIPS64r6" : "MIPS32r6"; + Warning(IDLoc, "ssnop is deprecated for " + ISA + " and is equivalent to a " + "nop instruction"); + } + if (MCID.hasDelaySlot() && Options.isReorder()) { // If this instruction has a delay slot and .set reorder is active, // emit a NOP after it. @@ -930,7 +1009,7 @@ bool MipsAsmParser::processInstruction(MCInst &Inst, SMLoc IDLoc, } // if load/store if (needsExpansion(Inst)) - expandInstruction(Inst, IDLoc, Instructions); + return expandInstruction(Inst, IDLoc, Instructions); else Instructions.push_back(Inst); @@ -943,17 +1022,27 @@ bool MipsAsmParser::needsExpansion(MCInst &Inst) { case Mips::LoadImm32Reg: case Mips::LoadAddr32Imm: case Mips::LoadAddr32Reg: + case Mips::LoadImm64Reg: return true; default: return false; } } -void MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc, +bool MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) { switch (Inst.getOpcode()) { + default: + assert(0 && "unimplemented expansion"); + return true; case Mips::LoadImm32Reg: return expandLoadImm(Inst, IDLoc, Instructions); + case Mips::LoadImm64Reg: + if (!isGP64bit()) { + Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + return true; + } + return expandLoadImm(Inst, IDLoc, Instructions); case Mips::LoadAddr32Imm: return expandLoadAddressImm(Inst, IDLoc, Instructions); case Mips::LoadAddr32Reg: @@ -961,7 +1050,31 @@ void MipsAsmParser::expandInstruction(MCInst &Inst, SMLoc IDLoc, } } -void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, +namespace { +template <int Shift, bool PerformShift> +void createShiftOr(int64_t Value, unsigned RegNo, SMLoc IDLoc, + SmallVectorImpl<MCInst> &Instructions) { + MCInst tmpInst; + if (PerformShift) { + tmpInst.setOpcode(Mips::DSLL); + tmpInst.addOperand(MCOperand::CreateReg(RegNo)); + tmpInst.addOperand(MCOperand::CreateReg(RegNo)); + tmpInst.addOperand(MCOperand::CreateImm(16)); + tmpInst.setLoc(IDLoc); + Instructions.push_back(tmpInst); + tmpInst.clear(); + } + tmpInst.setOpcode(Mips::ORi); + tmpInst.addOperand(MCOperand::CreateReg(RegNo)); + tmpInst.addOperand(MCOperand::CreateReg(RegNo)); + tmpInst.addOperand( + MCOperand::CreateImm(((Value & (0xffffLL << Shift)) >> Shift))); + tmpInst.setLoc(IDLoc); + Instructions.push_back(tmpInst); +} +} + +bool MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) { MCInst tmpInst; const MCOperand &ImmOp = Inst.getOperand(1); @@ -969,8 +1082,10 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, const MCOperand &RegOp = Inst.getOperand(0); assert(RegOp.isReg() && "expected register operand kind"); - int ImmValue = ImmOp.getImm(); + int64_t ImmValue = ImmOp.getImm(); tmpInst.setLoc(IDLoc); + // FIXME: gas has a special case for values that are 000...1111, which + // becomes a li -1 and then a dsrl if (0 <= ImmValue && ImmValue <= 65535) { // For 0 <= j <= 65535. // li d,j => ori d,$zero,j @@ -987,25 +1102,76 @@ void MipsAsmParser::expandLoadImm(MCInst &Inst, SMLoc IDLoc, tmpInst.addOperand(MCOperand::CreateReg(Mips::ZERO)); tmpInst.addOperand(MCOperand::CreateImm(ImmValue)); Instructions.push_back(tmpInst); - } else { - // For any other value of j that is representable as a 32-bit integer. + } else if ((ImmValue & 0xffffffff) == ImmValue) { + // For any value of j that is representable as a 32-bit integer, create + // a sequence of: // li d,j => lui d,hi16(j) // ori d,d,lo16(j) tmpInst.setOpcode(Mips::LUi); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); tmpInst.addOperand(MCOperand::CreateImm((ImmValue & 0xffff0000) >> 16)); Instructions.push_back(tmpInst); - tmpInst.clear(); - tmpInst.setOpcode(Mips::ORi); + createShiftOr<0, false>(ImmValue, RegOp.getReg(), IDLoc, Instructions); + } else if ((ImmValue & (0xffffLL << 48)) == 0) { + if (!isGP64bit()) { + Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + return true; + } + + // <------- lo32 ------> + // <------- hi32 ------> + // <- hi16 -> <- lo16 -> + // _________________________________ + // | | | | + // | 16-bytes | 16-bytes | 16-bytes | + // |__________|__________|__________| + // + // For any value of j that is representable as a 48-bit integer, create + // a sequence of: + // li d,j => lui d,hi16(j) + // ori d,d,hi16(lo32(j)) + // dsll d,d,16 + // ori d,d,lo16(lo32(j)) + tmpInst.setOpcode(Mips::LUi); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); + tmpInst.addOperand( + MCOperand::CreateImm((ImmValue & (0xffffLL << 32)) >> 32)); + Instructions.push_back(tmpInst); + createShiftOr<16, false>(ImmValue, RegOp.getReg(), IDLoc, Instructions); + createShiftOr<0, true>(ImmValue, RegOp.getReg(), IDLoc, Instructions); + } else { + if (!isGP64bit()) { + Error(IDLoc, "instruction requires a CPU feature not currently enabled"); + return true; + } + + // <------- hi32 ------> <------- lo32 ------> + // <- hi16 -> <- lo16 -> + // ___________________________________________ + // | | | | | + // | 16-bytes | 16-bytes | 16-bytes | 16-bytes | + // |__________|__________|__________|__________| + // + // For any value of j that isn't representable as a 48-bit integer. + // li d,j => lui d,hi16(j) + // ori d,d,lo16(hi32(j)) + // dsll d,d,16 + // ori d,d,hi16(lo32(j)) + // dsll d,d,16 + // ori d,d,lo16(lo32(j)) + tmpInst.setOpcode(Mips::LUi); tmpInst.addOperand(MCOperand::CreateReg(RegOp.getReg())); - tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff)); - tmpInst.setLoc(IDLoc); + tmpInst.addOperand( + MCOperand::CreateImm((ImmValue & (0xffffLL << 48)) >> 48)); Instructions.push_back(tmpInst); + createShiftOr<32, false>(ImmValue, RegOp.getReg(), IDLoc, Instructions); + createShiftOr<16, true>(ImmValue, RegOp.getReg(), IDLoc, Instructions); + createShiftOr<0, true>(ImmValue, RegOp.getReg(), IDLoc, Instructions); } + return false; } -void +bool MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) { MCInst tmpInst; @@ -1046,9 +1212,10 @@ MipsAsmParser::expandLoadAddressReg(MCInst &Inst, SMLoc IDLoc, tmpInst.addOperand(MCOperand::CreateReg(SrcRegOp.getReg())); Instructions.push_back(tmpInst); } + return false; } -void +bool MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc, SmallVectorImpl<MCInst> &Instructions) { MCInst tmpInst; @@ -1080,6 +1247,7 @@ MipsAsmParser::expandLoadAddressImm(MCInst &Inst, SMLoc IDLoc, tmpInst.addOperand(MCOperand::CreateImm(ImmValue & 0xffff)); Instructions.push_back(tmpInst); } + return false; } void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, @@ -1090,8 +1258,6 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, unsigned ImmOffset, HiOffset, LoOffset; const MCExpr *ExprOffset; unsigned TmpRegNum; - unsigned AtRegNum = getReg( - (isGP64()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, getATReg()); // 1st operand is either the source or destination register. assert(Inst.getOperand(0).isReg() && "expected register operand kind"); unsigned RegOpNum = Inst.getOperand(0).getReg(); @@ -1111,10 +1277,46 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, ExprOffset = Inst.getOperand(2).getExpr(); // All instructions will have the same location. TempInst.setLoc(IDLoc); - // 1st instruction in expansion is LUi. For load instruction we can use - // the dst register as a temporary if base and dst are different, - // but for stores we must use $at. - TmpRegNum = (isLoad && (BaseRegNum != RegOpNum)) ? RegOpNum : AtRegNum; + // These are some of the types of expansions we perform here: + // 1) lw $8, sym => lui $8, %hi(sym) + // lw $8, %lo(sym)($8) + // 2) lw $8, offset($9) => lui $8, %hi(offset) + // add $8, $8, $9 + // lw $8, %lo(offset)($9) + // 3) lw $8, offset($8) => lui $at, %hi(offset) + // add $at, $at, $8 + // lw $8, %lo(offset)($at) + // 4) sw $8, sym => lui $at, %hi(sym) + // sw $8, %lo(sym)($at) + // 5) sw $8, offset($8) => lui $at, %hi(offset) + // add $at, $at, $8 + // sw $8, %lo(offset)($at) + // 6) ldc1 $f0, sym => lui $at, %hi(sym) + // ldc1 $f0, %lo(sym)($at) + // + // For load instructions we can use the destination register as a temporary + // if base and dst are different (examples 1 and 2) and if the base register + // is general purpose otherwise we must use $at (example 6) and error if it's + // not available. For stores we must use $at (examples 4 and 5) because we + // must not clobber the source register setting up the offset. + const MCInstrDesc &Desc = getInstDesc(Inst.getOpcode()); + int16_t RegClassOp0 = Desc.OpInfo[0].RegClass; + unsigned RegClassIDOp0 = + getContext().getRegisterInfo()->getRegClass(RegClassOp0).getID(); + bool IsGPR = (RegClassIDOp0 == Mips::GPR32RegClassID) || + (RegClassIDOp0 == Mips::GPR64RegClassID); + if (isLoad && IsGPR && (BaseRegNum != RegOpNum)) + TmpRegNum = RegOpNum; + else { + int AT = getATReg(IDLoc); + // At this point we need AT to perform the expansions and we exit if it is + // not available. + if (!AT) + return; + TmpRegNum = getReg( + (isGP64bit()) ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, AT); + } + TempInst.setOpcode(Mips::LUi); TempInst.addOperand(MCOperand::CreateReg(TmpRegNum)); if (isImmOpnd) @@ -1164,10 +1366,24 @@ void MipsAsmParser::expandMemInst(MCInst &Inst, SMLoc IDLoc, TempInst.clear(); } -bool MipsAsmParser::MatchAndEmitInstruction( - SMLoc IDLoc, unsigned &Opcode, - SmallVectorImpl<MCParsedAsmOperand *> &Operands, MCStreamer &Out, - unsigned &ErrorInfo, bool MatchingInlineAsm) { +unsigned MipsAsmParser::checkTargetMatchPredicate(MCInst &Inst) { + // As described by the Mips32r2 spec, the registers Rd and Rs for + // jalr.hb must be different. + unsigned Opcode = Inst.getOpcode(); + + if (Opcode == Mips::JALR_HB && + (Inst.getOperand(0).getReg() == Inst.getOperand(1).getReg())) + return Match_RequiresDifferentSrcAndDst; + + return Match_Success; +} + +bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, + unsigned &ErrorInfo, + bool MatchingInlineAsm) { + MCInst Inst; SmallVector<MCInst, 8> Instructions; unsigned MatchResult = @@ -1192,7 +1408,7 @@ bool MipsAsmParser::MatchAndEmitInstruction( if (ErrorInfo >= Operands.size()) return Error(IDLoc, "too few operands for instruction"); - ErrorLoc = ((MipsOperand *)Operands[ErrorInfo])->getStartLoc(); + ErrorLoc = ((MipsOperand &)*Operands[ErrorInfo]).getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; } @@ -1201,6 +1417,8 @@ bool MipsAsmParser::MatchAndEmitInstruction( } case Match_MnemonicFail: return Error(IDLoc, "invalid instruction"); + case Match_RequiresDifferentSrcAndDst: + return Error(IDLoc, "source and destination must be different"); } return true; } @@ -1254,7 +1472,7 @@ int MipsAsmParser::matchCPURegisterName(StringRef Name) { .Case("t9", 25) .Default(-1); - if (isN32() || isN64()) { + if (isABI_N32() || isABI_N64()) { // Although SGI documentation just cuts out t0-t3 for n32/n64, // GNU pushes the values of t0-t3 to override the o32/o64 values for t4-t7 // We are supporting both cases, so for t0-t3 we'll just push them to t4-t7. @@ -1354,10 +1572,11 @@ bool MipsAssemblerOptions::setATReg(unsigned Reg) { return true; } -int MipsAsmParser::getATReg() { +int MipsAsmParser::getATReg(SMLoc Loc) { int AT = Options.getATRegNum(); if (AT == 0) - TokError("Pseudo instruction requires $at, which is not available"); + reportParseError(Loc, + "Pseudo instruction requires $at, which is not available"); return AT; } @@ -1366,7 +1585,7 @@ unsigned MipsAsmParser::getReg(int RC, int RegNo) { } unsigned MipsAsmParser::getGPR(int RegNo) { - return getReg(isGP64() ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, + return getReg(isGP64bit() ? Mips::GPR64RegClassID : Mips::GPR32RegClassID, RegNo); } @@ -1378,9 +1597,7 @@ int MipsAsmParser::matchRegisterByNumber(unsigned RegNum, unsigned RegClass) { return getReg(RegClass, RegNum); } -bool -MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand *> &Operands, - StringRef Mnemonic) { +bool MipsAsmParser::ParseOperand(OperandVector &Operands, StringRef Mnemonic) { DEBUG(dbgs() << "ParseOperand\n"); // Check if the current operand has a custom associated parser, if so, try to @@ -1431,6 +1648,7 @@ MipsAsmParser::ParseOperand(SmallVectorImpl<MCParsedAsmOperand *> &Operands, case AsmToken::Minus: case AsmToken::Plus: case AsmToken::Integer: + case AsmToken::Tilde: case AsmToken::String: { DEBUG(dbgs() << ".. generic integer\n"); OperandMatchResultTy ResTy = ParseImm(Operands); @@ -1578,11 +1796,11 @@ bool MipsAsmParser::parseRelocOperand(const MCExpr *&Res) { bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) { - SmallVector<MCParsedAsmOperand *, 1> Operands; + SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Operands; OperandMatchResultTy ResTy = ParseAnyRegister(Operands); if (ResTy == MatchOperand_Success) { assert(Operands.size() == 1); - MipsOperand &Operand = *static_cast<MipsOperand *>(Operands.front()); + MipsOperand &Operand = static_cast<MipsOperand &>(*Operands.front()); StartLoc = Operand.getStartLoc(); EndLoc = Operand.getEndLoc(); @@ -1592,11 +1810,9 @@ bool MipsAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, // register is a parse error. if (Operand.isGPRAsmReg()) { // Resolve to GPR32 or GPR64 appropriately. - RegNo = isGP64() ? Operand.getGPR64Reg() : Operand.getGPR32Reg(); + RegNo = isGP64bit() ? Operand.getGPR64Reg() : Operand.getGPR32Reg(); } - delete &Operand; - return (RegNo == (unsigned)-1); } @@ -1632,8 +1848,8 @@ bool MipsAsmParser::parseMemOffset(const MCExpr *&Res, bool isParenExpr) { return Result; } -MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( - SmallVectorImpl<MCParsedAsmOperand *> &Operands) { +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::parseMemOperand(OperandVector &Operands) { DEBUG(dbgs() << "parseMemOperand\n"); const MCExpr *IdVal = nullptr; SMLoc S; @@ -1653,8 +1869,8 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( const AsmToken &Tok = Parser.getTok(); // Get the next token. if (Tok.isNot(AsmToken::LParen)) { - MipsOperand *Mnemonic = static_cast<MipsOperand *>(Operands[0]); - if (Mnemonic->getToken() == "la") { + MipsOperand &Mnemonic = static_cast<MipsOperand &>(*Operands[0]); + if (Mnemonic.getToken() == "la") { SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(MipsOperand::CreateImm(IdVal, S, E, *this)); @@ -1666,9 +1882,10 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( // Zero register assumed, add a memory operand with ZERO as its base. // "Base" will be managed by k_Memory. - MipsOperand *Base = MipsOperand::CreateGPRReg( - 0, getContext().getRegisterInfo(), S, E, *this); - Operands.push_back(MipsOperand::CreateMem(Base, IdVal, S, E, *this)); + auto Base = MipsOperand::CreateGPRReg(0, getContext().getRegisterInfo(), + S, E, *this); + Operands.push_back( + MipsOperand::CreateMem(std::move(Base), IdVal, S, E, *this)); return MatchOperand_Success; } Error(Parser.getTok().getLoc(), "'(' expected"); @@ -1695,7 +1912,8 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( IdVal = MCConstantExpr::Create(0, getContext()); // Replace the register operand with the memory operand. - MipsOperand *op = static_cast<MipsOperand *>(Operands.back()); + std::unique_ptr<MipsOperand> op( + static_cast<MipsOperand *>(Operands.back().release())); // Remove the register from the operands. // "op" will be managed by k_Memory. Operands.pop_back(); @@ -1709,12 +1927,11 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::parseMemOperand( getContext()); } - Operands.push_back(MipsOperand::CreateMem(op, IdVal, S, E, *this)); + Operands.push_back(MipsOperand::CreateMem(std::move(op), IdVal, S, E, *this)); return MatchOperand_Success; } -bool MipsAsmParser::searchSymbolAlias( - SmallVectorImpl<MCParsedAsmOperand *> &Operands) { +bool MipsAsmParser::searchSymbolAlias(OperandVector &Operands) { MCSymbol *Sym = getContext().LookupSymbol(Parser.getTok().getIdentifier()); if (Sym) { @@ -1740,9 +1957,8 @@ bool MipsAsmParser::searchSymbolAlias( } else if (Expr->getKind() == MCExpr::Constant) { Parser.Lex(); const MCConstantExpr *Const = static_cast<const MCConstantExpr *>(Expr); - MipsOperand *op = - MipsOperand::CreateImm(Const, S, Parser.getTok().getLoc(), *this); - Operands.push_back(op); + Operands.push_back( + MipsOperand::CreateImm(Const, S, Parser.getTok().getLoc(), *this)); return true; } } @@ -1750,9 +1966,9 @@ bool MipsAsmParser::searchSymbolAlias( } MipsAsmParser::OperandMatchResultTy -MipsAsmParser::MatchAnyRegisterNameWithoutDollar( - SmallVectorImpl<MCParsedAsmOperand *> &Operands, StringRef Identifier, - SMLoc S) { +MipsAsmParser::MatchAnyRegisterNameWithoutDollar(OperandVector &Operands, + StringRef Identifier, + SMLoc S) { int Index = matchCPURegisterName(Identifier); if (Index != -1) { Operands.push_back(MipsOperand::CreateGPRReg( @@ -1799,8 +2015,7 @@ MipsAsmParser::MatchAnyRegisterNameWithoutDollar( } MipsAsmParser::OperandMatchResultTy -MipsAsmParser::MatchAnyRegisterWithoutDollar( - SmallVectorImpl<MCParsedAsmOperand *> &Operands, SMLoc S) { +MipsAsmParser::MatchAnyRegisterWithoutDollar(OperandVector &Operands, SMLoc S) { auto Token = Parser.getLexer().peekTok(false); if (Token.is(AsmToken::Identifier)) { @@ -1822,8 +2037,8 @@ MipsAsmParser::MatchAnyRegisterWithoutDollar( return MatchOperand_NoMatch; } -MipsAsmParser::OperandMatchResultTy MipsAsmParser::ParseAnyRegister( - SmallVectorImpl<MCParsedAsmOperand *> &Operands) { +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::ParseAnyRegister(OperandVector &Operands) { DEBUG(dbgs() << "ParseAnyRegister\n"); auto Token = Parser.getTok(); @@ -1850,7 +2065,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::ParseAnyRegister( } MipsAsmParser::OperandMatchResultTy -MipsAsmParser::ParseImm(SmallVectorImpl<MCParsedAsmOperand *> &Operands) { +MipsAsmParser::ParseImm(OperandVector &Operands) { switch (getLexer().getKind()) { default: return MatchOperand_NoMatch; @@ -1858,6 +2073,7 @@ MipsAsmParser::ParseImm(SmallVectorImpl<MCParsedAsmOperand *> &Operands) { case AsmToken::Minus: case AsmToken::Plus: case AsmToken::Integer: + case AsmToken::Tilde: case AsmToken::String: break; } @@ -1872,8 +2088,8 @@ MipsAsmParser::ParseImm(SmallVectorImpl<MCParsedAsmOperand *> &Operands) { return MatchOperand_Success; } -MipsAsmParser::OperandMatchResultTy MipsAsmParser::ParseJumpTarget( - SmallVectorImpl<MCParsedAsmOperand *> &Operands) { +MipsAsmParser::OperandMatchResultTy +MipsAsmParser::ParseJumpTarget(OperandVector &Operands) { DEBUG(dbgs() << "ParseJumpTarget\n"); SMLoc S = getLexer().getLoc(); @@ -1899,7 +2115,7 @@ MipsAsmParser::OperandMatchResultTy MipsAsmParser::ParseJumpTarget( } MipsAsmParser::OperandMatchResultTy -MipsAsmParser::parseInvNum(SmallVectorImpl<MCParsedAsmOperand *> &Operands) { +MipsAsmParser::parseInvNum(OperandVector &Operands) { const MCExpr *IdVal; // If the first token is '$' we may have register operand. if (Parser.getTok().is(AsmToken::Dollar)) @@ -1917,7 +2133,7 @@ MipsAsmParser::parseInvNum(SmallVectorImpl<MCParsedAsmOperand *> &Operands) { } MipsAsmParser::OperandMatchResultTy -MipsAsmParser::ParseLSAImm(SmallVectorImpl<MCParsedAsmOperand *> &Operands) { +MipsAsmParser::ParseLSAImm(OperandVector &Operands) { switch (getLexer().getKind()) { default: return MatchOperand_NoMatch; @@ -1996,8 +2212,7 @@ MCSymbolRefExpr::VariantKind MipsAsmParser::getVariantKind(StringRef Symbol) { /// ::= '(', register, ')' /// handle it before we iterate so we don't get tripped up by the lack of /// a comma. -bool MipsAsmParser::ParseParenSuffix( - StringRef Name, SmallVectorImpl<MCParsedAsmOperand *> &Operands) { +bool MipsAsmParser::ParseParenSuffix(StringRef Name, OperandVector &Operands) { if (getLexer().is(AsmToken::LParen)) { Operands.push_back( MipsOperand::CreateToken("(", getLexer().getLoc(), *this)); @@ -2025,8 +2240,8 @@ bool MipsAsmParser::ParseParenSuffix( /// ::= '[', integer, ']' /// handle it before we iterate so we don't get tripped up by the lack of /// a comma. -bool MipsAsmParser::ParseBracketSuffix( - StringRef Name, SmallVectorImpl<MCParsedAsmOperand *> &Operands) { +bool MipsAsmParser::ParseBracketSuffix(StringRef Name, + OperandVector &Operands) { if (getLexer().is(AsmToken::LBrac)) { Operands.push_back( MipsOperand::CreateToken("[", getLexer().getLoc(), *this)); @@ -2048,10 +2263,12 @@ bool MipsAsmParser::ParseBracketSuffix( return false; } -bool MipsAsmParser::ParseInstruction( - ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, - SmallVectorImpl<MCParsedAsmOperand *> &Operands) { +bool MipsAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) { DEBUG(dbgs() << "ParseInstruction\n"); + // We have reached first instruction, module directive after + // this is forbidden. + getTargetStreamer().setCanHaveModuleDir(false); // Check if we have valid mnemonic if (!mnemonicIsValid(Name, 0)) { Parser.eatToEndOfStatement(); @@ -2098,13 +2315,13 @@ bool MipsAsmParser::ParseInstruction( return false; } -bool MipsAsmParser::reportParseError(StringRef ErrorMsg) { +bool MipsAsmParser::reportParseError(Twine ErrorMsg) { SMLoc Loc = getLexer().getLoc(); Parser.eatToEndOfStatement(); return Error(Loc, ErrorMsg); } -bool MipsAsmParser::reportParseError(SMLoc Loc, StringRef ErrorMsg) { +bool MipsAsmParser::reportParseError(SMLoc Loc, Twine ErrorMsg) { return Error(Loc, ErrorMsg); } @@ -2238,6 +2455,32 @@ bool MipsAsmParser::parseSetNoMips16Directive() { return false; } +bool MipsAsmParser::parseSetFpDirective() { + MipsABIFlagsSection::FpABIKind FpAbiVal; + // Line can be: .set fp=32 + // .set fp=xx + // .set fp=64 + Parser.Lex(); // Eat fp token + AsmToken Tok = Parser.getTok(); + if (Tok.isNot(AsmToken::Equal)) { + reportParseError("unexpected token in statement"); + return false; + } + Parser.Lex(); // Eat '=' token. + Tok = Parser.getTok(); + + if (!parseFpABIValue(FpAbiVal, ".set")) + return false; + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token in statement"); + return false; + } + getTargetStreamer().emitDirectiveSetFp(FpAbiVal); + Parser.Lex(); // Consume the EndOfStatement. + return false; +} + bool MipsAsmParser::parseSetAssignment() { StringRef Name; const MCExpr *Value; @@ -2296,25 +2539,6 @@ bool MipsAsmParser::parseSetFeature(uint64_t Feature) { return false; } -bool MipsAsmParser::parseRegister(unsigned &RegNum) { - if (!getLexer().is(AsmToken::Dollar)) - return false; - - Parser.Lex(); - - const AsmToken &Reg = Parser.getTok(); - if (Reg.is(AsmToken::Identifier)) { - RegNum = matchCPURegisterName(Reg.getIdentifier()); - } else if (Reg.is(AsmToken::Integer)) { - RegNum = Reg.getIntVal(); - } else { - return false; - } - - Parser.Lex(); - return true; -} - bool MipsAsmParser::eatComma(StringRef ErrorStr) { if (getLexer().isNot(AsmToken::Comma)) { SMLoc Loc = getLexer().getLoc(); @@ -2332,21 +2556,20 @@ bool MipsAsmParser::parseDirectiveCPLoad(SMLoc Loc) { // FIXME: Warn if cpload is used in Mips16 mode. - SmallVector<MCParsedAsmOperand *, 1> Reg; + SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> Reg; OperandMatchResultTy ResTy = ParseAnyRegister(Reg); if (ResTy == MatchOperand_NoMatch || ResTy == MatchOperand_ParseFail) { reportParseError("expected register containing function address"); return false; } - MipsOperand *RegOpnd = static_cast<MipsOperand *>(Reg[0]); - if (!RegOpnd->isGPRAsmReg()) { - reportParseError(RegOpnd->getStartLoc(), "invalid register"); + MipsOperand &RegOpnd = static_cast<MipsOperand &>(*Reg[0]); + if (!RegOpnd.isGPRAsmReg()) { + reportParseError(RegOpnd.getStartLoc(), "invalid register"); return false; } - getTargetStreamer().emitDirectiveCpload(RegOpnd->getGPR32Reg()); - delete RegOpnd; + getTargetStreamer().emitDirectiveCpload(RegOpnd.getGPR32Reg()); return false; } @@ -2355,23 +2578,48 @@ bool MipsAsmParser::parseDirectiveCPSetup() { unsigned Save; bool SaveIsReg = true; - if (!parseRegister(FuncReg)) - return reportParseError("expected register containing function address"); - FuncReg = getGPR(FuncReg); + SmallVector<std::unique_ptr<MCParsedAsmOperand>, 1> TmpReg; + OperandMatchResultTy ResTy = ParseAnyRegister(TmpReg); + if (ResTy == MatchOperand_NoMatch) { + reportParseError("expected register containing function address"); + Parser.eatToEndOfStatement(); + return false; + } + + MipsOperand &FuncRegOpnd = static_cast<MipsOperand &>(*TmpReg[0]); + if (!FuncRegOpnd.isGPRAsmReg()) { + reportParseError(FuncRegOpnd.getStartLoc(), "invalid register"); + Parser.eatToEndOfStatement(); + return false; + } + + FuncReg = FuncRegOpnd.getGPR32Reg(); + TmpReg.clear(); if (!eatComma("expected comma parsing directive")) return true; - if (!parseRegister(Save)) { + ResTy = ParseAnyRegister(TmpReg); + if (ResTy == MatchOperand_NoMatch) { const AsmToken &Tok = Parser.getTok(); if (Tok.is(AsmToken::Integer)) { Save = Tok.getIntVal(); SaveIsReg = false; Parser.Lex(); - } else - return reportParseError("expected save register or stack offset"); - } else - Save = getGPR(Save); + } else { + reportParseError("expected save register or stack offset"); + Parser.eatToEndOfStatement(); + return false; + } + } else { + MipsOperand &SaveOpnd = static_cast<MipsOperand &>(*TmpReg[0]); + if (!SaveOpnd.isGPRAsmReg()) { + reportParseError(SaveOpnd.getStartLoc(), "invalid register"); + Parser.eatToEndOfStatement(); + return false; + } + Save = SaveOpnd.getGPR32Reg(); + } if (!eatComma("expected comma parsing directive")) return true; @@ -2414,6 +2662,8 @@ bool MipsAsmParser::parseDirectiveSet() { return parseSetNoAtDirective(); } else if (Tok.getString() == "at") { return parseSetAtDirective(); + } else if (Tok.getString() == "fp") { + return parseSetFpDirective(); } else if (Tok.getString() == "reorder") { return parseSetReorderDirective(); } else if (Tok.getString() == "noreorder") { @@ -2546,6 +2796,134 @@ bool MipsAsmParser::parseDirectiveOption() { return false; } +/// parseDirectiveModule +/// ::= .module oddspreg +/// ::= .module nooddspreg +/// ::= .module fp=value +bool MipsAsmParser::parseDirectiveModule() { + MCAsmLexer &Lexer = getLexer(); + SMLoc L = Lexer.getLoc(); + + if (!getTargetStreamer().getCanHaveModuleDir()) { + // TODO : get a better message. + reportParseError(".module directive must appear before any code"); + return false; + } + + if (Lexer.is(AsmToken::Identifier)) { + StringRef Option = Parser.getTok().getString(); + Parser.Lex(); + + if (Option == "oddspreg") { + getTargetStreamer().emitDirectiveModuleOddSPReg(true, isABI_O32()); + clearFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg"); + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("Expected end of statement"); + return false; + } + + return false; + } else if (Option == "nooddspreg") { + if (!isABI_O32()) { + Error(L, "'.module nooddspreg' requires the O32 ABI"); + return false; + } + + getTargetStreamer().emitDirectiveModuleOddSPReg(false, isABI_O32()); + setFeatureBits(Mips::FeatureNoOddSPReg, "nooddspreg"); + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("Expected end of statement"); + return false; + } + + return false; + } else if (Option == "fp") { + return parseDirectiveModuleFP(); + } + + return Error(L, "'" + Twine(Option) + "' is not a valid .module option."); + } + + return false; +} + +/// parseDirectiveModuleFP +/// ::= =32 +/// ::= =xx +/// ::= =64 +bool MipsAsmParser::parseDirectiveModuleFP() { + MCAsmLexer &Lexer = getLexer(); + + if (Lexer.isNot(AsmToken::Equal)) { + reportParseError("unexpected token in statement"); + return false; + } + Parser.Lex(); // Eat '=' token. + + MipsABIFlagsSection::FpABIKind FpABI; + if (!parseFpABIValue(FpABI, ".module")) + return false; + + if (getLexer().isNot(AsmToken::EndOfStatement)) { + reportParseError("unexpected token in statement"); + return false; + } + + // Emit appropriate flags. + getTargetStreamer().emitDirectiveModuleFP(FpABI, isABI_O32()); + Parser.Lex(); // Consume the EndOfStatement. + return false; +} + +bool MipsAsmParser::parseFpABIValue(MipsABIFlagsSection::FpABIKind &FpABI, + StringRef Directive) { + MCAsmLexer &Lexer = getLexer(); + + if (Lexer.is(AsmToken::Identifier)) { + StringRef Value = Parser.getTok().getString(); + Parser.Lex(); + + if (Value != "xx") { + reportParseError("unsupported value, expected 'xx', '32' or '64'"); + return false; + } + + if (!isABI_O32()) { + reportParseError("'" + Directive + " fp=xx' requires the O32 ABI"); + return false; + } + + FpABI = MipsABIFlagsSection::FpABIKind::XX; + return true; + } + + if (Lexer.is(AsmToken::Integer)) { + unsigned Value = Parser.getTok().getIntVal(); + Parser.Lex(); + + if (Value != 32 && Value != 64) { + reportParseError("unsupported value, expected 'xx', '32' or '64'"); + return false; + } + + if (Value == 32) { + if (!isABI_O32()) { + reportParseError("'" + Directive + " fp=32' requires the O32 ABI"); + return false; + } + + FpABI = MipsABIFlagsSection::FpABIKind::S32; + } else + FpABI = MipsABIFlagsSection::FpABIKind::S64; + + return true; + } + + return false; +} + bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { StringRef IDVal = DirectiveID.getString(); @@ -2624,6 +3002,9 @@ bool MipsAsmParser::ParseDirective(AsmToken DirectiveID) { if (IDVal == ".cpsetup") return parseDirectiveCPSetup(); + if (IDVal == ".module") + return parseDirectiveModule(); + return true; } diff --git a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp index 95670aa..902b877 100644 --- a/lib/Target/Mips/Disassembler/MipsDisassembler.cpp +++ b/lib/Target/Mips/Disassembler/MipsDisassembler.cpp @@ -57,16 +57,24 @@ class MipsDisassembler : public MipsDisassemblerBase { public: /// Constructor - Initializes the disassembler. /// - MipsDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, - bool bigEndian) : - MipsDisassemblerBase(STI, Ctx, bigEndian) { - IsMicroMips = STI.getFeatureBits() & Mips::FeatureMicroMips; - } + MipsDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx, bool bigEndian) + : MipsDisassemblerBase(STI, Ctx, bigEndian) { + IsMicroMips = STI.getFeatureBits() & Mips::FeatureMicroMips; + } - bool isMips32r6() const { + bool hasMips3() const { return STI.getFeatureBits() & Mips::FeatureMips3; } + bool hasMips32() const { return STI.getFeatureBits() & Mips::FeatureMips32; } + bool hasMips32r6() const { return STI.getFeatureBits() & Mips::FeatureMips32r6; } + bool isGP64() const { return STI.getFeatureBits() & Mips::FeatureGP64Bit; } + + bool hasCOP3() const { + // Only present in MIPS-I and MIPS-II + return !hasMips32() && !hasMips3(); + } + /// getInstruction - See MCDisassembler. DecodeStatus getInstruction(MCInst &instr, uint64_t &size, @@ -149,6 +157,10 @@ static DecodeStatus DecodeFCCRegisterClass(MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeFGRCCRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeHWRegsRegisterClass(MCInst &Inst, unsigned Insn, uint64_t Address, @@ -260,6 +272,11 @@ static DecodeStatus DecodeFMem(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder); + static DecodeStatus DecodeSimm16(MCInst &Inst, unsigned Insn, uint64_t Address, @@ -285,6 +302,9 @@ static DecodeStatus DecodeExtSize(MCInst &Inst, static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSimm18Lsl3(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); + /// INSVE_[BHWD] have an implicit operand that the generated decoder doesn't /// handle. template <typename InsnType> @@ -316,6 +336,11 @@ static DecodeStatus DecodeBgtzGroupBranch(MCInst &MI, InsnType insn, uint64_t Address, const void *Decoder); +template <typename InsnType> +static DecodeStatus +DecodeBlezGroupBranch(MCInst &MI, InsnType insn, uint64_t Address, + const void *Decoder); + namespace llvm { extern Target TheMipselTarget, TheMipsTarget, TheMips64Target, TheMips64elTarget; @@ -511,6 +536,7 @@ static DecodeStatus DecodeBlezlGroupBranch(MCInst &MI, InsnType insn, InsnType Rs = fieldFromInstruction(insn, 21, 5); InsnType Rt = fieldFromInstruction(insn, 16, 5); InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2; + bool HasRs = false; if (Rt == 0) return MCDisassembler::Fail; @@ -518,8 +544,14 @@ static DecodeStatus DecodeBlezlGroupBranch(MCInst &MI, InsnType insn, MI.setOpcode(Mips::BLEZC); else if (Rs == Rt) MI.setOpcode(Mips::BGEZC); - else - return MCDisassembler::Fail; // FIXME: BGEC is not implemented yet. + else { + HasRs = true; + MI.setOpcode(Mips::BGEC); + } + + if (HasRs) + MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID, + Rs))); MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID, Rt))); @@ -544,6 +576,8 @@ static DecodeStatus DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn, // BLTZC if rs == rt && rt != 0 // BLTC if rs != rt && rs != 0 && rt != 0 + bool HasRs = false; + InsnType Rs = fieldFromInstruction(insn, 21, 5); InsnType Rt = fieldFromInstruction(insn, 16, 5); InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2; @@ -554,8 +588,14 @@ static DecodeStatus DecodeBgtzlGroupBranch(MCInst &MI, InsnType insn, MI.setOpcode(Mips::BGTZC); else if (Rs == Rt) MI.setOpcode(Mips::BLTZC); - else - return MCDisassembler::Fail; // FIXME: BLTC is not implemented yet. + else { + MI.setOpcode(Mips::BLTC); + HasRs = true; + } + + if (HasRs) + MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID, + Rs))); MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID, Rt))); @@ -595,8 +635,11 @@ static DecodeStatus DecodeBgtzGroupBranch(MCInst &MI, InsnType insn, } else if (Rs == Rt) { MI.setOpcode(Mips::BLTZALC); HasRs = true; - } else - return MCDisassembler::Fail; // BLTUC not implemented yet + } else { + MI.setOpcode(Mips::BLTUC); + HasRs = true; + HasRt = true; + } if (HasRs) MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID, @@ -611,6 +654,48 @@ static DecodeStatus DecodeBgtzGroupBranch(MCInst &MI, InsnType insn, return MCDisassembler::Success; } +template <typename InsnType> +static DecodeStatus DecodeBlezGroupBranch(MCInst &MI, InsnType insn, + uint64_t Address, + const void *Decoder) { + // If we are called then we can assume that MIPS32r6/MIPS64r6 is enabled + // (otherwise we would have matched the BLEZL instruction from the earlier + // ISA's instead). + // + // We have: + // 0b000110 sssss ttttt iiiiiiiiiiiiiiii + // Invalid if rs == 0 + // BLEZALC if rs == 0 && rt != 0 + // BGEZALC if rs == rt && rt != 0 + // BGEUC if rs != rt && rs != 0 && rt != 0 + + InsnType Rs = fieldFromInstruction(insn, 21, 5); + InsnType Rt = fieldFromInstruction(insn, 16, 5); + InsnType Imm = SignExtend64(fieldFromInstruction(insn, 0, 16), 16) << 2; + bool HasRs = false; + + if (Rt == 0) + return MCDisassembler::Fail; + else if (Rs == 0) + MI.setOpcode(Mips::BLEZALC); + else if (Rs == Rt) + MI.setOpcode(Mips::BGEZALC); + else { + HasRs = true; + MI.setOpcode(Mips::BGEUC); + } + + if (HasRs) + MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID, + Rs))); + MI.addOperand(MCOperand::CreateReg(getReg(Decoder, Mips::GPR32RegClassID, + Rt))); + + MI.addOperand(MCOperand::CreateImm(Imm)); + + return MCDisassembler::Success; +} + /// readInstruction - read four bytes from the MemoryObject /// and return 32 bit word sorted according to the given endianess static DecodeStatus readInstruction32(const MemoryObject ®ion, @@ -670,6 +755,7 @@ MipsDisassembler::getInstruction(MCInst &instr, return MCDisassembler::Fail; if (IsMicroMips) { + DEBUG(dbgs() << "Trying MicroMips32 table (32-bit opcodes):\n"); // Calling the auto-generated decoder function. Result = decodeInstruction(DecoderTableMicroMips32, instr, Insn, Address, this, STI); @@ -680,7 +766,28 @@ MipsDisassembler::getInstruction(MCInst &instr, return MCDisassembler::Fail; } - if (isMips32r6()) { + if (hasCOP3()) { + DEBUG(dbgs() << "Trying COP3_ table (32-bit opcodes):\n"); + Result = + decodeInstruction(DecoderTableCOP3_32, instr, Insn, Address, this, STI); + if (Result != MCDisassembler::Fail) { + Size = 4; + return Result; + } + } + + if (hasMips32r6() && isGP64()) { + DEBUG(dbgs() << "Trying Mips32r6_64r6 (GPR64) table (32-bit opcodes):\n"); + Result = decodeInstruction(DecoderTableMips32r6_64r6_GP6432, instr, Insn, + Address, this, STI); + if (Result != MCDisassembler::Fail) { + Size = 4; + return Result; + } + } + + if (hasMips32r6()) { + DEBUG(dbgs() << "Trying Mips32r6_64r6 table (32-bit opcodes):\n"); Result = decodeInstruction(DecoderTableMips32r6_64r632, instr, Insn, Address, this, STI); if (Result != MCDisassembler::Fail) { @@ -689,6 +796,7 @@ MipsDisassembler::getInstruction(MCInst &instr, } } + DEBUG(dbgs() << "Trying Mips table (32-bit opcodes):\n"); // Calling the auto-generated decoder function. Result = decodeInstruction(DecoderTableMips32, instr, Insn, Address, this, STI); @@ -840,6 +948,17 @@ static DecodeStatus DecodeFCCRegisterClass(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeFGRCCRegisterClass(MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 31) + return MCDisassembler::Fail; + + unsigned Reg = getReg(Decoder, Mips::FGRCCRegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Reg)); + return MCDisassembler::Success; +} + static DecodeStatus DecodeMem(MCInst &Inst, unsigned Insn, uint64_t Address, @@ -965,6 +1084,27 @@ static DecodeStatus DecodeFMem(MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeSpecial3LlSc(MCInst &Inst, + unsigned Insn, + uint64_t Address, + const void *Decoder) { + int64_t Offset = SignExtend64<9>((Insn >> 7) & 0x1ff); + unsigned Rt = fieldFromInstruction(Insn, 16, 5); + unsigned Base = fieldFromInstruction(Insn, 21, 5); + + Rt = getReg(Decoder, Mips::GPR32RegClassID, Rt); + Base = getReg(Decoder, Mips::GPR32RegClassID, Base); + + if(Inst.getOpcode() == Mips::SC_R6 || Inst.getOpcode() == Mips::SCD_R6){ + Inst.addOperand(MCOperand::CreateReg(Rt)); + } + + Inst.addOperand(MCOperand::CreateReg(Rt)); + Inst.addOperand(MCOperand::CreateReg(Base)); + Inst.addOperand(MCOperand::CreateImm(Offset)); + + return MCDisassembler::Success; +} static DecodeStatus DecodeHWRegsRegisterClass(MCInst &Inst, unsigned RegNo, @@ -1197,3 +1337,9 @@ static DecodeStatus DecodeSimm19Lsl2(MCInst &Inst, unsigned Insn, Inst.addOperand(MCOperand::CreateImm(SignExtend32<19>(Insn) << 2)); return MCDisassembler::Success; } + +static DecodeStatus DecodeSimm18Lsl3(MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(SignExtend32<18>(Insn) << 3)); + return MCDisassembler::Success; +} diff --git a/lib/Target/Mips/MCTargetDesc/Android.mk b/lib/Target/Mips/MCTargetDesc/Android.mk index 7ee11a1..c8d18fc 100644 --- a/lib/Target/Mips/MCTargetDesc/Android.mk +++ b/lib/Target/Mips/MCTargetDesc/Android.mk @@ -7,6 +7,7 @@ mips_mc_desc_TBLGEN_TABLES := \ MipsGenSubtargetInfo.inc mips_mc_desc_SRC_FILES := \ + MipsABIFlagsSection.cpp \ MipsAsmBackend.cpp \ MipsELFObjectWriter.cpp \ MipsELFStreamer.cpp \ diff --git a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt index d3e2fd7..c14ee35 100644 --- a/lib/Target/Mips/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/Mips/MCTargetDesc/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_library(LLVMMipsDesc + MipsABIFlagsSection.cpp MipsAsmBackend.cpp MipsELFObjectWriter.cpp MipsELFStreamer.cpp diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp new file mode 100644 index 0000000..52d5dd3 --- /dev/null +++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.cpp @@ -0,0 +1,60 @@ +//===-- MipsABIFlagsSection.cpp - Mips ELF ABI Flags Section ---*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "MipsABIFlagsSection.h" + +using namespace llvm; + +uint8_t MipsABIFlagsSection::getFpABIValue() { + switch (FpABI) { + case FpABIKind::ANY: + return Val_GNU_MIPS_ABI_FP_ANY; + case FpABIKind::XX: + return Val_GNU_MIPS_ABI_FP_XX; + case FpABIKind::S32: + return Val_GNU_MIPS_ABI_FP_DOUBLE; + case FpABIKind::S64: + if (Is32BitABI) + return OddSPReg ? Val_GNU_MIPS_ABI_FP_64 : Val_GNU_MIPS_ABI_FP_64A; + return Val_GNU_MIPS_ABI_FP_DOUBLE; + } + + llvm_unreachable("unexpected fp abi value"); +} + +StringRef MipsABIFlagsSection::getFpABIString(FpABIKind Value) { + switch (Value) { + case FpABIKind::XX: + return "xx"; + case FpABIKind::S32: + return "32"; + case FpABIKind::S64: + return "64"; + default: + llvm_unreachable("unsupported fp abi value"); + } +} + +namespace llvm { +MCStreamer &operator<<(MCStreamer &OS, MipsABIFlagsSection &ABIFlagsSection) { + // Write out a Elf_Internal_ABIFlags_v0 struct + OS.EmitIntValue(ABIFlagsSection.getVersionValue(), 2); // version + OS.EmitIntValue(ABIFlagsSection.getISALevelValue(), 1); // isa_level + OS.EmitIntValue(ABIFlagsSection.getISARevisionValue(), 1); // isa_rev + OS.EmitIntValue(ABIFlagsSection.getGPRSizeValue(), 1); // gpr_size + OS.EmitIntValue(ABIFlagsSection.getCPR1SizeValue(), 1); // cpr1_size + OS.EmitIntValue(ABIFlagsSection.getCPR2SizeValue(), 1); // cpr2_size + OS.EmitIntValue(ABIFlagsSection.getFpABIValue(), 1); // fp_abi + OS.EmitIntValue(ABIFlagsSection.getISAExtensionSetValue(), 4); // isa_ext + OS.EmitIntValue(ABIFlagsSection.getASESetValue(), 4); // ases + OS.EmitIntValue(ABIFlagsSection.getFlags1Value(), 4); // flags1 + OS.EmitIntValue(ABIFlagsSection.getFlags2Value(), 4); // flags2 + return OS; +} +} diff --git a/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h new file mode 100644 index 0000000..ab18c44 --- /dev/null +++ b/lib/Target/Mips/MCTargetDesc/MipsABIFlagsSection.h @@ -0,0 +1,237 @@ +//===-- MipsABIFlagsSection.h - Mips ELF ABI Flags Section -----*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef MIPSABIFLAGSSECTION_H +#define MIPSABIFLAGSSECTION_H + +#include "llvm/MC/MCStreamer.h" + +namespace llvm { + +class MCStreamer; + +struct MipsABIFlagsSection { + // Values for the xxx_size bytes of an ABI flags structure. + enum AFL_REG { + AFL_REG_NONE = 0x00, // No registers. + AFL_REG_32 = 0x01, // 32-bit registers. + AFL_REG_64 = 0x02, // 64-bit registers. + AFL_REG_128 = 0x03 // 128-bit registers. + }; + + // Masks for the ases word of an ABI flags structure. + enum AFL_ASE { + AFL_ASE_DSP = 0x00000001, // DSP ASE. + AFL_ASE_DSPR2 = 0x00000002, // DSP R2 ASE. + AFL_ASE_EVA = 0x00000004, // Enhanced VA Scheme. + AFL_ASE_MCU = 0x00000008, // MCU (MicroController) ASE. + AFL_ASE_MDMX = 0x00000010, // MDMX ASE. + AFL_ASE_MIPS3D = 0x00000020, // MIPS-3D ASE. + AFL_ASE_MT = 0x00000040, // MT ASE. + AFL_ASE_SMARTMIPS = 0x00000080, // SmartMIPS ASE. + AFL_ASE_VIRT = 0x00000100, // VZ ASE. + AFL_ASE_MSA = 0x00000200, // MSA ASE. + AFL_ASE_MIPS16 = 0x00000400, // MIPS16 ASE. + AFL_ASE_MICROMIPS = 0x00000800, // MICROMIPS ASE. + AFL_ASE_XPA = 0x00001000 // XPA ASE. + }; + + // Values for the isa_ext word of an ABI flags structure. + enum AFL_EXT { + AFL_EXT_XLR = 1, // RMI Xlr instruction. + AFL_EXT_OCTEON2 = 2, // Cavium Networks Octeon2. + AFL_EXT_OCTEONP = 3, // Cavium Networks OcteonP. + AFL_EXT_LOONGSON_3A = 4, // Loongson 3A. + AFL_EXT_OCTEON = 5, // Cavium Networks Octeon. + AFL_EXT_5900 = 6, // MIPS R5900 instruction. + AFL_EXT_4650 = 7, // MIPS R4650 instruction. + AFL_EXT_4010 = 8, // LSI R4010 instruction. + AFL_EXT_4100 = 9, // NEC VR4100 instruction. + AFL_EXT_3900 = 10, // Toshiba R3900 instruction. + AFL_EXT_10000 = 11, // MIPS R10000 instruction. + AFL_EXT_SB1 = 12, // Broadcom SB-1 instruction. + AFL_EXT_4111 = 13, // NEC VR4111/VR4181 instruction. + AFL_EXT_4120 = 14, // NEC VR4120 instruction. + AFL_EXT_5400 = 15, // NEC VR5400 instruction. + AFL_EXT_5500 = 16, // NEC VR5500 instruction. + AFL_EXT_LOONGSON_2E = 17, // ST Microelectronics Loongson 2E. + AFL_EXT_LOONGSON_2F = 18 // ST Microelectronics Loongson 2F. + }; + + // Values for the fp_abi word of an ABI flags structure. + enum Val_GNU_MIPS_ABI { + Val_GNU_MIPS_ABI_FP_ANY = 0, + Val_GNU_MIPS_ABI_FP_DOUBLE = 1, + Val_GNU_MIPS_ABI_FP_XX = 5, + Val_GNU_MIPS_ABI_FP_64 = 6, + Val_GNU_MIPS_ABI_FP_64A = 7 + }; + + enum AFL_FLAGS1 { + AFL_FLAGS1_ODDSPREG = 1 + }; + + // Internal representation of the values used in .module fp=value + enum class FpABIKind { ANY, XX, S32, S64 }; + + // Version of flags structure. + uint16_t Version; + // The level of the ISA: 1-5, 32, 64. + uint8_t ISALevel; + // The revision of ISA: 0 for MIPS V and below, 1-n otherwise. + uint8_t ISARevision; + // The size of general purpose registers. + AFL_REG GPRSize; + // The size of co-processor 1 registers. + AFL_REG CPR1Size; + // The size of co-processor 2 registers. + AFL_REG CPR2Size; + // Processor-specific extension. + uint32_t ISAExtensionSet; + // Mask of ASEs used. + uint32_t ASESet; + + bool OddSPReg; + + bool Is32BitABI; + +protected: + // The floating-point ABI. + FpABIKind FpABI; + +public: + MipsABIFlagsSection() + : Version(0), ISALevel(0), ISARevision(0), GPRSize(AFL_REG_NONE), + CPR1Size(AFL_REG_NONE), CPR2Size(AFL_REG_NONE), ISAExtensionSet(0), + ASESet(0), OddSPReg(false), Is32BitABI(false), FpABI(FpABIKind::ANY) {} + + uint16_t getVersionValue() { return (uint16_t)Version; } + uint8_t getISALevelValue() { return (uint8_t)ISALevel; } + uint8_t getISARevisionValue() { return (uint8_t)ISARevision; } + uint8_t getGPRSizeValue() { return (uint8_t)GPRSize; } + uint8_t getCPR1SizeValue() { return (uint8_t)CPR1Size; } + uint8_t getCPR2SizeValue() { return (uint8_t)CPR2Size; } + uint8_t getFpABIValue(); + uint32_t getISAExtensionSetValue() { return (uint32_t)ISAExtensionSet; } + uint32_t getASESetValue() { return (uint32_t)ASESet; } + + uint32_t getFlags1Value() { + uint32_t Value = 0; + + if (OddSPReg) + Value |= (uint32_t)AFL_FLAGS1_ODDSPREG; + + return Value; + } + + uint32_t getFlags2Value() { return 0; } + + FpABIKind getFpABI() { return FpABI; } + void setFpABI(FpABIKind Value, bool IsABI32Bit) { + FpABI = Value; + Is32BitABI = IsABI32Bit; + } + StringRef getFpABIString(FpABIKind Value); + + template <class PredicateLibrary> + void setISALevelAndRevisionFromPredicates(const PredicateLibrary &P) { + if (P.hasMips64()) { + ISALevel = 64; + if (P.hasMips64r6()) + ISARevision = 6; + else if (P.hasMips64r2()) + ISARevision = 2; + else + ISARevision = 1; + } else if (P.hasMips32()) { + ISALevel = 32; + if (P.hasMips32r6()) + ISARevision = 6; + else if (P.hasMips32r2()) + ISARevision = 2; + else + ISARevision = 1; + } else { + ISARevision = 0; + if (P.hasMips5()) + ISALevel = 5; + else if (P.hasMips4()) + ISALevel = 4; + else if (P.hasMips3()) + ISALevel = 3; + else if (P.hasMips2()) + ISALevel = 2; + else if (P.hasMips1()) + ISALevel = 1; + else + llvm_unreachable("Unknown ISA level!"); + } + } + + template <class PredicateLibrary> + void setGPRSizeFromPredicates(const PredicateLibrary &P) { + GPRSize = P.isGP64bit() ? AFL_REG_64 : AFL_REG_32; + } + + template <class PredicateLibrary> + void setCPR1SizeFromPredicates(const PredicateLibrary &P) { + if (P.mipsSEUsesSoftFloat()) + CPR1Size = AFL_REG_NONE; + else if (P.hasMSA()) + CPR1Size = AFL_REG_128; + else + CPR1Size = P.isFP64bit() ? AFL_REG_64 : AFL_REG_32; + } + + template <class PredicateLibrary> + void setASESetFromPredicates(const PredicateLibrary &P) { + ASESet = 0; + if (P.hasDSP()) + ASESet |= AFL_ASE_DSP; + if (P.hasDSPR2()) + ASESet |= AFL_ASE_DSPR2; + if (P.hasMSA()) + ASESet |= AFL_ASE_MSA; + if (P.inMicroMipsMode()) + ASESet |= AFL_ASE_MICROMIPS; + if (P.inMips16Mode()) + ASESet |= AFL_ASE_MIPS16; + } + + template <class PredicateLibrary> + void setFpAbiFromPredicates(const PredicateLibrary &P) { + Is32BitABI = P.isABI_O32(); + + FpABI = FpABIKind::ANY; + if (P.isABI_N32() || P.isABI_N64()) + FpABI = FpABIKind::S64; + else if (P.isABI_O32()) { + if (P.isFP64bit()) + FpABI = FpABIKind::S64; + else if (P.isABI_FPXX()) + FpABI = FpABIKind::XX; + else + FpABI = FpABIKind::S32; + } + } + + template <class PredicateLibrary> + void setAllFromPredicates(const PredicateLibrary &P) { + setISALevelAndRevisionFromPredicates(P); + setGPRSizeFromPredicates(P); + setCPR1SizeFromPredicates(P); + setASESetFromPredicates(P); + setFpAbiFromPredicates(P); + } +}; + +MCStreamer &operator<<(MCStreamer &OS, MipsABIFlagsSection &ABIFlagsSection); +} + +#endif diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index 5375a00..d8e6128 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -70,6 +70,13 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, if (!isIntN(16, Value) && Ctx) Ctx->FatalError(Fixup.getLoc(), "out of range PC16 fixup"); break; + case Mips::fixup_MIPS_PC19_S2: + // Forcing a signed division because Value can be negative. + Value = (int64_t)Value / 4; + // We now check if Value can be encoded as a 19-bit signed immediate. + if (!isIntN(19, Value) && Ctx) + Ctx->FatalError(Fixup.getLoc(), "out of range PC19 fixup"); + break; case Mips::fixup_Mips_26: // So far we are only using this type for jumps. // The displacement is then divided by 4 to give us an 28 bit @@ -104,6 +111,13 @@ static unsigned adjustFixupValue(const MCFixup &Fixup, uint64_t Value, if (!isIntN(16, Value) && Ctx) Ctx->FatalError(Fixup.getLoc(), "out of range PC16 fixup"); break; + case Mips::fixup_MIPS_PC18_S3: + // Forcing a signed division because Value can be negative. + Value = (int64_t)Value / 8; + // We now check if Value can be encoded as a 18-bit signed immediate. + if (!isIntN(18, Value) && Ctx) + Ctx->FatalError(Fixup.getLoc(), "out of range PC18 fixup"); + break; case Mips::fixup_MIPS_PC21_S2: Value -= 4; // Forcing a signed division because Value can be negative. @@ -247,6 +261,8 @@ getFixupKindInfo(MCFixupKind Kind) const { { "fixup_Mips_GOT_LO16", 0, 16, 0 }, { "fixup_Mips_CALL_HI16", 0, 16, 0 }, { "fixup_Mips_CALL_LO16", 0, 16, 0 }, + { "fixup_Mips_PC18_S3", 0, 18, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_MIPS_PC19_S2", 0, 19, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MIPS_PC21_S2", 0, 21, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MIPS_PC26_S2", 0, 26, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MIPS_PCHI16", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, @@ -308,6 +324,8 @@ getFixupKindInfo(MCFixupKind Kind) const { { "fixup_Mips_GOT_LO16", 16, 16, 0 }, { "fixup_Mips_CALL_HI16", 16, 16, 0 }, { "fixup_Mips_CALL_LO16", 16, 16, 0 }, + { "fixup_Mips_PC18_S3", 14, 18, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_MIPS_PC19_S2", 13, 19, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MIPS_PC21_S2", 11, 21, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MIPS_PC26_S2", 6, 26, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_MIPS_PCHI16", 16, 16, MCFixupKindInfo::FKF_IsPCRel }, diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h index bc695e6..d5c3dbc 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.h @@ -65,7 +65,7 @@ public: const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const override { // FIXME. - assert(0 && "RelaxInstruction() unimplemented"); + llvm_unreachable("RelaxInstruction() unimplemented"); return false; } diff --git a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp index 74c12ff..49ac256 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsELFObjectWriter.cpp @@ -193,6 +193,12 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, case Mips::fixup_MICROMIPS_TLS_TPREL_LO16: Type = ELF::R_MICROMIPS_TLS_TPREL_LO16; break; + case Mips::fixup_MIPS_PC19_S2: + Type = ELF::R_MIPS_PC19_S2; + break; + case Mips::fixup_MIPS_PC18_S3: + Type = ELF::R_MIPS_PC18_S3; + break; case Mips::fixup_MIPS_PC21_S2: Type = ELF::R_MIPS_PC21_S2; break; diff --git a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h index 3079004..05080f0 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h +++ b/lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h @@ -128,6 +128,12 @@ namespace Mips { // resulting in - R_MIPS_CALL_LO16 fixup_Mips_CALL_LO16, + // resulting in - R_MIPS_PC18_S3 + fixup_MIPS_PC18_S3, + + // resulting in - R_MIPS_PC19_S2 + fixup_MIPS_PC19_S2, + // resulting in - R_MIPS_PC21_S2 fixup_MIPS_PC21_S2, diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp index 6aa3c76..e415412 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp @@ -38,7 +38,7 @@ MipsMCAsmInfo::MipsMCAsmInfo(StringRef TT) { ZeroDirective = "\t.space\t"; GPRel32Directive = "\t.gpword\t"; GPRel64Directive = "\t.gpdword\t"; - DebugLabelSuffix = "=."; + UseAssignmentForEHBegin = true; SupportsDebugInformation = true; ExceptionsType = ExceptionHandling::DwarfCFI; HasLEB128 = true; diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index 85e0bf1..43fc521 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -621,11 +621,42 @@ unsigned MipsMCCodeEmitter::getSimm19Lsl2Encoding(const MCInst &MI, unsigned OpNo, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const { - assert(MI.getOperand(OpNo).isImm()); - // The immediate is encoded as 'immediate << 2'. - unsigned Res = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI); - assert((Res & 3) == 0); - return Res >> 2; + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) { + // The immediate is encoded as 'immediate << 2'. + unsigned Res = getMachineOpValue(MI, MO, Fixups, STI); + assert((Res & 3) == 0); + return Res >> 2; + } + + assert(MO.isExpr() && + "getSimm19Lsl2Encoding expects only expressions or an immediate"); + + const MCExpr *Expr = MO.getExpr(); + Fixups.push_back(MCFixup::Create(0, Expr, + MCFixupKind(Mips::fixup_MIPS_PC19_S2))); + return 0; +} + +unsigned +MipsMCCodeEmitter::getSimm18Lsl3Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const { + const MCOperand &MO = MI.getOperand(OpNo); + if (MO.isImm()) { + // The immediate is encoded as 'immediate << 3'. + unsigned Res = getMachineOpValue(MI, MI.getOperand(OpNo), Fixups, STI); + assert((Res & 7) == 0); + return Res >> 3; + } + + assert(MO.isExpr() && + "getSimm18Lsl2Encoding expects only expressions or an immediate"); + + const MCExpr *Expr = MO.getExpr(); + Fixups.push_back(MCFixup::Create(0, Expr, + MCFixupKind(Mips::fixup_MIPS_PC18_S3))); + return 0; } #include "MipsGenMCCodeEmitter.inc" diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h index 3f7daab..304167f 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.h @@ -141,6 +141,10 @@ public: SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; + unsigned getSimm18Lsl3Encoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl<MCFixup> &Fixups, + const MCSubtargetInfo &STI) const; + unsigned getExprOpValue(const MCExpr *Expr, SmallVectorImpl<MCFixup> &Fixups, const MCSubtargetInfo &STI) const; diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp index 21ccc3c..5bba3e5 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.cpp @@ -11,6 +11,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCObjectStreamer.h" using namespace llvm; @@ -83,33 +84,6 @@ MipsMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, return getSubExpr()->EvaluateAsRelocatable(Res, Layout); } -// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps -// that method should be made public? -static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) { - switch (Value->getKind()) { - case MCExpr::Target: - llvm_unreachable("Can't handle nested target expr!"); - - case MCExpr::Constant: - break; - - case MCExpr::Binary: { - const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value); - AddValueSymbolsImpl(BE->getLHS(), Asm); - AddValueSymbolsImpl(BE->getRHS(), Asm); - break; - } - - case MCExpr::SymbolRef: - Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol()); - break; - - case MCExpr::Unary: - AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm); - break; - } -} - -void MipsMCExpr::AddValueSymbols(MCAssembler *Asm) const { - AddValueSymbolsImpl(getSubExpr(), Asm); +void MipsMCExpr::visitUsedExpr(MCStreamer &Streamer) const { + Streamer.visitUsedExpr(*getSubExpr()); } diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h index 8d7aacd..f193dc9 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCExpr.h @@ -49,7 +49,7 @@ public: void PrintImpl(raw_ostream &OS) const override; bool EvaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout) const override; - void AddValueSymbols(MCAssembler *) const override; + void visitUsedExpr(MCStreamer &Streamer) const override; const MCSection *FindAssociatedSection() const override { return getSubExpr()->FindAssociatedSection(); } diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index 660e5a7..d2b929b 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -133,6 +133,12 @@ createMCAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, return S; } +static MCStreamer *createMipsNullStreamer(MCContext &Ctx) { + MCStreamer *S = llvm::createNullStreamer(Ctx); + new MipsTargetStreamer(*S); + return S; +} + extern "C" void LLVMInitializeMipsTargetMC() { // Register the MC asm info. RegisterMCAsmInfoFn X(TheMipsTarget, createMipsMCAsmInfo); @@ -187,6 +193,12 @@ extern "C" void LLVMInitializeMipsTargetMC() { TargetRegistry::RegisterAsmStreamer(TheMips64Target, createMCAsmStreamer); TargetRegistry::RegisterAsmStreamer(TheMips64elTarget, createMCAsmStreamer); + TargetRegistry::RegisterNullStreamer(TheMipsTarget, createMipsNullStreamer); + TargetRegistry::RegisterNullStreamer(TheMipselTarget, createMipsNullStreamer); + TargetRegistry::RegisterNullStreamer(TheMips64Target, createMipsNullStreamer); + TargetRegistry::RegisterNullStreamer(TheMips64elTarget, + createMipsNullStreamer); + // Register the asm backend. TargetRegistry::RegisterMCAsmBackend(TheMipsTarget, createMipsAsmBackendEB32); diff --git a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp index cd6be73..6cde8f9 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsNaClELFStreamer.cpp @@ -48,7 +48,13 @@ private: bool PendingCall; bool isIndirectJump(const MCInst &MI) { - return MI.getOpcode() == Mips::JR || MI.getOpcode() == Mips::RET; + if (MI.getOpcode() == Mips::JALR) { + // MIPS32r6/MIPS64r6 doesn't have a JR instruction and uses JALR instead. + // JALR is an indirect branch if the link register is $0. + assert(MI.getOperand(0).isReg()); + return MI.getOperand(0).getReg() == Mips::ZERO; + } + return MI.getOpcode() == Mips::JR; } bool isStackPointerFirstOperand(const MCInst &MI) { @@ -56,7 +62,9 @@ private: && MI.getOperand(0).getReg() == Mips::SP); } - bool isCall(unsigned Opcode, bool *IsIndirectCall) { + bool isCall(const MCInst &MI, bool *IsIndirectCall) { + unsigned Opcode = MI.getOpcode(); + *IsIndirectCall = false; switch (Opcode) { @@ -64,12 +72,19 @@ private: return false; case Mips::JAL: + case Mips::BAL: case Mips::BAL_BR: case Mips::BLTZAL: case Mips::BGEZAL: return true; case Mips::JALR: + // JALR is only a call if the link register is not $0. Otherwise it's an + // indirect branch. + assert(MI.getOperand(0).isReg()); + if (MI.getOperand(0).getReg() == Mips::ZERO) + return false; + *IsIndirectCall = true; return true; } @@ -137,24 +152,23 @@ public: &IsStore); bool IsSPFirstOperand = isStackPointerFirstOperand(Inst); if (IsMemAccess || IsSPFirstOperand) { - if (PendingCall) - report_fatal_error("Dangerous instruction in branch delay slot!"); - bool MaskBefore = (IsMemAccess && baseRegNeedsLoadStoreMask(Inst.getOperand(AddrIdx) .getReg())); bool MaskAfter = IsSPFirstOperand && !IsStore; - if (MaskBefore || MaskAfter) + if (MaskBefore || MaskAfter) { + if (PendingCall) + report_fatal_error("Dangerous instruction in branch delay slot!"); sandboxLoadStoreStackChange(Inst, AddrIdx, STI, MaskBefore, MaskAfter); - else - MipsELFStreamer::EmitInstruction(Inst, STI); - return; + return; + } + // fallthrough } // Sandbox calls by aligning call and branch delay to the bundle end. // For indirect calls, emit the mask before the call. bool IsIndirectCall; - if (isCall(Inst.getOpcode(), &IsIndirectCall)) { + if (isCall(Inst, &IsIndirectCall)) { if (PendingCall) report_fatal_error("Dangerous instruction in branch delay slot!"); @@ -203,6 +217,7 @@ bool isBasePlusOffsetMemoryAccess(unsigned Opcode, unsigned *AddrIdx, case Mips::LWC1: case Mips::LDC1: case Mips::LL: + case Mips::LL_R6: case Mips::LWL: case Mips::LWR: *AddrIdx = 1; @@ -223,6 +238,7 @@ bool isBasePlusOffsetMemoryAccess(unsigned Opcode, unsigned *AddrIdx, // Store instructions with base address register in position 2. case Mips::SC: + case Mips::SC_R6: *AddrIdx = 2; if (IsStore) *IsStore = true; diff --git a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp index a8fa272..fbe375b 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp @@ -27,10 +27,43 @@ using namespace llvm; -// Pin vtable to this file. -void MipsTargetStreamer::anchor() {} - -MipsTargetStreamer::MipsTargetStreamer(MCStreamer &S) : MCTargetStreamer(S) {} +MipsTargetStreamer::MipsTargetStreamer(MCStreamer &S) + : MCTargetStreamer(S), canHaveModuleDirective(true) {} +void MipsTargetStreamer::emitDirectiveSetMicroMips() {} +void MipsTargetStreamer::emitDirectiveSetNoMicroMips() {} +void MipsTargetStreamer::emitDirectiveSetMips16() {} +void MipsTargetStreamer::emitDirectiveSetNoMips16() {} +void MipsTargetStreamer::emitDirectiveSetReorder() {} +void MipsTargetStreamer::emitDirectiveSetNoReorder() {} +void MipsTargetStreamer::emitDirectiveSetMacro() {} +void MipsTargetStreamer::emitDirectiveSetNoMacro() {} +void MipsTargetStreamer::emitDirectiveSetAt() {} +void MipsTargetStreamer::emitDirectiveSetNoAt() {} +void MipsTargetStreamer::emitDirectiveEnd(StringRef Name) {} +void MipsTargetStreamer::emitDirectiveEnt(const MCSymbol &Symbol) {} +void MipsTargetStreamer::emitDirectiveAbiCalls() {} +void MipsTargetStreamer::emitDirectiveNaN2008() {} +void MipsTargetStreamer::emitDirectiveNaNLegacy() {} +void MipsTargetStreamer::emitDirectiveOptionPic0() {} +void MipsTargetStreamer::emitDirectiveOptionPic2() {} +void MipsTargetStreamer::emitFrame(unsigned StackReg, unsigned StackSize, + unsigned ReturnReg) {} +void MipsTargetStreamer::emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) {} +void MipsTargetStreamer::emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff) { +} +void MipsTargetStreamer::emitDirectiveSetMips32R2() {} +void MipsTargetStreamer::emitDirectiveSetMips64() {} +void MipsTargetStreamer::emitDirectiveSetMips64R2() {} +void MipsTargetStreamer::emitDirectiveSetDsp() {} +void MipsTargetStreamer::emitDirectiveCpload(unsigned RegNo) {} +void MipsTargetStreamer::emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, + const MCSymbol &Sym, bool IsReg) { +} +void MipsTargetStreamer::emitDirectiveModuleOddSPReg(bool Enabled, + bool IsO32ABI) { + if (!Enabled && !IsO32ABI) + report_fatal_error("+nooddspreg is only valid for O32"); +} MipsTargetAsmStreamer::MipsTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS) @@ -38,42 +71,52 @@ MipsTargetAsmStreamer::MipsTargetAsmStreamer(MCStreamer &S, void MipsTargetAsmStreamer::emitDirectiveSetMicroMips() { OS << "\t.set\tmicromips\n"; + setCanHaveModuleDir(false); } void MipsTargetAsmStreamer::emitDirectiveSetNoMicroMips() { OS << "\t.set\tnomicromips\n"; + setCanHaveModuleDir(false); } void MipsTargetAsmStreamer::emitDirectiveSetMips16() { OS << "\t.set\tmips16\n"; + setCanHaveModuleDir(false); } void MipsTargetAsmStreamer::emitDirectiveSetNoMips16() { OS << "\t.set\tnomips16\n"; + setCanHaveModuleDir(false); } void MipsTargetAsmStreamer::emitDirectiveSetReorder() { OS << "\t.set\treorder\n"; + setCanHaveModuleDir(false); } void MipsTargetAsmStreamer::emitDirectiveSetNoReorder() { OS << "\t.set\tnoreorder\n"; + setCanHaveModuleDir(false); } void MipsTargetAsmStreamer::emitDirectiveSetMacro() { OS << "\t.set\tmacro\n"; + setCanHaveModuleDir(false); } void MipsTargetAsmStreamer::emitDirectiveSetNoMacro() { OS << "\t.set\tnomacro\n"; + setCanHaveModuleDir(false); } void MipsTargetAsmStreamer::emitDirectiveSetAt() { OS << "\t.set\tat\n"; + setCanHaveModuleDir(false); } void MipsTargetAsmStreamer::emitDirectiveSetNoAt() { OS << "\t.set\tnoat\n"; + setCanHaveModuleDir(false); } void MipsTargetAsmStreamer::emitDirectiveEnd(StringRef Name) { @@ -110,24 +153,28 @@ void MipsTargetAsmStreamer::emitFrame(unsigned StackReg, unsigned StackSize, void MipsTargetAsmStreamer::emitDirectiveSetMips32R2() { OS << "\t.set\tmips32r2\n"; + setCanHaveModuleDir(false); } void MipsTargetAsmStreamer::emitDirectiveSetMips64() { OS << "\t.set\tmips64\n"; + setCanHaveModuleDir(false); } void MipsTargetAsmStreamer::emitDirectiveSetMips64R2() { OS << "\t.set\tmips64r2\n"; + setCanHaveModuleDir(false); } void MipsTargetAsmStreamer::emitDirectiveSetDsp() { OS << "\t.set\tdsp\n"; + setCanHaveModuleDir(false); } // Print a 32 bit hex number with all numbers. static void printHex32(unsigned Value, raw_ostream &OS) { OS << "0x"; for (int i = 7; i >= 0; i--) - OS.write_hex((Value & (0xF << (i*4))) >> (i*4)); + OS.write_hex((Value & (0xF << (i * 4))) >> (i * 4)); } void MipsTargetAsmStreamer::emitMask(unsigned CPUBitmask, @@ -147,6 +194,7 @@ void MipsTargetAsmStreamer::emitFMask(unsigned FPUBitmask, void MipsTargetAsmStreamer::emitDirectiveCpload(unsigned RegNo) { OS << "\t.cpload\t$" << StringRef(MipsInstPrinter::getRegisterName(RegNo)).lower() << "\n"; + setCanHaveModuleDir(false); } void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo, @@ -165,6 +213,34 @@ void MipsTargetAsmStreamer::emitDirectiveCpsetup(unsigned RegNo, OS << ", "; OS << Sym.getName() << "\n"; + setCanHaveModuleDir(false); +} + +void MipsTargetAsmStreamer::emitDirectiveModuleFP( + MipsABIFlagsSection::FpABIKind Value, bool Is32BitABI) { + MipsTargetStreamer::emitDirectiveModuleFP(Value, Is32BitABI); + + StringRef ModuleValue; + OS << "\t.module\tfp="; + OS << ABIFlagsSection.getFpABIString(Value) << "\n"; +} + +void MipsTargetAsmStreamer::emitDirectiveSetFp( + MipsABIFlagsSection::FpABIKind Value) { + StringRef ModuleValue; + OS << "\t.set\tfp="; + OS << ABIFlagsSection.getFpABIString(Value) << "\n"; +} + +void MipsTargetAsmStreamer::emitMipsAbiFlags() { + // No action required for text output. +} + +void MipsTargetAsmStreamer::emitDirectiveModuleOddSPReg(bool Enabled, + bool IsO32ABI) { + MipsTargetStreamer::emitDirectiveModuleOddSPReg(Enabled, IsO32ABI); + + OS << "\t.module\t" << (Enabled ? "" : "no") << "oddspreg\n"; } // This part is for ELF object output. @@ -174,7 +250,7 @@ MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S, MCAssembler &MCA = getStreamer().getAssembler(); uint64_t Features = STI.getFeatureBits(); Triple T(STI.getTargetTriple()); - Pic = (MCA.getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_) + Pic = (MCA.getContext().getObjectFileInfo()->getRelocM() == Reloc::PIC_) ? true : false; @@ -182,16 +258,28 @@ MipsTargetELFStreamer::MipsTargetELFStreamer(MCStreamer &S, unsigned EFlags = 0; // Architecture - if (Features & Mips::FeatureMips64r2) + if (Features & Mips::FeatureMips64r6) + EFlags |= ELF::EF_MIPS_ARCH_64R6; + else if (Features & Mips::FeatureMips64r2) EFlags |= ELF::EF_MIPS_ARCH_64R2; else if (Features & Mips::FeatureMips64) EFlags |= ELF::EF_MIPS_ARCH_64; + else if (Features & Mips::FeatureMips5) + EFlags |= ELF::EF_MIPS_ARCH_5; else if (Features & Mips::FeatureMips4) EFlags |= ELF::EF_MIPS_ARCH_4; + else if (Features & Mips::FeatureMips3) + EFlags |= ELF::EF_MIPS_ARCH_3; + else if (Features & Mips::FeatureMips32r6) + EFlags |= ELF::EF_MIPS_ARCH_32R6; else if (Features & Mips::FeatureMips32r2) EFlags |= ELF::EF_MIPS_ARCH_32R2; else if (Features & Mips::FeatureMips32) EFlags |= ELF::EF_MIPS_ARCH_32; + else if (Features & Mips::FeatureMips2) + EFlags |= ELF::EF_MIPS_ARCH_2; + else + EFlags |= ELF::EF_MIPS_ARCH_1; if (T.isArch64Bit()) { if (Features & Mips::FeatureN32) @@ -244,17 +332,17 @@ void MipsTargetELFStreamer::finish() { ELF::SHF_ALLOC | ELF::SHF_MIPS_NOSTRIP, SectionKind::getMetadata()); OS.SwitchSection(Sec); - OS.EmitIntValue(1, 1); // kind + OS.EmitIntValue(1, 1); // kind OS.EmitIntValue(40, 1); // size - OS.EmitIntValue(0, 2); // section - OS.EmitIntValue(0, 4); // info - OS.EmitIntValue(0, 4); // ri_gprmask - OS.EmitIntValue(0, 4); // pad - OS.EmitIntValue(0, 4); // ri_cpr[0]mask - OS.EmitIntValue(0, 4); // ri_cpr[1]mask - OS.EmitIntValue(0, 4); // ri_cpr[2]mask - OS.EmitIntValue(0, 4); // ri_cpr[3]mask - OS.EmitIntValue(0, 8); // ri_gp_value + OS.EmitIntValue(0, 2); // section + OS.EmitIntValue(0, 4); // info + OS.EmitIntValue(0, 4); // ri_gprmask + OS.EmitIntValue(0, 4); // pad + OS.EmitIntValue(0, 4); // ri_cpr[0]mask + OS.EmitIntValue(0, 4); // ri_cpr[1]mask + OS.EmitIntValue(0, 4); // ri_cpr[2]mask + OS.EmitIntValue(0, 4); // ri_cpr[3]mask + OS.EmitIntValue(0, 8); // ri_gp_value } else { const MCSectionELF *Sec = Context.getELFSection(".reginfo", ELF::SHT_MIPS_REGINFO, ELF::SHF_ALLOC, @@ -268,6 +356,7 @@ void MipsTargetELFStreamer::finish() { OS.EmitIntValue(0, 4); // ri_cpr[3]mask OS.EmitIntValue(0, 4); // ri_gp_value } + emitMipsAbiFlags(); } void MipsTargetELFStreamer::emitAssignment(MCSymbol *Symbol, @@ -276,11 +365,11 @@ void MipsTargetELFStreamer::emitAssignment(MCSymbol *Symbol, if (Value->getKind() != MCExpr::SymbolRef) return; const MCSymbol &RhsSym = - static_cast<const MCSymbolRefExpr *>(Value)->getSymbol(); + static_cast<const MCSymbolRefExpr *>(Value)->getSymbol(); MCSymbolData &Data = getStreamer().getOrCreateSymbolData(&RhsSym); uint8_t Type = MCELF::GetType(Data); - if ((Type != ELF::STT_FUNC) - || !(MCELF::getOther(Data) & (ELF::STO_MIPS_MICROMIPS >> 2))) + if ((Type != ELF::STT_FUNC) || + !(MCELF::getOther(Data) & (ELF::STO_MIPS_MICROMIPS >> 2))) return; MCSymbolData &SymbolData = getStreamer().getOrCreateSymbolData(Symbol); @@ -305,6 +394,7 @@ void MipsTargetELFStreamer::emitDirectiveSetMicroMips() { void MipsTargetELFStreamer::emitDirectiveSetNoMicroMips() { MicroMipsEnabled = false; + setCanHaveModuleDir(false); } void MipsTargetELFStreamer::emitDirectiveSetMips16() { @@ -312,14 +402,17 @@ void MipsTargetELFStreamer::emitDirectiveSetMips16() { unsigned Flags = MCA.getELFHeaderEFlags(); Flags |= ELF::EF_MIPS_ARCH_ASE_M16; MCA.setELFHeaderEFlags(Flags); + setCanHaveModuleDir(false); } void MipsTargetELFStreamer::emitDirectiveSetNoMips16() { // FIXME: implement. + setCanHaveModuleDir(false); } void MipsTargetELFStreamer::emitDirectiveSetReorder() { // FIXME: implement. + setCanHaveModuleDir(false); } void MipsTargetELFStreamer::emitDirectiveSetNoReorder() { @@ -327,22 +420,27 @@ void MipsTargetELFStreamer::emitDirectiveSetNoReorder() { unsigned Flags = MCA.getELFHeaderEFlags(); Flags |= ELF::EF_MIPS_NOREORDER; MCA.setELFHeaderEFlags(Flags); + setCanHaveModuleDir(false); } void MipsTargetELFStreamer::emitDirectiveSetMacro() { // FIXME: implement. + setCanHaveModuleDir(false); } void MipsTargetELFStreamer::emitDirectiveSetNoMacro() { // FIXME: implement. + setCanHaveModuleDir(false); } void MipsTargetELFStreamer::emitDirectiveSetAt() { // FIXME: implement. + setCanHaveModuleDir(false); } void MipsTargetELFStreamer::emitDirectiveSetNoAt() { // FIXME: implement. + setCanHaveModuleDir(false); } void MipsTargetELFStreamer::emitDirectiveEnd(StringRef Name) { @@ -411,19 +509,19 @@ void MipsTargetELFStreamer::emitFMask(unsigned FPUBitmask, } void MipsTargetELFStreamer::emitDirectiveSetMips32R2() { - // No action required for ELF output. + setCanHaveModuleDir(false); } void MipsTargetELFStreamer::emitDirectiveSetMips64() { - // No action required for ELF output. + setCanHaveModuleDir(false); } void MipsTargetELFStreamer::emitDirectiveSetMips64R2() { - // No action required for ELF output. + setCanHaveModuleDir(false); } void MipsTargetELFStreamer::emitDirectiveSetDsp() { - // No action required for ELF output. + setCanHaveModuleDir(false); } void MipsTargetELFStreamer::emitDirectiveCpload(unsigned RegNo) { @@ -473,6 +571,8 @@ void MipsTargetELFStreamer::emitDirectiveCpload(unsigned RegNo) { TmpInst.addOperand(MCOperand::CreateReg(Mips::GP)); TmpInst.addOperand(MCOperand::CreateReg(RegNo)); getStreamer().EmitInstruction(TmpInst, STI); + + setCanHaveModuleDir(false); } void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo, @@ -528,4 +628,27 @@ void MipsTargetELFStreamer::emitDirectiveCpsetup(unsigned RegNo, Inst.addOperand(MCOperand::CreateReg(Mips::GP)); Inst.addOperand(MCOperand::CreateReg(RegNo)); getStreamer().EmitInstruction(Inst, STI); + + setCanHaveModuleDir(false); +} + +void MipsTargetELFStreamer::emitMipsAbiFlags() { + MCAssembler &MCA = getStreamer().getAssembler(); + MCContext &Context = MCA.getContext(); + MCStreamer &OS = getStreamer(); + const MCSectionELF *Sec = + Context.getELFSection(".MIPS.abiflags", ELF::SHT_MIPS_ABIFLAGS, + ELF::SHF_ALLOC, SectionKind::getMetadata()); + MCSectionData &ABIShndxSD = MCA.getOrCreateSectionData(*Sec); + ABIShndxSD.setAlignment(8); + OS.SwitchSection(Sec); + + OS << ABIFlagsSection; +} + +void MipsTargetELFStreamer::emitDirectiveModuleOddSPReg(bool Enabled, + bool IsO32ABI) { + MipsTargetStreamer::emitDirectiveModuleOddSPReg(Enabled, IsO32ABI); + + ABIFlagsSection.OddSPReg = Enabled; } diff --git a/lib/Target/Mips/MicroMipsInstrFPU.td b/lib/Target/Mips/MicroMipsInstrFPU.td index d95f9b0..b93017a 100644 --- a/lib/Target/Mips/MicroMipsInstrFPU.td +++ b/lib/Target/Mips/MicroMipsInstrFPU.td @@ -24,13 +24,13 @@ def LDC1_MM : MMRel, LW_FT<"ldc1", AFGR64Opnd, II_LDC1, load>, LW_FM_MM<0x2f>; def SDC1_MM : MMRel, SW_FT<"sdc1", AFGR64Opnd, II_SDC1, store>, LW_FM_MM<0x2e>; def LWXC1_MM : MMRel, LWXC1_FT<"lwxc1", FGR32Opnd, II_LWXC1, load>, - LWXC1_FM_MM<0x48>; + LWXC1_FM_MM<0x48>, INSN_MIPS4_32R2_NOT_32R6_64R6; def SWXC1_MM : MMRel, SWXC1_FT<"swxc1", FGR32Opnd, II_SWXC1, store>, - SWXC1_FM_MM<0x88>; + SWXC1_FM_MM<0x88>, INSN_MIPS4_32R2_NOT_32R6_64R6; def LUXC1_MM : MMRel, LWXC1_FT<"luxc1", AFGR64Opnd, II_LUXC1>, - LWXC1_FM_MM<0x148>, INSN_MIPS5_32R2; + LWXC1_FM_MM<0x148>, INSN_MIPS5_32R2_NOT_32R6_64R6; def SUXC1_MM : MMRel, SWXC1_FT<"suxc1", AFGR64Opnd, II_SUXC1>, - SWXC1_FM_MM<0x188>, INSN_MIPS5_32R2; + SWXC1_FM_MM<0x188>, INSN_MIPS5_32R2_NOT_32R6_64R6; def FCMP_S32_MM : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, CEQS_FM_MM<0>; @@ -38,9 +38,9 @@ def FCMP_D32_MM : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM_MM<1>; def BC1F_MM : MMRel, BC1F_FT<"bc1f", brtarget_mm, IIBranch, MIPS_BRANCH_F>, - BC1F_FM_MM<0x1c>; + BC1F_FM_MM<0x1c>, ISA_MIPS1_NOT_32R6_64R6; def BC1T_MM : MMRel, BC1F_FT<"bc1t", brtarget_mm, IIBranch, MIPS_BRANCH_T>, - BC1F_FM_MM<0x1d>; + BC1F_FM_MM<0x1d>, ISA_MIPS1_NOT_32R6_64R6; def CEIL_W_S_MM : MMRel, ABSS_FT<"ceil.w.s", FGR32Opnd, FGR32Opnd, II_CEIL>, ROUND_W_FM_MM<0, 0x6c>; diff --git a/lib/Target/Mips/MicroMipsInstrInfo.td b/lib/Target/Mips/MicroMipsInstrInfo.td index 9904bc6..87a3a3e 100644 --- a/lib/Target/Mips/MicroMipsInstrInfo.td +++ b/lib/Target/Mips/MicroMipsInstrInfo.td @@ -246,7 +246,6 @@ let DecoderNamespace = "MicroMips", Predicates = [InMicroMips] in { } def JR_MM : MMRel, IndirectBranch<"jr", GPR32Opnd>, JR_FM_MM<0x3c>; def JALR_MM : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM_MM<0x03c>; - def RET_MM : MMRel, RetBase<"ret", GPR32Opnd>, JR_FM_MM<0x3c>; /// Branch Instructions def BEQ_MM : MMRel, CBranch<"beq", brtarget_mm, seteq, GPR32Opnd>, diff --git a/lib/Target/Mips/Mips.td b/lib/Target/Mips/Mips.td index ea16331..dd3bc9b 100644 --- a/lib/Target/Mips/Mips.td +++ b/lib/Target/Mips/Mips.td @@ -61,6 +61,8 @@ def FeatureGP64Bit : SubtargetFeature<"gp64", "IsGP64bit", "true", "General Purpose Registers are 64-bit wide.">; def FeatureFP64Bit : SubtargetFeature<"fp64", "IsFP64bit", "true", "Support 64-bit FP registers.">; +def FeatureFPXX : SubtargetFeature<"fpxx", "IsFPXX", "true", + "Support for FPXX.">; def FeatureNaN2008 : SubtargetFeature<"nan2008", "IsNaN2008bit", "true", "IEEE 754-2008 NaN encoding.">; def FeatureSingleFloat : SubtargetFeature<"single-float", "IsSingleFloat", @@ -73,6 +75,9 @@ def FeatureN64 : SubtargetFeature<"n64", "MipsABI", "N64", "Enable n64 ABI">; def FeatureEABI : SubtargetFeature<"eabi", "MipsABI", "EABI", "Enable eabi ABI">; +def FeatureNoOddSPReg : SubtargetFeature<"nooddspreg", "UseOddSPReg", "false", + "Disable odd numbered single-precision " + "registers">; def FeatureVFPU : SubtargetFeature<"vfpu", "HasVFPU", "true", "Enable vector FPU instructions.">; def FeatureMips1 : SubtargetFeature<"mips1", "MipsArchVersion", "Mips1", diff --git a/lib/Target/Mips/Mips16FrameLowering.cpp b/lib/Target/Mips/Mips16FrameLowering.cpp index c01d03a..93706c2 100644 --- a/lib/Target/Mips/Mips16FrameLowering.cpp +++ b/lib/Target/Mips/Mips16FrameLowering.cpp @@ -16,6 +16,7 @@ #include "Mips16InstrInfo.h" #include "MipsInstrInfo.h" #include "MipsRegisterInfo.h" +#include "MipsSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -28,6 +29,9 @@ using namespace llvm; +Mips16FrameLowering::Mips16FrameLowering(const MipsSubtarget &STI) + : MipsFrameLowering(STI, STI.stackAlignment()) {} + void Mips16FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineFrameInfo *MFI = MF.getFrameInfo(); diff --git a/lib/Target/Mips/Mips16FrameLowering.h b/lib/Target/Mips/Mips16FrameLowering.h index 3f7829d..1fb7eda 100644 --- a/lib/Target/Mips/Mips16FrameLowering.h +++ b/lib/Target/Mips/Mips16FrameLowering.h @@ -19,8 +19,7 @@ namespace llvm { class Mips16FrameLowering : public MipsFrameLowering { public: - explicit Mips16FrameLowering(const MipsSubtarget &STI) - : MipsFrameLowering(STI, STI.stackAlignment()) {} + explicit Mips16FrameLowering(const MipsSubtarget &STI); /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. diff --git a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp index 4e86a27..6672aef 100644 --- a/lib/Target/Mips/Mips16ISelDAGToDAG.cpp +++ b/lib/Target/Mips/Mips16ISelDAGToDAG.cpp @@ -37,7 +37,7 @@ using namespace llvm; #define DEBUG_TYPE "mips-isel" bool Mips16DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { - if (!Subtarget.inMips16Mode()) + if (!Subtarget->inMips16Mode()) return false; return MipsDAGToDAGISel::runOnMachineFunction(MF); } @@ -226,9 +226,9 @@ bool Mips16DAGToDAGISel::selectAddr16( const LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(Parent); if (LS) { - if (LS->getMemoryVT() == MVT::f32 && Subtarget.hasMips4_32r2()) + if (LS->getMemoryVT() == MVT::f32 && Subtarget->hasMips4_32r2()) return false; - if (LS->getMemoryVT() == MVT::f64 && Subtarget.hasMips4_32r2()) + if (LS->getMemoryVT() == MVT::f64 && Subtarget->hasMips4_32r2()) return false; } } diff --git a/lib/Target/Mips/Mips16ISelLowering.cpp b/lib/Target/Mips/Mips16ISelLowering.cpp index 9102450..81a05df 100644 --- a/lib/Target/Mips/Mips16ISelLowering.cpp +++ b/lib/Target/Mips/Mips16ISelLowering.cpp @@ -120,13 +120,6 @@ static const Mips16IntrinsicHelperType Mips16IntrinsicHelper[] = { Mips16TargetLowering::Mips16TargetLowering(MipsTargetMachine &TM) : MipsTargetLowering(TM) { - // - // set up as if mips32 and then revert so we can test the mechanism - // for switching - addRegisterClass(MVT::i32, &Mips::GPR32RegClass); - addRegisterClass(MVT::f32, &Mips::FGR32RegClass); - computeRegisterProperties(); - clearRegisterClasses(); // Set up the register classes addRegisterClass(MVT::i32, &Mips::CPU16RegsRegClass); diff --git a/lib/Target/Mips/Mips16ISelLowering.h b/lib/Target/Mips/Mips16ISelLowering.h index df88333..2a5eec5 100644 --- a/lib/Target/Mips/Mips16ISelLowering.h +++ b/lib/Target/Mips/Mips16ISelLowering.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef Mips16ISELLOWERING_H -#define Mips16ISELLOWERING_H +#ifndef MIPS16ISELLOWERING_H +#define MIPS16ISELLOWERING_H #include "MipsISelLowering.h" diff --git a/lib/Target/Mips/Mips16InstrInfo.td b/lib/Target/Mips/Mips16InstrInfo.td index 11166c4..5e4eebb 100644 --- a/lib/Target/Mips/Mips16InstrInfo.td +++ b/lib/Target/Mips/Mips16InstrInfo.td @@ -1370,9 +1370,11 @@ def : Mips16Pat<(MipsJmpLink (i32 texternalsym:$dst)), (Jal16 texternalsym:$dst)>; // Indirect branch -def: Mips16Pat< - (brind CPU16Regs:$rs), - (JrcRx16 CPU16Regs:$rs)>; +def: Mips16Pat<(brind CPU16Regs:$rs), (JrcRx16 CPU16Regs:$rs)> { + // Ensure that the addition of MIPS32r6/MIPS64r6 support does not change + // MIPS16's behaviour. + let AddedComplexity = 1; +} // Jump and Link (Call) let isCall=1, hasDelaySlot=0 in diff --git a/lib/Target/Mips/Mips32r6InstrFormats.td b/lib/Target/Mips/Mips32r6InstrFormats.td index a3f9df5..e4ec96a 100644 --- a/lib/Target/Mips/Mips32r6InstrFormats.td +++ b/lib/Target/Mips/Mips32r6InstrFormats.td @@ -39,7 +39,10 @@ def OPGROUP_DAUI : OPGROUP<0b011101>; def OPGROUP_PCREL : OPGROUP<0b111011>; def OPGROUP_REGIMM : OPGROUP<0b000001>; def OPGROUP_SPECIAL : OPGROUP<0b000000>; +// The spec occasionally names this value LL, LLD, SC, or SCD. def OPGROUP_SPECIAL3 : OPGROUP<0b011111>; +// The spec names this constant LWC2, LDC2, SWC2, and SDC2 in different places. +def OPGROUP_COP2LDST : OPGROUP<0b010010>; class OPCODE2<bits<2> Val> { bits<2> Value = Val; @@ -48,6 +51,11 @@ def OPCODE2_ADDIUPC : OPCODE2<0b00>; def OPCODE2_LWPC : OPCODE2<0b01>; def OPCODE2_LWUPC : OPCODE2<0b10>; +class OPCODE3<bits<3> Val> { + bits<3> Value = Val; +} +def OPCODE3_LDPC : OPCODE3<0b110>; + class OPCODE5<bits<5> Val> { bits<5> Value = Val; } @@ -59,6 +67,13 @@ def OPCODE5_BC1EQZ : OPCODE5<0b01001>; def OPCODE5_BC1NEZ : OPCODE5<0b01101>; def OPCODE5_BC2EQZ : OPCODE5<0b01001>; def OPCODE5_BC2NEZ : OPCODE5<0b01101>; +def OPCODE5_BGEZAL : OPCODE5<0b10001>; +// The next four constants are unnamed in the spec. These names are taken from +// the OPGROUP names they are used with. +def OPCODE5_LDC2 : OPCODE5<0b01110>; +def OPCODE5_LWC2 : OPCODE5<0b01010>; +def OPCODE5_SDC2 : OPCODE5<0b01111>; +def OPCODE5_SWC2 : OPCODE5<0b01011>; class OPCODE6<bits<6> Val> { bits<6> Value = Val; @@ -67,6 +82,22 @@ def OPCODE6_ALIGN : OPCODE6<0b100000>; def OPCODE6_DALIGN : OPCODE6<0b100100>; def OPCODE6_BITSWAP : OPCODE6<0b100000>; def OPCODE6_DBITSWAP : OPCODE6<0b100100>; +def OPCODE6_JALR : OPCODE6<0b001001>; +def OPCODE6_CACHE : OPCODE6<0b100101>; +def OPCODE6_PREF : OPCODE6<0b110101>; +// The next four constants are unnamed in the spec. These names are taken from +// the OPGROUP names they are used with. +def OPCODE6_LL : OPCODE6<0b110110>; +def OPCODE6_LLD : OPCODE6<0b110111>; +def OPCODE6_SC : OPCODE6<0b100110>; +def OPCODE6_SCD : OPCODE6<0b100111>; +def OPCODE6_CLO : OPCODE6<0b010001>; +def OPCODE6_CLZ : OPCODE6<0b010000>; +def OPCODE6_DCLO : OPCODE6<0b010011>; +def OPCODE6_DCLZ : OPCODE6<0b010010>; +def OPCODE6_LSA : OPCODE6<0b000101>; +def OPCODE6_DLSA : OPCODE6<0b010101>; +def OPCODE6_SDBBP : OPCODE6<0b001110>; class FIELD_FMT<bits<5> Val> { bits<5> Value = Val; @@ -77,22 +108,23 @@ def FIELD_FMT_D : FIELD_FMT<0b10001>; class FIELD_CMP_COND<bits<5> Val> { bits<5> Value = Val; } -def FIELD_CMP_COND_F : FIELD_CMP_COND<0b00000>; +// Note: The CMP_COND_FMT names differ from the C_COND_FMT names. +def FIELD_CMP_COND_AF : FIELD_CMP_COND<0b00000>; def FIELD_CMP_COND_UN : FIELD_CMP_COND<0b00001>; def FIELD_CMP_COND_EQ : FIELD_CMP_COND<0b00010>; def FIELD_CMP_COND_UEQ : FIELD_CMP_COND<0b00011>; -def FIELD_CMP_COND_OLT : FIELD_CMP_COND<0b00100>; +def FIELD_CMP_COND_LT : FIELD_CMP_COND<0b00100>; def FIELD_CMP_COND_ULT : FIELD_CMP_COND<0b00101>; -def FIELD_CMP_COND_OLE : FIELD_CMP_COND<0b00110>; +def FIELD_CMP_COND_LE : FIELD_CMP_COND<0b00110>; def FIELD_CMP_COND_ULE : FIELD_CMP_COND<0b00111>; -def FIELD_CMP_COND_SF : FIELD_CMP_COND<0b01000>; -def FIELD_CMP_COND_NGLE : FIELD_CMP_COND<0b01001>; +def FIELD_CMP_COND_SAF : FIELD_CMP_COND<0b01000>; +def FIELD_CMP_COND_SUN : FIELD_CMP_COND<0b01001>; def FIELD_CMP_COND_SEQ : FIELD_CMP_COND<0b01010>; -def FIELD_CMP_COND_NGL : FIELD_CMP_COND<0b01011>; -def FIELD_CMP_COND_LT : FIELD_CMP_COND<0b01100>; -def FIELD_CMP_COND_NGE : FIELD_CMP_COND<0b01101>; -def FIELD_CMP_COND_LE : FIELD_CMP_COND<0b01110>; -def FIELD_CMP_COND_NGT : FIELD_CMP_COND<0b01111>; +def FIELD_CMP_COND_SUEQ : FIELD_CMP_COND<0b01011>; +def FIELD_CMP_COND_SLT : FIELD_CMP_COND<0b01100>; +def FIELD_CMP_COND_SULT : FIELD_CMP_COND<0b01101>; +def FIELD_CMP_COND_SLE : FIELD_CMP_COND<0b01110>; +def FIELD_CMP_COND_SULE : FIELD_CMP_COND<0b01111>; class FIELD_CMP_FORMAT<bits<5> Val> { bits<5> Value = Val; @@ -139,6 +171,17 @@ class DAUI_FM : AUI_FM { let Inst{31-26} = OPGROUP_DAUI.Value; } +class BAL_FM : MipsR6Inst { + bits<16> offset; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_REGIMM.Value; + let Inst{25-21} = 0b00000; + let Inst{20-16} = OPCODE5_BGEZAL.Value; + let Inst{15-0} = offset; +} + class COP1_2R_FM<bits<6> funct, FIELD_FMT Format> : MipsR6Inst { bits<5> fs; bits<5> fd; @@ -216,6 +259,18 @@ class PCREL19_FM<OPCODE2 Operation> : MipsR6Inst { let Inst{18-0} = imm; } +class PCREL18_FM<OPCODE3 Operation> : MipsR6Inst { + bits<5> rs; + bits<18> imm; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_PCREL.Value; + let Inst{25-21} = rs; + let Inst{20-18} = Operation.Value; + let Inst{17-0} = imm; +} + class SPECIAL3_2R_FM<OPCODE6 Operation> : MipsR6Inst { bits<5> rd; bits<5> rt; @@ -230,6 +285,36 @@ class SPECIAL3_2R_FM<OPCODE6 Operation> : MipsR6Inst { let Inst{5-0} = Operation.Value; } +class SPECIAL3_MEM_FM<OPCODE6 Operation> : MipsR6Inst { + bits<21> addr; + bits<5> hint; + bits<5> base = addr{20-16}; + bits<9> offset = addr{8-0}; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_SPECIAL3.Value; + let Inst{25-21} = base; + let Inst{20-16} = hint; + let Inst{15-7} = offset; + let Inst{6} = 0; + let Inst{5-0} = Operation.Value; +} + +class SPECIAL_2R_FM<OPCODE6 Operation> : MipsR6Inst { + bits<5> rd; + bits<5> rs; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_SPECIAL.Value; + let Inst{25-21} = rs; + let Inst{20-16} = 0b00000; + let Inst{15-11} = rd; + let Inst{10-6} = 0b00001; + let Inst{5-0} = Operation.Value; +} + class SPECIAL_3R_FM<bits<5> mulop, bits<6> funct> : MipsR6Inst { bits<5> rd; bits<5> rs; @@ -245,6 +330,16 @@ class SPECIAL_3R_FM<bits<5> mulop, bits<6> funct> : MipsR6Inst { let Inst{5-0} = funct; } +class SPECIAL_SDBBP_FM : MipsR6Inst { + bits<20> code_; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_SPECIAL.Value; + let Inst{25-6} = code_; + let Inst{5-0} = OPCODE6_SDBBP.Value; +} + // This class is ambiguous with other branches: // BEQC/BNEC require that rs > rt class CMP_BRANCH_2R_OFF16_FM<OPGROUP funct> : MipsR6Inst { @@ -355,6 +450,40 @@ class SPECIAL3_DALIGN_FM<OPCODE6 Operation> : MipsR6Inst { let Inst{5-0} = Operation.Value; } +class SPECIAL3_LL_SC_FM<OPCODE6 Operation> : MipsR6Inst { + bits<5> rt; + bits<21> addr; + bits<5> base = addr{20-16}; + bits<9> offset = addr{8-0}; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_SPECIAL3.Value; + let Inst{25-21} = base; + let Inst{20-16} = rt; + let Inst{15-7} = offset; + let Inst{5-0} = Operation.Value; + + string DecoderMethod = "DecodeSpecial3LlSc"; +} + +class SPECIAL_LSA_FM<OPCODE6 Operation> : MipsR6Inst { + bits<5> rd; + bits<5> rs; + bits<5> rt; + bits<2> imm2; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_SPECIAL.Value; + let Inst{25-21} = rs; + let Inst{20-16} = rt; + let Inst{15-11} = rd; + let Inst{10-8} = 0b000; + let Inst{7-6} = imm2; + let Inst{5-0} = Operation.Value; +} + class REGIMM_FM<OPCODE5 Operation> : MipsR6Inst { bits<5> rs; bits<16> imm; @@ -384,3 +513,31 @@ class COP1_CMP_CONDN_FM<FIELD_CMP_FORMAT Format, let Inst{4-0} = Cond.Value; } +class JR_HB_R6_FM<OPCODE6 Operation> : MipsR6Inst { + bits<5> rs; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_SPECIAL.Value; + let Inst{25-21} = rs; + let Inst{20-16} = 0; + let Inst{15-11} = 0; + let Inst{10} = 1; + let Inst{9-6} = 0; + let Inst{5-0} = Operation.Value; +} + +class COP2LDST_FM<OPCODE5 Operation> : MipsR6Inst { + bits<5> rt; + bits<21> addr; + bits<5> base = addr{20-16}; + bits<11> offset = addr{10-0}; + + bits<32> Inst; + + let Inst{31-26} = OPGROUP_COP2LDST.Value; + let Inst{25-21} = Operation.Value; + let Inst{20-16} = rt; + let Inst{15-11} = base; + let Inst{10-0} = offset; +} diff --git a/lib/Target/Mips/Mips32r6InstrInfo.td b/lib/Target/Mips/Mips32r6InstrInfo.td index ffaf965..d06e5ca 100644 --- a/lib/Target/Mips/Mips32r6InstrInfo.td +++ b/lib/Target/Mips/Mips32r6InstrInfo.td @@ -14,39 +14,8 @@ include "Mips32r6InstrFormats.td" // Notes about removals/changes from MIPS32r6: -// Unclear: ssnop -// Reencoded: cache, pref -// Reencoded: clo, clz // Reencoded: jr -> jalr // Reencoded: jr.hb -> jalr.hb -// Reencoded: ldc2 -// Reencoded: ll, sc -// Reencoded: lwc2 -// Reencoded: sdbbp -// Reencoded: sdc2 -// Reencoded: swc2 -// Removed: bc1any2, bc1any4 -// Removed: bc2[ft] -// Removed: bc2f, bc2t -// Removed: bgezal -// Removed: bltzal -// Removed: c.cond.fmt, bc1[ft] -// Removed: div, divu -// Removed: jalx -// Removed: ldxc1 -// Removed: luxc1 -// Removed: lwxc1 -// Removed: madd.[ds], nmadd.[ds], nmsub.[ds], sub.[ds] -// Removed: mfhi, mflo, mthi, mtlo, madd, maddu, msub, msubu, mul -// Removed: movf, movt -// Removed: movf.fmt, movt.fmt, movn.fmt, movz.fmt -// Removed: movn, movz -// Removed: mult, multu -// Removed: prefx -// Removed: sdxc1 -// Removed: suxc1 -// Removed: swxc1 -// Rencoded: [ls][wd]c2 def brtarget21 : Operand<OtherVT> { let EncoderMethod = "getBranchTarget21OpValue"; @@ -84,6 +53,7 @@ class ALUIPC_ENC : PCREL16_FM<OPCODE5_ALUIPC>; class AUI_ENC : AUI_FM; class AUIPC_ENC : PCREL16_FM<OPCODE5_AUIPC>; +class BAL_ENC : BAL_FM; class BALC_ENC : BRANCH_OFF26_FM<0b111010>; class BC_ENC : BRANCH_OFF26_FM<0b110010>; class BEQC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_ADDI>, @@ -97,11 +67,20 @@ class BNEZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_DADDI>, class BLTZC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM<OPGROUP_BGTZL>, DecodeDisambiguates<"BgtzlGroupBranch">; +class BGEC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_BLEZL>, + DecodeDisambiguatedBy<"BlezlGroupBranch">; +class BGEUC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_BLEZ>, + DecodeDisambiguatedBy<"BlezGroupBranch">; class BGEZC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM<OPGROUP_BLEZL>, DecodeDisambiguates<"BlezlGroupBranch">; class BGTZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_BGTZ>, DecodeDisambiguatedBy<"BgtzGroupBranch">; +class BLTC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_BGTZL>, + DecodeDisambiguatedBy<"BgtzlGroupBranch">; +class BLTUC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_BGTZ>, + DecodeDisambiguatedBy<"BgtzGroupBranch">; + class BLEZC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_BLEZL>, DecodeDisambiguatedBy<"BlezlGroupBranch">; class BLTZALC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM<OPGROUP_BGTZ>, @@ -110,7 +89,8 @@ class BGTZC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_BGTZL>, DecodeDisambiguatedBy<"BgtzlGroupBranch">; class BEQZC_ENC : CMP_BRANCH_OFF21_FM<0b110110>; -class BGEZALC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM<OPGROUP_BLEZ>; +class BGEZALC_ENC : CMP_BRANCH_1R_BOTH_OFF16_FM<OPGROUP_BLEZ>, + DecodeDisambiguates<"BlezGroupBranch">; class BNEZC_ENC : CMP_BRANCH_OFF21_FM<0b111110>; class BC1EQZ_ENC : COP1_BCCZ_FM<OPCODE5_BC1EQZ>; @@ -120,9 +100,10 @@ class BC2NEZ_ENC : COP2_BCCZ_FM<OPCODE5_BC2NEZ>; class JIALC_ENC : JMP_IDX_COMPACT_FM<0b111110>; class JIC_ENC : JMP_IDX_COMPACT_FM<0b110110>; - +class JR_HB_R6_ENC : JR_HB_R6_FM<OPCODE6_JALR>; class BITSWAP_ENC : SPECIAL3_2R_FM<OPCODE6_BITSWAP>; -class BLEZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_BLEZ>; +class BLEZALC_ENC : CMP_BRANCH_1R_RT_OFF16_FM<OPGROUP_BLEZ>, + DecodeDisambiguatedBy<"BlezGroupBranch">; class BNVC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_DADDI>, DecodeDisambiguatedBy<"DaddiGroupBranch">; class BOVC_ENC : CMP_BRANCH_2R_OFF16_FM<OPGROUP_ADDI>, @@ -170,12 +151,23 @@ class RINT_D_ENC : COP1_2R_FM<0b011010, FIELD_FMT_D>; class CLASS_S_ENC : COP1_2R_FM<0b011011, FIELD_FMT_S>; class CLASS_D_ENC : COP1_2R_FM<0b011011, FIELD_FMT_D>; -class CMP_CONDN_DESC_BASE<string CondStr, string Typestr, RegisterOperand FGROpnd> { - dag OutOperandList = (outs FGROpnd:$fd); - dag InOperandList = (ins FGROpnd:$fs, FGROpnd:$ft); - string AsmString = !strconcat("cmp.", CondStr, ".", Typestr, "\t$fd, $fs, $ft"); - list<dag> Pattern = []; -} +class CACHE_ENC : SPECIAL3_MEM_FM<OPCODE6_CACHE>; +class PREF_ENC : SPECIAL3_MEM_FM<OPCODE6_PREF>; + +class LDC2_R6_ENC : COP2LDST_FM<OPCODE5_LDC2>; +class LWC2_R6_ENC : COP2LDST_FM<OPCODE5_LWC2>; +class SDC2_R6_ENC : COP2LDST_FM<OPCODE5_SDC2>; +class SWC2_R6_ENC : COP2LDST_FM<OPCODE5_SWC2>; + +class LSA_R6_ENC : SPECIAL_LSA_FM<OPCODE6_LSA>; + +class LL_R6_ENC : SPECIAL3_LL_SC_FM<OPCODE6_LL>; +class SC_R6_ENC : SPECIAL3_LL_SC_FM<OPCODE6_SC>; + +class CLO_R6_ENC : SPECIAL_2R_FM<OPCODE6_CLO>; +class CLZ_R6_ENC : SPECIAL_2R_FM<OPCODE6_CLZ>; + +class SDBBP_R6_ENC : SPECIAL_SDBBP_FM; //===----------------------------------------------------------------------===// // @@ -183,56 +175,65 @@ class CMP_CONDN_DESC_BASE<string CondStr, string Typestr, RegisterOperand FGROpn // //===----------------------------------------------------------------------===// +class CMP_CONDN_DESC_BASE<string CondStr, string Typestr, + RegisterOperand FGROpnd, + SDPatternOperator Op = null_frag> { + dag OutOperandList = (outs FGRCCOpnd:$fd); + dag InOperandList = (ins FGROpnd:$fs, FGROpnd:$ft); + string AsmString = !strconcat("cmp.", CondStr, ".", Typestr, "\t$fd, $fs, $ft"); + list<dag> Pattern = [(set FGRCCOpnd:$fd, (Op FGROpnd:$fs, FGROpnd:$ft))]; +} + multiclass CMP_CC_M <FIELD_CMP_FORMAT Format, string Typestr, RegisterOperand FGROpnd>{ - def CMP_F_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_F>, - CMP_CONDN_DESC_BASE<"f", Typestr, FGROpnd>, + def CMP_F_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_AF>, + CMP_CONDN_DESC_BASE<"af", Typestr, FGROpnd>, ISA_MIPS32R6; def CMP_UN_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_UN>, - CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd>, + CMP_CONDN_DESC_BASE<"un", Typestr, FGROpnd, setuo>, ISA_MIPS32R6; def CMP_EQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_EQ>, - CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd>, + CMP_CONDN_DESC_BASE<"eq", Typestr, FGROpnd, setoeq>, ISA_MIPS32R6; def CMP_UEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_UEQ>, - CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd>, - ISA_MIPS32R6; - def CMP_OLT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_OLT>, - CMP_CONDN_DESC_BASE<"olt", Typestr, FGROpnd>, + CMP_CONDN_DESC_BASE<"ueq", Typestr, FGROpnd, setueq>, ISA_MIPS32R6; + def CMP_LT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LT>, + CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd, setolt>, + ISA_MIPS32R6; def CMP_ULT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_ULT>, - CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd>, - ISA_MIPS32R6; - def CMP_OLE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_OLE>, - CMP_CONDN_DESC_BASE<"ole", Typestr, FGROpnd>, + CMP_CONDN_DESC_BASE<"ult", Typestr, FGROpnd, setult>, ISA_MIPS32R6; + def CMP_LE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LE>, + CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd, setole>, + ISA_MIPS32R6; def CMP_ULE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_ULE>, - CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd>, + CMP_CONDN_DESC_BASE<"ule", Typestr, FGROpnd, setule>, + ISA_MIPS32R6; + def CMP_SAF_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SAF>, + CMP_CONDN_DESC_BASE<"saf", Typestr, FGROpnd>, + ISA_MIPS32R6; + def CMP_SUN_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SUN>, + CMP_CONDN_DESC_BASE<"sun", Typestr, FGROpnd>, ISA_MIPS32R6; - def CMP_SF_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SF>, - CMP_CONDN_DESC_BASE<"sf", Typestr, FGROpnd>, - ISA_MIPS32R6; - def CMP_NGLE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_NGLE>, - CMP_CONDN_DESC_BASE<"ngle", Typestr, FGROpnd>, - ISA_MIPS32R6; def CMP_SEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SEQ>, CMP_CONDN_DESC_BASE<"seq", Typestr, FGROpnd>, ISA_MIPS32R6; - def CMP_NGL_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_NGL>, - CMP_CONDN_DESC_BASE<"ngl", Typestr, FGROpnd>, - ISA_MIPS32R6; - def CMP_LT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LT>, - CMP_CONDN_DESC_BASE<"lt", Typestr, FGROpnd>, - ISA_MIPS32R6; - def CMP_NGE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_NGE>, - CMP_CONDN_DESC_BASE<"nge", Typestr, FGROpnd>, + def CMP_SUEQ_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SUEQ>, + CMP_CONDN_DESC_BASE<"sueq", Typestr, FGROpnd>, + ISA_MIPS32R6; + def CMP_SLT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SLT>, + CMP_CONDN_DESC_BASE<"slt", Typestr, FGROpnd>, ISA_MIPS32R6; - def CMP_LE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_LE>, - CMP_CONDN_DESC_BASE<"le", Typestr, FGROpnd>, - ISA_MIPS32R6; - def CMP_NGT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_NGT>, - CMP_CONDN_DESC_BASE<"ngt", Typestr, FGROpnd>, + def CMP_SULT_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SULT>, + CMP_CONDN_DESC_BASE<"sult", Typestr, FGROpnd>, + ISA_MIPS32R6; + def CMP_SLE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SLE>, + CMP_CONDN_DESC_BASE<"sle", Typestr, FGROpnd>, ISA_MIPS32R6; + def CMP_SULE_#NAME : COP1_CMP_CONDN_FM<Format, FIELD_CMP_COND_SULE>, + CMP_CONDN_DESC_BASE<"sule", Typestr, FGROpnd>, + ISA_MIPS32R6; } //===----------------------------------------------------------------------===// @@ -241,16 +242,17 @@ multiclass CMP_CC_M <FIELD_CMP_FORMAT Format, string Typestr, // //===----------------------------------------------------------------------===// -class PCREL19_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> { +class PCREL_DESC_BASE<string instr_asm, RegisterOperand GPROpnd, + Operand ImmOpnd> { dag OutOperandList = (outs GPROpnd:$rs); - dag InOperandList = (ins simm19_lsl2:$imm); + dag InOperandList = (ins ImmOpnd:$imm); string AsmString = !strconcat(instr_asm, "\t$rs, $imm"); list<dag> Pattern = []; } -class ADDIUPC_DESC : PCREL19_DESC_BASE<"addiupc", GPR32Opnd>; -class LWPC_DESC: PCREL19_DESC_BASE<"lwpc", GPR32Opnd>; -class LWUPC_DESC: PCREL19_DESC_BASE<"lwupc", GPR32Opnd>; +class ADDIUPC_DESC : PCREL_DESC_BASE<"addiupc", GPR32Opnd, simm19_lsl2>; +class LWPC_DESC: PCREL_DESC_BASE<"lwpc", GPR32Opnd, simm19_lsl2>; +class LWUPC_DESC: PCREL_DESC_BASE<"lwupc", GPR32Opnd, simm19_lsl2>; class ALIGN_DESC_BASE<string instr_asm, RegisterOperand GPROpnd, Operand ImmOpnd> { @@ -318,15 +320,26 @@ class CMP_CBR_RT_Z_DESC_BASE<string instr_asm, DAGOperand opnd, list<Register> Defs = [AT]; } +class BAL_DESC : BC_DESC_BASE<"bal", brtarget> { + bit isCall = 1; + bit hasDelaySlot = 1; + list<Register> Defs = [RA]; +} + class BALC_DESC : BC_DESC_BASE<"balc", brtarget26> { bit isCall = 1; list<Register> Defs = [RA]; } class BC_DESC : BC_DESC_BASE<"bc", brtarget26>; +class BGEC_DESC : CMP_BC_DESC_BASE<"bgec", brtarget, GPR32Opnd>; +class BGEUC_DESC : CMP_BC_DESC_BASE<"bgeuc", brtarget, GPR32Opnd>; class BEQC_DESC : CMP_BC_DESC_BASE<"beqc", brtarget, GPR32Opnd>; class BNEC_DESC : CMP_BC_DESC_BASE<"bnec", brtarget, GPR32Opnd>; +class BLTC_DESC : CMP_BC_DESC_BASE<"bltc", brtarget, GPR32Opnd>; +class BLTUC_DESC : CMP_BC_DESC_BASE<"bltuc", brtarget, GPR32Opnd>; + class BLTZC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bltzc", brtarget, GPR32Opnd>; class BGEZC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bgezc", brtarget, GPR32Opnd>; @@ -380,6 +393,14 @@ class JIC_DESC : JMP_IDX_COMPACT_DESC_BASE<"jic", jmpoffset16, GPR32Opnd> { list<Register> Defs = [AT]; } +class JR_HB_R6_DESC : JR_HB_DESC_BASE<"jr.hb", GPR32Opnd> { + bit isBranch = 1; + bit isIndirectBranch = 1; + bit hasDelaySlot = 1; + bit isTerminator=1; + bit isBarrier=1; +} + class BITSWAP_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> { dag OutOperandList = (outs GPROpnd:$rd); dag InOperandList = (ins GPROpnd:$rt); @@ -389,17 +410,22 @@ class BITSWAP_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> { class BITSWAP_DESC : BITSWAP_DESC_BASE<"bitswap", GPR32Opnd>; -class DIVMOD_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> { +class DIVMOD_DESC_BASE<string instr_asm, RegisterOperand GPROpnd, + SDPatternOperator Op=null_frag> { dag OutOperandList = (outs GPROpnd:$rd); dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt); string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); - list<dag> Pattern = []; + list<dag> Pattern = [(set GPROpnd:$rd, (Op GPROpnd:$rs, GPROpnd:$rt))]; + + // This instruction doesn't trap division by zero itself. We must insert + // teq instructions as well. + bit usesCustomInserter = 1; } -class DIV_DESC : DIVMOD_DESC_BASE<"div", GPR32Opnd>; -class DIVU_DESC : DIVMOD_DESC_BASE<"divu", GPR32Opnd>; -class MOD_DESC : DIVMOD_DESC_BASE<"mod", GPR32Opnd>; -class MODU_DESC : DIVMOD_DESC_BASE<"modu", GPR32Opnd>; +class DIV_DESC : DIVMOD_DESC_BASE<"div", GPR32Opnd, sdiv>; +class DIVU_DESC : DIVMOD_DESC_BASE<"divu", GPR32Opnd, udiv>; +class MOD_DESC : DIVMOD_DESC_BASE<"mod", GPR32Opnd, srem>; +class MODU_DESC : DIVMOD_DESC_BASE<"modu", GPR32Opnd, urem>; class BEQZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"beqzalc", brtarget, GPR32Opnd> { list<Register> Defs = [RA]; @@ -424,28 +450,35 @@ class BLTZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bltzalc", brtarget, GPR32Opnd> { class BNEZALC_DESC : CMP_CBR_RT_Z_DESC_BASE<"bnezalc", brtarget, GPR32Opnd> { list<Register> Defs = [RA]; } -class MUL_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> { + +class MUL_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd, + SDPatternOperator Op=null_frag> { dag OutOperandList = (outs GPROpnd:$rd); dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt); string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt"); - list<dag> Pattern = []; + list<dag> Pattern = [(set GPROpnd:$rd, (Op GPROpnd:$rs, GPROpnd:$rt))]; } -class MUH_DESC : MUL_R6_DESC_BASE<"muh", GPR32Opnd>; -class MUHU_DESC : MUL_R6_DESC_BASE<"muhu", GPR32Opnd>; -class MUL_R6_DESC : MUL_R6_DESC_BASE<"mul", GPR32Opnd>; +class MUH_DESC : MUL_R6_DESC_BASE<"muh", GPR32Opnd, mulhs>; +class MUHU_DESC : MUL_R6_DESC_BASE<"muhu", GPR32Opnd, mulhu>; +class MUL_R6_DESC : MUL_R6_DESC_BASE<"mul", GPR32Opnd, mul>; class MULU_DESC : MUL_R6_DESC_BASE<"mulu", GPR32Opnd>; -class COP1_4R_DESC_BASE<string instr_asm, RegisterOperand FGROpnd> { +class COP1_SEL_DESC_BASE<string instr_asm, RegisterOperand FGROpnd> { dag OutOperandList = (outs FGROpnd:$fd); - dag InOperandList = (ins FGROpnd:$fd_in, FGROpnd:$fs, FGROpnd:$ft); + dag InOperandList = (ins FGRCCOpnd:$fd_in, FGROpnd:$fs, FGROpnd:$ft); string AsmString = !strconcat(instr_asm, "\t$fd, $fs, $ft"); - list<dag> Pattern = []; + list<dag> Pattern = [(set FGROpnd:$fd, (select FGRCCOpnd:$fd_in, + FGROpnd:$ft, + FGROpnd:$fs))]; string Constraints = "$fd_in = $fd"; } -class SEL_D_DESC : COP1_4R_DESC_BASE<"sel.d", FGR64Opnd>; -class SEL_S_DESC : COP1_4R_DESC_BASE<"sel.s", FGR32Opnd>; +class SEL_D_DESC : COP1_SEL_DESC_BASE<"sel.d", FGR64Opnd> { + // We must insert a SUBREG_TO_REG around $fd_in + bit usesCustomInserter = 1; +} +class SEL_S_DESC : COP1_SEL_DESC_BASE<"sel.s", FGR32Opnd>; class SELEQNE_Z_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> { dag OutOperandList = (outs GPROpnd:$rd); @@ -457,6 +490,14 @@ class SELEQNE_Z_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> { class SELEQZ_DESC : SELEQNE_Z_DESC_BASE<"seleqz", GPR32Opnd>; class SELNEZ_DESC : SELEQNE_Z_DESC_BASE<"selnez", GPR32Opnd>; +class COP1_4R_DESC_BASE<string instr_asm, RegisterOperand FGROpnd> { + dag OutOperandList = (outs FGROpnd:$fd); + dag InOperandList = (ins FGROpnd:$fd_in, FGROpnd:$fs, FGROpnd:$ft); + string AsmString = !strconcat(instr_asm, "\t$fd, $fs, $ft"); + list<dag> Pattern = []; + string Constraints = "$fd_in = $fd"; +} + class MADDF_S_DESC : COP1_4R_DESC_BASE<"maddf.s", FGR32Opnd>; class MADDF_D_DESC : COP1_4R_DESC_BASE<"maddf.d", FGR64Opnd>; class MSUBF_S_DESC : COP1_4R_DESC_BASE<"msubf.s", FGR32Opnd>; @@ -503,6 +544,96 @@ class RINT_D_DESC : CLASS_RINT_DESC_BASE<"rint.d", FGR64Opnd>; class CLASS_S_DESC : CLASS_RINT_DESC_BASE<"class.s", FGR32Opnd>; class CLASS_D_DESC : CLASS_RINT_DESC_BASE<"class.d", FGR64Opnd>; +class CACHE_HINT_DESC<string instr_asm, Operand MemOpnd, + RegisterOperand GPROpnd> { + dag OutOperandList = (outs); + dag InOperandList = (ins MemOpnd:$addr, uimm5:$hint); + string AsmString = !strconcat(instr_asm, "\t$hint, $addr"); + list<dag> Pattern = []; +} + +class CACHE_DESC : CACHE_HINT_DESC<"cache", mem_simm9, GPR32Opnd>; +class PREF_DESC : CACHE_HINT_DESC<"pref", mem_simm9, GPR32Opnd>; + +class COP2LD_DESC_BASE<string instr_asm, RegisterOperand COPOpnd> { + dag OutOperandList = (outs COPOpnd:$rt); + dag InOperandList = (ins mem_simm11:$addr); + string AsmString = !strconcat(instr_asm, "\t$rt, $addr"); + list<dag> Pattern = []; + bit mayLoad = 1; +} + +class LDC2_R6_DESC : COP2LD_DESC_BASE<"ldc2", COP2Opnd>; +class LWC2_R6_DESC : COP2LD_DESC_BASE<"lwc2", COP2Opnd>; + +class COP2ST_DESC_BASE<string instr_asm, RegisterOperand COPOpnd> { + dag OutOperandList = (outs); + dag InOperandList = (ins COPOpnd:$rt, mem_simm11:$addr); + string AsmString = !strconcat(instr_asm, "\t$rt, $addr"); + list<dag> Pattern = []; + bit mayStore = 1; +} + +class SDC2_R6_DESC : COP2ST_DESC_BASE<"sdc2", COP2Opnd>; +class SWC2_R6_DESC : COP2ST_DESC_BASE<"swc2", COP2Opnd>; + +class LSA_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd, + Operand ImmOpnd> { + dag OutOperandList = (outs GPROpnd:$rd); + dag InOperandList = (ins GPROpnd:$rs, GPROpnd:$rt, ImmOpnd:$imm2); + string AsmString = !strconcat(instr_asm, "\t$rd, $rs, $rt, $imm2"); + list<dag> Pattern = []; +} + +class LSA_R6_DESC : LSA_R6_DESC_BASE<"lsa", GPR32Opnd, uimm2>; + +class LL_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> { + dag OutOperandList = (outs GPROpnd:$rt); + dag InOperandList = (ins mem_simm9:$addr); + string AsmString = !strconcat(instr_asm, "\t$rt, $addr"); + list<dag> Pattern = []; + bit mayLoad = 1; +} + +class LL_R6_DESC : LL_R6_DESC_BASE<"ll", GPR32Opnd>; + +class SC_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> { + dag OutOperandList = (outs GPROpnd:$dst); + dag InOperandList = (ins GPROpnd:$rt, mem_simm9:$addr); + string AsmString = !strconcat(instr_asm, "\t$rt, $addr"); + list<dag> Pattern = []; + bit mayStore = 1; + string Constraints = "$rt = $dst"; +} + +class SC_R6_DESC : SC_R6_DESC_BASE<"sc", GPR32Opnd>; + +class CLO_CLZ_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> { + dag OutOperandList = (outs GPROpnd:$rd); + dag InOperandList = (ins GPROpnd:$rs); + string AsmString = !strconcat(instr_asm, "\t$rd, $rs"); +} + +class CLO_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> : + CLO_CLZ_R6_DESC_BASE<instr_asm, GPROpnd> { + list<dag> Pattern = [(set GPROpnd:$rd, (ctlz (not GPROpnd:$rs)))]; +} + +class CLZ_R6_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> : + CLO_CLZ_R6_DESC_BASE<instr_asm, GPROpnd> { + list<dag> Pattern = [(set GPROpnd:$rd, (ctlz GPROpnd:$rs))]; +} + +class CLO_R6_DESC : CLO_R6_DESC_BASE<"clo", GPR32Opnd>; +class CLZ_R6_DESC : CLZ_R6_DESC_BASE<"clz", GPR32Opnd>; + +class SDBBP_R6_DESC { + dag OutOperandList = (outs); + dag InOperandList = (ins uimm20:$code_); + string AsmString = "sdbbp\t$code_"; + list<dag> Pattern = []; +} + //===----------------------------------------------------------------------===// // // Instruction Definitions @@ -514,6 +645,7 @@ def ALIGN : ALIGN_ENC, ALIGN_DESC, ISA_MIPS32R6; def ALUIPC : ALUIPC_ENC, ALUIPC_DESC, ISA_MIPS32R6; def AUI : AUI_ENC, AUI_DESC, ISA_MIPS32R6; def AUIPC : AUIPC_ENC, AUIPC_DESC, ISA_MIPS32R6; +def BAL : BAL_ENC, BAL_DESC, ISA_MIPS32R6; def BALC : BALC_ENC, BALC_DESC, ISA_MIPS32R6; def BC1EQZ : BC1EQZ_ENC, BC1EQZ_DESC, ISA_MIPS32R6; def BC1NEZ : BC1NEZ_ENC, BC1NEZ_DESC, ISA_MIPS32R6; @@ -523,8 +655,8 @@ def BC : BC_ENC, BC_DESC, ISA_MIPS32R6; def BEQC : BEQC_ENC, BEQC_DESC, ISA_MIPS32R6; def BEQZALC : BEQZALC_ENC, BEQZALC_DESC, ISA_MIPS32R6; def BEQZC : BEQZC_ENC, BEQZC_DESC, ISA_MIPS32R6; -def BGEC; // Also aliased to blec with operands swapped -def BGEUC; // Also aliased to bleuc with operands swapped +def BGEC : BGEC_ENC, BGEC_DESC, ISA_MIPS32R6; +def BGEUC : BGEUC_ENC, BGEUC_DESC, ISA_MIPS32R6; def BGEZALC : BGEZALC_ENC, BGEZALC_DESC, ISA_MIPS32R6; def BGEZC : BGEZC_ENC, BGEZC_DESC, ISA_MIPS32R6; def BGTZALC : BGTZALC_ENC, BGTZALC_DESC, ISA_MIPS32R6; @@ -532,8 +664,8 @@ def BGTZC : BGTZC_ENC, BGTZC_DESC, ISA_MIPS32R6; def BITSWAP : BITSWAP_ENC, BITSWAP_DESC, ISA_MIPS32R6; def BLEZALC : BLEZALC_ENC, BLEZALC_DESC, ISA_MIPS32R6; def BLEZC : BLEZC_ENC, BLEZC_DESC, ISA_MIPS32R6; -def BLTC; // Also aliased to bgtc with operands swapped -def BLTUC; // Also aliased to bgtuc with operands swapped +def BLTC : BLTC_ENC, BLTC_DESC, ISA_MIPS32R6; +def BLTUC : BLTUC_ENC, BLTUC_DESC, ISA_MIPS32R6; def BLTZALC : BLTZALC_ENC, BLTZALC_DESC, ISA_MIPS32R6; def BLTZC : BLTZC_ENC, BLTZC_DESC, ISA_MIPS32R6; def BNEC : BNEC_ENC, BNEC_DESC, ISA_MIPS32R6; @@ -541,15 +673,22 @@ def BNEZALC : BNEZALC_ENC, BNEZALC_DESC, ISA_MIPS32R6; def BNEZC : BNEZC_ENC, BNEZC_DESC, ISA_MIPS32R6; def BNVC : BNVC_ENC, BNVC_DESC, ISA_MIPS32R6; def BOVC : BOVC_ENC, BOVC_DESC, ISA_MIPS32R6; +def CACHE_R6 : CACHE_ENC, CACHE_DESC, ISA_MIPS32R6; def CLASS_D : CLASS_D_ENC, CLASS_D_DESC, ISA_MIPS32R6; def CLASS_S : CLASS_S_ENC, CLASS_S_DESC, ISA_MIPS32R6; +def CLO_R6 : CLO_R6_ENC, CLO_R6_DESC, ISA_MIPS32R6; +def CLZ_R6 : CLZ_R6_ENC, CLZ_R6_DESC, ISA_MIPS32R6; defm S : CMP_CC_M<FIELD_CMP_FORMAT_S, "s", FGR32Opnd>; defm D : CMP_CC_M<FIELD_CMP_FORMAT_D, "d", FGR64Opnd>; def DIV : DIV_ENC, DIV_DESC, ISA_MIPS32R6; def DIVU : DIVU_ENC, DIVU_DESC, ISA_MIPS32R6; def JIALC : JIALC_ENC, JIALC_DESC, ISA_MIPS32R6; def JIC : JIC_ENC, JIC_DESC, ISA_MIPS32R6; -// def LSA; // See MSA +def JR_HB_R6 : JR_HB_R6_ENC, JR_HB_R6_DESC, ISA_MIPS32R6; +def LDC2_R6 : LDC2_R6_ENC, LDC2_R6_DESC, ISA_MIPS32R6; +def LL_R6 : LL_R6_ENC, LL_R6_DESC, ISA_MIPS32R6; +def LSA_R6 : LSA_R6_ENC, LSA_R6_DESC, ISA_MIPS32R6; +def LWC2_R6 : LWC2_R6_ENC, LWC2_R6_DESC, ISA_MIPS32R6; def LWPC : LWPC_ENC, LWPC_DESC, ISA_MIPS32R6; def LWUPC : LWUPC_ENC, LWUPC_DESC, ISA_MIPS32R6; def MADDF_S : MADDF_S_ENC, MADDF_S_DESC, ISA_MIPS32R6; @@ -571,13 +710,115 @@ def MUHU : MUHU_ENC, MUHU_DESC, ISA_MIPS32R6; def MUL_R6 : MUL_R6_ENC, MUL_R6_DESC, ISA_MIPS32R6; def MULU : MULU_ENC, MULU_DESC, ISA_MIPS32R6; def NAL; // BAL with rd=0 +def PREF_R6 : PREF_ENC, PREF_DESC, ISA_MIPS32R6; def RINT_D : RINT_D_ENC, RINT_D_DESC, ISA_MIPS32R6; def RINT_S : RINT_S_ENC, RINT_S_DESC, ISA_MIPS32R6; -def SELEQZ : SELEQZ_ENC, SELEQZ_DESC, ISA_MIPS32R6; +def SC_R6 : SC_R6_ENC, SC_R6_DESC, ISA_MIPS32R6; +def SDBBP_R6 : SDBBP_R6_ENC, SDBBP_R6_DESC, ISA_MIPS32R6; +def SDC2_R6 : SDC2_R6_ENC, SDC2_R6_DESC, ISA_MIPS32R6; +def SELEQZ : SELEQZ_ENC, SELEQZ_DESC, ISA_MIPS32R6, GPR_32; def SELEQZ_D : SELEQZ_D_ENC, SELEQZ_D_DESC, ISA_MIPS32R6; def SELEQZ_S : SELEQZ_S_ENC, SELEQZ_S_DESC, ISA_MIPS32R6; -def SELNEZ : SELNEZ_ENC, SELNEZ_DESC, ISA_MIPS32R6; +def SELNEZ : SELNEZ_ENC, SELNEZ_DESC, ISA_MIPS32R6, GPR_32; def SELNEZ_D : SELNEZ_D_ENC, SELNEZ_D_DESC, ISA_MIPS32R6; def SELNEZ_S : SELNEZ_S_ENC, SELNEZ_S_DESC, ISA_MIPS32R6; def SEL_D : SEL_D_ENC, SEL_D_DESC, ISA_MIPS32R6; def SEL_S : SEL_S_ENC, SEL_S_DESC, ISA_MIPS32R6; +def SWC2_R6 : SWC2_R6_ENC, SWC2_R6_DESC, ISA_MIPS32R6; + +//===----------------------------------------------------------------------===// +// +// Instruction Aliases +// +//===----------------------------------------------------------------------===// + +def : MipsInstAlias<"sdbbp", (SDBBP_R6 0)>, ISA_MIPS32R6; +def : MipsInstAlias<"jr $rs", (JALR ZERO, GPR32Opnd:$rs), 1>, ISA_MIPS32R6; + +//===----------------------------------------------------------------------===// +// +// Patterns and Pseudo Instructions +// +//===----------------------------------------------------------------------===// + +// f32 comparisons supported via another comparison +def : MipsPat<(setone f32:$lhs, f32:$rhs), + (NOR (CMP_UEQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6; +def : MipsPat<(seto f32:$lhs, f32:$rhs), + (NOR (CMP_UN_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6; +def : MipsPat<(setune f32:$lhs, f32:$rhs), + (NOR (CMP_EQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6; +def : MipsPat<(seteq f32:$lhs, f32:$rhs), (CMP_EQ_S f32:$lhs, f32:$rhs)>, + ISA_MIPS32R6; +def : MipsPat<(setgt f32:$lhs, f32:$rhs), (CMP_LE_S f32:$rhs, f32:$lhs)>, + ISA_MIPS32R6; +def : MipsPat<(setge f32:$lhs, f32:$rhs), (CMP_LT_S f32:$rhs, f32:$lhs)>, + ISA_MIPS32R6; +def : MipsPat<(setlt f32:$lhs, f32:$rhs), (CMP_LT_S f32:$lhs, f32:$rhs)>, + ISA_MIPS32R6; +def : MipsPat<(setlt f32:$lhs, f32:$rhs), (CMP_LE_S f32:$lhs, f32:$rhs)>, + ISA_MIPS32R6; +def : MipsPat<(setne f32:$lhs, f32:$rhs), + (NOR (CMP_EQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6; + +// f64 comparisons supported via another comparison +def : MipsPat<(setone f64:$lhs, f64:$rhs), + (NOR (CMP_UEQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6; +def : MipsPat<(seto f64:$lhs, f64:$rhs), + (NOR (CMP_UN_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6; +def : MipsPat<(setune f64:$lhs, f64:$rhs), + (NOR (CMP_EQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6; +def : MipsPat<(seteq f64:$lhs, f64:$rhs), (CMP_EQ_D f64:$lhs, f64:$rhs)>, + ISA_MIPS32R6; +def : MipsPat<(setgt f64:$lhs, f64:$rhs), (CMP_LE_D f64:$rhs, f64:$lhs)>, + ISA_MIPS32R6; +def : MipsPat<(setge f64:$lhs, f64:$rhs), (CMP_LT_D f64:$rhs, f64:$lhs)>, + ISA_MIPS32R6; +def : MipsPat<(setlt f64:$lhs, f64:$rhs), (CMP_LT_D f64:$lhs, f64:$rhs)>, + ISA_MIPS32R6; +def : MipsPat<(setlt f64:$lhs, f64:$rhs), (CMP_LE_D f64:$lhs, f64:$rhs)>, + ISA_MIPS32R6; +def : MipsPat<(setne f64:$lhs, f64:$rhs), + (NOR (CMP_EQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6; + +// i32 selects +def : MipsPat<(select i32:$cond, i32:$t, i32:$f), + (OR (SELNEZ i32:$t, i32:$cond), (SELEQZ i32:$f, i32:$cond))>, + ISA_MIPS32R6; +def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i32:$t, i32:$f), + (OR (SELEQZ i32:$t, i32:$cond), (SELNEZ i32:$f, i32:$cond))>, + ISA_MIPS32R6; +def : MipsPat<(select (i32 (setne i32:$cond, immz)), i32:$t, i32:$f), + (OR (SELNEZ i32:$t, i32:$cond), (SELEQZ i32:$f, i32:$cond))>, + ISA_MIPS32R6; +def : MipsPat<(select (i32 (seteq i32:$cond, immZExt16:$imm)), i32:$t, i32:$f), + (OR (SELEQZ i32:$t, (XORi i32:$cond, immZExt16:$imm)), + (SELNEZ i32:$f, (XORi i32:$cond, immZExt16:$imm)))>, + ISA_MIPS32R6; +def : MipsPat<(select (i32 (setne i32:$cond, immZExt16:$imm)), i32:$t, i32:$f), + (OR (SELNEZ i32:$f, (XORi i32:$cond, immZExt16:$imm)), + (SELEQZ i32:$t, (XORi i32:$cond, immZExt16:$imm)))>, + ISA_MIPS32R6; +def : MipsPat<(select (i32 (setgt i32:$cond, immSExt16Plus1:$imm)), i32:$t, + i32:$f), + (OR (SELEQZ i32:$t, (SLTi i32:$cond, (Plus1 imm:$imm))), + (SELNEZ i32:$f, (SLTi i32:$cond, (Plus1 imm:$imm))))>, + ISA_MIPS32R6; +def : MipsPat<(select (i32 (setugt i32:$cond, immSExt16Plus1:$imm)), + i32:$t, i32:$f), + (OR (SELEQZ i32:$t, (SLTiu i32:$cond, (Plus1 imm:$imm))), + (SELNEZ i32:$f, (SLTiu i32:$cond, (Plus1 imm:$imm))))>, + ISA_MIPS32R6; + +def : MipsPat<(select i32:$cond, i32:$t, immz), + (SELNEZ i32:$t, i32:$cond)>, ISA_MIPS32R6; +def : MipsPat<(select (i32 (setne i32:$cond, immz)), i32:$t, immz), + (SELNEZ i32:$t, i32:$cond)>, ISA_MIPS32R6; +def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i32:$t, immz), + (SELEQZ i32:$t, i32:$cond)>, ISA_MIPS32R6; +def : MipsPat<(select i32:$cond, immz, i32:$f), + (SELEQZ i32:$f, i32:$cond)>, ISA_MIPS32R6; +def : MipsPat<(select (i32 (setne i32:$cond, immz)), immz, i32:$f), + (SELEQZ i32:$f, i32:$cond)>, ISA_MIPS32R6; +def : MipsPat<(select (i32 (seteq i32:$cond, immz)), immz, i32:$f), + (SELNEZ i32:$f, i32:$cond)>, ISA_MIPS32R6; diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 924b325..f0b6814 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -23,6 +23,8 @@ def uimm16_64 : Operand<i64> { // Signed Operand def simm10_64 : Operand<i64>; +def imm64: Operand<i64>; + // Transformation Function - get Imm - 32. def Subtract32 : SDNodeXForm<imm, [{ return getImm(N, (unsigned)N->getZExtValue() - 32); @@ -36,6 +38,9 @@ def immZExt6 : ImmLeaf<i32, [{return Imm == (Imm & 0x3f);}]>; def immSExt10_64 : PatLeaf<(i64 imm), [{ return isInt<10>(N->getSExtValue()); }]>; +def immZExt16_64 : PatLeaf<(i64 imm), + [{ return isInt<16>(N->getZExtValue()); }]>; + //===----------------------------------------------------------------------===// // Instructions specific format //===----------------------------------------------------------------------===// @@ -62,7 +67,7 @@ let isPseudo = 1, isCodeGenOnly = 1 in { let DecoderNamespace = "Mips64" in { /// Arithmetic Instructions (ALU Immediate) def DADDi : ArithLogicI<"daddi", simm16_64, GPR64Opnd>, ADDI_FM<0x18>, - ISA_MIPS3; + ISA_MIPS3_NOT_32R6_64R6; def DADDiu : ArithLogicI<"daddiu", simm16_64, GPR64Opnd, II_DADDIU, immSExt16, add>, ADDI_FM<0x19>, IsAsCheapAsAMove, ISA_MIPS3; @@ -164,49 +169,58 @@ def SDR : StoreLeftRight<"sdr", MipsSDR, GPR64Opnd, II_SDR>, LW_FM<0x2d>, ISA_MIPS3_NOT_32R6_64R6; /// Load-linked, Store-conditional -def LLD : LLBase<"lld", GPR64Opnd>, LW_FM<0x34>, ISA_MIPS3; -def SCD : SCBase<"scd", GPR64Opnd>, LW_FM<0x3c>, ISA_MIPS3; +def LLD : LLBase<"lld", GPR64Opnd>, LW_FM<0x34>, ISA_MIPS3_NOT_32R6_64R6; +def SCD : SCBase<"scd", GPR64Opnd>, LW_FM<0x3c>, ISA_MIPS3_NOT_32R6_64R6; /// Jump and Branch Instructions let isCodeGenOnly = 1 in { -def JR64 : IndirectBranch<"jr", GPR64Opnd>, MTLO_FM<8>; -def BEQ64 : CBranch<"beq", brtarget, seteq, GPR64Opnd>, BEQ_FM<4>; -def BNE64 : CBranch<"bne", brtarget, setne, GPR64Opnd>, BEQ_FM<5>; -def BGEZ64 : CBranchZero<"bgez", brtarget, setge, GPR64Opnd>, BGEZ_FM<1, 1>; -def BGTZ64 : CBranchZero<"bgtz", brtarget, setgt, GPR64Opnd>, BGEZ_FM<7, 0>; -def BLEZ64 : CBranchZero<"blez", brtarget, setle, GPR64Opnd>, BGEZ_FM<6, 0>; -def BLTZ64 : CBranchZero<"bltz", brtarget, setlt, GPR64Opnd>, BGEZ_FM<1, 0>; -def JALR64 : JumpLinkReg<"jalr", GPR64Opnd>, JALR_FM; -def JALR64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR, RA, GPR32Opnd>; -def TAILCALL64_R : TailCallReg<GPR64Opnd, JR, GPR32Opnd>; + def JR64 : IndirectBranch<"jr", GPR64Opnd>, MTLO_FM<8>; + def BEQ64 : CBranch<"beq", brtarget, seteq, GPR64Opnd>, BEQ_FM<4>; + def BNE64 : CBranch<"bne", brtarget, setne, GPR64Opnd>, BEQ_FM<5>; + def BGEZ64 : CBranchZero<"bgez", brtarget, setge, GPR64Opnd>, BGEZ_FM<1, 1>; + def BGTZ64 : CBranchZero<"bgtz", brtarget, setgt, GPR64Opnd>, BGEZ_FM<7, 0>; + def BLEZ64 : CBranchZero<"blez", brtarget, setle, GPR64Opnd>, BGEZ_FM<6, 0>; + def BLTZ64 : CBranchZero<"bltz", brtarget, setlt, GPR64Opnd>, BGEZ_FM<1, 0>; + def JALR64 : JumpLinkReg<"jalr", GPR64Opnd>, JALR_FM; + def JALR64Pseudo : JumpLinkRegPseudo<GPR64Opnd, JALR, RA, GPR32Opnd>; + def TAILCALL64_R : TailCallReg<GPR64Opnd, JR, GPR32Opnd>; } +def PseudoReturn64 : PseudoReturnBase<GPR64Opnd>; +def PseudoIndirectBranch64 : PseudoIndirectBranchBase<GPR64Opnd>; + /// Multiply and Divide Instructions. def DMULT : Mult<"dmult", II_DMULT, GPR64Opnd, [HI0_64, LO0_64]>, - MULT_FM<0, 0x1c>, ISA_MIPS3; + MULT_FM<0, 0x1c>, ISA_MIPS3_NOT_32R6_64R6; def DMULTu : Mult<"dmultu", II_DMULTU, GPR64Opnd, [HI0_64, LO0_64]>, - MULT_FM<0, 0x1d>, ISA_MIPS3; + MULT_FM<0, 0x1d>, ISA_MIPS3_NOT_32R6_64R6; def PseudoDMULT : MultDivPseudo<DMULT, ACC128, GPR64Opnd, MipsMult, - II_DMULT>; + II_DMULT>, ISA_MIPS3_NOT_32R6_64R6; def PseudoDMULTu : MultDivPseudo<DMULTu, ACC128, GPR64Opnd, MipsMultu, - II_DMULTU>; + II_DMULTU>, ISA_MIPS3_NOT_32R6_64R6; def DSDIV : Div<"ddiv", II_DDIV, GPR64Opnd, [HI0_64, LO0_64]>, - MULT_FM<0, 0x1e>, ISA_MIPS3; + MULT_FM<0, 0x1e>, ISA_MIPS3_NOT_32R6_64R6; def DUDIV : Div<"ddivu", II_DDIVU, GPR64Opnd, [HI0_64, LO0_64]>, - MULT_FM<0, 0x1f>, ISA_MIPS3; + MULT_FM<0, 0x1f>, ISA_MIPS3_NOT_32R6_64R6; def PseudoDSDIV : MultDivPseudo<DSDIV, ACC128, GPR64Opnd, MipsDivRem, - II_DDIV, 0, 1, 1>; + II_DDIV, 0, 1, 1>, ISA_MIPS3_NOT_32R6_64R6; def PseudoDUDIV : MultDivPseudo<DUDIV, ACC128, GPR64Opnd, MipsDivRemU, - II_DDIVU, 0, 1, 1>; + II_DDIVU, 0, 1, 1>, ISA_MIPS3_NOT_32R6_64R6; let isCodeGenOnly = 1 in { -def MTHI64 : MoveToLOHI<"mthi", GPR64Opnd, [HI0_64]>, MTLO_FM<0x11>; -def MTLO64 : MoveToLOHI<"mtlo", GPR64Opnd, [LO0_64]>, MTLO_FM<0x13>; -def MFHI64 : MoveFromLOHI<"mfhi", GPR64Opnd, AC0_64>, MFLO_FM<0x10>; -def MFLO64 : MoveFromLOHI<"mflo", GPR64Opnd, AC0_64>, MFLO_FM<0x12>; -def PseudoMFHI64 : PseudoMFLOHI<GPR64, ACC128, MipsMFHI>; -def PseudoMFLO64 : PseudoMFLOHI<GPR64, ACC128, MipsMFLO>; -def PseudoMTLOHI64 : PseudoMTLOHI<ACC128, GPR64>; +def MTHI64 : MoveToLOHI<"mthi", GPR64Opnd, [HI0_64]>, MTLO_FM<0x11>, + ISA_MIPS3_NOT_32R6_64R6; +def MTLO64 : MoveToLOHI<"mtlo", GPR64Opnd, [LO0_64]>, MTLO_FM<0x13>, + ISA_MIPS3_NOT_32R6_64R6; +def MFHI64 : MoveFromLOHI<"mfhi", GPR64Opnd, AC0_64>, MFLO_FM<0x10>, + ISA_MIPS3_NOT_32R6_64R6; +def MFLO64 : MoveFromLOHI<"mflo", GPR64Opnd, AC0_64>, MFLO_FM<0x12>, + ISA_MIPS3_NOT_32R6_64R6; +def PseudoMFHI64 : PseudoMFLOHI<GPR64, ACC128, MipsMFHI>, + ISA_MIPS3_NOT_32R6_64R6; +def PseudoMFLO64 : PseudoMFLOHI<GPR64, ACC128, MipsMFLO>, + ISA_MIPS3_NOT_32R6_64R6; +def PseudoMTLOHI64 : PseudoMTLOHI<ACC128, GPR64>, ISA_MIPS3_NOT_32R6_64R6; /// Sign Ext In Register Instructions. def SEB64 : SignExtInReg<"seb", i8, GPR64Opnd, II_SEB>, SEB_FM<0x10, 0x20>, @@ -216,8 +230,8 @@ def SEH64 : SignExtInReg<"seh", i16, GPR64Opnd, II_SEH>, SEB_FM<0x18, 0x20>, } /// Count Leading -def DCLZ : CountLeading0<"dclz", GPR64Opnd>, CLO_FM<0x24>, ISA_MIPS64; -def DCLO : CountLeading1<"dclo", GPR64Opnd>, CLO_FM<0x25>, ISA_MIPS64; +def DCLZ : CountLeading0<"dclz", GPR64Opnd>, CLO_FM<0x24>, ISA_MIPS64_NOT_64R6; +def DCLO : CountLeading1<"dclo", GPR64Opnd>, CLO_FM<0x25>, ISA_MIPS64_NOT_64R6; /// Double Word Swap Bytes/HalfWords def DSBH : SubwordSwap<"dsbh", GPR64Opnd>, SEB_FM<2, 0x24>, ISA_MIPS64R2; @@ -431,13 +445,13 @@ def : MipsInstAlias<"daddu $rs, $rt, $imm", 0>; def : MipsInstAlias<"dadd $rs, $rt, $imm", (DADDi GPR64Opnd:$rs, GPR64Opnd:$rt, simm16_64:$imm), - 0>; + 0>, ISA_MIPS3_NOT_32R6_64R6; def : MipsInstAlias<"daddu $rs, $imm", (DADDiu GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm), 0>; def : MipsInstAlias<"dadd $rs, $imm", (DADDi GPR64Opnd:$rs, GPR64Opnd:$rs, simm16_64:$imm), - 0>; + 0>, ISA_MIPS3_NOT_32R6_64R6; def : MipsInstAlias<"add $rs, $imm", (ADDi GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm), 0>; @@ -450,10 +464,22 @@ def : MipsInstAlias<"dsll $rd, $rt, $rs", def : MipsInstAlias<"dsubu $rt, $rs, $imm", (DADDiu GPR64Opnd:$rt, GPR64Opnd:$rs, InvertedImOperand64:$imm), 0>; +def : MipsInstAlias<"dsubi $rs, $rt, $imm", + (DADDi GPR64Opnd:$rs, GPR64Opnd:$rt, + InvertedImOperand64:$imm), + 0>, ISA_MIPS3_NOT_32R6_64R6; +def : MipsInstAlias<"dsubi $rs, $imm", + (DADDi GPR64Opnd:$rs, GPR64Opnd:$rs, + InvertedImOperand64:$imm), + 0>, ISA_MIPS3_NOT_32R6_64R6; +def : MipsInstAlias<"dsub $rs, $rt, $imm", + (DADDi GPR64Opnd:$rs, GPR64Opnd:$rt, + InvertedImOperand64:$imm), + 0>, ISA_MIPS3_NOT_32R6_64R6; def : MipsInstAlias<"dsub $rs, $imm", (DADDi GPR64Opnd:$rs, GPR64Opnd:$rs, InvertedImOperand64:$imm), - 0>; + 0>, ISA_MIPS3_NOT_32R6_64R6; def : MipsInstAlias<"dsubu $rs, $imm", (DADDiu GPR64Opnd:$rs, GPR64Opnd:$rs, InvertedImOperand64:$imm), @@ -465,6 +491,11 @@ def : MipsInstAlias<"dsrl $rd, $rt, $rs", (DSRLV GPR64Opnd:$rd, GPR64Opnd:$rt, GPR32Opnd:$rs), 0>, ISA_MIPS3; +class LoadImm64< string instr_asm, Operand Od, RegisterOperand RO> : + MipsAsmPseudoInst<(outs RO:$rt), (ins Od:$imm64), + !strconcat(instr_asm, "\t$rt, $imm64")> ; +def LoadImm64Reg : LoadImm64<"dli", imm64, GPR64Opnd>; + /// Move between CPU and coprocessor registers let DecoderNamespace = "Mips64", Predicates = [HasMips64] in { def DMFC0 : MFC3OP<"dmfc0", GPR64Opnd>, MFC3OP_FM<0x10, 1>; diff --git a/lib/Target/Mips/Mips64r6InstrInfo.td b/lib/Target/Mips/Mips64r6InstrInfo.td index f971218..63cf60b 100644 --- a/lib/Target/Mips/Mips64r6InstrInfo.td +++ b/lib/Target/Mips/Mips64r6InstrInfo.td @@ -13,10 +13,6 @@ // Notes about removals/changes from MIPS32r6: // Reencoded: dclo, dclz -// Reencoded: lld, scd -// Removed: daddi -// Removed: ddiv, ddivu, dmult, dmultu -// Removed: div, divu //===----------------------------------------------------------------------===// // @@ -29,14 +25,20 @@ class DAUI_ENC : DAUI_FM; class DAHI_ENC : REGIMM_FM<OPCODE5_DAHI>; class DATI_ENC : REGIMM_FM<OPCODE5_DATI>; class DBITSWAP_ENC : SPECIAL3_2R_FM<OPCODE6_DBITSWAP>; +class DCLO_R6_ENC : SPECIAL_2R_FM<OPCODE6_DCLO>; +class DCLZ_R6_ENC : SPECIAL_2R_FM<OPCODE6_DCLZ>; class DDIV_ENC : SPECIAL_3R_FM<0b00010, 0b011110>; class DDIVU_ENC : SPECIAL_3R_FM<0b00010, 0b011111>; +class DLSA_R6_ENC : SPECIAL_LSA_FM<OPCODE6_DLSA>; class DMOD_ENC : SPECIAL_3R_FM<0b00011, 0b011110>; class DMODU_ENC : SPECIAL_3R_FM<0b00011, 0b011111>; -class DMUH_ENC : SPECIAL_3R_FM<0b00011, 0b111000>; -class DMUHU_ENC : SPECIAL_3R_FM<0b00011, 0b111001>; -class DMUL_R6_ENC : SPECIAL_3R_FM<0b00010, 0b111000>; -class DMULU_ENC : SPECIAL_3R_FM<0b00010, 0b111001>; +class DMUH_ENC : SPECIAL_3R_FM<0b00011, 0b011100>; +class DMUHU_ENC : SPECIAL_3R_FM<0b00011, 0b011101>; +class DMUL_R6_ENC : SPECIAL_3R_FM<0b00010, 0b011100>; +class DMULU_ENC : SPECIAL_3R_FM<0b00010, 0b011101>; +class LDPC_ENC : PCREL18_FM<OPCODE3_LDPC>; +class LLD_R6_ENC : SPECIAL3_LL_SC_FM<OPCODE6_LLD>; +class SCD_R6_ENC : SPECIAL3_LL_SC_FM<OPCODE6_SCD>; //===----------------------------------------------------------------------===// // @@ -56,14 +58,22 @@ class DAHI_DESC : AHI_ATI_DESC_BASE<"dahi", GPR64Opnd>; class DATI_DESC : AHI_ATI_DESC_BASE<"dati", GPR64Opnd>; class DAUI_DESC : AUI_DESC_BASE<"daui", GPR64Opnd>; class DBITSWAP_DESC : BITSWAP_DESC_BASE<"dbitswap", GPR64Opnd>; -class DDIV_DESC : DIVMOD_DESC_BASE<"ddiv", GPR64Opnd>; -class DDIVU_DESC : DIVMOD_DESC_BASE<"ddivu", GPR64Opnd>; -class DMOD_DESC : DIVMOD_DESC_BASE<"dmod", GPR64Opnd>; -class DMODU_DESC : DIVMOD_DESC_BASE<"dmodu", GPR64Opnd>; -class DMUH_DESC : MUL_R6_DESC_BASE<"dmuh", GPR64Opnd>; -class DMUHU_DESC : MUL_R6_DESC_BASE<"dmuhu", GPR64Opnd>; -class DMUL_R6_DESC : MUL_R6_DESC_BASE<"dmul", GPR64Opnd>; +class DCLO_R6_DESC : CLO_R6_DESC_BASE<"dclo", GPR64Opnd>; +class DCLZ_R6_DESC : CLZ_R6_DESC_BASE<"dclz", GPR64Opnd>; +class DDIV_DESC : DIVMOD_DESC_BASE<"ddiv", GPR64Opnd, sdiv>; +class DDIVU_DESC : DIVMOD_DESC_BASE<"ddivu", GPR64Opnd, udiv>; +class DLSA_R6_DESC : LSA_R6_DESC_BASE<"dlsa", GPR64Opnd, uimm2>; +class DMOD_DESC : DIVMOD_DESC_BASE<"dmod", GPR64Opnd, srem>; +class DMODU_DESC : DIVMOD_DESC_BASE<"dmodu", GPR64Opnd, urem>; +class DMUH_DESC : MUL_R6_DESC_BASE<"dmuh", GPR64Opnd, mulhs>; +class DMUHU_DESC : MUL_R6_DESC_BASE<"dmuhu", GPR64Opnd, mulhu>; +class DMUL_R6_DESC : MUL_R6_DESC_BASE<"dmul", GPR64Opnd, mul>; class DMULU_DESC : MUL_R6_DESC_BASE<"dmulu", GPR64Opnd>; +class LDPC_DESC : PCREL_DESC_BASE<"ldpc", GPR64Opnd, simm18_lsl3>; +class LLD_R6_DESC : LL_R6_DESC_BASE<"lld", GPR64Opnd>; +class SCD_R6_DESC : SC_R6_DESC_BASE<"scd", GPR64Opnd>; +class SELEQZ64_DESC : SELEQNE_Z_DESC_BASE<"seleqz", GPR64Opnd>; +class SELNEZ64_DESC : SELEQNE_Z_DESC_BASE<"selnez", GPR64Opnd>; //===----------------------------------------------------------------------===// // @@ -76,13 +86,132 @@ def DALIGN : DALIGN_ENC, DALIGN_DESC, ISA_MIPS64R6; def DATI : DATI_ENC, DATI_DESC, ISA_MIPS64R6; def DAUI : DAUI_ENC, DAUI_DESC, ISA_MIPS64R6; def DBITSWAP : DBITSWAP_ENC, DBITSWAP_DESC, ISA_MIPS64R6; +def DCLO_R6 : DCLO_R6_ENC, DCLO_R6_DESC, ISA_MIPS64R6; +def DCLZ_R6 : DCLZ_R6_ENC, DCLZ_R6_DESC, ISA_MIPS64R6; def DDIV : DDIV_ENC, DDIV_DESC, ISA_MIPS64R6; def DDIVU : DDIVU_ENC, DDIVU_DESC, ISA_MIPS64R6; -// def DLSA; // See MSA +def DLSA_R6 : DLSA_R6_ENC, DLSA_R6_DESC, ISA_MIPS64R6; def DMOD : DMOD_ENC, DMOD_DESC, ISA_MIPS64R6; def DMODU : DMODU_ENC, DMODU_DESC, ISA_MIPS64R6; def DMUH: DMUH_ENC, DMUH_DESC, ISA_MIPS64R6; def DMUHU: DMUHU_ENC, DMUHU_DESC, ISA_MIPS64R6; def DMUL_R6: DMUL_R6_ENC, DMUL_R6_DESC, ISA_MIPS64R6; def DMULU: DMULU_ENC, DMULU_DESC, ISA_MIPS64R6; -def LDPC; +def LDPC: LDPC_ENC, LDPC_DESC, ISA_MIPS64R6; +def LLD_R6 : LLD_R6_ENC, LLD_R6_DESC, ISA_MIPS32R6; +def SCD_R6 : SCD_R6_ENC, SCD_R6_DESC, ISA_MIPS32R6; +let DecoderNamespace = "Mips32r6_64r6_GP64" in { + def SELEQZ64 : SELEQZ_ENC, SELEQZ64_DESC, ISA_MIPS32R6, GPR_64; + def SELNEZ64 : SELNEZ_ENC, SELNEZ64_DESC, ISA_MIPS32R6, GPR_64; +} + +//===----------------------------------------------------------------------===// +// +// Instruction Aliases +// +//===----------------------------------------------------------------------===// + +def : MipsInstAlias<"jr $rs", (JALR64 ZERO_64, GPR64Opnd:$rs), 1>, ISA_MIPS64R6; + +//===----------------------------------------------------------------------===// +// +// Patterns and Pseudo Instructions +// +//===----------------------------------------------------------------------===// + +// i64 selects +def : MipsPat<(select i64:$cond, i64:$t, i64:$f), + (OR64 (SELNEZ64 i64:$t, i64:$cond), + (SELEQZ64 i64:$f, i64:$cond))>, + ISA_MIPS64R6; +def : MipsPat<(select (i32 (seteq i64:$cond, immz)), i64:$t, i64:$f), + (OR64 (SELEQZ64 i64:$t, i64:$cond), + (SELNEZ64 i64:$f, i64:$cond))>, + ISA_MIPS64R6; +def : MipsPat<(select (i32 (setne i64:$cond, immz)), i64:$t, i64:$f), + (OR64 (SELNEZ64 i64:$t, i64:$cond), + (SELEQZ64 i64:$f, i64:$cond))>, + ISA_MIPS64R6; +def : MipsPat<(select (i32 (seteq i64:$cond, immZExt16_64:$imm)), i64:$t, i64:$f), + (OR64 (SELEQZ64 i64:$t, (XORi64 i64:$cond, immZExt16_64:$imm)), + (SELNEZ64 i64:$f, (XORi64 i64:$cond, immZExt16_64:$imm)))>, + ISA_MIPS64R6; +def : MipsPat<(select (i32 (setne i64:$cond, immZExt16_64:$imm)), i64:$t, i64:$f), + (OR64 (SELNEZ64 i64:$t, (XORi64 i64:$cond, immZExt16_64:$imm)), + (SELEQZ64 i64:$f, (XORi64 i64:$cond, immZExt16_64:$imm)))>, + ISA_MIPS64R6; +def : MipsPat< + (select (i32 (setgt i64:$cond, immSExt16Plus1:$imm)), i64:$t, i64:$f), + (OR64 (SELEQZ64 i64:$t, + (SUBREG_TO_REG (i64 0), (SLTi64 i64:$cond, (Plus1 imm:$imm)), + sub_32)), + (SELNEZ64 i64:$f, + (SUBREG_TO_REG (i64 0), (SLTi64 i64:$cond, (Plus1 imm:$imm)), + sub_32)))>, + ISA_MIPS64R6; +def : MipsPat< + (select (i32 (setugt i64:$cond, immSExt16Plus1:$imm)), i64:$t, i64:$f), + (OR64 (SELEQZ64 i64:$t, + (SUBREG_TO_REG (i64 0), (SLTiu64 i64:$cond, (Plus1 imm:$imm)), + sub_32)), + (SELNEZ64 i64:$f, + (SUBREG_TO_REG (i64 0), (SLTiu64 i64:$cond, (Plus1 imm:$imm)), + sub_32)))>, + ISA_MIPS64R6; + +def : MipsPat<(select (i32 (setne i64:$cond, immz)), i64:$t, immz), + (SELNEZ64 i64:$t, i64:$cond)>, ISA_MIPS64R6; +def : MipsPat<(select (i32 (seteq i64:$cond, immz)), i64:$t, immz), + (SELEQZ64 i64:$t, i64:$cond)>, ISA_MIPS64R6; +def : MipsPat<(select (i32 (setne i64:$cond, immz)), immz, i64:$f), + (SELEQZ64 i64:$f, i64:$cond)>, ISA_MIPS64R6; +def : MipsPat<(select (i32 (seteq i64:$cond, immz)), immz, i64:$f), + (SELNEZ64 i64:$f, i64:$cond)>, ISA_MIPS64R6; + +// i64 selects from an i32 comparison +// One complicating factor here is that bits 32-63 of an i32 are undefined. +// FIXME: Ideally, setcc would always produce an i64 on MIPS64 targets. +// This would allow us to remove the sign-extensions here. +def : MipsPat<(select i32:$cond, i64:$t, i64:$f), + (OR64 (SELNEZ64 i64:$t, (SLL64_32 i32:$cond)), + (SELEQZ64 i64:$f, (SLL64_32 i32:$cond)))>, + ISA_MIPS64R6; +def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i64:$t, i64:$f), + (OR64 (SELEQZ64 i64:$t, (SLL64_32 i32:$cond)), + (SELNEZ64 i64:$f, (SLL64_32 i32:$cond)))>, + ISA_MIPS64R6; +def : MipsPat<(select (i32 (setne i32:$cond, immz)), i64:$t, i64:$f), + (OR64 (SELNEZ64 i64:$t, (SLL64_32 i32:$cond)), + (SELEQZ64 i64:$f, (SLL64_32 i32:$cond)))>, + ISA_MIPS64R6; +def : MipsPat<(select (i32 (seteq i32:$cond, immZExt16:$imm)), i64:$t, i64:$f), + (OR64 (SELEQZ64 i64:$t, (SLL64_32 (XORi i32:$cond, + immZExt16:$imm))), + (SELNEZ64 i64:$f, (SLL64_32 (XORi i32:$cond, + immZExt16:$imm))))>, + ISA_MIPS64R6; +def : MipsPat<(select (i32 (setne i32:$cond, immZExt16:$imm)), i64:$t, i64:$f), + (OR64 (SELNEZ64 i64:$f, (SLL64_32 (XORi i32:$cond, + immZExt16:$imm))), + (SELEQZ64 i64:$t, (SLL64_32 (XORi i32:$cond, + immZExt16:$imm))))>, + ISA_MIPS64R6; + +def : MipsPat<(select i32:$cond, i64:$t, immz), + (SELNEZ64 i64:$t, (SLL64_32 i32:$cond))>, + ISA_MIPS64R6; +def : MipsPat<(select (i32 (setne i32:$cond, immz)), i64:$t, immz), + (SELNEZ64 i64:$t, (SLL64_32 i32:$cond))>, + ISA_MIPS64R6; +def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i64:$t, immz), + (SELEQZ64 i64:$t, (SLL64_32 i32:$cond))>, + ISA_MIPS64R6; +def : MipsPat<(select i32:$cond, immz, i64:$f), + (SELEQZ64 i64:$f, (SLL64_32 i32:$cond))>, + ISA_MIPS64R6; +def : MipsPat<(select (i32 (setne i32:$cond, immz)), immz, i64:$f), + (SELEQZ64 i64:$f, (SLL64_32 i32:$cond))>, + ISA_MIPS64R6; +def : MipsPat<(select (i32 (seteq i32:$cond, immz)), immz, i64:$f), + (SELNEZ64 i64:$f, (SLL64_32 i32:$cond))>, + ISA_MIPS64R6; diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 6df90aa..1fb75a2 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -91,7 +91,46 @@ bool MipsAsmPrinter::lowerOperand(const MachineOperand &MO, MCOperand &MCOp) { #include "MipsGenMCPseudoLowering.inc" +// Lower PseudoReturn/PseudoIndirectBranch/PseudoIndirectBranch64 to JR, JR_MM, +// JALR, or JALR64 as appropriate for the target +void MipsAsmPrinter::emitPseudoIndirectBranch(MCStreamer &OutStreamer, + const MachineInstr *MI) { + bool HasLinkReg = false; + MCInst TmpInst0; + + if (Subtarget->hasMips64r6()) { + // MIPS64r6 should use (JALR64 ZERO_64, $rs) + TmpInst0.setOpcode(Mips::JALR64); + HasLinkReg = true; + } else if (Subtarget->hasMips32r6()) { + // MIPS32r6 should use (JALR ZERO, $rs) + TmpInst0.setOpcode(Mips::JALR); + HasLinkReg = true; + } else if (Subtarget->inMicroMipsMode()) + // microMIPS should use (JR_MM $rs) + TmpInst0.setOpcode(Mips::JR_MM); + else { + // Everything else should use (JR $rs) + TmpInst0.setOpcode(Mips::JR); + } + + MCOperand MCOp; + + if (HasLinkReg) { + unsigned ZeroReg = Subtarget->isGP64bit() ? Mips::ZERO_64 : Mips::ZERO; + TmpInst0.addOperand(MCOperand::CreateReg(ZeroReg)); + } + + lowerOperand(MI->getOperand(0), MCOp); + TmpInst0.addOperand(MCOp); + + EmitToStreamer(OutStreamer, TmpInst0); +} + void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { + MipsTargetStreamer &TS = getTargetStreamer(); + TS.setCanHaveModuleDir(false); + if (MI->isDebugValue()) { SmallString<128> Str; raw_svector_ostream OS(Str); @@ -141,6 +180,14 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { if (emitPseudoExpansionLowering(OutStreamer, &*I)) continue; + if (I->getOpcode() == Mips::PseudoReturn || + I->getOpcode() == Mips::PseudoReturn64 || + I->getOpcode() == Mips::PseudoIndirectBranch || + I->getOpcode() == Mips::PseudoIndirectBranch64) { + emitPseudoIndirectBranch(OutStreamer, &*I); + continue; + } + // The inMips16Mode() test is not permanent. // Some instructions are marked as pseudo right now which // would make the test fail for the wrong reason but @@ -657,6 +704,13 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { OutContext.getELFSection(".gcc_compiled_long64", ELF::SHT_PROGBITS, 0, SectionKind::getDataRel())); } + + getTargetStreamer().updateABIInfo(*Subtarget); + getTargetStreamer().emitDirectiveModuleFP(); + + if (Subtarget->isABI_O32()) + getTargetStreamer().emitDirectiveModuleOddSPReg(Subtarget->useOddSPReg(), + Subtarget->isABI_O32()); } void MipsAsmPrinter::EmitJal(MCSymbol *Symbol) { @@ -852,7 +906,7 @@ void MipsAsmPrinter::EmitFPCallStub( TS.emitDirectiveSetNoMicroMips(); // // .ent __call_stub_fp_xxxx - // .type __call_stub_fp_xxxx,@function + // .type __call_stub_fp_xxxx,@function // __call_stub_fp_xxxx: // std::string x = "__call_stub_fp_" + std::string(Symbol); diff --git a/lib/Target/Mips/MipsAsmPrinter.h b/lib/Target/Mips/MipsAsmPrinter.h index e82b145..967aa0b 100644 --- a/lib/Target/Mips/MipsAsmPrinter.h +++ b/lib/Target/Mips/MipsAsmPrinter.h @@ -40,6 +40,12 @@ private: bool emitPseudoExpansionLowering(MCStreamer &OutStreamer, const MachineInstr *MI); + // Emit PseudoReturn, PseudoReturn64, PseudoIndirectBranch, + // and PseudoIndirectBranch64 as a JR, JR_MM, JALR, or JALR64 as appropriate + // for the target. + void emitPseudoIndirectBranch(MCStreamer &OutStreamer, + const MachineInstr *MI); + // lowerOperand - Convert a MachineOperand into the equivalent MCOperand. bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index c83d880..007213c 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -239,6 +239,11 @@ def RetCC_Mips : CallingConv<[ def CSR_SingleFloatOnly : CalleeSavedRegs<(add (sequence "F%u", 31, 20), RA, FP, (sequence "S%u", 7, 0))>; +def CSR_O32_FPXX : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP, + (sequence "S%u", 7, 0))> { + let OtherPreserved = (add (decimate (sequence "F%u", 30, 20), 2)); +} + def CSR_O32 : CalleeSavedRegs<(add (sequence "D%u", 15, 10), RA, FP, (sequence "S%u", 7, 0))>; diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp index 13fa546..151ef13 100644 --- a/lib/Target/Mips/MipsCodeEmitter.cpp +++ b/lib/Target/Mips/MipsCodeEmitter.cpp @@ -124,6 +124,7 @@ private: unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getLSAImmEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getSimm19Lsl2Encoding(const MachineInstr &MI, unsigned OpNo) const; + unsigned getSimm18Lsl3Encoding(const MachineInstr &MI, unsigned OpNo) const; /// Expand pseudo instructions with accumulator register operands. void expandACCInstr(MachineBasicBlock::instr_iterator MI, @@ -273,6 +274,12 @@ unsigned MipsCodeEmitter::getLSAImmEncoding(const MachineInstr &MI, return 0; } +unsigned MipsCodeEmitter::getSimm18Lsl3Encoding(const MachineInstr &MI, + unsigned OpNo) const { + llvm_unreachable("Unimplemented function."); + return 0; +} + unsigned MipsCodeEmitter::getSimm19Lsl2Encoding(const MachineInstr &MI, unsigned OpNo) const { llvm_unreachable("Unimplemented function."); diff --git a/lib/Target/Mips/MipsCondMov.td b/lib/Target/Mips/MipsCondMov.td index 7177f65..690f626 100644 --- a/lib/Target/Mips/MipsCondMov.td +++ b/lib/Target/Mips/MipsCondMov.td @@ -104,136 +104,162 @@ multiclass MovnPats<RegisterClass CRC, RegisterClass DRC, Instruction MOVNInst, // Instantiation of instructions. def MOVZ_I_I : MMRel, CMov_I_I_FT<"movz", GPR32Opnd, GPR32Opnd, II_MOVZ>, - ADD_FM<0, 0xa>, INSN_MIPS4_32; + ADD_FM<0, 0xa>, INSN_MIPS4_32_NOT_32R6_64R6; let isCodeGenOnly = 1 in { def MOVZ_I_I64 : CMov_I_I_FT<"movz", GPR32Opnd, GPR64Opnd, II_MOVZ>, - ADD_FM<0, 0xa>; + ADD_FM<0, 0xa>, INSN_MIPS4_32_NOT_32R6_64R6; def MOVZ_I64_I : CMov_I_I_FT<"movz", GPR64Opnd, GPR32Opnd, II_MOVZ>, - ADD_FM<0, 0xa>; + ADD_FM<0, 0xa>, INSN_MIPS4_32_NOT_32R6_64R6; def MOVZ_I64_I64 : CMov_I_I_FT<"movz", GPR64Opnd, GPR64Opnd, II_MOVZ>, - ADD_FM<0, 0xa>; + ADD_FM<0, 0xa>, INSN_MIPS4_32_NOT_32R6_64R6; } def MOVN_I_I : MMRel, CMov_I_I_FT<"movn", GPR32Opnd, GPR32Opnd, II_MOVN>, - ADD_FM<0, 0xb>, INSN_MIPS4_32; + ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6; let isCodeGenOnly = 1 in { def MOVN_I_I64 : CMov_I_I_FT<"movn", GPR32Opnd, GPR64Opnd, II_MOVN>, - ADD_FM<0, 0xb>; + ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6; def MOVN_I64_I : CMov_I_I_FT<"movn", GPR64Opnd, GPR32Opnd, II_MOVN>, - ADD_FM<0, 0xb>; + ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6; def MOVN_I64_I64 : CMov_I_I_FT<"movn", GPR64Opnd, GPR64Opnd, II_MOVN>, - ADD_FM<0, 0xb>; + ADD_FM<0, 0xb>, INSN_MIPS4_32_NOT_32R6_64R6; } def MOVZ_I_S : MMRel, CMov_I_F_FT<"movz.s", GPR32Opnd, FGR32Opnd, II_MOVZ_S>, - CMov_I_F_FM<18, 16>, INSN_MIPS4_32; + CMov_I_F_FM<18, 16>, INSN_MIPS4_32_NOT_32R6_64R6; let isCodeGenOnly = 1 in def MOVZ_I64_S : CMov_I_F_FT<"movz.s", GPR64Opnd, FGR32Opnd, II_MOVZ_S>, - CMov_I_F_FM<18, 16>, AdditionalRequires<[HasMips64]>; + CMov_I_F_FM<18, 16>, INSN_MIPS4_32_NOT_32R6_64R6, + AdditionalRequires<[HasMips64]>; def MOVN_I_S : MMRel, CMov_I_F_FT<"movn.s", GPR32Opnd, FGR32Opnd, II_MOVN_S>, - CMov_I_F_FM<19, 16>, INSN_MIPS4_32; + CMov_I_F_FM<19, 16>, INSN_MIPS4_32_NOT_32R6_64R6; let isCodeGenOnly = 1 in def MOVN_I64_S : CMov_I_F_FT<"movn.s", GPR64Opnd, FGR32Opnd, II_MOVN_S>, - CMov_I_F_FM<19, 16>, AdditionalRequires<[IsGP64bit]>; + CMov_I_F_FM<19, 16>, INSN_MIPS4_32_NOT_32R6_64R6, + AdditionalRequires<[IsGP64bit]>; def MOVZ_I_D32 : MMRel, CMov_I_F_FT<"movz.d", GPR32Opnd, AFGR64Opnd, II_MOVZ_D>, CMov_I_F_FM<18, 17>, - INSN_MIPS4_32, FGR_32; + INSN_MIPS4_32_NOT_32R6_64R6, FGR_32; def MOVN_I_D32 : MMRel, CMov_I_F_FT<"movn.d", GPR32Opnd, AFGR64Opnd, II_MOVN_D>, CMov_I_F_FM<19, 17>, - INSN_MIPS4_32, FGR_32; + INSN_MIPS4_32_NOT_32R6_64R6, FGR_32; let DecoderNamespace = "Mips64" in { def MOVZ_I_D64 : CMov_I_F_FT<"movz.d", GPR32Opnd, FGR64Opnd, II_MOVZ_D>, - CMov_I_F_FM<18, 17>, INSN_MIPS4_32, FGR_64; + CMov_I_F_FM<18, 17>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64; def MOVN_I_D64 : CMov_I_F_FT<"movn.d", GPR32Opnd, FGR64Opnd, II_MOVN_D>, - CMov_I_F_FM<19, 17>, INSN_MIPS4_32, FGR_64; + CMov_I_F_FM<19, 17>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64; let isCodeGenOnly = 1 in { - def MOVZ_I64_D64 : CMov_I_F_FT<"movz.d", GPR64Opnd, FGR64Opnd, - II_MOVZ_D>, CMov_I_F_FM<18, 17>, FGR_64; - def MOVN_I64_D64 : CMov_I_F_FT<"movn.d", GPR64Opnd, FGR64Opnd, - II_MOVN_D>, CMov_I_F_FM<19, 17>, FGR_64; + def MOVZ_I64_D64 : CMov_I_F_FT<"movz.d", GPR64Opnd, FGR64Opnd, II_MOVZ_D>, + CMov_I_F_FM<18, 17>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64; + def MOVN_I64_D64 : CMov_I_F_FT<"movn.d", GPR64Opnd, FGR64Opnd, II_MOVN_D>, + CMov_I_F_FM<19, 17>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64; } } def MOVT_I : MMRel, CMov_F_I_FT<"movt", GPR32Opnd, II_MOVT, MipsCMovFP_T>, - CMov_F_I_FM<1>, INSN_MIPS4_32; + CMov_F_I_FM<1>, INSN_MIPS4_32_NOT_32R6_64R6; let isCodeGenOnly = 1 in def MOVT_I64 : CMov_F_I_FT<"movt", GPR64Opnd, II_MOVT, MipsCMovFP_T>, - CMov_F_I_FM<1>, AdditionalRequires<[IsGP64bit]>; + CMov_F_I_FM<1>, INSN_MIPS4_32_NOT_32R6_64R6, + AdditionalRequires<[IsGP64bit]>; def MOVF_I : MMRel, CMov_F_I_FT<"movf", GPR32Opnd, II_MOVF, MipsCMovFP_F>, - CMov_F_I_FM<0>, INSN_MIPS4_32; + CMov_F_I_FM<0>, INSN_MIPS4_32_NOT_32R6_64R6; let isCodeGenOnly = 1 in def MOVF_I64 : CMov_F_I_FT<"movf", GPR64Opnd, II_MOVF, MipsCMovFP_F>, - CMov_F_I_FM<0>, AdditionalRequires<[IsGP64bit]>; + CMov_F_I_FM<0>, INSN_MIPS4_32_NOT_32R6_64R6, + AdditionalRequires<[IsGP64bit]>; def MOVT_S : MMRel, CMov_F_F_FT<"movt.s", FGR32Opnd, II_MOVT_S, MipsCMovFP_T>, - CMov_F_F_FM<16, 1>, INSN_MIPS4_32; + CMov_F_F_FM<16, 1>, INSN_MIPS4_32_NOT_32R6_64R6; def MOVF_S : MMRel, CMov_F_F_FT<"movf.s", FGR32Opnd, II_MOVF_S, MipsCMovFP_F>, - CMov_F_F_FM<16, 0>, INSN_MIPS4_32; + CMov_F_F_FM<16, 0>, INSN_MIPS4_32_NOT_32R6_64R6; def MOVT_D32 : MMRel, CMov_F_F_FT<"movt.d", AFGR64Opnd, II_MOVT_D, MipsCMovFP_T>, CMov_F_F_FM<17, 1>, - INSN_MIPS4_32, FGR_32; + INSN_MIPS4_32_NOT_32R6_64R6, FGR_32; def MOVF_D32 : MMRel, CMov_F_F_FT<"movf.d", AFGR64Opnd, II_MOVF_D, MipsCMovFP_F>, CMov_F_F_FM<17, 0>, - INSN_MIPS4_32, FGR_32; + INSN_MIPS4_32_NOT_32R6_64R6, FGR_32; let DecoderNamespace = "Mips64" in { def MOVT_D64 : CMov_F_F_FT<"movt.d", FGR64Opnd, II_MOVT_D, MipsCMovFP_T>, - CMov_F_F_FM<17, 1>, INSN_MIPS4_32, FGR_64; + CMov_F_F_FM<17, 1>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64; def MOVF_D64 : CMov_F_F_FT<"movf.d", FGR64Opnd, II_MOVF_D, MipsCMovFP_F>, - CMov_F_F_FM<17, 0>, INSN_MIPS4_32, FGR_64; + CMov_F_F_FM<17, 0>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64; } // Instantiation of conditional move patterns. -defm : MovzPats0<GPR32, GPR32, MOVZ_I_I, SLT, SLTu, SLTi, SLTiu>; -defm : MovzPats1<GPR32, GPR32, MOVZ_I_I, XOR>; -defm : MovzPats2<GPR32, GPR32, MOVZ_I_I, XORi>; +defm : MovzPats0<GPR32, GPR32, MOVZ_I_I, SLT, SLTu, SLTi, SLTiu>, + INSN_MIPS4_32_NOT_32R6_64R6; +defm : MovzPats1<GPR32, GPR32, MOVZ_I_I, XOR>, INSN_MIPS4_32_NOT_32R6_64R6; +defm : MovzPats2<GPR32, GPR32, MOVZ_I_I, XORi>, INSN_MIPS4_32_NOT_32R6_64R6; -defm : MovzPats0<GPR32, GPR64, MOVZ_I_I64, SLT, SLTu, SLTi, SLTiu>, GPR_64; +defm : MovzPats0<GPR32, GPR64, MOVZ_I_I64, SLT, SLTu, SLTi, SLTiu>, + INSN_MIPS4_32_NOT_32R6_64R6, GPR_64; defm : MovzPats0<GPR64, GPR32, MOVZ_I_I, SLT64, SLTu64, SLTi64, SLTiu64>, - GPR_64; + INSN_MIPS4_32_NOT_32R6_64R6, GPR_64; defm : MovzPats0<GPR64, GPR64, MOVZ_I_I64, SLT64, SLTu64, SLTi64, SLTiu64>, + INSN_MIPS4_32_NOT_32R6_64R6, GPR_64; +defm : MovzPats1<GPR32, GPR64, MOVZ_I_I64, XOR>, + INSN_MIPS4_32_NOT_32R6_64R6, GPR_64; +defm : MovzPats1<GPR64, GPR32, MOVZ_I64_I, XOR64>, + INSN_MIPS4_32_NOT_32R6_64R6, GPR_64; +defm : MovzPats1<GPR64, GPR64, MOVZ_I64_I64, XOR64>, + INSN_MIPS4_32_NOT_32R6_64R6, GPR_64; +defm : MovzPats2<GPR32, GPR64, MOVZ_I_I64, XORi>, + INSN_MIPS4_32_NOT_32R6_64R6, GPR_64; +defm : MovzPats2<GPR64, GPR32, MOVZ_I64_I, XORi64>, + INSN_MIPS4_32_NOT_32R6_64R6, GPR_64; +defm : MovzPats2<GPR64, GPR64, MOVZ_I64_I64, XORi64>, + INSN_MIPS4_32_NOT_32R6_64R6, GPR_64; + +defm : MovnPats<GPR32, GPR32, MOVN_I_I, XOR>, INSN_MIPS4_32_NOT_32R6_64R6; + +defm : MovnPats<GPR32, GPR64, MOVN_I_I64, XOR>, INSN_MIPS4_32_NOT_32R6_64R6, + GPR_64; +defm : MovnPats<GPR64, GPR32, MOVN_I64_I, XOR64>, INSN_MIPS4_32_NOT_32R6_64R6, + GPR_64; +defm : MovnPats<GPR64, GPR64, MOVN_I64_I64, XOR64>, INSN_MIPS4_32_NOT_32R6_64R6, GPR_64; -defm : MovzPats1<GPR32, GPR64, MOVZ_I_I64, XOR>, GPR_64; -defm : MovzPats1<GPR64, GPR32, MOVZ_I64_I, XOR64>, GPR_64; -defm : MovzPats1<GPR64, GPR64, MOVZ_I64_I64, XOR64>, GPR_64; -defm : MovzPats2<GPR32, GPR64, MOVZ_I_I64, XORi>, GPR_64; -defm : MovzPats2<GPR64, GPR32, MOVZ_I64_I, XORi64>, GPR_64; -defm : MovzPats2<GPR64, GPR64, MOVZ_I64_I64, XORi64>, GPR_64; - -defm : MovnPats<GPR32, GPR32, MOVN_I_I, XOR>; - -defm : MovnPats<GPR32, GPR64, MOVN_I_I64, XOR>, GPR_64; -defm : MovnPats<GPR64, GPR32, MOVN_I64_I, XOR64>, GPR_64; -defm : MovnPats<GPR64, GPR64, MOVN_I64_I64, XOR64>, GPR_64; -defm : MovzPats0<GPR32, FGR32, MOVZ_I_S, SLT, SLTu, SLTi, SLTiu>; -defm : MovzPats1<GPR32, FGR32, MOVZ_I_S, XOR>; -defm : MovnPats<GPR32, FGR32, MOVN_I_S, XOR>; +defm : MovzPats0<GPR32, FGR32, MOVZ_I_S, SLT, SLTu, SLTi, SLTiu>, + INSN_MIPS4_32_NOT_32R6_64R6; +defm : MovzPats1<GPR32, FGR32, MOVZ_I_S, XOR>, INSN_MIPS4_32_NOT_32R6_64R6; +defm : MovnPats<GPR32, FGR32, MOVN_I_S, XOR>, INSN_MIPS4_32_NOT_32R6_64R6; defm : MovzPats0<GPR64, FGR32, MOVZ_I_S, SLT64, SLTu64, SLTi64, SLTiu64>, + INSN_MIPS4_32_NOT_32R6_64R6, GPR_64; +defm : MovzPats1<GPR64, FGR32, MOVZ_I64_S, XOR64>, INSN_MIPS4_32_NOT_32R6_64R6, + GPR_64; +defm : MovnPats<GPR64, FGR32, MOVN_I64_S, XOR64>, INSN_MIPS4_32_NOT_32R6_64R6, GPR_64; -defm : MovzPats1<GPR64, FGR32, MOVZ_I64_S, XOR64>, GPR_64; -defm : MovnPats<GPR64, FGR32, MOVN_I64_S, XOR64>, GPR_64; -defm : MovzPats0<GPR32, AFGR64, MOVZ_I_D32, SLT, SLTu, SLTi, SLTiu>, FGR_32; -defm : MovzPats1<GPR32, AFGR64, MOVZ_I_D32, XOR>, FGR_32; -defm : MovnPats<GPR32, AFGR64, MOVN_I_D32, XOR>, FGR_32; +defm : MovzPats0<GPR32, AFGR64, MOVZ_I_D32, SLT, SLTu, SLTi, SLTiu>, + INSN_MIPS4_32_NOT_32R6_64R6, FGR_32; +defm : MovzPats1<GPR32, AFGR64, MOVZ_I_D32, XOR>, INSN_MIPS4_32_NOT_32R6_64R6, + FGR_32; +defm : MovnPats<GPR32, AFGR64, MOVN_I_D32, XOR>, INSN_MIPS4_32_NOT_32R6_64R6, + FGR_32; -defm : MovzPats0<GPR32, FGR64, MOVZ_I_D64, SLT, SLTu, SLTi, SLTiu>, FGR_64; +defm : MovzPats0<GPR32, FGR64, MOVZ_I_D64, SLT, SLTu, SLTi, SLTiu>, + INSN_MIPS4_32_NOT_32R6_64R6, FGR_64; defm : MovzPats0<GPR64, FGR64, MOVZ_I_D64, SLT64, SLTu64, SLTi64, SLTiu64>, + INSN_MIPS4_32_NOT_32R6_64R6, FGR_64; +defm : MovzPats1<GPR32, FGR64, MOVZ_I_D64, XOR>, INSN_MIPS4_32_NOT_32R6_64R6, + FGR_64; +defm : MovzPats1<GPR64, FGR64, MOVZ_I64_D64, XOR64>, + INSN_MIPS4_32_NOT_32R6_64R6, FGR_64; +defm : MovnPats<GPR32, FGR64, MOVN_I_D64, XOR>, INSN_MIPS4_32_NOT_32R6_64R6, + FGR_64; +defm : MovnPats<GPR64, FGR64, MOVN_I64_D64, XOR64>, INSN_MIPS4_32_NOT_32R6_64R6, FGR_64; -defm : MovzPats1<GPR32, FGR64, MOVZ_I_D64, XOR>, FGR_64; -defm : MovzPats1<GPR64, FGR64, MOVZ_I64_D64, XOR64>, FGR_64; -defm : MovnPats<GPR32, FGR64, MOVN_I_D64, XOR>, FGR_64; -defm : MovnPats<GPR64, FGR64, MOVN_I64_D64, XOR64>, FGR_64; diff --git a/lib/Target/Mips/MipsDSPInstrFormats.td b/lib/Target/Mips/MipsDSPInstrFormats.td index cf09113..b5d52ce 100644 --- a/lib/Target/Mips/MipsDSPInstrFormats.td +++ b/lib/Target/Mips/MipsDSPInstrFormats.td @@ -7,9 +7,9 @@ // //===----------------------------------------------------------------------===// -def HasDSP : Predicate<"Subtarget.hasDSP()">, +def HasDSP : Predicate<"Subtarget->hasDSP()">, AssemblerPredicate<"FeatureDSP">; -def HasDSPR2 : Predicate<"Subtarget.hasDSPR2()">, +def HasDSPR2 : Predicate<"Subtarget->hasDSPR2()">, AssemblerPredicate<"FeatureDSPR2">; // Fields. diff --git a/lib/Target/Mips/MipsDelaySlotFiller.cpp b/lib/Target/Mips/MipsDelaySlotFiller.cpp index d6c7cac..bcfbc12 100644 --- a/lib/Target/Mips/MipsDelaySlotFiller.cpp +++ b/lib/Target/Mips/MipsDelaySlotFiller.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetInstrInfo.h" @@ -177,6 +178,13 @@ namespace { for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) Changed |= runOnMachineBasicBlock(*FI); + + // This pass invalidates liveness information when it reorders + // instructions to fill delay slot. Without this, -verify-machineinstrs + // will fail. + if (Changed) + F.getRegInfo().invalidateLiveness(); + return Changed; } diff --git a/lib/Target/Mips/MipsFastISel.cpp b/lib/Target/Mips/MipsFastISel.cpp index 268a0ed..617801b 100644 --- a/lib/Target/Mips/MipsFastISel.cpp +++ b/lib/Target/Mips/MipsFastISel.cpp @@ -12,6 +12,7 @@ #include "MipsISelLowering.h" #include "MipsMachineFunction.h" #include "MipsSubtarget.h" +#include "MipsTargetMachine.h" using namespace llvm; @@ -36,11 +37,11 @@ class MipsFastISel final : public FastISel { /// Subtarget - Keep a pointer to the MipsSubtarget around so that we can /// make the right decision when generating code for different targets. - const MipsSubtarget *Subtarget; Module &M; const TargetMachine &TM; const TargetInstrInfo &TII; const TargetLowering &TLI; + const MipsSubtarget *Subtarget; MipsFunctionInfo *MFI; // Convenience variables to avoid some queries. @@ -54,8 +55,8 @@ public: : FastISel(funcInfo, libInfo), M(const_cast<Module &>(*funcInfo.Fn->getParent())), TM(funcInfo.MF->getTarget()), TII(*TM.getInstrInfo()), - TLI(*TM.getTargetLowering()) { - Subtarget = &TM.getSubtarget<MipsSubtarget>(); + TLI(*TM.getTargetLowering()), + Subtarget(&TM.getSubtarget<MipsSubtarget>()) { MFI = funcInfo.MF->getInfo<MipsFunctionInfo>(); Context = &funcInfo.Fn->getContext(); TargetSupported = ((Subtarget->getRelocationModel() == Reloc::PIC_) && @@ -68,8 +69,11 @@ public: bool ComputeAddress(const Value *Obj, Address &Addr); private: + bool EmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, + unsigned Alignment = 0); bool EmitStore(MVT VT, unsigned SrcReg, Address &Addr, unsigned Alignment = 0); + bool SelectLoad(const Instruction *I); bool SelectRet(const Instruction *I); bool SelectStore(const Instruction *I); @@ -80,6 +84,36 @@ private: unsigned MaterializeGV(const GlobalValue *GV, MVT VT); unsigned MaterializeInt(const Constant *C, MVT VT); unsigned Materialize32BitInt(int64_t Imm, const TargetRegisterClass *RC); + + // for some reason, this default is not generated by tablegen + // so we explicitly generate it here. + // + unsigned FastEmitInst_riir(uint64_t inst, const TargetRegisterClass *RC, + unsigned Op0, bool Op0IsKill, uint64_t imm1, + uint64_t imm2, unsigned Op3, bool Op3IsKill) { + return 0; + } + + MachineInstrBuilder EmitInst(unsigned Opc) { + return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)); + } + + MachineInstrBuilder EmitInst(unsigned Opc, unsigned DstReg) { + return BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), + DstReg); + } + + MachineInstrBuilder EmitInstStore(unsigned Opc, unsigned SrcReg, + unsigned MemReg, int64_t MemOffset) { + return EmitInst(Opc).addReg(SrcReg).addReg(MemReg).addImm(MemOffset); + } + + MachineInstrBuilder EmitInstLoad(unsigned Opc, unsigned DstReg, + unsigned MemReg, int64_t MemOffset) { + return EmitInst(Opc, DstReg).addReg(MemReg).addImm(MemOffset); + } + +#include "MipsGenFastISel.inc" }; bool MipsFastISel::isTypeLegal(Type *Ty, MVT &VT) { @@ -100,6 +134,8 @@ bool MipsFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { // We will extend this in a later patch: // If this is a type than can be sign or zero-extended to a basic operation // go ahead and accept it now. + if (VT == MVT::i8 || VT == MVT::i16) + return true; return false; } @@ -116,6 +152,45 @@ bool MipsFastISel::ComputeAddress(const Value *Obj, Address &Addr) { return Addr.Base.Reg != 0; } +bool MipsFastISel::EmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, + unsigned Alignment) { + // + // more cases will be handled here in following patches. + // + unsigned Opc; + switch (VT.SimpleTy) { + case MVT::i32: { + ResultReg = createResultReg(&Mips::GPR32RegClass); + Opc = Mips::LW; + break; + } + case MVT::i16: { + ResultReg = createResultReg(&Mips::GPR32RegClass); + Opc = Mips::LHu; + break; + } + case MVT::i8: { + ResultReg = createResultReg(&Mips::GPR32RegClass); + Opc = Mips::LBu; + break; + } + case MVT::f32: { + ResultReg = createResultReg(&Mips::FGR32RegClass); + Opc = Mips::LWC1; + break; + } + case MVT::f64: { + ResultReg = createResultReg(&Mips::AFGR64RegClass); + Opc = Mips::LDC1; + break; + } + default: + return false; + } + EmitInstLoad(Opc, ResultReg, Addr.Base.Reg, Addr.Offset); + return true; +} + // Materialize a constant into a register, and return the register // number (or zero if we failed to handle it). unsigned MipsFastISel::TargetMaterializeConstant(const Constant *C) { @@ -141,12 +216,49 @@ bool MipsFastISel::EmitStore(MVT VT, unsigned SrcReg, Address &Addr, // // more cases will be handled here in following patches. // - if (VT != MVT::i32) + unsigned Opc; + switch (VT.SimpleTy) { + case MVT::i8: + Opc = Mips::SB; + break; + case MVT::i16: + Opc = Mips::SH; + break; + case MVT::i32: + Opc = Mips::SW; + break; + case MVT::f32: + Opc = Mips::SWC1; + break; + case MVT::f64: + Opc = Mips::SDC1; + break; + default: return false; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::SW)) - .addReg(SrcReg) - .addReg(Addr.Base.Reg) - .addImm(Addr.Offset); + } + EmitInstStore(Opc, SrcReg, Addr.Base.Reg, Addr.Offset); + return true; +} + +bool MipsFastISel::SelectLoad(const Instruction *I) { + // Atomic loads need special handling. + if (cast<LoadInst>(I)->isAtomic()) + return false; + + // Verify we have a legal type before going any further. + MVT VT; + if (!isLoadTypeLegal(I->getType(), VT)) + return false; + + // See if we can handle this address. + Address Addr; + if (!ComputeAddress(I->getOperand(0), Addr)) + return false; + + unsigned ResultReg; + if (!EmitLoad(VT, ResultReg, Addr, cast<LoadInst>(I)->getAlignment())) + return false; + UpdateValueMap(I, ResultReg); return true; } @@ -186,8 +298,7 @@ bool MipsFastISel::SelectRet(const Instruction *I) { if (Ret->getNumOperands() > 0) { return false; } - unsigned RetOpc = Mips::RetRA; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(RetOpc)); + EmitInst(Mips::RetRA); return true; } @@ -197,6 +308,8 @@ bool MipsFastISel::TargetSelectInstruction(const Instruction *I) { switch (I->getOpcode()) { default: break; + case Instruction::Load: + return SelectLoad(I); case Instruction::Store: return SelectStore(I); case Instruction::Ret: @@ -207,6 +320,22 @@ bool MipsFastISel::TargetSelectInstruction(const Instruction *I) { } unsigned MipsFastISel::MaterializeFP(const ConstantFP *CFP, MVT VT) { + int64_t Imm = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); + if (VT == MVT::f32) { + const TargetRegisterClass *RC = &Mips::FGR32RegClass; + unsigned DestReg = createResultReg(RC); + unsigned TempReg = Materialize32BitInt(Imm, &Mips::GPR32RegClass); + EmitInst(Mips::MTC1, DestReg).addReg(TempReg); + return DestReg; + } else if (VT == MVT::f64) { + const TargetRegisterClass *RC = &Mips::AFGR64RegClass; + unsigned DestReg = createResultReg(RC); + unsigned TempReg1 = Materialize32BitInt(Imm >> 32, &Mips::GPR32RegClass); + unsigned TempReg2 = + Materialize32BitInt(Imm & 0xFFFFFFFF, &Mips::GPR32RegClass); + EmitInst(Mips::BuildPairF64, DestReg).addReg(TempReg2).addReg(TempReg1); + return DestReg; + } return 0; } @@ -221,9 +350,8 @@ unsigned MipsFastISel::MaterializeGV(const GlobalValue *GV, MVT VT) { // TLS not supported at this time. if (IsThreadLocal) return 0; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::LW), DestReg) - .addReg(MFI->getGlobalBaseReg()) - .addGlobalAddress(GV, 0, MipsII::MO_GOT); + EmitInst(Mips::LW, DestReg).addReg(MFI->getGlobalBaseReg()).addGlobalAddress( + GV, 0, MipsII::MO_GOT); return DestReg; } unsigned MipsFastISel::MaterializeInt(const Constant *C, MVT VT) { @@ -245,15 +373,10 @@ unsigned MipsFastISel::Materialize32BitInt(int64_t Imm, if (isInt<16>(Imm)) { unsigned Opc = Mips::ADDiu; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) - .addReg(Mips::ZERO) - .addImm(Imm); + EmitInst(Opc, ResultReg).addReg(Mips::ZERO).addImm(Imm); return ResultReg; } else if (isUInt<16>(Imm)) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::ORi), - ResultReg) - .addReg(Mips::ZERO) - .addImm(Imm); + EmitInst(Mips::ORi, ResultReg).addReg(Mips::ZERO).addImm(Imm); return ResultReg; } unsigned Lo = Imm & 0xFFFF; @@ -261,16 +384,10 @@ unsigned MipsFastISel::Materialize32BitInt(int64_t Imm, if (Lo) { // Both Lo and Hi have nonzero bits. unsigned TmpReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::LUi), - TmpReg).addImm(Hi); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::ORi), - ResultReg) - .addReg(TmpReg) - .addImm(Lo); - + EmitInst(Mips::LUi, TmpReg).addImm(Hi); + EmitInst(Mips::ORi, ResultReg).addReg(TmpReg).addImm(Lo); } else { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Mips::LUi), - ResultReg).addImm(Hi); + EmitInst(Mips::LUi, ResultReg).addImm(Hi); } return ResultReg; } diff --git a/lib/Target/Mips/MipsFrameLowering.h b/lib/Target/Mips/MipsFrameLowering.h index e10a3a5..8e9196c 100644 --- a/lib/Target/Mips/MipsFrameLowering.h +++ b/lib/Target/Mips/MipsFrameLowering.h @@ -15,7 +15,6 @@ #define MIPS_FRAMEINFO_H #include "Mips.h" -#include "MipsSubtarget.h" #include "llvm/Target/TargetFrameLowering.h" namespace llvm { diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 90cff63..0bdabf3 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -47,6 +47,7 @@ using namespace llvm; //===----------------------------------------------------------------------===// bool MipsDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { + Subtarget = &TM.getSubtarget<MipsSubtarget>(); bool Ret = SelectionDAGISel::runOnMachineFunction(MF); processFunctionAfterISel(MF); @@ -202,7 +203,7 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { #ifndef NDEBUG case ISD::LOAD: case ISD::STORE: - assert((Subtarget.systemSupportsUnalignedAccess() || + assert((Subtarget->systemSupportsUnalignedAccess() || cast<MemSDNode>(Node)->getMemoryVT().getSizeInBits() / 8 <= cast<MemSDNode>(Node)->getAlignment()) && "Unexpected unaligned loads/stores."); diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h index 13becb6..2a6c875 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.h +++ b/lib/Target/Mips/MipsISelDAGToDAG.h @@ -32,7 +32,7 @@ namespace llvm { class MipsDAGToDAGISel : public SelectionDAGISel { public: explicit MipsDAGToDAGISel(MipsTargetMachine &TM) - : SelectionDAGISel(TM), Subtarget(TM.getSubtarget<MipsSubtarget>()) {} + : SelectionDAGISel(TM), Subtarget(&TM.getSubtarget<MipsSubtarget>()) {} // Pass Name const char *getPassName() const override { @@ -46,7 +46,7 @@ protected: /// Keep a pointer to the MipsSubtarget around so that we can make the right /// decision when generating code for different targets. - const MipsSubtarget &Subtarget; + const MipsSubtarget *Subtarget; private: // Include the pieces autogenerated from the target description. diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index bfe5ea1..b7af2d4 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -215,6 +215,11 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM) // setcc operations results (slt, sgt, ...). setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + // The cmp.cond.fmt instruction in MIPS32r6/MIPS64r6 uses 0 and -1 like MSA + // does. Integer booleans still use 0 and 1. + if (Subtarget->hasMips32r6()) + setBooleanContents(ZeroOrOneBooleanContent, + ZeroOrNegativeOneBooleanContent); // Load extented operations for i1 types must be promoted setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); @@ -251,7 +256,7 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::FCOPYSIGN, MVT::f64, Custom); setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); - if (isGP64bit()) { + if (Subtarget->isGP64bit()) { setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); setOperationAction(ISD::BlockAddress, MVT::i64, Custom); setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); @@ -263,14 +268,14 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); } - if (!isGP64bit()) { + if (!Subtarget->isGP64bit()) { setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); } setOperationAction(ISD::ADD, MVT::i32, Custom); - if (isGP64bit()) + if (Subtarget->isGP64bit()) setOperationAction(ISD::ADD, MVT::i64, Custom); setOperationAction(ISD::SDIV, MVT::i32, Expand); @@ -287,7 +292,8 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::BR_CC, MVT::f64, Expand); setOperationAction(ISD::BR_CC, MVT::i32, Expand); setOperationAction(ISD::BR_CC, MVT::i64, Expand); - setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); @@ -368,7 +374,7 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM) if (!Subtarget->hasMips64r2()) setOperationAction(ISD::BSWAP, MVT::i64, Expand); - if (isGP64bit()) { + if (Subtarget->isGP64bit()) { setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Custom); setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Custom); setLoadExtAction(ISD::EXTLOAD, MVT::i32, Custom); @@ -384,12 +390,13 @@ MipsTargetLowering::MipsTargetLowering(MipsTargetMachine &TM) setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::ADD); - setMinFunctionAlignment(isGP64bit() ? 3 : 2); + setMinFunctionAlignment(Subtarget->isGP64bit() ? 3 : 2); - setStackPointerRegisterToSaveRestore(isN64() ? Mips::SP_64 : Mips::SP); + setStackPointerRegisterToSaveRestore(Subtarget->isABI_N64() ? Mips::SP_64 + : Mips::SP); - setExceptionPointerRegister(isN64() ? Mips::A0_64 : Mips::A0); - setExceptionSelectorRegister(isN64() ? Mips::A1_64 : Mips::A1); + setExceptionPointerRegister(Subtarget->isABI_N64() ? Mips::A0_64 : Mips::A0); + setExceptionSelectorRegister(Subtarget->isABI_N64() ? Mips::A1_64 : Mips::A1); MaxStoresPerMemcpy = 16; @@ -815,10 +822,10 @@ addLiveIn(MachineFunction &MF, unsigned PReg, const TargetRegisterClass *RC) return VReg; } -static MachineBasicBlock *expandPseudoDIV(MachineInstr *MI, - MachineBasicBlock &MBB, - const TargetInstrInfo &TII, - bool Is64Bit) { +static MachineBasicBlock *insertDivByZeroTrap(MachineInstr *MI, + MachineBasicBlock &MBB, + const TargetInstrInfo &TII, + bool Is64Bit) { if (NoZeroDivCheck) return &MBB; @@ -836,6 +843,10 @@ static MachineBasicBlock *expandPseudoDIV(MachineInstr *MI, // Clear Divisor's kill flag. Divisor.setIsKill(false); + + // We would normally delete the original instruction here but in this case + // we only needed to inject an additional instruction rather than replace it. + return &MBB; } @@ -918,10 +929,22 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return emitAtomicCmpSwap(MI, BB, 8); case Mips::PseudoSDIV: case Mips::PseudoUDIV: - return expandPseudoDIV(MI, *BB, *getTargetMachine().getInstrInfo(), false); + case Mips::DIV: + case Mips::DIVU: + case Mips::MOD: + case Mips::MODU: + return insertDivByZeroTrap(MI, *BB, *getTargetMachine().getInstrInfo(), + false); case Mips::PseudoDSDIV: case Mips::PseudoDUDIV: - return expandPseudoDIV(MI, *BB, *getTargetMachine().getInstrInfo(), true); + case Mips::DDIV: + case Mips::DDIVU: + case Mips::DMOD: + case Mips::DMODU: + return insertDivByZeroTrap(MI, *BB, *getTargetMachine().getInstrInfo(), + true); + case Mips::SEL_D: + return emitSEL_D(MI, BB); } } @@ -941,16 +964,20 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned LL, SC, AND, NOR, ZERO, BEQ; if (Size == 4) { - LL = isMicroMips ? Mips::LL_MM : Mips::LL; - SC = isMicroMips ? Mips::SC_MM : Mips::SC; + if (isMicroMips) { + LL = Mips::LL_MM; + SC = Mips::SC_MM; + } else { + LL = Subtarget->hasMips32r6() ? Mips::LL : Mips::LL_R6; + SC = Subtarget->hasMips32r6() ? Mips::SC : Mips::SC_R6; + } AND = Mips::AND; NOR = Mips::NOR; ZERO = Mips::ZERO; BEQ = Mips::BEQ; - } - else { - LL = Mips::LLD; - SC = Mips::SCD; + } else { + LL = Subtarget->hasMips64r6() ? Mips::LLD : Mips::LLD_R6; + SC = Subtarget->hasMips64r6() ? Mips::SCD : Mips::SCD_R6; AND = Mips::AND64; NOR = Mips::NOR64; ZERO = Mips::ZERO_64; @@ -1012,11 +1039,39 @@ MipsTargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, return exitMBB; } -MachineBasicBlock * -MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Size, unsigned BinOpcode, - bool Nand) const { +MachineBasicBlock *MipsTargetLowering::emitSignExtendToI32InReg( + MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned DstReg, + unsigned SrcReg) const { + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); + + if (Subtarget->hasMips32r2() && Size == 1) { + BuildMI(BB, DL, TII->get(Mips::SEB), DstReg).addReg(SrcReg); + return BB; + } + + if (Subtarget->hasMips32r2() && Size == 2) { + BuildMI(BB, DL, TII->get(Mips::SEH), DstReg).addReg(SrcReg); + return BB; + } + + MachineFunction *MF = BB->getParent(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + const TargetRegisterClass *RC = getRegClassFor(MVT::i32); + unsigned ScrReg = RegInfo.createVirtualRegister(RC); + + assert(Size < 32); + int64_t ShiftImm = 32 - (Size * 8); + + BuildMI(BB, DL, TII->get(Mips::SLL), ScrReg).addReg(SrcReg).addImm(ShiftImm); + BuildMI(BB, DL, TII->get(Mips::SRA), DstReg).addReg(ScrReg).addImm(ShiftImm); + + return BB; +} + +MachineBasicBlock *MipsTargetLowering::emitAtomicBinaryPartword( + MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode, + bool Nand) const { assert((Size == 1 || Size == 2) && "Unsupported size for EmitAtomicBinaryPartial."); @@ -1046,7 +1101,6 @@ MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI, unsigned StoreVal = RegInfo.createVirtualRegister(RC); unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC); unsigned SrlRes = RegInfo.createVirtualRegister(RC); - unsigned SllRes = RegInfo.createVirtualRegister(RC); unsigned Success = RegInfo.createVirtualRegister(RC); // insert new blocks after the current block @@ -1152,19 +1206,14 @@ MipsTargetLowering::emitAtomicBinaryPartword(MachineInstr *MI, // sinkMBB: // and maskedoldval1,oldval,mask // srl srlres,maskedoldval1,shiftamt - // sll sllres,srlres,24 - // sra dest,sllres,24 + // sign_extend dest,srlres BB = sinkMBB; - int64_t ShiftImm = (Size == 1) ? 24 : 16; BuildMI(BB, DL, TII->get(Mips::AND), MaskedOldVal1) .addReg(OldVal).addReg(Mask); BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes) .addReg(MaskedOldVal1).addReg(ShiftAmt); - BuildMI(BB, DL, TII->get(Mips::SLL), SllRes) - .addReg(SrlRes).addImm(ShiftImm); - BuildMI(BB, DL, TII->get(Mips::SRA), Dest) - .addReg(SllRes).addImm(ShiftImm); + BB = emitSignExtendToI32InReg(MI, BB, Size, Dest, SrlRes); MI->eraseFromParent(); // The instruction is gone now. @@ -1285,7 +1334,6 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI, unsigned MaskedOldVal1 = RegInfo.createVirtualRegister(RC); unsigned StoreVal = RegInfo.createVirtualRegister(RC); unsigned SrlRes = RegInfo.createVirtualRegister(RC); - unsigned SllRes = RegInfo.createVirtualRegister(RC); unsigned Success = RegInfo.createVirtualRegister(RC); // insert new blocks after the current block @@ -1382,23 +1430,44 @@ MipsTargetLowering::emitAtomicCmpSwapPartword(MachineInstr *MI, // sinkMBB: // srl srlres,maskedoldval0,shiftamt - // sll sllres,srlres,24 - // sra dest,sllres,24 + // sign_extend dest,srlres BB = sinkMBB; - int64_t ShiftImm = (Size == 1) ? 24 : 16; BuildMI(BB, DL, TII->get(Mips::SRLV), SrlRes) .addReg(MaskedOldVal0).addReg(ShiftAmt); - BuildMI(BB, DL, TII->get(Mips::SLL), SllRes) - .addReg(SrlRes).addImm(ShiftImm); - BuildMI(BB, DL, TII->get(Mips::SRA), Dest) - .addReg(SllRes).addImm(ShiftImm); + BB = emitSignExtendToI32InReg(MI, BB, Size, Dest, SrlRes); MI->eraseFromParent(); // The instruction is gone now. return exitMBB; } +MachineBasicBlock *MipsTargetLowering::emitSEL_D(MachineInstr *MI, + MachineBasicBlock *BB) const { + MachineFunction *MF = BB->getParent(); + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + MachineRegisterInfo &RegInfo = MF->getRegInfo(); + DebugLoc DL = MI->getDebugLoc(); + MachineBasicBlock::iterator II(MI); + + unsigned Fc = MI->getOperand(1).getReg(); + const auto &FGR64RegClass = TRI->getRegClass(Mips::FGR64RegClassID); + + unsigned Fc2 = RegInfo.createVirtualRegister(FGR64RegClass); + + BuildMI(*BB, II, DL, TII->get(Mips::SUBREG_TO_REG), Fc2) + .addImm(0) + .addReg(Fc) + .addImm(Mips::sub_lo); + + // We don't erase the original instruction, we just replace the condition + // register with the 64-bit super-register. + MI->getOperand(1).setReg(Fc2); + + return BB; +} + //===----------------------------------------------------------------------===// // Misc Lower Operation implementation //===----------------------------------------------------------------------===// @@ -1421,7 +1490,8 @@ SDValue MipsTargetLowering::lowerBR_JT(SDValue Op, SelectionDAG &DAG) const { 0); Chain = Addr.getValue(1); - if ((getTargetMachine().getRelocationModel() == Reloc::PIC_) || isN64()) { + if ((getTargetMachine().getRelocationModel() == Reloc::PIC_) || + Subtarget->isABI_N64()) { // For PIC, the sequence is: // BRIND(load(Jumptable + index) + RelocBase) // RelocBase can be JumpTable, GOT or some sort of global base. @@ -1439,6 +1509,7 @@ SDValue MipsTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue Dest = Op.getOperand(2); SDLoc DL(Op); + assert(!Subtarget->hasMips32r6() && !Subtarget->hasMips64r6()); SDValue CondRes = createFPCmp(DAG, Op.getOperand(1)); // Return if flag is not set by a floating point comparison. @@ -1458,6 +1529,7 @@ SDValue MipsTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue MipsTargetLowering:: lowerSELECT(SDValue Op, SelectionDAG &DAG) const { + assert(!Subtarget->hasMips32r6() && !Subtarget->hasMips64r6()); SDValue Cond = createFPCmp(DAG, Op.getOperand(0)); // Return if flag is not set by a floating point comparison. @@ -1483,6 +1555,7 @@ lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const } SDValue MipsTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const { + assert(!Subtarget->hasMips32r6() && !Subtarget->hasMips64r6()); SDValue Cond = createFPCmp(DAG, Op); assert(Cond.getOpcode() == MipsISD::FPCmp && @@ -1502,7 +1575,8 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op, GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op); const GlobalValue *GV = N->getGlobal(); - if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !isN64()) { + if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && + !Subtarget->isABI_N64()) { const MipsTargetObjectFile &TLOF = (const MipsTargetObjectFile&)getObjFileLowering(); @@ -1521,15 +1595,18 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op, } if (GV->hasInternalLinkage() || (GV->hasLocalLinkage() && !isa<Function>(GV))) - return getAddrLocal(N, Ty, DAG, isN32() || isN64()); + return getAddrLocal(N, Ty, DAG, + Subtarget->isABI_N32() || Subtarget->isABI_N64()); if (LargeGOT) return getAddrGlobalLargeGOT(N, Ty, DAG, MipsII::MO_GOT_HI16, MipsII::MO_GOT_LO16, DAG.getEntryNode(), MachinePointerInfo::getGOT()); - return getAddrGlobal(N, Ty, DAG, (isN32() || isN64()) ? MipsII::MO_GOT_DISP - : MipsII::MO_GOT16, + return getAddrGlobal(N, Ty, DAG, + (Subtarget->isABI_N32() || Subtarget->isABI_N64()) + ? MipsII::MO_GOT_DISP + : MipsII::MO_GOT16, DAG.getEntryNode(), MachinePointerInfo::getGOT()); } @@ -1538,10 +1615,12 @@ SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op, BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op); EVT Ty = Op.getValueType(); - if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !isN64()) + if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && + !Subtarget->isABI_N64()) return getAddrNonPIC(N, Ty, DAG); - return getAddrLocal(N, Ty, DAG, isN32() || isN64()); + return getAddrLocal(N, Ty, DAG, + Subtarget->isABI_N32() || Subtarget->isABI_N64()); } SDValue MipsTargetLowering:: @@ -1579,7 +1658,7 @@ lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(DL).setChain(DAG.getEntryNode()) - .setCallee(CallingConv::C, PtrTy, TlsGetAddr, &Args, 0); + .setCallee(CallingConv::C, PtrTy, TlsGetAddr, std::move(Args), 0); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); SDValue Ret = CallResult.first; @@ -1629,10 +1708,12 @@ lowerJumpTable(SDValue Op, SelectionDAG &DAG) const JumpTableSDNode *N = cast<JumpTableSDNode>(Op); EVT Ty = Op.getValueType(); - if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !isN64()) + if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && + !Subtarget->isABI_N64()) return getAddrNonPIC(N, Ty, DAG); - return getAddrLocal(N, Ty, DAG, isN32() || isN64()); + return getAddrLocal(N, Ty, DAG, + Subtarget->isABI_N32() || Subtarget->isABI_N64()); } SDValue MipsTargetLowering:: @@ -1650,10 +1731,12 @@ lowerConstantPool(SDValue Op, SelectionDAG &DAG) const ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op); EVT Ty = Op.getValueType(); - if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && !isN64()) + if (getTargetMachine().getRelocationModel() != Reloc::PIC_ && + !Subtarget->isABI_N64()) return getAddrNonPIC(N, Ty, DAG); - return getAddrLocal(N, Ty, DAG, isN32() || isN64()); + return getAddrLocal(N, Ty, DAG, + Subtarget->isABI_N32() || Subtarget->isABI_N64()); } SDValue MipsTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const { @@ -1784,8 +1867,9 @@ lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { MFI->setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); SDLoc DL(Op); - SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, - isN64() ? Mips::FP_64 : Mips::FP, VT); + SDValue FrameAddr = + DAG.getCopyFromReg(DAG.getEntryNode(), DL, + Subtarget->isABI_N64() ? Mips::FP_64 : Mips::FP, VT); return FrameAddr; } @@ -1801,7 +1885,7 @@ SDValue MipsTargetLowering::lowerRETURNADDR(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); MVT VT = Op.getSimpleValueType(); - unsigned RA = isN64() ? Mips::RA_64 : Mips::RA; + unsigned RA = Subtarget->isABI_N64() ? Mips::RA_64 : Mips::RA; MFI->setReturnAddressIsTaken(true); // Return RA, which contains the return address. Mark it an implicit live-in. @@ -1823,12 +1907,12 @@ SDValue MipsTargetLowering::lowerEH_RETURN(SDValue Op, SelectionDAG &DAG) SDValue Offset = Op.getOperand(1); SDValue Handler = Op.getOperand(2); SDLoc DL(Op); - EVT Ty = isN64() ? MVT::i64 : MVT::i32; + EVT Ty = Subtarget->isABI_N64() ? MVT::i64 : MVT::i32; // Store stack offset in V1, store jump target in V0. Glue CopyToReg and // EH_RETURN nodes, so that instructions are emitted back-to-back. - unsigned OffsetReg = isN64() ? Mips::V1_64 : Mips::V1; - unsigned AddrReg = isN64() ? Mips::V0_64 : Mips::V0; + unsigned OffsetReg = Subtarget->isABI_N64() ? Mips::V1_64 : Mips::V1; + unsigned AddrReg = Subtarget->isABI_N64() ? Mips::V0_64 : Mips::V0; Chain = DAG.getCopyToReg(Chain, DL, OffsetReg, Offset, SDValue()); Chain = DAG.getCopyToReg(Chain, DL, AddrReg, Handler, Chain.getValue(1)); return DAG.getNode(MipsISD::EH_RETURN, DL, MVT::Other, Chain, @@ -2256,8 +2340,8 @@ getOpndList(SmallVectorImpl<SDValue> &Ops, // in PIC mode) allow symbols to be resolved via lazy binding. // The lazy binding stub requires GP to point to the GOT. if (IsPICCall && !InternalLinkage) { - unsigned GPReg = isN64() ? Mips::GP_64 : Mips::GP; - EVT Ty = isN64() ? MVT::i64 : MVT::i32; + unsigned GPReg = Subtarget->isABI_N64() ? Mips::GP_64 : Mips::GP; + EVT Ty = Subtarget->isABI_N64() ? MVT::i64 : MVT::i32; RegsToPass.push_back(std::make_pair(GPReg, getGlobalReg(CLI.DAG, Ty))); } @@ -2326,8 +2410,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, getTargetMachine(), ArgLocs, *DAG.getContext()); MipsCC::SpecialCallingConvType SpecialCallingConv = getSpecialCallingConv(Callee); - MipsCC MipsCCInfo(CallConv, isO32(), Subtarget->isFP64bit(), CCInfo, - SpecialCallingConv); + MipsCC MipsCCInfo(CallConv, Subtarget->isABI_O32(), Subtarget->isFP64bit(), + CCInfo, SpecialCallingConv); MipsCCInfo.analyzeCallOperands(Outs, IsVarArg, Subtarget->mipsSEUsesSoftFloat(), @@ -2360,7 +2444,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Chain = DAG.getCALLSEQ_START(Chain, NextStackOffsetVal, DL); SDValue StackPtr = DAG.getCopyFromReg( - Chain, DL, isN64() ? Mips::SP_64 : Mips::SP, getPointerTy()); + Chain, DL, Subtarget->isABI_N64() ? Mips::SP_64 : Mips::SP, + getPointerTy()); // With EABI is it possible to have 16 args on registers. std::deque< std::pair<unsigned, SDValue> > RegsToPass; @@ -2446,8 +2531,9 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. - bool IsPICCall = (isN64() || IsPIC); // true if calls are translated to - // jalr $25 + bool IsPICCall = + (Subtarget->isABI_N64() || IsPIC); // true if calls are translated to + // jalr $25 bool GlobalOrExternal = false, InternalLinkage = false; SDValue CalleeLo; EVT Ty = Callee.getValueType(); @@ -2458,7 +2544,8 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, InternalLinkage = Val->hasInternalLinkage(); if (InternalLinkage) - Callee = getAddrLocal(G, Ty, DAG, isN32() || isN64()); + Callee = getAddrLocal(G, Ty, DAG, + Subtarget->isABI_N32() || Subtarget->isABI_N64()); else if (LargeGOT) Callee = getAddrGlobalLargeGOT(G, Ty, DAG, MipsII::MO_CALL_HI16, MipsII::MO_CALL_LO16, Chain, @@ -2474,7 +2561,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) { const char *Sym = S->getSymbol(); - if (!isN64() && !IsPIC) // !N64 && static + if (!Subtarget->isABI_N64() && !IsPIC) // !N64 && static Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(), MipsII::MO_NO_FLAG); else if (LargeGOT) @@ -2525,7 +2612,8 @@ MipsTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), getTargetMachine(), RVLocs, *DAG.getContext()); - MipsCC MipsCCInfo(CallConv, isO32(), Subtarget->isFP64bit(), CCInfo); + MipsCC MipsCCInfo(CallConv, Subtarget->isABI_O32(), Subtarget->isFP64bit(), + CCInfo); MipsCCInfo.analyzeCallResult(Ins, Subtarget->mipsSEUsesSoftFloat(), CallNode, RetTy); @@ -2572,7 +2660,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); - MipsCC MipsCCInfo(CallConv, isO32(), Subtarget->isFP64bit(), CCInfo); + MipsCC MipsCCInfo(CallConv, Subtarget->isABI_O32(), Subtarget->isFP64bit(), + CCInfo); Function::const_arg_iterator FuncArg = DAG.getMachineFunction().getFunction()->arg_begin(); bool UseSoftFloat = Subtarget->mipsSEUsesSoftFloat(); @@ -2634,7 +2723,8 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, (RegVT == MVT::i64 && ValVT == MVT::f64) || (RegVT == MVT::f64 && ValVT == MVT::i64)) ArgValue = DAG.getNode(ISD::BITCAST, DL, ValVT, ArgValue); - else if (isO32() && RegVT == MVT::i32 && ValVT == MVT::f64) { + else if (Subtarget->isABI_O32() && RegVT == MVT::i32 && + ValVT == MVT::f64) { unsigned Reg2 = addLiveIn(DAG.getMachineFunction(), getNextIntArgReg(ArgReg), RC); SDValue ArgValue2 = DAG.getCopyFromReg(Chain, DL, Reg2, RegVT); @@ -2672,7 +2762,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, unsigned Reg = MipsFI->getSRetReturnReg(); if (!Reg) { Reg = MF.getRegInfo().createVirtualRegister( - getRegClassFor(isN64() ? MVT::i64 : MVT::i32)); + getRegClassFor(Subtarget->isABI_N64() ? MVT::i64 : MVT::i32)); MipsFI->setSRetReturnReg(Reg); } SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[i]); @@ -2723,7 +2813,8 @@ MipsTargetLowering::LowerReturn(SDValue Chain, // CCState - Info about the registers and stack slot. CCState CCInfo(CallConv, IsVarArg, MF, getTargetMachine(), RVLocs, *DAG.getContext()); - MipsCC MipsCCInfo(CallConv, isO32(), Subtarget->isFP64bit(), CCInfo); + MipsCC MipsCCInfo(CallConv, Subtarget->isABI_O32(), Subtarget->isFP64bit(), + CCInfo); // Analyze return values. MipsCCInfo.analyzeReturn(Outs, Subtarget->mipsSEUsesSoftFloat(), @@ -2759,7 +2850,7 @@ MipsTargetLowering::LowerReturn(SDValue Chain, if (!Reg) llvm_unreachable("sret virtual register not created in the entry block"); SDValue Val = DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy()); - unsigned V0 = isN64() ? Mips::V0_64 : Mips::V0; + unsigned V0 = Subtarget->isABI_N64() ? Mips::V0_64 : Mips::V0; Chain = DAG.getCopyToReg(Chain, DL, V0, Val, Flag); Flag = Chain.getValue(1); @@ -2980,9 +3071,9 @@ getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const return std::make_pair(0U, &Mips::CPU16RegsRegClass); return std::make_pair(0U, &Mips::GPR32RegClass); } - if (VT == MVT::i64 && !isGP64bit()) + if (VT == MVT::i64 && !Subtarget->isGP64bit()) return std::make_pair(0U, &Mips::GPR32RegClass); - if (VT == MVT::i64 && isGP64bit()) + if (VT == MVT::i64 && Subtarget->isGP64bit()) return std::make_pair(0U, &Mips::GPR64RegClass); // This will generate an error message return std::make_pair(0U, nullptr); @@ -3169,7 +3260,7 @@ bool MipsTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { } unsigned MipsTargetLowering::getJumpTableEncoding() const { - if (isN64()) + if (Subtarget->isABI_N64()) return MachineJumpTableInfo::EK_GPRel64BlockAddress; return TargetLowering::getJumpTableEncoding(); diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 4ac33bf..4701bc4 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -17,7 +17,6 @@ #include "MCTargetDesc/MipsBaseInfo.h" #include "Mips.h" -#include "MipsSubtarget.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/Function.h" @@ -210,6 +209,7 @@ namespace llvm { // TargetLowering Implementation //===--------------------------------------------------------------------===// class MipsFunctionInfo; + class MipsSubtarget; class MipsTargetLowering : public TargetLowering { bool isMicroMips; @@ -438,12 +438,6 @@ namespace llvm { // Subtarget Info const MipsSubtarget *Subtarget; - bool hasMips64() const { return Subtarget->hasMips64(); } - bool isGP64bit() const { return Subtarget->isGP64bit(); } - bool isO32() const { return Subtarget->isABI_O32(); } - bool isN32() const { return Subtarget->isABI_N32(); } - bool isN64() const { return Subtarget->isABI_N64(); } - private: // Create a TargetGlobalAddress node. SDValue getTargetNode(GlobalAddressSDNode *N, EVT Ty, SelectionDAG &DAG, @@ -598,6 +592,12 @@ namespace llvm { unsigned getJumpTableEncoding() const override; + /// Emit a sign-extension using sll/sra, seb, or seh appropriately. + MachineBasicBlock *emitSignExtendToI32InReg(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Size, unsigned DstReg, + unsigned SrcRec) const; + MachineBasicBlock *emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode, bool Nand = false) const; MachineBasicBlock *emitAtomicBinaryPartword(MachineInstr *MI, @@ -607,6 +607,7 @@ namespace llvm { MachineBasicBlock *BB, unsigned Size) const; MachineBasicBlock *emitAtomicCmpSwapPartword(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size) const; + MachineBasicBlock *emitSEL_D(MachineInstr *MI, MachineBasicBlock *BB) const; }; /// Create MipsTargetLowering objects. diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 32cda3b..2260d53 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -57,13 +57,13 @@ let PrintMethod = "printFCCOperand", DecoderMethod = "DecodeCondCode" in // Feature predicates. //===----------------------------------------------------------------------===// -def IsFP64bit : Predicate<"Subtarget.isFP64bit()">, +def IsFP64bit : Predicate<"Subtarget->isFP64bit()">, AssemblerPredicate<"FeatureFP64Bit">; -def NotFP64bit : Predicate<"!Subtarget.isFP64bit()">, +def NotFP64bit : Predicate<"!Subtarget->isFP64bit()">, AssemblerPredicate<"!FeatureFP64Bit">; -def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">, +def IsSingleFloat : Predicate<"Subtarget->isSingleFloat()">, AssemblerPredicate<"FeatureSingleFloat">; -def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">, +def IsNotSingleFloat : Predicate<"!Subtarget->isSingleFloat()">, AssemblerPredicate<"!FeatureSingleFloat">; //===----------------------------------------------------------------------===// @@ -153,6 +153,15 @@ class MTC1_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC, InstSE<(outs DstRC:$fs), (ins SrcRC:$rt), !strconcat(opstr, "\t$rt, $fs"), [(set DstRC:$fs, (OpNode SrcRC:$rt))], Itin, FrmFR, opstr>; +class MTC1_64_FT<string opstr, RegisterOperand DstRC, RegisterOperand SrcRC, + InstrItinClass Itin> : + InstSE<(outs DstRC:$fs), (ins DstRC:$fs_in, SrcRC:$rt), + !strconcat(opstr, "\t$rt, $fs"), [], Itin, FrmFR, opstr> { + // $fs_in is part of a white lie to work around a widespread bug in the FPU + // implementation. See expandBuildPairF64 for details. + let Constraints = "$fs = $fs_in"; +} + class LW_FT<string opstr, RegisterOperand RC, InstrItinClass Itin, SDPatternOperator OpNode= null_frag> : InstSE<(outs RC:$rt), (ins mem:$addr), !strconcat(opstr, "\t$rt, $addr"), @@ -249,11 +258,11 @@ multiclass C_COND_M<string TypeStr, RegisterOperand RC, bits<5> fmt, def C_NGT_#NAME : C_COND_FT<"ngt", TypeStr, RC, itin>, C_COND_FM<fmt, 15>; } -defm S : C_COND_M<"s", FGR32Opnd, 16, II_C_CC_S>; -defm D32 : C_COND_M<"d", AFGR64Opnd, 17, II_C_CC_D>, +defm S : C_COND_M<"s", FGR32Opnd, 16, II_C_CC_S>, ISA_MIPS1_NOT_32R6_64R6; +defm D32 : C_COND_M<"d", AFGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6, AdditionalRequires<[NotFP64bit]>; let DecoderNamespace = "Mips64" in -defm D64 : C_COND_M<"d", FGR64Opnd, 17, II_C_CC_D>, +defm D64 : C_COND_M<"d", FGR64Opnd, 17, II_C_CC_D>, ISA_MIPS1_NOT_32R6_64R6, AdditionalRequires<[IsFP64bit]>; //===----------------------------------------------------------------------===// @@ -355,8 +364,12 @@ def MTC1 : MMRel, MTC1_FT<"mtc1", FGR32Opnd, GPR32Opnd, II_MTC1, bitconvert>, MFC1_FM<4>; def MFHC1 : MMRel, MFC1_FT<"mfhc1", GPR32Opnd, FGRH32Opnd, II_MFHC1>, MFC1_FM<3>, ISA_MIPS32R2; -def MTHC1 : MMRel, MTC1_FT<"mthc1", FGRH32Opnd, GPR32Opnd, II_MTHC1>, - MFC1_FM<7>, ISA_MIPS32R2; +def MTHC1_D32 : MMRel, MTC1_64_FT<"mthc1", FGR64Opnd, GPR32Opnd, II_MTHC1>, + MFC1_FM<7>, ISA_MIPS32R2, AdditionalRequires<[NotFP64bit]>; +def MTHC1_D64 : MTC1_64_FT<"mthc1", AFGR64Opnd, GPR32Opnd, II_MTHC1>, + MFC1_FM<7>, ISA_MIPS32R2, AdditionalRequires<[IsFP64bit]> { + let DecoderNamespace = "Mips64"; +} def DMFC1 : MFC1_FT<"dmfc1", GPR64Opnd, FGR64Opnd, II_DMFC1, bitconvert>, MFC1_FM<1>, ISA_MIPS3; def DMTC1 : MTC1_FT<"dmtc1", FGR64Opnd, GPR64Opnd, II_DMTC1, @@ -390,56 +403,64 @@ def SDC1 : MMRel, SW_FT<"sdc1", AFGR64Opnd, II_SDC1, store>, LW_FM<0x3d>, // Cop2 Memory Instructions // FIXME: These aren't really FPU instructions and as such don't belong in this // file -def LWC2 : LW_FT<"lwc2", COP2Opnd, NoItinerary, load>, LW_FM<0x32>; -def SWC2 : SW_FT<"swc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3a>; -def LDC2 : LW_FT<"ldc2", COP2Opnd, NoItinerary, load>, LW_FM<0x36>, ISA_MIPS2; -def SDC2 : SW_FT<"sdc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3e>, ISA_MIPS2; +def LWC2 : LW_FT<"lwc2", COP2Opnd, NoItinerary, load>, LW_FM<0x32>, + ISA_MIPS1_NOT_32R6_64R6; +def SWC2 : SW_FT<"swc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3a>, + ISA_MIPS1_NOT_32R6_64R6; +def LDC2 : LW_FT<"ldc2", COP2Opnd, NoItinerary, load>, LW_FM<0x36>, + ISA_MIPS2_NOT_32R6_64R6; +def SDC2 : SW_FT<"sdc2", COP2Opnd, NoItinerary, store>, LW_FM<0x3e>, + ISA_MIPS2_NOT_32R6_64R6; // Cop3 Memory Instructions // FIXME: These aren't really FPU instructions and as such don't belong in this // file -def LWC3 : LW_FT<"lwc3", COP3Opnd, NoItinerary, load>, LW_FM<0x33>; -def SWC3 : SW_FT<"swc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3b>; -def LDC3 : LW_FT<"ldc3", COP3Opnd, NoItinerary, load>, LW_FM<0x37>, ISA_MIPS2; -def SDC3 : SW_FT<"sdc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3f>, ISA_MIPS2; +let DecoderNamespace = "COP3_" in { + def LWC3 : LW_FT<"lwc3", COP3Opnd, NoItinerary, load>, LW_FM<0x33>; + def SWC3 : SW_FT<"swc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3b>; + def LDC3 : LW_FT<"ldc3", COP3Opnd, NoItinerary, load>, LW_FM<0x37>, + ISA_MIPS2; + def SDC3 : SW_FT<"sdc3", COP3Opnd, NoItinerary, store>, LW_FM<0x3f>, + ISA_MIPS2; +} // Indexed loads and stores. // Base register + offset register addressing mode (indicated by "x" in the // instruction mnemonic) is disallowed under NaCl. let AdditionalPredicates = [IsNotNaCl] in { def LWXC1 : MMRel, LWXC1_FT<"lwxc1", FGR32Opnd, II_LWXC1, load>, LWXC1_FM<0>, - INSN_MIPS4_32R2; + INSN_MIPS4_32R2_NOT_32R6_64R6; def SWXC1 : MMRel, SWXC1_FT<"swxc1", FGR32Opnd, II_SWXC1, store>, SWXC1_FM<8>, - INSN_MIPS4_32R2; + INSN_MIPS4_32R2_NOT_32R6_64R6; } let AdditionalPredicates = [NotInMicroMips, IsNotNaCl] in { def LDXC1 : LWXC1_FT<"ldxc1", AFGR64Opnd, II_LDXC1, load>, LWXC1_FM<1>, - INSN_MIPS4_32R2, FGR_32; + INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; def SDXC1 : SWXC1_FT<"sdxc1", AFGR64Opnd, II_SDXC1, store>, SWXC1_FM<9>, - INSN_MIPS4_32R2, FGR_32; + INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_32; } let DecoderNamespace="Mips64" in { def LDXC164 : LWXC1_FT<"ldxc1", FGR64Opnd, II_LDXC1, load>, LWXC1_FM<1>, - INSN_MIPS4_32R2, FGR_64; + INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; def SDXC164 : SWXC1_FT<"sdxc1", FGR64Opnd, II_SDXC1, store>, SWXC1_FM<9>, - INSN_MIPS4_32R2, FGR_64; + INSN_MIPS4_32R2_NOT_32R6_64R6, FGR_64; } // Load/store doubleword indexed unaligned. let AdditionalPredicates = [IsNotNaCl] in { def LUXC1 : MMRel, LWXC1_FT<"luxc1", AFGR64Opnd, II_LUXC1>, LWXC1_FM<0x5>, - INSN_MIPS5_32R2, FGR_32; + INSN_MIPS5_32R2_NOT_32R6_64R6, FGR_32; def SUXC1 : MMRel, SWXC1_FT<"suxc1", AFGR64Opnd, II_SUXC1>, SWXC1_FM<0xd>, - INSN_MIPS5_32R2, FGR_32; + INSN_MIPS5_32R2_NOT_32R6_64R6, FGR_32; } let DecoderNamespace="Mips64" in { def LUXC164 : LWXC1_FT<"luxc1", FGR64Opnd, II_LUXC1>, LWXC1_FM<0x5>, - INSN_MIPS5_32R2, FGR_64; + INSN_MIPS5_32R2_NOT_32R6_64R6, FGR_64; def SUXC164 : SWXC1_FT<"suxc1", FGR64Opnd, II_SUXC1>, SWXC1_FM<0xd>, - INSN_MIPS5_32R2, FGR_64; + INSN_MIPS5_32R2_NOT_32R6_64R6, FGR_64; } /// Floating-point Aritmetic @@ -457,42 +478,42 @@ def FSUB_S : MMRel, ADDS_FT<"sub.s", FGR32Opnd, II_SUB_S, 0, fsub>, defm FSUB : ADDS_M<"sub.d", II_SUB_D, 0, fsub>, ADDS_FM<0x01, 17>; def MADD_S : MMRel, MADDS_FT<"madd.s", FGR32Opnd, II_MADD_S, fadd>, - MADDS_FM<4, 0>, ISA_MIPS32R2; + MADDS_FM<4, 0>, ISA_MIPS32R2_NOT_32R6_64R6; def MSUB_S : MMRel, MADDS_FT<"msub.s", FGR32Opnd, II_MSUB_S, fsub>, - MADDS_FM<5, 0>, ISA_MIPS32R2; + MADDS_FM<5, 0>, ISA_MIPS32R2_NOT_32R6_64R6; let AdditionalPredicates = [NoNaNsFPMath] in { def NMADD_S : MMRel, NMADDS_FT<"nmadd.s", FGR32Opnd, II_NMADD_S, fadd>, - MADDS_FM<6, 0>, ISA_MIPS32R2; + MADDS_FM<6, 0>, ISA_MIPS32R2_NOT_32R6_64R6; def NMSUB_S : MMRel, NMADDS_FT<"nmsub.s", FGR32Opnd, II_NMSUB_S, fsub>, - MADDS_FM<7, 0>, ISA_MIPS32R2; + MADDS_FM<7, 0>, ISA_MIPS32R2_NOT_32R6_64R6; } def MADD_D32 : MMRel, MADDS_FT<"madd.d", AFGR64Opnd, II_MADD_D, fadd>, - MADDS_FM<4, 1>, ISA_MIPS32R2, FGR_32; + MADDS_FM<4, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32; def MSUB_D32 : MMRel, MADDS_FT<"msub.d", AFGR64Opnd, II_MSUB_D, fsub>, - MADDS_FM<5, 1>, ISA_MIPS32R2, FGR_32; + MADDS_FM<5, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32; let AdditionalPredicates = [NoNaNsFPMath] in { def NMADD_D32 : MMRel, NMADDS_FT<"nmadd.d", AFGR64Opnd, II_NMADD_D, fadd>, - MADDS_FM<6, 1>, ISA_MIPS32R2, FGR_32; + MADDS_FM<6, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32; def NMSUB_D32 : MMRel, NMADDS_FT<"nmsub.d", AFGR64Opnd, II_NMSUB_D, fsub>, - MADDS_FM<7, 1>, ISA_MIPS32R2, FGR_32; + MADDS_FM<7, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_32; } let isCodeGenOnly=1 in { def MADD_D64 : MADDS_FT<"madd.d", FGR64Opnd, II_MADD_D, fadd>, - MADDS_FM<4, 1>, ISA_MIPS32R2, FGR_64; + MADDS_FM<4, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64; def MSUB_D64 : MADDS_FT<"msub.d", FGR64Opnd, II_MSUB_D, fsub>, - MADDS_FM<5, 1>, ISA_MIPS32R2, FGR_64; + MADDS_FM<5, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64; } let AdditionalPredicates = [NoNaNsFPMath], isCodeGenOnly=1 in { def NMADD_D64 : NMADDS_FT<"nmadd.d", FGR64Opnd, II_NMADD_D, fadd>, - MADDS_FM<6, 1>, ISA_MIPS32R2, FGR_64; + MADDS_FM<6, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64; def NMSUB_D64 : NMADDS_FT<"nmsub.d", FGR64Opnd, II_NMSUB_D, fsub>, - MADDS_FM<7, 1>, ISA_MIPS32R2, FGR_64; + MADDS_FM<7, 1>, ISA_MIPS32R2_NOT_32R6_64R6, FGR_64; } //===----------------------------------------------------------------------===// @@ -504,9 +525,9 @@ def MIPS_BRANCH_F : PatLeaf<(i32 0)>; def MIPS_BRANCH_T : PatLeaf<(i32 1)>; def BC1F : MMRel, BC1F_FT<"bc1f", brtarget, IIBranch, MIPS_BRANCH_F>, - BC1F_FM<0, 0>; + BC1F_FM<0, 0>, ISA_MIPS1_NOT_32R6_64R6; def BC1T : MMRel, BC1F_FT<"bc1t", brtarget, IIBranch, MIPS_BRANCH_T>, - BC1F_FM<0, 1>; + BC1F_FM<0, 1>, ISA_MIPS1_NOT_32R6_64R6; //===----------------------------------------------------------------------===// // Floating Point Flag Conditions @@ -531,12 +552,13 @@ def MIPS_FCOND_LE : PatLeaf<(i32 14)>; def MIPS_FCOND_NGT : PatLeaf<(i32 15)>; /// Floating Point Compare -def FCMP_S32 : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, CEQS_FM<16>; +def FCMP_S32 : MMRel, CEQS_FT<"s", FGR32, II_C_CC_S, MipsFPCmp>, CEQS_FM<16>, + ISA_MIPS1_NOT_32R6_64R6; def FCMP_D32 : MMRel, CEQS_FT<"d", AFGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>, - AdditionalRequires<[NotFP64bit]>; + ISA_MIPS1_NOT_32R6_64R6, AdditionalRequires<[NotFP64bit]>; let DecoderNamespace = "Mips64" in def FCMP_D64 : CEQS_FT<"d", FGR64, II_C_CC_D, MipsFPCmp>, CEQS_FM<17>, - AdditionalRequires<[IsFP64bit]>; + ISA_MIPS1_NOT_32R6_64R6, AdditionalRequires<[IsFP64bit]>; //===----------------------------------------------------------------------===// // Floating Point Pseudo-Instructions @@ -569,8 +591,10 @@ def ExtractElementF64_64 : ExtractElementF64Base<FGR64Opnd>, //===----------------------------------------------------------------------===// // InstAliases. //===----------------------------------------------------------------------===// -def : MipsInstAlias<"bc1t $offset", (BC1T FCC0, brtarget:$offset)>; -def : MipsInstAlias<"bc1f $offset", (BC1F FCC0, brtarget:$offset)>; +def : MipsInstAlias<"bc1t $offset", (BC1T FCC0, brtarget:$offset)>, + ISA_MIPS1_NOT_32R6_64R6; +def : MipsInstAlias<"bc1f $offset", (BC1F FCC0, brtarget:$offset)>, + ISA_MIPS1_NOT_32R6_64R6; //===----------------------------------------------------------------------===// // Floating Point Patterns diff --git a/lib/Target/Mips/MipsInstrFormats.td b/lib/Target/Mips/MipsInstrFormats.td index 0377eab..6a01ae5 100644 --- a/lib/Target/Mips/MipsInstrFormats.td +++ b/lib/Target/Mips/MipsInstrFormats.td @@ -844,6 +844,44 @@ class BARRIER_FM<bits<5> op> : StdArch { let Inst{5-0} = 0; // SLL } +class SDBBP_FM : StdArch { + bits<20> code_; + + bits<32> Inst; + + let Inst{31-26} = 0b011100; // SPECIAL2 + let Inst{25-6} = code_; + let Inst{5-0} = 0b111111; // SDBBP +} + +class JR_HB_FM<bits<6> op> : StdArch{ + bits<5> rs; + + bits<32> Inst; + + let Inst{31-26} = 0; // SPECIAL + let Inst{25-21} = rs; + let Inst{20-11} = 0; + let Inst{10} = 1; + let Inst{9-6} = 0; + let Inst{5-0} = op; +} + +class JALR_HB_FM<bits<6> op> : StdArch { + bits<5> rd; + bits<5> rs; + + bits<32> Inst; + + let Inst{31-26} = 0; // SPECIAL + let Inst{25-21} = rs; + let Inst{20-16} = 0; + let Inst{15-11} = rd; + let Inst{10} = 1; + let Inst{9-6} = 0; + let Inst{5-0} = op; +} + class COP0_TLB_FM<bits<6> op> : StdArch { bits<32> Inst; @@ -852,3 +890,17 @@ class COP0_TLB_FM<bits<6> op> : StdArch { let Inst{24-6} = 0; let Inst{5-0} = op; // Operation } + +class CACHEOP_FM<bits<6> op> : StdArch { + bits<21> addr; + bits<5> hint; + bits<5> base = addr{20-16}; + bits<16> offset = addr{15-0}; + + bits<32> Inst; + + let Inst{31-26} = op; + let Inst{25-21} = base; + let Inst{20-16} = hint; + let Inst{15-0} = offset; +} diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 0d3cb75..8e9472c 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -146,61 +146,61 @@ def MipsSDR : SDNode<"MipsISD::SDR", SDTStore, //===----------------------------------------------------------------------===// // Mips Instruction Predicate Definitions. //===----------------------------------------------------------------------===// -def HasMips2 : Predicate<"Subtarget.hasMips2()">, +def HasMips2 : Predicate<"Subtarget->hasMips2()">, AssemblerPredicate<"FeatureMips2">; -def HasMips3_32 : Predicate<"Subtarget.hasMips3_32()">, +def HasMips3_32 : Predicate<"Subtarget->hasMips3_32()">, AssemblerPredicate<"FeatureMips3_32">; -def HasMips3_32r2 : Predicate<"Subtarget.hasMips3_32r2()">, +def HasMips3_32r2 : Predicate<"Subtarget->hasMips3_32r2()">, AssemblerPredicate<"FeatureMips3_32r2">; -def HasMips3 : Predicate<"Subtarget.hasMips3()">, +def HasMips3 : Predicate<"Subtarget->hasMips3()">, AssemblerPredicate<"FeatureMips3">; -def HasMips4_32 : Predicate<"Subtarget.hasMips4_32()">, +def HasMips4_32 : Predicate<"Subtarget->hasMips4_32()">, AssemblerPredicate<"FeatureMips4_32">; -def HasMips4_32r2 : Predicate<"Subtarget.hasMips4_32r2()">, +def HasMips4_32r2 : Predicate<"Subtarget->hasMips4_32r2()">, AssemblerPredicate<"FeatureMips4_32r2">; -def HasMips5_32r2 : Predicate<"Subtarget.hasMips5_32r2()">, +def HasMips5_32r2 : Predicate<"Subtarget->hasMips5_32r2()">, AssemblerPredicate<"FeatureMips5_32r2">; -def HasMips32 : Predicate<"Subtarget.hasMips32()">, +def HasMips32 : Predicate<"Subtarget->hasMips32()">, AssemblerPredicate<"FeatureMips32">; -def HasMips32r2 : Predicate<"Subtarget.hasMips32r2()">, +def HasMips32r2 : Predicate<"Subtarget->hasMips32r2()">, AssemblerPredicate<"FeatureMips32r2">; -def HasMips32r6 : Predicate<"Subtarget.hasMips32r6()">, +def HasMips32r6 : Predicate<"Subtarget->hasMips32r6()">, AssemblerPredicate<"FeatureMips32r6">; -def NotMips32r6 : Predicate<"!Subtarget.hasMips32r6()">, +def NotMips32r6 : Predicate<"!Subtarget->hasMips32r6()">, AssemblerPredicate<"!FeatureMips32r6">; -def IsGP64bit : Predicate<"Subtarget.isGP64bit()">, +def IsGP64bit : Predicate<"Subtarget->isGP64bit()">, AssemblerPredicate<"FeatureGP64Bit">; -def IsGP32bit : Predicate<"!Subtarget.isGP64bit()">, +def IsGP32bit : Predicate<"!Subtarget->isGP64bit()">, AssemblerPredicate<"!FeatureGP64Bit">; -def HasMips64 : Predicate<"Subtarget.hasMips64()">, +def HasMips64 : Predicate<"Subtarget->hasMips64()">, AssemblerPredicate<"FeatureMips64">; -def HasMips64r2 : Predicate<"Subtarget.hasMips64r2()">, +def HasMips64r2 : Predicate<"Subtarget->hasMips64r2()">, AssemblerPredicate<"FeatureMips64r2">; -def HasMips64r6 : Predicate<"Subtarget.hasMips64r6()">, +def HasMips64r6 : Predicate<"Subtarget->hasMips64r6()">, AssemblerPredicate<"FeatureMips64r6">; -def NotMips64r6 : Predicate<"!Subtarget.hasMips64r6()">, +def NotMips64r6 : Predicate<"!Subtarget->hasMips64r6()">, AssemblerPredicate<"!FeatureMips64r6">; -def IsN64 : Predicate<"Subtarget.isABI_N64()">, +def IsN64 : Predicate<"Subtarget->isABI_N64()">, AssemblerPredicate<"FeatureN64">; -def InMips16Mode : Predicate<"Subtarget.inMips16Mode()">, +def InMips16Mode : Predicate<"Subtarget->inMips16Mode()">, AssemblerPredicate<"FeatureMips16">; -def HasCnMips : Predicate<"Subtarget.hasCnMips()">, +def HasCnMips : Predicate<"Subtarget->hasCnMips()">, AssemblerPredicate<"FeatureCnMips">; def RelocStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">, AssemblerPredicate<"FeatureMips32">; def RelocPIC : Predicate<"TM.getRelocationModel() == Reloc::PIC_">, AssemblerPredicate<"FeatureMips32">; def NoNaNsFPMath : Predicate<"TM.Options.NoNaNsFPMath">; -def HasStdEnc : Predicate<"Subtarget.hasStandardEncoding()">, +def HasStdEnc : Predicate<"Subtarget->hasStandardEncoding()">, AssemblerPredicate<"!FeatureMips16">; -def NotDSP : Predicate<"!Subtarget.hasDSP()">; -def InMicroMips : Predicate<"Subtarget.inMicroMipsMode()">, +def NotDSP : Predicate<"!Subtarget->hasDSP()">; +def InMicroMips : Predicate<"Subtarget->inMicroMipsMode()">, AssemblerPredicate<"FeatureMicroMips">; -def NotInMicroMips : Predicate<"!Subtarget.inMicroMipsMode()">, +def NotInMicroMips : Predicate<"!Subtarget->inMicroMipsMode()">, AssemblerPredicate<"!FeatureMicroMips">; -def IsLE : Predicate<"Subtarget.isLittle()">; -def IsBE : Predicate<"!Subtarget.isLittle()">; -def IsNotNaCl : Predicate<"!Subtarget.isTargetNaCl()">; +def IsLE : Predicate<"Subtarget->isLittle()">; +def IsBE : Predicate<"!Subtarget->isLittle()">; +def IsNotNaCl : Predicate<"!Subtarget->isTargetNaCl()">; //===----------------------------------------------------------------------===// // Mips GPR size adjectives. @@ -232,8 +232,17 @@ class ISA_MIPS3_NOT_32R6_64R6 { list<Predicate> InsnPredicates = [HasMips3, NotMips32r6, NotMips64r6]; } class ISA_MIPS32 { list<Predicate> InsnPredicates = [HasMips32]; } +class ISA_MIPS32_NOT_32R6_64R6 { + list<Predicate> InsnPredicates = [HasMips32, NotMips32r6, NotMips64r6]; +} class ISA_MIPS32R2 { list<Predicate> InsnPredicates = [HasMips32r2]; } +class ISA_MIPS32R2_NOT_32R6_64R6 { + list<Predicate> InsnPredicates = [HasMips32r2, NotMips32r6, NotMips64r6]; +} class ISA_MIPS64 { list<Predicate> InsnPredicates = [HasMips64]; } +class ISA_MIPS64_NOT_64R6 { + list<Predicate> InsnPredicates = [HasMips64, NotMips64r6]; +} class ISA_MIPS64R2 { list<Predicate> InsnPredicates = [HasMips64r2]; } class ISA_MIPS32R6 { list<Predicate> InsnPredicates = [HasMips32r6]; } class ISA_MIPS64R6 { list<Predicate> InsnPredicates = [HasMips64r6]; } @@ -241,17 +250,32 @@ class ISA_MIPS64R6 { list<Predicate> InsnPredicates = [HasMips64r6]; } // The portions of MIPS-III that were also added to MIPS32 class INSN_MIPS3_32 { list<Predicate> InsnPredicates = [HasMips3_32]; } +// The portions of MIPS-III that were also added to MIPS32 but were removed in +// MIPS32r6 and MIPS64r6. +class INSN_MIPS3_32_NOT_32R6_64R6 { + list<Predicate> InsnPredicates = [HasMips3_32, NotMips32r6, NotMips64r6]; +} + // The portions of MIPS-III that were also added to MIPS32 class INSN_MIPS3_32R2 { list<Predicate> InsnPredicates = [HasMips3_32r2]; } -// The portions of MIPS-IV that were also added to MIPS32 -class INSN_MIPS4_32 { list<Predicate> InsnPredicates = [HasMips4_32]; } +// The portions of MIPS-IV that were also added to MIPS32 but were removed in +// MIPS32r6 and MIPS64r6. +class INSN_MIPS4_32_NOT_32R6_64R6 { + list<Predicate> InsnPredicates = [HasMips4_32, NotMips32r6, NotMips64r6]; +} -// The portions of MIPS-IV that were also added to MIPS32R2 -class INSN_MIPS4_32R2 { list<Predicate> InsnPredicates = [HasMips4_32r2]; } +// The portions of MIPS-IV that were also added to MIPS32r2 but were removed in +// MIPS32r6 and MIPS64r6. +class INSN_MIPS4_32R2_NOT_32R6_64R6 { + list<Predicate> InsnPredicates = [HasMips4_32r2, NotMips32r6, NotMips64r6]; +} -// The portions of MIPS-V that were also added to MIPS32R2 -class INSN_MIPS5_32R2 { list<Predicate> InsnPredicates = [HasMips5_32r2]; } +// The portions of MIPS-V that were also added to MIPS32r2 but were removed in +// MIPS32r6 and MIPS64r6. +class INSN_MIPS5_32R2_NOT_32R6_64R6 { + list<Predicate> InsnPredicates = [HasMips5_32r2, NotMips32r6, NotMips64r6]; +} //===----------------------------------------------------------------------===// @@ -328,7 +352,9 @@ def calltarget : Operand<iPTR> { let ParserMatchClass = MipsJumpTargetAsmOperand; } +def simm9 : Operand<i32>; def simm10 : Operand<i32>; +def simm11 : Operand<i32>; def simm16 : Operand<i32> { let DecoderMethod= "DecodeSimm16"; @@ -337,6 +363,13 @@ def simm16 : Operand<i32> { def simm19_lsl2 : Operand<i32> { let EncoderMethod = "getSimm19Lsl2Encoding"; let DecoderMethod = "DecodeSimm19Lsl2"; + let ParserMatchClass = MipsJumpTargetAsmOperand; +} + +def simm18_lsl3 : Operand<i32> { + let EncoderMethod = "getSimm18Lsl3Encoding"; + let DecoderMethod = "DecodeSimm18Lsl3"; + let ParserMatchClass = MipsJumpTargetAsmOperand; } def simm20 : Operand<i32> { @@ -386,6 +419,15 @@ def MipsMemAsmOperand : AsmOperandClass { let ParserMethod = "parseMemOperand"; } +def MipsMemSimm11AsmOperand : AsmOperandClass { + let Name = "MemOffsetSimm11"; + let SuperClasses = [MipsMemAsmOperand]; + let RenderMethod = "addMemOperands"; + let ParserMethod = "parseMemOperand"; + let PredicateMethod = "isMemWithSimmOffset<11>"; + //let DiagnosticType = "Simm11"; +} + def MipsInvertedImmoperand : AsmOperandClass { let Name = "InvNum"; let RenderMethod = "addImmOperands"; @@ -417,6 +459,17 @@ def mem_msa : mem_generic { let EncoderMethod = "getMSAMemEncoding"; } +def mem_simm9 : mem_generic { + let MIOperandInfo = (ops ptr_rc, simm9); + let EncoderMethod = "getMemEncoding"; +} + +def mem_simm11 : mem_generic { + let MIOperandInfo = (ops ptr_rc, simm11); + let EncoderMethod = "getMemEncoding"; + let ParserMatchClass = MipsMemSimm11AsmOperand; +} + def mem_ea : Operand<iPTR> { let PrintMethod = "printMemOperandEA"; let MIOperandInfo = (ops ptr_rc, simm16); @@ -690,20 +743,11 @@ class JumpFR<string opstr, RegisterOperand RO, FrmR, opstr>; // Indirect branch -class IndirectBranch<string opstr, RegisterOperand RO> : - JumpFR<opstr, RO, brind> { +class IndirectBranch<string opstr, RegisterOperand RO> : JumpFR<opstr, RO> { let isBranch = 1; let isIndirectBranch = 1; } -// Return instruction -class RetBase<string opstr, RegisterOperand RO>: JumpFR<opstr, RO> { - let isReturn = 1; - let isCodeGenOnly = 1; - let hasCtrlDep = 1; - let hasExtraSrcRegAllocReq = 1; -} - // Jump and Link (Call) let isCall=1, hasDelaySlot=1, Defs = [RA] in { class JumpLink<string opstr, DAGOperand opnd> : @@ -1042,7 +1086,7 @@ def SUBu : MMRel, ArithLogicR<"subu", GPR32Opnd, 0, II_SUBU, sub>, ADD_FM<0, 0x23>; let Defs = [HI0, LO0] in def MUL : MMRel, ArithLogicR<"mul", GPR32Opnd, 1, II_MUL, mul>, - ADD_FM<0x1c, 2>, ISA_MIPS32; + ADD_FM<0x1c, 2>, ISA_MIPS32_NOT_32R6_64R6; def ADD : MMRel, ArithLogicR<"add", GPR32Opnd>, ADD_FM<0, 0x20>; def SUB : MMRel, ArithLogicR<"sub", GPR32Opnd>, ADD_FM<0, 0x22>; def SLT : MMRel, SetCC_R<"slt", setlt, GPR32Opnd>, ADD_FM<0, 0x2a>; @@ -1103,7 +1147,7 @@ def SWR : StoreLeftRight<"swr", MipsSWR, GPR32Opnd, II_SWR>, LW_FM<0x2e>, ISA_MIPS1_NOT_32R6_64R6; } -def SYNC : MMRel, SYNC_FT<"sync">, SYNC_FM; +def SYNC : MMRel, SYNC_FT<"sync">, SYNC_FM, ISA_MIPS32; def TEQ : MMRel, TEQ_FT<"teq", GPR32Opnd>, TEQ_FM<0x34>; def TGE : MMRel, TEQ_FT<"tge", GPR32Opnd>, TEQ_FM<0x30>; def TGEU : MMRel, TEQ_FT<"tgeu", GPR32Opnd>, TEQ_FM<0x31>; @@ -1127,6 +1171,7 @@ def TNEI : MMRel, TEQI_FT<"tnei", GPR32Opnd>, TEQI_FM<0xe>, def BREAK : MMRel, BRK_FT<"break">, BRK_FM<0xd>; def SYSCALL : MMRel, SYS_FT<"syscall">, SYS_FM<0xc>; def TRAP : TrapBase<BREAK>; +def SDBBP : SYS_FT<"sdbbp">, SDBBP_FM, ISA_MIPS32_NOT_32R6_64R6; def ERET : MMRel, ER_FT<"eret">, ER_FM<0x18>, INSN_MIPS3_32; def DERET : MMRel, ER_FT<"deret">, ER_FM<0x1f>, ISA_MIPS32; @@ -1139,8 +1184,8 @@ let EncodingPredicates = []<Predicate>, // FIXME: Lack of HasStdEnc is probably def WAIT : WAIT_FT<"wait">, WAIT_FM; /// Load-linked, Store-conditional -def LL : LLBase<"ll", GPR32Opnd>, LW_FM<0x30>, ISA_MIPS2; -def SC : SCBase<"sc", GPR32Opnd>, LW_FM<0x38>, ISA_MIPS2; +def LL : LLBase<"ll", GPR32Opnd>, LW_FM<0x30>, ISA_MIPS2_NOT_32R6_64R6; +def SC : SCBase<"sc", GPR32Opnd>, LW_FM<0x38>, ISA_MIPS2_NOT_32R6_64R6; } /// Jump and Branch Instructions @@ -1161,17 +1206,49 @@ def B : UncondBranch<BEQ>; def JAL : MMRel, JumpLink<"jal", calltarget>, FJ<3>; let AdditionalPredicates = [NotInMicroMips] in { -def JALR : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM; -def JALRPseudo : JumpLinkRegPseudo<GPR32Opnd, JALR, RA>; + def JALR : JumpLinkReg<"jalr", GPR32Opnd>, JALR_FM; + def JALRPseudo : JumpLinkRegPseudo<GPR32Opnd, JALR, RA>; } -def JALX : JumpLink<"jalx", calltarget>, FJ<0x1D>; -def BGEZAL : MMRel, BGEZAL_FT<"bgezal", brtarget, GPR32Opnd>, BGEZAL_FM<0x11>; -def BLTZAL : MMRel, BGEZAL_FT<"bltzal", brtarget, GPR32Opnd>, BGEZAL_FM<0x10>; + +// FIXME: JALX really requires either MIPS16 or microMIPS in addition to MIPS32. +def JALX : JumpLink<"jalx", calltarget>, FJ<0x1D>, ISA_MIPS32_NOT_32R6_64R6; +def BGEZAL : MMRel, BGEZAL_FT<"bgezal", brtarget, GPR32Opnd>, BGEZAL_FM<0x11>, + ISA_MIPS1_NOT_32R6_64R6; +def BLTZAL : MMRel, BGEZAL_FT<"bltzal", brtarget, GPR32Opnd>, BGEZAL_FM<0x10>, + ISA_MIPS1_NOT_32R6_64R6; def BAL_BR : BAL_BR_Pseudo<BGEZAL>; def TAILCALL : TailCall<J>; def TAILCALL_R : TailCallReg<GPR32Opnd, JR>; -def RET : MMRel, RetBase<"ret", GPR32Opnd>, MTLO_FM<8>; +// Indirect branches are matched as PseudoIndirectBranch/PseudoIndirectBranch64 +// then are expanded to JR, JR64, JALR, or JALR64 depending on the ISA. +class PseudoIndirectBranchBase<RegisterOperand RO> : + MipsPseudo<(outs), (ins RO:$rs), [(brind RO:$rs)], IIBranch> { + let isTerminator=1; + let isBarrier=1; + let hasDelaySlot = 1; + let isBranch = 1; + let isIndirectBranch = 1; +} + +def PseudoIndirectBranch : PseudoIndirectBranchBase<GPR32Opnd>; + +// Return instructions are matched as a RetRA instruction, then ar expanded +// into PseudoReturn/PseudoReturn64 after register allocation. Finally, +// MipsAsmPrinter expands this into JR, JR64, JALR, or JALR64 depending on the +// ISA. +class PseudoReturnBase<RegisterOperand RO> : MipsPseudo<(outs), (ins RO:$rs), + [], IIBranch> { + let isTerminator = 1; + let isBarrier = 1; + let hasDelaySlot = 1; + let isReturn = 1; + let isCodeGenOnly = 1; + let hasCtrlDep = 1; + let hasExtraSrcRegAllocReq = 1; +} + +def PseudoReturn : PseudoReturnBase<GPR32Opnd>; // Exception handling related node and instructions. // The conversion sequence is: @@ -1196,20 +1273,24 @@ let Uses = [V0, V1], isTerminator = 1, isReturn = 1, isBarrier = 1 in { /// Multiply and Divide Instructions. def MULT : MMRel, Mult<"mult", II_MULT, GPR32Opnd, [HI0, LO0]>, - MULT_FM<0, 0x18>; + MULT_FM<0, 0x18>, ISA_MIPS1_NOT_32R6_64R6; def MULTu : MMRel, Mult<"multu", II_MULTU, GPR32Opnd, [HI0, LO0]>, - MULT_FM<0, 0x19>; + MULT_FM<0, 0x19>, ISA_MIPS1_NOT_32R6_64R6; def SDIV : MMRel, Div<"div", II_DIV, GPR32Opnd, [HI0, LO0]>, - MULT_FM<0, 0x1a>; + MULT_FM<0, 0x1a>, ISA_MIPS1_NOT_32R6_64R6; def UDIV : MMRel, Div<"divu", II_DIVU, GPR32Opnd, [HI0, LO0]>, - MULT_FM<0, 0x1b>; + MULT_FM<0, 0x1b>, ISA_MIPS1_NOT_32R6_64R6; -def MTHI : MMRel, MoveToLOHI<"mthi", GPR32Opnd, [HI0]>, MTLO_FM<0x11>; -def MTLO : MMRel, MoveToLOHI<"mtlo", GPR32Opnd, [LO0]>, MTLO_FM<0x13>; +def MTHI : MMRel, MoveToLOHI<"mthi", GPR32Opnd, [HI0]>, MTLO_FM<0x11>, + ISA_MIPS1_NOT_32R6_64R6; +def MTLO : MMRel, MoveToLOHI<"mtlo", GPR32Opnd, [LO0]>, MTLO_FM<0x13>, + ISA_MIPS1_NOT_32R6_64R6; let EncodingPredicates = []<Predicate>, // FIXME: Lack of HasStdEnc is probably a bug AdditionalPredicates = [NotInMicroMips] in { -def MFHI : MMRel, MoveFromLOHI<"mfhi", GPR32Opnd, AC0>, MFLO_FM<0x10>; -def MFLO : MMRel, MoveFromLOHI<"mflo", GPR32Opnd, AC0>, MFLO_FM<0x12>; +def MFHI : MMRel, MoveFromLOHI<"mfhi", GPR32Opnd, AC0>, MFLO_FM<0x10>, + ISA_MIPS1_NOT_32R6_64R6; +def MFLO : MMRel, MoveFromLOHI<"mflo", GPR32Opnd, AC0>, MFLO_FM<0x12>, + ISA_MIPS1_NOT_32R6_64R6; } /// Sign Ext In Register Instructions. @@ -1219,8 +1300,10 @@ def SEH : MMRel, SignExtInReg<"seh", i16, GPR32Opnd, II_SEH>, SEB_FM<0x18, 0x20>, ISA_MIPS32R2; /// Count Leading -def CLZ : MMRel, CountLeading0<"clz", GPR32Opnd>, CLO_FM<0x20>, ISA_MIPS32; -def CLO : MMRel, CountLeading1<"clo", GPR32Opnd>, CLO_FM<0x21>, ISA_MIPS32; +def CLZ : MMRel, CountLeading0<"clz", GPR32Opnd>, CLO_FM<0x20>, + ISA_MIPS32_NOT_32R6_64R6; +def CLO : MMRel, CountLeading1<"clo", GPR32Opnd>, CLO_FM<0x21>, + ISA_MIPS32_NOT_32R6_64R6; /// Word Swap Bytes Within Halfwords def WSBH : MMRel, SubwordSwap<"wsbh", GPR32Opnd>, SEB_FM<2, 0x20>, ISA_MIPS32R2; @@ -1235,27 +1318,37 @@ def NOP : PseudoSE<(outs), (ins), []>, PseudoInstExpansion<(SLL ZERO, ZERO, 0)>; def LEA_ADDiu : MMRel, EffectiveAddress<"addiu", GPR32Opnd>, LW_FM<9>; // MADD*/MSUB* -def MADD : MMRel, MArithR<"madd", II_MADD, 1>, MULT_FM<0x1c, 0>, ISA_MIPS32; -def MADDU : MMRel, MArithR<"maddu", II_MADDU, 1>, MULT_FM<0x1c, 1>, ISA_MIPS32; -def MSUB : MMRel, MArithR<"msub", II_MSUB>, MULT_FM<0x1c, 4>, ISA_MIPS32; -def MSUBU : MMRel, MArithR<"msubu", II_MSUBU>, MULT_FM<0x1c, 5>, ISA_MIPS32; +def MADD : MMRel, MArithR<"madd", II_MADD, 1>, MULT_FM<0x1c, 0>, + ISA_MIPS32_NOT_32R6_64R6; +def MADDU : MMRel, MArithR<"maddu", II_MADDU, 1>, MULT_FM<0x1c, 1>, + ISA_MIPS32_NOT_32R6_64R6; +def MSUB : MMRel, MArithR<"msub", II_MSUB>, MULT_FM<0x1c, 4>, + ISA_MIPS32_NOT_32R6_64R6; +def MSUBU : MMRel, MArithR<"msubu", II_MSUBU>, MULT_FM<0x1c, 5>, + ISA_MIPS32_NOT_32R6_64R6; let AdditionalPredicates = [NotDSP] in { -def PseudoMULT : MultDivPseudo<MULT, ACC64, GPR32Opnd, MipsMult, II_MULT>; -def PseudoMULTu : MultDivPseudo<MULTu, ACC64, GPR32Opnd, MipsMultu, II_MULTU>; -def PseudoMFHI : PseudoMFLOHI<GPR32, ACC64, MipsMFHI>; -def PseudoMFLO : PseudoMFLOHI<GPR32, ACC64, MipsMFLO>; -def PseudoMTLOHI : PseudoMTLOHI<ACC64, GPR32>; -def PseudoMADD : MAddSubPseudo<MADD, MipsMAdd, II_MADD>; -def PseudoMADDU : MAddSubPseudo<MADDU, MipsMAddu, II_MADDU>; -def PseudoMSUB : MAddSubPseudo<MSUB, MipsMSub, II_MSUB>; -def PseudoMSUBU : MAddSubPseudo<MSUBU, MipsMSubu, II_MSUBU>; +def PseudoMULT : MultDivPseudo<MULT, ACC64, GPR32Opnd, MipsMult, II_MULT>, + ISA_MIPS1_NOT_32R6_64R6; +def PseudoMULTu : MultDivPseudo<MULTu, ACC64, GPR32Opnd, MipsMultu, II_MULTU>, + ISA_MIPS1_NOT_32R6_64R6; +def PseudoMFHI : PseudoMFLOHI<GPR32, ACC64, MipsMFHI>, ISA_MIPS1_NOT_32R6_64R6; +def PseudoMFLO : PseudoMFLOHI<GPR32, ACC64, MipsMFLO>, ISA_MIPS1_NOT_32R6_64R6; +def PseudoMTLOHI : PseudoMTLOHI<ACC64, GPR32>, ISA_MIPS1_NOT_32R6_64R6; +def PseudoMADD : MAddSubPseudo<MADD, MipsMAdd, II_MADD>, + ISA_MIPS32_NOT_32R6_64R6; +def PseudoMADDU : MAddSubPseudo<MADDU, MipsMAddu, II_MADDU>, + ISA_MIPS32_NOT_32R6_64R6; +def PseudoMSUB : MAddSubPseudo<MSUB, MipsMSub, II_MSUB>, + ISA_MIPS32_NOT_32R6_64R6; +def PseudoMSUBU : MAddSubPseudo<MSUBU, MipsMSubu, II_MSUBU>, + ISA_MIPS32_NOT_32R6_64R6; } def PseudoSDIV : MultDivPseudo<SDIV, ACC64, GPR32Opnd, MipsDivRem, II_DIV, - 0, 1, 1>; + 0, 1, 1>, ISA_MIPS1_NOT_32R6_64R6; def PseudoUDIV : MultDivPseudo<UDIV, ACC64, GPR32Opnd, MipsDivRemU, II_DIVU, - 0, 1, 1>; + 0, 1, 1>, ISA_MIPS1_NOT_32R6_64R6; def RDHWR : ReadHardware<GPR32Opnd, HWRegsOpnd>, RDHWR_FM; @@ -1274,6 +1367,46 @@ def SSNOP : Barrier<"ssnop">, BARRIER_FM<1>; def EHB : Barrier<"ehb">, BARRIER_FM<3>; def PAUSE : Barrier<"pause">, BARRIER_FM<5>, ISA_MIPS32R2; +// JR_HB and JALR_HB are defined here using the new style naming +// scheme because some of this code is shared with Mips32r6InstrInfo.td +// and because of that it doesn't follow the naming convention of the +// rest of the file. To avoid a mixture of old vs new style, the new +// style was chosen. +class JR_HB_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> { + dag OutOperandList = (outs); + dag InOperandList = (ins GPROpnd:$rs); + string AsmString = !strconcat(instr_asm, "\t$rs"); + list<dag> Pattern = []; +} + +class JALR_HB_DESC_BASE<string instr_asm, RegisterOperand GPROpnd> { + dag OutOperandList = (outs GPROpnd:$rd); + dag InOperandList = (ins GPROpnd:$rs); + string AsmString = !strconcat(instr_asm, "\t$rd, $rs"); + list<dag> Pattern = []; +} + +class JR_HB_DESC : InstSE<(outs), (ins), "", [], NoItinerary, FrmJ>, + JR_HB_DESC_BASE<"jr.hb", GPR32Opnd> { + let isBranch=1; + let isIndirectBranch=1; + let hasDelaySlot=1; + let isTerminator=1; + let isBarrier=1; +} + +class JALR_HB_DESC : InstSE<(outs), (ins), "", [], NoItinerary, FrmJ>, + JALR_HB_DESC_BASE<"jalr.hb", GPR32Opnd> { + let isIndirectBranch=1; + let hasDelaySlot=1; +} + +class JR_HB_ENC : JR_HB_FM<8>; +class JALR_HB_ENC : JALR_HB_FM<9>; + +def JR_HB : JR_HB_DESC, JR_HB_ENC, ISA_MIPS32_NOT_32R6_64R6; +def JALR_HB : JALR_HB_DESC, JALR_HB_ENC, ISA_MIPS32; + class TLB<string asmstr> : InstSE<(outs), (ins), asmstr, [], NoItinerary, FrmOther>; def TLBP : TLB<"tlbp">, COP0_TLB_FM<0x08>; @@ -1281,6 +1414,15 @@ def TLBR : TLB<"tlbr">, COP0_TLB_FM<0x01>; def TLBWI : TLB<"tlbwi">, COP0_TLB_FM<0x02>; def TLBWR : TLB<"tlbwr">, COP0_TLB_FM<0x06>; +class CacheOp<string instr_asm, Operand MemOpnd, RegisterOperand GPROpnd> : + InstSE<(outs), (ins MemOpnd:$addr, uimm5:$hint), + !strconcat(instr_asm, "\t$hint, $addr"), [], NoItinerary, FrmOther>; + +def CACHE : CacheOp<"cache", mem, GPR32Opnd>, CACHEOP_FM<0b101111>, + INSN_MIPS3_32_NOT_32R6_64R6; +def PREF : CacheOp<"pref", mem, GPR32Opnd>, CACHEOP_FM<0b110011>, + INSN_MIPS3_32_NOT_32R6_64R6; + //===----------------------------------------------------------------------===// // Instruction aliases //===----------------------------------------------------------------------===// @@ -1289,19 +1431,23 @@ def : MipsInstAlias<"move $dst, $src", GPR_32 { let AdditionalPredicates = [NotInMicroMips]; } -def : MipsInstAlias<"bal $offset", (BGEZAL ZERO, brtarget:$offset), 0>; +def : MipsInstAlias<"bal $offset", (BGEZAL ZERO, brtarget:$offset), 0>, + ISA_MIPS1_NOT_32R6_64R6; def : MipsInstAlias<"addu $rs, $rt, $imm", (ADDiu GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>; def : MipsInstAlias<"add $rs, $rt, $imm", (ADDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>; def : MipsInstAlias<"and $rs, $rt, $imm", (ANDi GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm), 0>; +def : MipsInstAlias<"and $rs, $imm", + (ANDi GPR32Opnd:$rs, GPR32Opnd:$rs, simm16:$imm), 0>; def : MipsInstAlias<"j $rs", (JR GPR32Opnd:$rs), 0>; let Predicates = [NotInMicroMips] in { def : MipsInstAlias<"jalr $rs", (JALR RA, GPR32Opnd:$rs), 0>; } def : MipsInstAlias<"jal $rs", (JALR RA, GPR32Opnd:$rs), 0>; def : MipsInstAlias<"jal $rd,$rs", (JALR GPR32Opnd:$rd, GPR32Opnd:$rs), 0>; +def : MipsInstAlias<"jalr.hb $rs", (JALR_HB RA, GPR32Opnd:$rs), 1>, ISA_MIPS32; def : MipsInstAlias<"not $rt, $rs", (NOR GPR32Opnd:$rt, GPR32Opnd:$rs, ZERO), 0>; def : MipsInstAlias<"neg $rt, $rs", @@ -1318,6 +1464,8 @@ def : MipsInstAlias<"xor $rs, $rt, $imm", (XORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>; def : MipsInstAlias<"or $rs, $rt, $imm", (ORi GPR32Opnd:$rs, GPR32Opnd:$rt, uimm16:$imm), 0>; +def : MipsInstAlias<"or $rs, $imm", + (ORi GPR32Opnd:$rs, GPR32Opnd:$rs, uimm16:$imm), 0>; def : MipsInstAlias<"nop", (SLL ZERO, ZERO, 0), 1>; def : MipsInstAlias<"mfc0 $rt, $rd", (MFC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>; def : MipsInstAlias<"mtc0 $rt, $rd", (MTC0 GPR32Opnd:$rt, GPR32Opnd:$rd, 0), 0>; @@ -1360,6 +1508,9 @@ def : MipsInstAlias<"sra $rd, $rt, $rs", (SRAV GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>; def : MipsInstAlias<"srl $rd, $rt, $rs", (SRLV GPR32Opnd:$rd, GPR32Opnd:$rt, GPR32Opnd:$rs), 0>; +def : MipsInstAlias<"sdbbp", (SDBBP 0)>, ISA_MIPS32_NOT_32R6_64R6; +def : MipsInstAlias<"sync", + (SYNC 0), 1>, ISA_MIPS2; //===----------------------------------------------------------------------===// // Assembler Pseudo Instructions //===----------------------------------------------------------------------===// @@ -1412,6 +1563,10 @@ let AdditionalPredicates = [NotDSP] in { (ADDiu GPR32:$src, imm:$imm)>; } +// SYNC +def : MipsPat<(MipsSync (i32 immz)), + (SYNC 0)>, ISA_MIPS2; + // Call def : MipsPat<(MipsJmpLink (i32 tglobaladdr:$dst)), (JAL tglobaladdr:$dst)>; diff --git a/lib/Target/Mips/MipsLongBranch.cpp b/lib/Target/Mips/MipsLongBranch.cpp index acfe76e..c6838a3 100644 --- a/lib/Target/Mips/MipsLongBranch.cpp +++ b/lib/Target/Mips/MipsLongBranch.cpp @@ -15,6 +15,7 @@ #include "Mips.h" #include "MCTargetDesc/MipsBaseInfo.h" +#include "MCTargetDesc/MipsMCNaCl.h" #include "MipsTargetMachine.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -64,7 +65,8 @@ namespace { : MachineFunctionPass(ID), TM(tm), IsPIC(TM.getRelocationModel() == Reloc::PIC_), ABI(TM.getSubtarget<MipsSubtarget>().getTargetABI()), - LongBranchSeqSize(!IsPIC ? 2 : (ABI == MipsSubtarget::N64 ? 10 : 9)) {} + LongBranchSeqSize(!IsPIC ? 2 : (ABI == MipsSubtarget::N64 ? 10 : + (!TM.getSubtarget<MipsSubtarget>().isTargetNaCl() ? 9 : 10))) {} const char *getPassName() const override { return "Mips Long Branch"; @@ -264,6 +266,13 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { LongBrMBB->addSuccessor(BalTgtMBB); BalTgtMBB->addSuccessor(TgtMBB); + // We must select between the MIPS32r6/MIPS64r6 BAL (which is a normal + // instruction) and the pre-MIPS32r6/MIPS64r6 definition (which is an + // pseudo-instruction wrapping BGEZAL). + + const MipsSubtarget &Subtarget = TM.getSubtarget<MipsSubtarget>(); + unsigned BalOp = Subtarget.hasMips32r6() ? Mips::BAL : Mips::BAL_BR; + if (ABI != MipsSubtarget::N64) { // $longbr: // addiu $sp, $sp, -8 @@ -305,9 +314,11 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { BuildMI(*LongBrMBB, Pos, DL, TII->get(Mips::LONG_BRANCH_LUi), Mips::AT) .addMBB(TgtMBB).addMBB(BalTgtMBB); MIBundleBuilder(*LongBrMBB, Pos) - .append(BuildMI(*MF, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB)) - .append(BuildMI(*MF, DL, TII->get(Mips::LONG_BRANCH_ADDiu), Mips::AT) - .addReg(Mips::AT).addMBB(TgtMBB).addMBB(BalTgtMBB)); + .append(BuildMI(*MF, DL, TII->get(BalOp)).addMBB(BalTgtMBB)) + .append(BuildMI(*MF, DL, TII->get(Mips::LONG_BRANCH_ADDiu), Mips::AT) + .addReg(Mips::AT) + .addMBB(TgtMBB) + .addMBB(BalTgtMBB)); Pos = BalTgtMBB->begin(); @@ -316,10 +327,23 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::LW), Mips::RA) .addReg(Mips::SP).addImm(0); - MIBundleBuilder(*BalTgtMBB, Pos) - .append(BuildMI(*MF, DL, TII->get(Mips::JR)).addReg(Mips::AT)) - .append(BuildMI(*MF, DL, TII->get(Mips::ADDiu), Mips::SP) - .addReg(Mips::SP).addImm(8)); + if (!TM.getSubtarget<MipsSubtarget>().isTargetNaCl()) { + MIBundleBuilder(*BalTgtMBB, Pos) + .append(BuildMI(*MF, DL, TII->get(Mips::JR)).addReg(Mips::AT)) + .append(BuildMI(*MF, DL, TII->get(Mips::ADDiu), Mips::SP) + .addReg(Mips::SP).addImm(8)); + } else { + // In NaCl, modifying the sp is not allowed in branch delay slot. + BuildMI(*BalTgtMBB, Pos, DL, TII->get(Mips::ADDiu), Mips::SP) + .addReg(Mips::SP).addImm(8); + + MIBundleBuilder(*BalTgtMBB, Pos) + .append(BuildMI(*MF, DL, TII->get(Mips::JR)).addReg(Mips::AT)) + .append(BuildMI(*MF, DL, TII->get(Mips::NOP))); + + // Bundle-align the target of indirect branch JR. + TgtMBB->setAlignment(MIPS_NACL_BUNDLE_ALIGN); + } } else { // $longbr: // daddiu $sp, $sp, -16 @@ -364,11 +388,12 @@ void MipsLongBranch::expandToLongBranch(MBBInfo &I) { .addReg(Mips::AT_64).addImm(16); MIBundleBuilder(*LongBrMBB, Pos) - .append(BuildMI(*MF, DL, TII->get(Mips::BAL_BR)).addMBB(BalTgtMBB)) - .append(BuildMI(*MF, DL, TII->get(Mips::LONG_BRANCH_DADDiu), - Mips::AT_64).addReg(Mips::AT_64) - .addMBB(TgtMBB, MipsII::MO_ABS_LO) - .addMBB(BalTgtMBB)); + .append(BuildMI(*MF, DL, TII->get(BalOp)).addMBB(BalTgtMBB)) + .append( + BuildMI(*MF, DL, TII->get(Mips::LONG_BRANCH_DADDiu), Mips::AT_64) + .addReg(Mips::AT_64) + .addMBB(TgtMBB, MipsII::MO_ABS_LO) + .addMBB(BalTgtMBB)); Pos = BalTgtMBB->begin(); @@ -450,9 +475,18 @@ bool MipsLongBranch::runOnMachineFunction(MachineFunction &F) { continue; int ShVal = TM.getSubtarget<MipsSubtarget>().inMicroMipsMode() ? 2 : 4; + int64_t Offset = computeOffset(I->Br) / ShVal; + + if (TM.getSubtarget<MipsSubtarget>().isTargetNaCl()) { + // The offset calculation does not include sandboxing instructions + // that will be added later in the MC layer. Since at this point we + // don't know the exact amount of code that "sandboxing" will add, we + // conservatively estimate that code will not grow more than 100%. + Offset *= 2; + } // Check if offset fits into 16-bit immediate field of branches. - if (!ForceLongBranch && isInt<16>(computeOffset(I->Br) / ShVal)) + if (!ForceLongBranch && isInt<16>(Offset)) continue; I->HasLongBranch = true; diff --git a/lib/Target/Mips/MipsMSAInstrFormats.td b/lib/Target/Mips/MipsMSAInstrFormats.td index 6bd0366..bff2d0f 100644 --- a/lib/Target/Mips/MipsMSAInstrFormats.td +++ b/lib/Target/Mips/MipsMSAInstrFormats.td @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// -def HasMSA : Predicate<"Subtarget.hasMSA()">, +def HasMSA : Predicate<"Subtarget->hasMSA()">, AssemblerPredicate<"FeatureMSA">; class MSAInst : MipsInst<(outs), (ins), "", [], NoItinerary, FrmOther> { diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index e9101cc..8c16f82 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -15,7 +15,6 @@ #define MIPS_MACHINE_FUNCTION_INFO_H #include "Mips16HardFloatInfo.h" -#include "MipsSubtarget.h" #include "llvm/ADT/StringMap.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index 83d25ab..084449b 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -93,6 +93,9 @@ MipsRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (Subtarget.isFP64bit()) return CSR_O32_FP64_SaveList; + if (Subtarget.isFPXX()) + return CSR_O32_FPXX_SaveList; + return CSR_O32_SaveList; } @@ -110,6 +113,9 @@ MipsRegisterInfo::getCallPreservedMask(CallingConv::ID) const { if (Subtarget.isFP64bit()) return CSR_O32_FP64_RegMask; + if (Subtarget.isFPXX()) + return CSR_O32_FPXX_RegMask; + return CSR_O32_RegMask; } @@ -201,6 +207,11 @@ getReservedRegs(const MachineFunction &MF) const { Reserved.set(Mips::GP_64); } + if (Subtarget.isABI_O32() && !Subtarget.useOddSPReg()) { + for (const auto &Reg : Mips::OddSPRegClass) + Reserved.set(Reg); + } + return Reserved; } diff --git a/lib/Target/Mips/MipsRegisterInfo.td b/lib/Target/Mips/MipsRegisterInfo.td index 875a596..6323da3 100644 --- a/lib/Target/Mips/MipsRegisterInfo.td +++ b/lib/Target/Mips/MipsRegisterInfo.td @@ -340,6 +340,12 @@ def AFGR64 : RegisterClass<"Mips", [f64], 64, (add def FGR64 : RegisterClass<"Mips", [f64], 64, (sequence "D%u_64", 0, 31)>; +// Used to reserve odd registers when given -mattr=+nooddspreg +def OddSP : RegisterClass<"Mips", [f32], 32, + (add (decimate (sequence "F%u", 1, 31), 2), + (decimate (sequence "F_HI%u", 1, 31), 2))>, + Unallocatable; + // FP control registers. def CCR : RegisterClass<"Mips", [i32], 32, (sequence "FCR%u", 0, 31)>, Unallocatable; @@ -348,6 +354,10 @@ def CCR : RegisterClass<"Mips", [i32], 32, (sequence "FCR%u", 0, 31)>, def FCC : RegisterClass<"Mips", [i32], 32, (sequence "FCC%u", 0, 7)>, Unallocatable; +// MIPS32r6/MIPS64r6 store FPU condition codes in normal FGR registers. +// This class allows us to represent this in codegen patterns. +def FGRCC : RegisterClass<"Mips", [i32], 32, (sequence "F%u", 0, 31)>; + def MSA128B: RegisterClass<"Mips", [v16i8], 128, (sequence "W%u", 0, 31)>; def MSA128H: RegisterClass<"Mips", [v8i16, v8f16], 128, @@ -512,6 +522,12 @@ def FGR32Opnd : RegisterOperand<FGR32> { let ParserMatchClass = FGR32AsmOperand; } +def FGRCCOpnd : RegisterOperand<FGRCC> { + // The assembler doesn't use register classes so we can re-use + // FGR32AsmOperand. + let ParserMatchClass = FGR32AsmOperand; +} + def FGRH32Opnd : RegisterOperand<FGRH32> { let ParserMatchClass = FGRH32AsmOperand; } diff --git a/lib/Target/Mips/MipsSEFrameLowering.cpp b/lib/Target/Mips/MipsSEFrameLowering.cpp index 6ad5821..6573070 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.cpp +++ b/lib/Target/Mips/MipsSEFrameLowering.cpp @@ -16,6 +16,7 @@ #include "MipsAnalyzeImmediate.h" #include "MipsMachineFunction.h" #include "MipsSEInstrInfo.h" +#include "MipsSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -257,6 +258,9 @@ bool ExpandPseudo::expandCopyACC(MachineBasicBlock &MBB, Iter I, return true; } +MipsSEFrameLowering::MipsSEFrameLowering(const MipsSubtarget &STI) + : MipsFrameLowering(STI, STI.stackAlignment()) {} + unsigned MipsSEFrameLowering::ehDataReg(unsigned I) const { static const unsigned EhDataReg[] = { Mips::A0, Mips::A1, Mips::A2, Mips::A3 diff --git a/lib/Target/Mips/MipsSEFrameLowering.h b/lib/Target/Mips/MipsSEFrameLowering.h index 5d2801f..e832848 100644 --- a/lib/Target/Mips/MipsSEFrameLowering.h +++ b/lib/Target/Mips/MipsSEFrameLowering.h @@ -20,8 +20,7 @@ namespace llvm { class MipsSEFrameLowering : public MipsFrameLowering { public: - explicit MipsSEFrameLowering(const MipsSubtarget &STI) - : MipsFrameLowering(STI, STI.stackAlignment()) {} + explicit MipsSEFrameLowering(const MipsSubtarget &STI); /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index d5385be..6f35947 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -37,7 +37,7 @@ using namespace llvm; #define DEBUG_TYPE "mips-isel" bool MipsSEDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { - if (Subtarget.inMips16Mode()) + if (Subtarget->inMips16Mode()) return false; return MipsDAGToDAGISel::runOnMachineFunction(MF); } @@ -134,7 +134,7 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { unsigned V0, V1, GlobalBaseReg = MipsFI->getGlobalBaseReg(); const TargetRegisterClass *RC; - if (Subtarget.isABI_N64()) + if (Subtarget->isABI_N64()) RC = (const TargetRegisterClass*)&Mips::GPR64RegClass; else RC = (const TargetRegisterClass*)&Mips::GPR32RegClass; @@ -142,7 +142,7 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { V0 = RegInfo.createVirtualRegister(RC); V1 = RegInfo.createVirtualRegister(RC); - if (Subtarget.isABI_N64()) { + if (Subtarget->isABI_N64()) { MF.getRegInfo().addLiveIn(Mips::T9_64); MBB.addLiveIn(Mips::T9_64); @@ -174,7 +174,7 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { MF.getRegInfo().addLiveIn(Mips::T9); MBB.addLiveIn(Mips::T9); - if (Subtarget.isABI_N32()) { + if (Subtarget->isABI_N32()) { // lui $v0, %hi(%neg(%gp_rel(fname))) // addu $v1, $v0, $t9 // addiu $globalbasereg, $v1, %lo(%neg(%gp_rel(fname))) @@ -187,7 +187,7 @@ void MipsSEDAGToDAGISel::initGlobalBaseReg(MachineFunction &MF) { return; } - assert(Subtarget.isABI_O32()); + assert(Subtarget->isABI_O32()); // For O32 ABI, the following instruction sequence is emitted to initialize // the global base register: @@ -408,7 +408,7 @@ bool MipsSEDAGToDAGISel::selectIntAddrMSA(SDValue Addr, SDValue &Base, // * MSA is enabled // * N is a ISD::BUILD_VECTOR representing a constant splat bool MipsSEDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm) const { - if (!Subtarget.hasMSA()) + if (!Subtarget->hasMSA()) return false; BuildVectorSDNode *Node = dyn_cast<BuildVectorSDNode>(N); @@ -422,7 +422,7 @@ bool MipsSEDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm) const { if (!Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 8, - !Subtarget.isLittle())) + !Subtarget->isLittle())) return false; Imm = SplatValue; @@ -648,7 +648,7 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) { } case ISD::ADDE: { - if (Subtarget.hasDSP()) // Select DSP instructions, ADDSC and ADDWC. + if (Subtarget->hasDSP()) // Select DSP instructions, ADDSC and ADDWC. break; SDValue InFlag = Node->getOperand(2); Result = selectAddESubE(Mips::ADDu, InFlag, InFlag.getValue(0), DL, Node); @@ -658,11 +658,11 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) { case ISD::ConstantFP: { ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(Node); if (Node->getValueType(0) == MVT::f64 && CN->isExactlyValue(+0.0)) { - if (Subtarget.isGP64bit()) { + if (Subtarget->isGP64bit()) { SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, Mips::ZERO_64, MVT::i64); Result = CurDAG->getMachineNode(Mips::DMTC1, DL, MVT::f64, Zero); - } else if (Subtarget.isFP64bit()) { + } else if (Subtarget->isFP64bit()) { SDValue Zero = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, Mips::ZERO, MVT::i32); Result = CurDAG->getMachineNode(Mips::BuildPairF64_64, DL, MVT::f64, @@ -813,12 +813,12 @@ std::pair<bool, SDNode*> MipsSEDAGToDAGISel::selectNode(SDNode *Node) { EVT ResVecTy = BVN->getValueType(0); EVT ViaVecTy; - if (!Subtarget.hasMSA() || !BVN->getValueType(0).is128BitVector()) + if (!Subtarget->hasMSA() || !BVN->getValueType(0).is128BitVector()) return std::make_pair(false, nullptr); if (!BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs, 8, - !Subtarget.isLittle())) + !Subtarget->isLittle())) return std::make_pair(false, nullptr); switch (SplatBitSize) { diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 969d730..be4ca86 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -39,7 +39,7 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) // Set up the register classes addRegisterClass(MVT::i32, &Mips::GPR32RegClass); - if (isGP64bit()) + if (Subtarget->isGP64bit()) addRegisterClass(MVT::i64, &Mips::GPR64RegClass); if (Subtarget->hasDSP() || Subtarget->hasMSA()) { @@ -120,10 +120,10 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) if (Subtarget->hasCnMips()) setOperationAction(ISD::MUL, MVT::i64, Legal); - else if (isGP64bit()) + else if (Subtarget->isGP64bit()) setOperationAction(ISD::MUL, MVT::i64, Custom); - if (isGP64bit()) { + if (Subtarget->isGP64bit()) { setOperationAction(ISD::MULHS, MVT::i64, Custom); setOperationAction(ISD::MULHU, MVT::i64, Custom); } @@ -152,6 +152,76 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::STORE, MVT::f64, Custom); } + if (Subtarget->hasMips32r6()) { + // MIPS32r6 replaces the accumulator-based multiplies with a three register + // instruction + setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); + setOperationAction(ISD::MUL, MVT::i32, Legal); + setOperationAction(ISD::MULHS, MVT::i32, Legal); + setOperationAction(ISD::MULHU, MVT::i32, Legal); + + // MIPS32r6 replaces the accumulator-based division/remainder with separate + // three register division and remainder instructions. + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + setOperationAction(ISD::SDIV, MVT::i32, Legal); + setOperationAction(ISD::UDIV, MVT::i32, Legal); + setOperationAction(ISD::SREM, MVT::i32, Legal); + setOperationAction(ISD::UREM, MVT::i32, Legal); + + // MIPS32r6 replaces conditional moves with an equivalent that removes the + // need for three GPR read ports. + setOperationAction(ISD::SETCC, MVT::i32, Legal); + setOperationAction(ISD::SELECT, MVT::i32, Legal); + setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); + + setOperationAction(ISD::SETCC, MVT::f32, Legal); + setOperationAction(ISD::SELECT, MVT::f32, Legal); + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + + assert(Subtarget->isFP64bit() && "FR=1 is required for MIPS32r6"); + setOperationAction(ISD::SETCC, MVT::f64, Legal); + setOperationAction(ISD::SELECT, MVT::f64, Legal); + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + + setOperationAction(ISD::BRCOND, MVT::Other, Legal); + + // Floating point > and >= are supported via < and <= + setCondCodeAction(ISD::SETOGE, MVT::f32, Expand); + setCondCodeAction(ISD::SETOGT, MVT::f32, Expand); + setCondCodeAction(ISD::SETUGE, MVT::f32, Expand); + setCondCodeAction(ISD::SETUGT, MVT::f32, Expand); + + setCondCodeAction(ISD::SETOGE, MVT::f64, Expand); + setCondCodeAction(ISD::SETOGT, MVT::f64, Expand); + setCondCodeAction(ISD::SETUGE, MVT::f64, Expand); + setCondCodeAction(ISD::SETUGT, MVT::f64, Expand); + } + + if (Subtarget->hasMips64r6()) { + // MIPS64r6 replaces the accumulator-based multiplies with a three register + // instruction + setOperationAction(ISD::MUL, MVT::i64, Legal); + setOperationAction(ISD::MULHS, MVT::i64, Legal); + setOperationAction(ISD::MULHU, MVT::i64, Legal); + + // MIPS32r6 replaces the accumulator-based division/remainder with separate + // three register division and remainder instructions. + setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + setOperationAction(ISD::UDIVREM, MVT::i64, Expand); + setOperationAction(ISD::SDIV, MVT::i64, Legal); + setOperationAction(ISD::UDIV, MVT::i64, Legal); + setOperationAction(ISD::SREM, MVT::i64, Legal); + setOperationAction(ISD::UREM, MVT::i64, Legal); + + // MIPS64r6 replaces conditional moves with an equivalent that removes the + // need for three GPR read ports. + setOperationAction(ISD::SETCC, MVT::i64, Legal); + setOperationAction(ISD::SELECT, MVT::i64, Legal); + setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); + } + computeRegisterProperties(); } @@ -160,6 +230,14 @@ llvm::createMipsSETargetLowering(MipsTargetMachine &TM) { return new MipsSETargetLowering(TM); } +const TargetRegisterClass * +MipsSETargetLowering::getRepRegClassFor(MVT VT) const { + if (VT == MVT::Untyped) + return Subtarget->hasDSP() ? &Mips::ACC64DSPRegClass : &Mips::ACC64RegClass; + + return TargetLowering::getRepRegClassFor(VT); +} + // Enable MSA support for the given integer type and Register class. void MipsSETargetLowering:: addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { @@ -449,8 +527,8 @@ static SDValue performADDECombine(SDNode *N, SelectionDAG &DAG, if (DCI.isBeforeLegalize()) return SDValue(); - if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && - selectMADD(N, &DAG)) + if (Subtarget->hasMips32() && !Subtarget->hasMips32r6() && + N->getValueType(0) == MVT::i32 && selectMADD(N, &DAG)) return SDValue(N, 0); return SDValue(); @@ -1178,6 +1256,9 @@ SDValue MipsSETargetLowering::lowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDValue MipsSETargetLowering::lowerMulDiv(SDValue Op, unsigned NewOpc, bool HasLo, bool HasHi, SelectionDAG &DAG) const { + // MIPS32r6/MIPS64r6 removed accumulator based multiplies. + assert(!Subtarget->hasMips32r6()); + EVT Ty = Op.getOperand(0).getValueType(); SDLoc DL(Op); SDValue Mult = DAG.getNode(NewOpc, DL, MVT::Untyped, @@ -1651,7 +1732,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_copy_s_w: return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); case Intrinsic::mips_copy_s_d: - if (hasMips64()) + if (Subtarget->hasMips64()) // Lower directly into VEXTRACT_SEXT_ELT since i64 is legal on Mips64. return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_SEXT_ELT); else { @@ -1666,7 +1747,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_copy_u_w: return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); case Intrinsic::mips_copy_u_d: - if (hasMips64()) + if (Subtarget->hasMips64()) // Lower directly into VEXTRACT_ZEXT_ELT since i64 is legal on Mips64. return lowerMSACopyIntr(Op, DAG, MipsISD::VEXTRACT_ZEXT_ELT); else { @@ -2943,8 +3024,8 @@ MipsSETargetLowering::emitINSERT_DF_VIDX(MachineInstr *MI, unsigned SrcValReg = MI->getOperand(3).getReg(); const TargetRegisterClass *VecRC = nullptr; - const TargetRegisterClass *GPRRC = isGP64bit() ? &Mips::GPR64RegClass - : &Mips::GPR32RegClass; + const TargetRegisterClass *GPRRC = + Subtarget->isGP64bit() ? &Mips::GPR64RegClass : &Mips::GPR32RegClass; unsigned EltLog2Size; unsigned InsertOp = 0; unsigned InsveOp = 0; diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h index 03a20ef..13ef6fc 100644 --- a/lib/Target/Mips/MipsSEISelLowering.h +++ b/lib/Target/Mips/MipsSEISelLowering.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef MipsSEISELLOWERING_H -#define MipsSEISELLOWERING_H +#ifndef MIPSSEISELLOWERING_H +#define MIPSSEISELLOWERING_H #include "MipsISelLowering.h" #include "MipsRegisterInfo.h" @@ -46,13 +46,7 @@ namespace llvm { return false; } - const TargetRegisterClass *getRepRegClassFor(MVT VT) const override { - if (VT == MVT::Untyped) - return Subtarget->hasDSP() ? &Mips::ACC64DSPRegClass : - &Mips::ACC64RegClass; - - return TargetLowering::getRepRegClassFor(VT); - } + const TargetRegisterClass *getRepRegClassFor(MVT VT) const override; private: bool isEligibleForTailCallOptimization(const MipsCC &MipsCCInfo, diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp index f6f364f..32da749 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.cpp +++ b/lib/Target/Mips/MipsSEInstrInfo.cpp @@ -272,7 +272,7 @@ bool MipsSEInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { default: return false; case Mips::RetRA: - expandRetRA(MBB, MI, Mips::RET); + expandRetRA(MBB, MI); break; case Mips::PseudoMFHI: Opc = isMicroMips ? Mips::MFHI16_MM : Mips::MFHI; @@ -428,9 +428,14 @@ unsigned MipsSEInstrInfo::getAnalyzableBrOpc(unsigned Opc) const { } void MipsSEInstrInfo::expandRetRA(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned Opc) const { - BuildMI(MBB, I, I->getDebugLoc(), get(Opc)).addReg(Mips::RA); + MachineBasicBlock::iterator I) const { + const auto &Subtarget = TM.getSubtarget<MipsSubtarget>(); + + if (Subtarget.isGP64bit()) + BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn64)) + .addReg(Mips::RA_64); + else + BuildMI(MBB, I, I->getDebugLoc(), get(Mips::PseudoReturn)).addReg(Mips::RA); } std::pair<bool, bool> @@ -542,20 +547,31 @@ void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB, const MCInstrDesc& Mtc1Tdd = get(Mips::MTC1); DebugLoc dl = I->getDebugLoc(); const TargetRegisterInfo &TRI = getRegisterInfo(); + bool HasMTHC1 = TM.getSubtarget<MipsSubtarget>().hasMips32r2() || + TM.getSubtarget<MipsSubtarget>().hasMips32r6(); - // For FP32 mode: - // mtc1 Lo, $fp - // mtc1 Hi, $fp + 1 - // For FP64 mode: + // When mthc1 is available, use: // mtc1 Lo, $fp // mthc1 Hi, $fp + // + // Otherwise, for FP64: + // spill + reload via ldc1 + // This has not been implemented since FP64 on MIPS32 and earlier is not + // supported. + // + // Otherwise, for FP32: + // mtc1 Lo, $fp + // mtc1 Hi, $fp + 1 BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_lo)) .addReg(LoReg); - if (FP64) { - // FIXME: The .addReg(DstReg, RegState::Implicit) is a white lie used to - // temporarily work around a widespread bug in the -mfp64 support. + if (HasMTHC1 || FP64) { + assert(TM.getSubtarget<MipsSubtarget>().hasMips32r2() && + "MTHC1 requires MIPS32r2"); + + // FIXME: The .addReg(DstReg) is a white lie used to temporarily work + // around a widespread bug in the -mfp64 support. // The problem is that none of the 32-bit fpu ops mention the fact // that they clobber the upper 32-bits of the 64-bit FPR. Fixing that // requires a major overhaul of the FPU implementation which can't @@ -565,9 +581,9 @@ void MipsSEInstrInfo::expandBuildPairF64(MachineBasicBlock &MBB, // We therefore pretend that it reads the bottom 32-bits to // artificially create a dependency and prevent the scheduler // changing the behaviour of the code. - BuildMI(MBB, I, dl, get(Mips::MTHC1), TRI.getSubReg(DstReg, Mips::sub_hi)) - .addReg(HiReg) - .addReg(DstReg, RegState::Implicit); + BuildMI(MBB, I, dl, get(FP64 ? Mips::MTHC1_D64 : Mips::MTHC1_D32), DstReg) + .addReg(DstReg) + .addReg(HiReg); } else BuildMI(MBB, I, dl, Mtc1Tdd, TRI.getSubReg(DstReg, Mips::sub_hi)) .addReg(HiReg); @@ -580,17 +596,16 @@ void MipsSEInstrInfo::expandEhReturn(MachineBasicBlock &MBB, // indirect jump to TargetReg const MipsSubtarget &STI = TM.getSubtarget<MipsSubtarget>(); unsigned ADDU = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; - unsigned JR = STI.isABI_N64() ? Mips::JR64 : Mips::JR; - unsigned SP = STI.isABI_N64() ? Mips::SP_64 : Mips::SP; - unsigned RA = STI.isABI_N64() ? Mips::RA_64 : Mips::RA; - unsigned T9 = STI.isABI_N64() ? Mips::T9_64 : Mips::T9; - unsigned ZERO = STI.isABI_N64() ? Mips::ZERO_64 : Mips::ZERO; + unsigned SP = STI.isGP64bit() ? Mips::SP_64 : Mips::SP; + unsigned RA = STI.isGP64bit() ? Mips::RA_64 : Mips::RA; + unsigned T9 = STI.isGP64bit() ? Mips::T9_64 : Mips::T9; + unsigned ZERO = STI.isGP64bit() ? Mips::ZERO_64 : Mips::ZERO; unsigned OffsetReg = I->getOperand(0).getReg(); unsigned TargetReg = I->getOperand(1).getReg(); // addu $ra, $v0, $zero // addu $sp, $sp, $v1 - // jr $ra + // jr $ra (via RetRA) if (TM.getRelocationModel() == Reloc::PIC_) BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(ADDU), T9) .addReg(TargetReg).addReg(ZERO); @@ -598,7 +613,7 @@ void MipsSEInstrInfo::expandEhReturn(MachineBasicBlock &MBB, .addReg(TargetReg).addReg(ZERO); BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(ADDU), SP) .addReg(SP).addReg(OffsetReg); - BuildMI(MBB, I, I->getDebugLoc(), TM.getInstrInfo()->get(JR)).addReg(RA); + expandRetRA(MBB, I); } const MipsInstrInfo *llvm::createMipsSEInstrInfo(MipsTargetMachine &TM) { diff --git a/lib/Target/Mips/MipsSEInstrInfo.h b/lib/Target/Mips/MipsSEInstrInfo.h index aa68552..9ac94ce 100644 --- a/lib/Target/Mips/MipsSEInstrInfo.h +++ b/lib/Target/Mips/MipsSEInstrInfo.h @@ -81,8 +81,7 @@ public: private: unsigned getAnalyzableBrOpc(unsigned Opc) const override; - void expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned Opc) const; + void expandRetRA(MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const; std::pair<bool, bool> compareOpndSize(unsigned Opc, const MachineFunction &MF) const; diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.cpp b/lib/Target/Mips/MipsSelectionDAGInfo.cpp index 0d4398e..edd8f67 100644 --- a/lib/Target/Mips/MipsSelectionDAGInfo.cpp +++ b/lib/Target/Mips/MipsSelectionDAGInfo.cpp @@ -16,9 +16,8 @@ using namespace llvm; #define DEBUG_TYPE "mips-selectiondag-info" -MipsSelectionDAGInfo::MipsSelectionDAGInfo(const MipsTargetMachine &TM) - : TargetSelectionDAGInfo(TM) { -} +MipsSelectionDAGInfo::MipsSelectionDAGInfo(const DataLayout &DL) + : TargetSelectionDAGInfo(&DL) {} MipsSelectionDAGInfo::~MipsSelectionDAGInfo() { } diff --git a/lib/Target/Mips/MipsSelectionDAGInfo.h b/lib/Target/Mips/MipsSelectionDAGInfo.h index 6cafb55..2b3d527 100644 --- a/lib/Target/Mips/MipsSelectionDAGInfo.h +++ b/lib/Target/Mips/MipsSelectionDAGInfo.h @@ -22,7 +22,7 @@ class MipsTargetMachine; class MipsSelectionDAGInfo : public TargetSelectionDAGInfo { public: - explicit MipsSelectionDAGInfo(const MipsTargetMachine &TM); + explicit MipsSelectionDAGInfo(const DataLayout &DL); ~MipsSelectionDAGInfo(); }; diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 74ec064..693daa3 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -60,11 +60,9 @@ Mips16ConstantIslands( /// Select the Mips CPU for the given triple and cpu name. /// FIXME: Merge with the copy in MipsMCTargetDesc.cpp -static inline StringRef selectMipsCPU(StringRef TT, StringRef CPU) { +static StringRef selectMipsCPU(Triple TT, StringRef CPU) { if (CPU.empty() || CPU == "generic") { - Triple TheTriple(TT); - if (TheTriple.getArch() == Triple::mips || - TheTriple.getArch() == Triple::mipsel) + if (TT.getArch() == Triple::mips || TT.getArch() == Triple::mipsel) CPU = "mips32"; else CPU = "mips64"; @@ -74,39 +72,56 @@ static inline StringRef selectMipsCPU(StringRef TT, StringRef CPU) { void MipsSubtarget::anchor() { } +static std::string computeDataLayout(const MipsSubtarget &ST) { + std::string Ret = ""; + + // There are both little and big endian mips. + if (ST.isLittle()) + Ret += "e"; + else + Ret += "E"; + + Ret += "-m:m"; + + // Pointers are 32 bit on some ABIs. + if (!ST.isABI_N64()) + Ret += "-p:32:32"; + + // 8 and 16 bit integers only need no have natural alignment, but try to + // align them to 32 bits. 64 bit integers have natural alignment. + Ret += "-i8:8:32-i16:16:32-i64:64"; + + // 32 bit registers are always available and the stack is at least 64 bit + // aligned. On N64 64 bit registers are also available and the stack is + // 128 bit aligned. + if (ST.isABI_N64() || ST.isABI_N32()) + Ret += "-n32:64-S128"; + else + Ret += "-n32-S64"; + + return Ret; +} + MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, const std::string &FS, bool little, Reloc::Model _RM, MipsTargetMachine *_TM) : MipsGenSubtargetInfo(TT, CPU, FS), MipsArchVersion(Mips32), MipsABI(UnknownABI), IsLittle(little), IsSingleFloat(false), - IsFP64bit(false), IsNaN2008bit(false), IsGP64bit(false), HasVFPU(false), - HasCnMips(false), IsLinux(true), HasMips3_32(false), HasMips3_32r2(false), - HasMips4_32(false), HasMips4_32r2(false), HasMips5_32r2(false), - InMips16Mode(false), InMips16HardFloat(Mips16HardFloat), - InMicroMipsMode(false), HasDSP(false), HasDSPR2(false), - AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), HasMSA(false), - RM(_RM), OverrideMode(NoOverride), TM(_TM), TargetTriple(TT) { - std::string CPUName = CPU; - CPUName = selectMipsCPU(TT, CPUName); - - // Parse features string. - ParseSubtargetFeatures(CPUName, FS); - - if (InMips16Mode && !TM->Options.UseSoftFloat) { - // Hard float for mips16 means essentially to compile as soft float - // but to use a runtime library for soft float that is written with - // native mips32 floating point instructions (those runtime routines - // run in mips32 hard float mode). - TM->Options.UseSoftFloat = true; - TM->Options.FloatABIType = FloatABI::Soft; - InMips16HardFloat = true; - } + IsFPXX(false), IsFP64bit(false), UseOddSPReg(true), IsNaN2008bit(false), + IsGP64bit(false), HasVFPU(false), HasCnMips(false), IsLinux(true), + HasMips3_32(false), HasMips3_32r2(false), HasMips4_32(false), + HasMips4_32r2(false), HasMips5_32r2(false), InMips16Mode(false), + InMips16HardFloat(Mips16HardFloat), InMicroMipsMode(false), HasDSP(false), + HasDSPR2(false), AllowMixed16_32(Mixed16_32 | Mips_Os16), Os16(Mips_Os16), + HasMSA(false), RM(_RM), OverrideMode(NoOverride), TM(_TM), + TargetTriple(TT), + DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS, TM))), + TSInfo(DL), JITInfo(), InstrInfo(MipsInstrInfo::create(*TM)), + FrameLowering(MipsFrameLowering::create(*TM, *this)), + TLInfo(MipsTargetLowering::create(*TM)) { PreviousInMips16Mode = InMips16Mode; - // Initialize scheduling itinerary for the specified CPU. - InstrItins = getInstrItineraryForCPU(CPUName); - // Don't even attempt to generate code for MIPS-I, MIPS-II, MIPS-III, and // MIPS-V. They have not been tested and currently exist for the integrated // assembler only. @@ -137,6 +152,11 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, "See -mattr=+fp64.", false); + if (!isABI_O32() && !useOddSPReg()) + report_fatal_error("-mattr=+nooddspreg is not currently permitted for a " + "the O32 ABI.", + false); + if (hasMips32r6()) { StringRef ISA = hasMips64r6() ? "MIPS64r6" : "MIPS32r6"; @@ -167,6 +187,29 @@ MipsSubtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel, return OptLevel >= CodeGenOpt::Aggressive; } +MipsSubtarget & +MipsSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS, + const TargetMachine *TM) { + std::string CPUName = selectMipsCPU(TargetTriple, CPU); + + // Parse features string. + ParseSubtargetFeatures(CPUName, FS); + // Initialize scheduling itinerary for the specified CPU. + InstrItins = getInstrItineraryForCPU(CPUName); + + if (InMips16Mode && !TM->Options.UseSoftFloat) { + // Hard float for mips16 means essentially to compile as soft float + // but to use a runtime library for soft float that is written with + // native mips32 floating point instructions (those runtime routines + // run in mips32 hard float mode). + TM->Options.UseSoftFloat = true; + TM->Options.FloatABIType = FloatABI::Soft; + InMips16HardFloat = true; + } + + return *this; +} + //FIXME: This logic for reseting the subtarget along with // the helper classes can probably be simplified but there are a lot of // cases so we will defer rewriting this to later. @@ -186,14 +229,14 @@ void MipsSubtarget::resetSubtarget(MachineFunction *MF) { return; OverrideMode = Mips16Override; PreviousInMips16Mode = true; - TM->setHelperClassesMips16(); + setHelperClassesMips16(); return; } else if (ChangeToNoMips16) { if (!PreviousInMips16Mode) return; OverrideMode = NoMips16Override; PreviousInMips16Mode = false; - TM->setHelperClassesMipsSE(); + setHelperClassesMipsSE(); return; } else { if (OverrideMode == NoOverride) @@ -201,16 +244,52 @@ void MipsSubtarget::resetSubtarget(MachineFunction *MF) { OverrideMode = NoOverride; DEBUG(dbgs() << "back to default" << "\n"); if (inMips16Mode() && !PreviousInMips16Mode) { - TM->setHelperClassesMips16(); + setHelperClassesMips16(); PreviousInMips16Mode = true; } else if (!inMips16Mode() && PreviousInMips16Mode) { - TM->setHelperClassesMipsSE(); + setHelperClassesMipsSE(); PreviousInMips16Mode = false; } return; } } +void MipsSubtarget::setHelperClassesMips16() { + InstrInfoSE.swap(InstrInfo); + FrameLoweringSE.swap(FrameLowering); + TLInfoSE.swap(TLInfo); + if (!InstrInfo16) { + InstrInfo.reset(MipsInstrInfo::create(*TM)); + FrameLowering.reset(MipsFrameLowering::create(*TM, *this)); + TLInfo.reset(MipsTargetLowering::create(*TM)); + } else { + InstrInfo16.swap(InstrInfo); + FrameLowering16.swap(FrameLowering); + TLInfo16.swap(TLInfo); + } + assert(TLInfo && "null target lowering 16"); + assert(InstrInfo && "null instr info 16"); + assert(FrameLowering && "null frame lowering 16"); +} + +void MipsSubtarget::setHelperClassesMipsSE() { + InstrInfo16.swap(InstrInfo); + FrameLowering16.swap(FrameLowering); + TLInfo16.swap(TLInfo); + if (!InstrInfoSE) { + InstrInfo.reset(MipsInstrInfo::create(*TM)); + FrameLowering.reset(MipsFrameLowering::create(*TM, *this)); + TLInfo.reset(MipsTargetLowering::create(*TM)); + } else { + InstrInfoSE.swap(InstrInfo); + FrameLoweringSE.swap(FrameLowering); + TLInfoSE.swap(TLInfo); + } + assert(TLInfo && "null target lowering in SE"); + assert(InstrInfo && "null instr info SE"); + assert(FrameLowering && "null frame lowering SE"); +} + bool MipsSubtarget::mipsSEUsesSoftFloat() const { return TM->Options.UseSoftFloat && !InMips16HardFloat; } diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index 373f481..a3dcf03 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -14,6 +14,12 @@ #ifndef MIPSSUBTARGET_H #define MIPSSUBTARGET_H +#include "MipsFrameLowering.h" +#include "MipsISelLowering.h" +#include "MipsInstrInfo.h" +#include "MipsJITInfo.h" +#include "MipsSelectionDAGInfo.h" +#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -56,9 +62,16 @@ protected: // floating point registers instead of only using even ones. bool IsSingleFloat; + // IsFPXX - MIPS O32 modeless ABI. + bool IsFPXX; + // IsFP64bit - The target processor has 64-bit floating point registers. bool IsFP64bit; + /// Are odd single-precision registers permitted? + /// This corresponds to -modd-spreg and -mno-odd-spreg + bool UseOddSPReg; + // IsNan2008 - IEEE 754-2008 NaN encoding. bool IsNaN2008bit; @@ -132,6 +145,20 @@ protected: MipsTargetMachine *TM; Triple TargetTriple; + + const DataLayout DL; // Calculates type size & alignment + const MipsSelectionDAGInfo TSInfo; + MipsJITInfo JITInfo; + std::unique_ptr<const MipsInstrInfo> InstrInfo; + std::unique_ptr<const MipsFrameLowering> FrameLowering; + std::unique_ptr<const MipsTargetLowering> TLInfo; + std::unique_ptr<const MipsInstrInfo> InstrInfo16; + std::unique_ptr<const MipsFrameLowering> FrameLowering16; + std::unique_ptr<const MipsTargetLowering> TLInfo16; + std::unique_ptr<const MipsInstrInfo> InstrInfoSE; + std::unique_ptr<const MipsFrameLowering> FrameLoweringSE; + std::unique_ptr<const MipsTargetLowering> TLInfoSE; + public: bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, AntiDepBreakMode& Mode, @@ -142,6 +169,7 @@ public: bool isABI_N64() const { return MipsABI == N64; } bool isABI_N32() const { return MipsABI == N32; } bool isABI_O32() const { return MipsABI == O32; } + bool isABI_FPXX() const { return false; } // TODO: add check for FPXX unsigned getTargetABI() const { return MipsABI; } /// This constructor initializes the data members to match that @@ -154,23 +182,36 @@ public: /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + bool hasMips1() const { return MipsArchVersion >= Mips1; } bool hasMips2() const { return MipsArchVersion >= Mips2; } bool hasMips3() const { return MipsArchVersion >= Mips3; } + bool hasMips4() const { return MipsArchVersion >= Mips4; } + bool hasMips5() const { return MipsArchVersion >= Mips5; } bool hasMips4_32() const { return HasMips4_32; } bool hasMips4_32r2() const { return HasMips4_32r2; } - bool hasMips32() const { return MipsArchVersion >= Mips32; } - bool hasMips32r2() const { return MipsArchVersion == Mips32r2 || - MipsArchVersion == Mips64r2; } - bool hasMips32r6() const { return MipsArchVersion == Mips32r6 || - MipsArchVersion == Mips64r6; } + bool hasMips32() const { + return MipsArchVersion >= Mips32 && MipsArchVersion != Mips3 && + MipsArchVersion != Mips4 && MipsArchVersion != Mips5; + } + bool hasMips32r2() const { + return MipsArchVersion == Mips32r2 || MipsArchVersion == Mips32r6 || + MipsArchVersion == Mips64r2 || MipsArchVersion == Mips64r6; + } + bool hasMips32r6() const { + return MipsArchVersion == Mips32r6 || MipsArchVersion == Mips64r6; + } bool hasMips64() const { return MipsArchVersion >= Mips64; } - bool hasMips64r2() const { return MipsArchVersion == Mips64r2; } + bool hasMips64r2() const { + return MipsArchVersion == Mips64r2 || MipsArchVersion == Mips64r6; + } bool hasMips64r6() const { return MipsArchVersion == Mips64r6; } bool hasCnMips() const { return HasCnMips; } bool isLittle() const { return IsLittle; } + bool isFPXX() const { return IsFPXX; } bool isFP64bit() const { return IsFP64bit; } + bool useOddSPReg() const { return UseOddSPReg; } bool isNaN2008() const { return IsNaN2008bit; } bool isNotFP64bit() const { return !IsFP64bit; } bool isGP64bit() const { return IsGP64bit; } @@ -234,12 +275,31 @@ public: /// \brief Reset the subtarget for the Mips target. void resetSubtarget(MachineFunction *MF); + MipsSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS, + const TargetMachine *TM); + /// Does the system support unaligned memory access. /// /// MIPS32r6/MIPS64r6 require full unaligned access support but does not /// specify which component of the system provides it. Hardware, software, and /// hybrid implementations are all valid. bool systemSupportsUnalignedAccess() const { return hasMips32r6(); } + + // Set helper classes + void setHelperClassesMips16(); + void setHelperClassesMipsSE(); + + MipsJITInfo *getJITInfo() { return &JITInfo; } + const MipsSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } + const DataLayout *getDataLayout() const { return &DL; } + const MipsInstrInfo *getInstrInfo() const { return InstrInfo.get(); } + const TargetFrameLowering *getFrameLowering() const { + return FrameLowering.get(); + } + const MipsRegisterInfo *getRegisterInfo() const { + return &InstrInfo->getRegisterInfo(); + } + const MipsTargetLowering *getTargetLowering() const { return TLInfo.get(); } }; } // End llvm namespace diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp index 984c58e..425dbf1 100644 --- a/lib/Target/Mips/MipsTargetMachine.cpp +++ b/lib/Target/Mips/MipsTargetMachine.cpp @@ -45,93 +45,21 @@ extern "C" void LLVMInitializeMipsTarget() { RegisterTargetMachine<MipselTargetMachine> B(TheMips64elTarget); } -static std::string computeDataLayout(const MipsSubtarget &ST) { - std::string Ret = ""; - - // There are both little and big endian mips. - if (ST.isLittle()) - Ret += "e"; - else - Ret += "E"; - - Ret += "-m:m"; - - // Pointers are 32 bit on some ABIs. - if (!ST.isABI_N64()) - Ret += "-p:32:32"; - - // 8 and 16 bit integers only need no have natural alignment, but try to - // align them to 32 bits. 64 bit integers have natural alignment. - Ret += "-i8:8:32-i16:16:32-i64:64"; - - // 32 bit registers are always available and the stack is at least 64 bit - // aligned. On N64 64 bit registers are also available and the stack is - // 128 bit aligned. - if (ST.isABI_N64() || ST.isABI_N32()) - Ret += "-n32:64-S128"; - else - Ret += "-n32-S64"; - - return Ret; -} - // On function prologue, the stack is created by decrementing // its pointer. Once decremented, all references are done with positive // offset from the stack/frame pointer, using StackGrowsUp enables // an easier handling. // Using CodeModel::Large enables different CALL behavior. -MipsTargetMachine:: -MipsTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, - bool isLittle) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, isLittle, RM, this), - DL(computeDataLayout(Subtarget)), - InstrInfo(MipsInstrInfo::create(*this)), - FrameLowering(MipsFrameLowering::create(*this, Subtarget)), - TLInfo(MipsTargetLowering::create(*this)), TSInfo(*this), - InstrItins(Subtarget.getInstrItineraryData()), JITInfo() { +MipsTargetMachine::MipsTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool isLittle) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS, isLittle, RM, this) { initAsmInfo(); } - -void MipsTargetMachine::setHelperClassesMips16() { - InstrInfoSE.swap(InstrInfo); - FrameLoweringSE.swap(FrameLowering); - TLInfoSE.swap(TLInfo); - if (!InstrInfo16) { - InstrInfo.reset(MipsInstrInfo::create(*this)); - FrameLowering.reset(MipsFrameLowering::create(*this, Subtarget)); - TLInfo.reset(MipsTargetLowering::create(*this)); - } else { - InstrInfo16.swap(InstrInfo); - FrameLowering16.swap(FrameLowering); - TLInfo16.swap(TLInfo); - } - assert(TLInfo && "null target lowering 16"); - assert(InstrInfo && "null instr info 16"); - assert(FrameLowering && "null frame lowering 16"); -} - -void MipsTargetMachine::setHelperClassesMipsSE() { - InstrInfo16.swap(InstrInfo); - FrameLowering16.swap(FrameLowering); - TLInfo16.swap(TLInfo); - if (!InstrInfoSE) { - InstrInfo.reset(MipsInstrInfo::create(*this)); - FrameLowering.reset(MipsFrameLowering::create(*this, Subtarget)); - TLInfo.reset(MipsTargetLowering::create(*this)); - } else { - InstrInfoSE.swap(InstrInfo); - FrameLoweringSE.swap(FrameLowering); - TLInfoSE.swap(TLInfo); - } - assert(TLInfo && "null target lowering in SE"); - assert(InstrInfo && "null instr info SE"); - assert(FrameLowering && "null frame lowering SE"); -} void MipsebTargetMachine::anchor() { } MipsebTargetMachine:: diff --git a/lib/Target/Mips/MipsTargetMachine.h b/lib/Target/Mips/MipsTargetMachine.h index a5aa39b..a0e7d43 100644 --- a/lib/Target/Mips/MipsTargetMachine.h +++ b/lib/Target/Mips/MipsTargetMachine.h @@ -14,15 +14,9 @@ #ifndef MIPSTARGETMACHINE_H #define MIPSTARGETMACHINE_H -#include "MipsFrameLowering.h" -#include "MipsISelLowering.h" -#include "MipsInstrInfo.h" -#include "MipsJITInfo.h" -#include "MipsSelectionDAGInfo.h" #include "MipsSubtarget.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" @@ -32,68 +26,47 @@ class MipsRegisterInfo; class MipsTargetMachine : public LLVMTargetMachine { MipsSubtarget Subtarget; - const DataLayout DL; // Calculates type size & alignment - std::unique_ptr<const MipsInstrInfo> InstrInfo; - std::unique_ptr<const MipsFrameLowering> FrameLowering; - std::unique_ptr<const MipsTargetLowering> TLInfo; - std::unique_ptr<const MipsInstrInfo> InstrInfo16; - std::unique_ptr<const MipsFrameLowering> FrameLowering16; - std::unique_ptr<const MipsTargetLowering> TLInfo16; - std::unique_ptr<const MipsInstrInfo> InstrInfoSE; - std::unique_ptr<const MipsFrameLowering> FrameLoweringSE; - std::unique_ptr<const MipsTargetLowering> TLInfoSE; - MipsSelectionDAGInfo TSInfo; - const InstrItineraryData &InstrItins; - MipsJITInfo JITInfo; public: - MipsTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, - bool isLittle); + MipsTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, + const TargetOptions &Options, Reloc::Model RM, + CodeModel::Model CM, CodeGenOpt::Level OL, bool isLittle); virtual ~MipsTargetMachine() {} void addAnalysisPasses(PassManagerBase &PM) override; - const MipsInstrInfo *getInstrInfo() const override - { return InstrInfo.get(); } - const TargetFrameLowering *getFrameLowering() const override - { return FrameLowering.get(); } - const MipsSubtarget *getSubtargetImpl() const override - { return &Subtarget; } - const DataLayout *getDataLayout() const override - { return &DL;} - + const MipsInstrInfo *getInstrInfo() const override { + return getSubtargetImpl()->getInstrInfo(); + } + const TargetFrameLowering *getFrameLowering() const override { + return getSubtargetImpl()->getFrameLowering(); + } + const MipsSubtarget *getSubtargetImpl() const override { return &Subtarget; } const InstrItineraryData *getInstrItineraryData() const override { - return Subtarget.inMips16Mode() ? nullptr : &InstrItins; + return Subtarget.inMips16Mode() + ? nullptr + : &getSubtargetImpl()->getInstrItineraryData(); + } + MipsJITInfo *getJITInfo() override { + return Subtarget.getJITInfo(); } - - MipsJITInfo *getJITInfo() override { return &JITInfo; } - const MipsRegisterInfo *getRegisterInfo() const override { - return &InstrInfo->getRegisterInfo(); + return getSubtargetImpl()->getRegisterInfo(); } - const MipsTargetLowering *getTargetLowering() const override { - return TLInfo.get(); + return getSubtargetImpl()->getTargetLowering(); + } + const DataLayout *getDataLayout() const override { + return getSubtargetImpl()->getDataLayout(); } - const MipsSelectionDAGInfo* getSelectionDAGInfo() const override { - return &TSInfo; + return getSubtargetImpl()->getSelectionDAGInfo(); } // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &JCE) override; - - // Set helper classes - void setHelperClassesMips16(); - - void setHelperClassesMipsSE(); - - }; /// MipsebTargetMachine - Mips32/64 big endian target machine. diff --git a/lib/Target/Mips/MipsTargetStreamer.h b/lib/Target/Mips/MipsTargetStreamer.h index 4ad37ac..99f7d4c 100644 --- a/lib/Target/Mips/MipsTargetStreamer.h +++ b/lib/Target/Mips/MipsTargetStreamer.h @@ -12,46 +12,83 @@ #include "llvm/MC/MCELFStreamer.h" #include "llvm/MC/MCStreamer.h" +#include "MCTargetDesc/MipsABIFlagsSection.h" namespace llvm { -class MipsTargetStreamer : public MCTargetStreamer { - virtual void anchor(); +struct MipsABIFlagsSection; + +class MipsTargetStreamer : public MCTargetStreamer { public: MipsTargetStreamer(MCStreamer &S); - virtual void emitDirectiveSetMicroMips() = 0; - virtual void emitDirectiveSetNoMicroMips() = 0; - virtual void emitDirectiveSetMips16() = 0; - virtual void emitDirectiveSetNoMips16() = 0; - - virtual void emitDirectiveSetReorder() = 0; - virtual void emitDirectiveSetNoReorder() = 0; - virtual void emitDirectiveSetMacro() = 0; - virtual void emitDirectiveSetNoMacro() = 0; - virtual void emitDirectiveSetAt() = 0; - virtual void emitDirectiveSetNoAt() = 0; - virtual void emitDirectiveEnd(StringRef Name) = 0; - - virtual void emitDirectiveEnt(const MCSymbol &Symbol) = 0; - virtual void emitDirectiveAbiCalls() = 0; - virtual void emitDirectiveNaN2008() = 0; - virtual void emitDirectiveNaNLegacy() = 0; - virtual void emitDirectiveOptionPic0() = 0; - virtual void emitDirectiveOptionPic2() = 0; + virtual void emitDirectiveSetMicroMips(); + virtual void emitDirectiveSetNoMicroMips(); + virtual void emitDirectiveSetMips16(); + virtual void emitDirectiveSetNoMips16(); + + virtual void emitDirectiveSetReorder(); + virtual void emitDirectiveSetNoReorder(); + virtual void emitDirectiveSetMacro(); + virtual void emitDirectiveSetNoMacro(); + virtual void emitDirectiveSetAt(); + virtual void emitDirectiveSetNoAt(); + virtual void emitDirectiveEnd(StringRef Name); + + virtual void emitDirectiveEnt(const MCSymbol &Symbol); + virtual void emitDirectiveAbiCalls(); + virtual void emitDirectiveNaN2008(); + virtual void emitDirectiveNaNLegacy(); + virtual void emitDirectiveOptionPic0(); + virtual void emitDirectiveOptionPic2(); virtual void emitFrame(unsigned StackReg, unsigned StackSize, - unsigned ReturnReg) = 0; - virtual void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff) = 0; - virtual void emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff) = 0; + unsigned ReturnReg); + virtual void emitMask(unsigned CPUBitmask, int CPUTopSavedRegOff); + virtual void emitFMask(unsigned FPUBitmask, int FPUTopSavedRegOff); - virtual void emitDirectiveSetMips32R2() = 0; - virtual void emitDirectiveSetMips64() = 0; - virtual void emitDirectiveSetMips64R2() = 0; - virtual void emitDirectiveSetDsp() = 0; + virtual void emitDirectiveSetMips32R2(); + virtual void emitDirectiveSetMips64(); + virtual void emitDirectiveSetMips64R2(); + virtual void emitDirectiveSetDsp(); // PIC support - virtual void emitDirectiveCpload(unsigned RegNo) = 0; + virtual void emitDirectiveCpload(unsigned RegNo); virtual void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, - const MCSymbol &Sym, bool IsReg) = 0; + const MCSymbol &Sym, bool IsReg); + + /// Emit a '.module fp=value' directive using the given values. + /// Updates the .MIPS.abiflags section + virtual void emitDirectiveModuleFP(MipsABIFlagsSection::FpABIKind Value, + bool Is32BitABI) { + ABIFlagsSection.setFpABI(Value, Is32BitABI); + } + + /// Emit a '.module fp=value' directive using the current values of the + /// .MIPS.abiflags section. + void emitDirectiveModuleFP() { + emitDirectiveModuleFP(ABIFlagsSection.getFpABI(), + ABIFlagsSection.Is32BitABI); + } + + virtual void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI); + virtual void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value){}; + virtual void emitMipsAbiFlags(){}; + void setCanHaveModuleDir(bool Can) { canHaveModuleDirective = Can; } + bool getCanHaveModuleDir() { return canHaveModuleDirective; } + + // This method enables template classes to set internal abi flags + // structure values. + template <class PredicateLibrary> + void updateABIInfo(const PredicateLibrary &P) { + ABIFlagsSection.setAllFromPredicates(P); + } + + MipsABIFlagsSection &getABIFlagsSection() { return ABIFlagsSection; } + +protected: + MipsABIFlagsSection ABIFlagsSection; + +private: + bool canHaveModuleDirective; }; // This part is for ascii assembly output @@ -93,6 +130,13 @@ public: virtual void emitDirectiveCpload(unsigned RegNo); void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, const MCSymbol &Sym, bool IsReg) override; + + // ABI Flags + void emitDirectiveModuleFP(MipsABIFlagsSection::FpABIKind Value, + bool Is32BitABI) override; + void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI) override; + void emitDirectiveSetFp(MipsABIFlagsSection::FpABIKind Value) override; + void emitMipsAbiFlags() override; }; // This part is for ELF object output @@ -144,6 +188,10 @@ public: void emitDirectiveCpsetup(unsigned RegNo, int RegOrOffset, const MCSymbol &Sym, bool IsReg) override; + // ABI Flags + void emitDirectiveModuleOddSPReg(bool Enabled, bool IsO32ABI) override; + void emitMipsAbiFlags() override; + protected: bool isO32() const { return STI.getFeatureBits() & Mips::FeatureO32; } bool isN32() const { return STI.getFeatureBits() & Mips::FeatureN32; } diff --git a/lib/Target/NVPTX/NVPTX.td b/lib/Target/NVPTX/NVPTX.td index d78b4e8..93fabf6 100644 --- a/lib/Target/NVPTX/NVPTX.td +++ b/lib/Target/NVPTX/NVPTX.td @@ -34,12 +34,18 @@ def SM30 : SubtargetFeature<"sm_30", "SmVersion", "30", "Target SM 3.0">; def SM35 : SubtargetFeature<"sm_35", "SmVersion", "35", "Target SM 3.5">; +def SM50 : SubtargetFeature<"sm_50", "SmVersion", "50", + "Target SM 5.0">; // PTX Versions def PTX30 : SubtargetFeature<"ptx30", "PTXVersion", "30", "Use PTX version 3.0">; def PTX31 : SubtargetFeature<"ptx31", "PTXVersion", "31", "Use PTX version 3.1">; +def PTX32 : SubtargetFeature<"ptx32", "PTXVersion", "32", + "Use PTX version 3.2">; +def PTX40 : SubtargetFeature<"ptx40", "PTXVersion", "40", + "Use PTX version 4.0">; //===----------------------------------------------------------------------===// // NVPTX supported processors. @@ -52,6 +58,7 @@ def : Proc<"sm_20", [SM20]>; def : Proc<"sm_21", [SM21]>; def : Proc<"sm_30", [SM30]>; def : Proc<"sm_35", [SM35]>; +def : Proc<"sm_50", [SM50]>; def NVPTXInstrInfo : InstrInfo { diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 4ec575f..decf02a 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -734,23 +734,7 @@ void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { << " func_retval0"; } else { if ((Ty->getTypeID() == Type::StructTyID) || isa<VectorType>(Ty)) { - SmallVector<EVT, 16> vtparts; - ComputeValueVTs(*TLI, Ty, vtparts); - unsigned totalsz = 0; - for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { - unsigned elems = 1; - EVT elemtype = vtparts[i]; - if (vtparts[i].isVector()) { - elems = vtparts[i].getVectorNumElements(); - elemtype = vtparts[i].getVectorElementType(); - } - for (unsigned j = 0, je = elems; j != je; ++j) { - unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 8)) - sz = 8; - totalsz += sz / 8; - } - } + unsigned totalsz = TD->getTypeAllocSize(Ty); unsigned retAlignment = 0; if (!llvm::getAlign(*F, 0, retAlignment)) retAlignment = TD->getABITypeAlignment(Ty); @@ -1321,6 +1305,10 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) { // external global variable with init -> .visible // external without init -> .extern // appending -> not allowed, assert. +// for any linkage other than +// internal, private, linker_private, +// linker_private_weak, linker_private_weak_def_auto, +// we emit -> .weak. void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, raw_ostream &O) { @@ -1346,6 +1334,9 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, msg.append(V->getName().str()); msg.append("has unsupported appending linkage type"); llvm_unreachable(msg.c_str()); + } else if (!V->hasInternalLinkage() && + !V->hasPrivateLinkage()) { + O << ".weak "; } } } @@ -1356,10 +1347,15 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, // Skip meta data if (GVar->hasSection()) { - if (GVar->getSection() == "llvm.metadata") + if (GVar->getSection() == StringRef("llvm.metadata")) return; } + // Skip LLVM intrinsic global variables + if (GVar->getName().startswith("llvm.") || + GVar->getName().startswith("nvvm.")) + return; + const DataLayout *TD = TM.getDataLayout(); // GlobalVariables are always constant pointers themselves. @@ -1371,6 +1367,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, O << ".visible "; else O << ".extern "; + } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() || + GVar->hasAvailableExternallyLinkage() || + GVar->hasCommonLinkage()) { + O << ".weak "; } if (llvm::isTexture(*GVar)) { @@ -1438,7 +1438,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, O << "linear"; break; case 2: - assert(0 && "Anisotropic filtering is not supported"); + llvm_unreachable("Anisotropic filtering is not supported"); default: O << "nearest"; break; @@ -1480,6 +1480,11 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, O << "."; emitPTXAddressSpace(PTy->getAddressSpace(), O); + + if (isManaged(*GVar)) { + O << " .attribute(.managed)"; + } + if (GVar->getAlignment() == 0) O << " .align " << (int) TD->getPrefTypeAlignment(ETy); else @@ -1497,13 +1502,24 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, // Ptx allows variable initilization only for constant and global state // spaces. - if (((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || - (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && - GVar->hasInitializer()) { - const Constant *Initializer = GVar->getInitializer(); - if (!Initializer->isNullValue()) { - O << " = "; - printScalarConstant(Initializer, O); + if (GVar->hasInitializer()) { + if ((PTy->getAddressSpace() == llvm::ADDRESS_SPACE_GLOBAL) || + (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) { + const Constant *Initializer = GVar->getInitializer(); + // 'undef' is treated as there is no value spefied. + if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) { + O << " = "; + printScalarConstant(Initializer, O); + } + } else { + // The frontend adds zero-initializer to variables that don't have an + // initial value, so skip warning for this case. + if (!GVar->getInitializer()->isNullValue()) { + std::string warnMsg = "initial value of '" + GVar->getName().str() + + "' is not allowed in addrspace(" + + llvm::utostr_32(PTy->getAddressSpace()) + ")"; + report_fatal_error(warnMsg.c_str()); + } } } } else { @@ -1562,7 +1578,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, } break; default: - assert(0 && "type not supported yet"); + llvm_unreachable("type not supported yet"); } } @@ -1682,7 +1698,7 @@ void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, O << "]"; break; default: - assert(0 && "type not supported yet"); + llvm_unreachable("type not supported yet"); } return; } diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.cpp b/lib/Target/NVPTX/NVPTXFrameLowering.cpp index 9030584..8b088412 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.cpp +++ b/lib/Target/NVPTX/NVPTXFrameLowering.cpp @@ -26,6 +26,10 @@ using namespace llvm; +NVPTXFrameLowering::NVPTXFrameLowering(NVPTXSubtarget &STI) + : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), + is64bit(STI.is64Bit()) {} + bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; } void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const { @@ -43,17 +47,21 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const { // cvta.local %SP, %SPL; if (is64bit) { unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int64RegsRegClass); - MachineInstr *MI = BuildMI( - MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64), - NVPTX::VRFrame).addReg(LocalReg); - BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR_64), + MachineInstr *MI = + BuildMI(MBB, MBBI, dl, + MF.getTarget().getInstrInfo()->get(NVPTX::cvta_local_yes_64), + NVPTX::VRFrame).addReg(LocalReg); + BuildMI(MBB, MI, dl, + MF.getTarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR_64), LocalReg).addImm(MF.getFunctionNumber()); } else { unsigned LocalReg = MRI.createVirtualRegister(&NVPTX::Int32RegsRegClass); - MachineInstr *MI = BuildMI( - MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes), - NVPTX::VRFrame).addReg(LocalReg); - BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR), + MachineInstr *MI = + BuildMI(MBB, MBBI, dl, + MF.getTarget().getInstrInfo()->get(NVPTX::cvta_local_yes), + NVPTX::VRFrame).addReg(LocalReg); + BuildMI(MBB, MI, dl, + MF.getTarget().getInstrInfo()->get(NVPTX::MOV_DEPOT_ADDR), LocalReg).addImm(MF.getFunctionNumber()); } } diff --git a/lib/Target/NVPTX/NVPTXFrameLowering.h b/lib/Target/NVPTX/NVPTXFrameLowering.h index 2ae6d72..56fb673 100644 --- a/lib/Target/NVPTX/NVPTXFrameLowering.h +++ b/lib/Target/NVPTX/NVPTXFrameLowering.h @@ -17,16 +17,12 @@ #include "llvm/Target/TargetFrameLowering.h" namespace llvm { -class NVPTXTargetMachine; - +class NVPTXSubtarget; class NVPTXFrameLowering : public TargetFrameLowering { - NVPTXTargetMachine &tm; bool is64bit; public: - explicit NVPTXFrameLowering(NVPTXTargetMachine &_tm, bool _is64bit) - : TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), tm(_tm), - is64bit(_is64bit) {} + explicit NVPTXFrameLowering(NVPTXSubtarget &STI); bool hasFP(const MachineFunction &MF) const override; void emitPrologue(MachineFunction &MF) const override; diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp index 023dd5e..faa9fdb 100644 --- a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp +++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -84,7 +84,7 @@ bool GenericToNVVM::runOnModule(Module &M) { GlobalVariable *GV = I++; if (GV->getType()->getAddressSpace() == llvm::ADDRESS_SPACE_GENERIC && !llvm::isTexture(*GV) && !llvm::isSurface(*GV) && - !GV->getName().startswith("llvm.")) { + !llvm::isSampler(*GV) && !GV->getName().startswith("llvm.")) { GlobalVariable *NewGV = new GlobalVariable( M, GV->getType()->getElementType(), GV->isConstant(), GV->getLinkage(), diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp index cd30880..0dfbf10 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp @@ -24,11 +24,14 @@ using namespace llvm; #define DEBUG_TYPE "nvptx-isel" -static cl::opt<int> -FMAContractLevel("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, - cl::desc("NVPTX Specific: FMA contraction (0: don't do it" - " 1: do it 2: do it aggressively"), - cl::init(2)); +unsigned FMAContractLevel = 0; + +static cl::opt<unsigned, true> +FMAContractLevelOpt("nvptx-fma-level", cl::ZeroOrMore, cl::Hidden, + cl::desc("NVPTX Specific: FMA contraction (0: don't do it" + " 1: do it 2: do it aggressively"), + cl::location(FMAContractLevel), + cl::init(2)); static cl::opt<int> UsePrecDivF32( "nvptx-prec-divf32", cl::ZeroOrMore, cl::Hidden, @@ -138,7 +141,7 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) { case NVPTXISD::LDGV4: case NVPTXISD::LDUV2: case NVPTXISD::LDUV4: - ResNode = SelectLDGLDUVector(N); + ResNode = SelectLDGLDU(N); break; case NVPTXISD::StoreV2: case NVPTXISD::StoreV4: @@ -164,6 +167,9 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) { case ISD::INTRINSIC_WO_CHAIN: ResNode = SelectIntrinsicNoChain(N); break; + case ISD::INTRINSIC_W_CHAIN: + ResNode = SelectIntrinsicChain(N); + break; case NVPTXISD::Tex1DFloatI32: case NVPTXISD::Tex1DFloatFloat: case NVPTXISD::Tex1DFloatFloatLevel: @@ -253,6 +259,12 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) { case NVPTXISD::Suld3DV4I32Trap: ResNode = SelectSurfaceIntrinsic(N); break; + case ISD::AND: + case ISD::SRA: + case ISD::SRL: + // Try to select BFE + ResNode = SelectBFE(N); + break; case ISD::ADDRSPACECAST: ResNode = SelectAddrSpaceCast(N); break; @@ -264,6 +276,21 @@ SDNode *NVPTXDAGToDAGISel::Select(SDNode *N) { return SelectCode(N); } +SDNode *NVPTXDAGToDAGISel::SelectIntrinsicChain(SDNode *N) { + unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + switch (IID) { + default: + return NULL; + case Intrinsic::nvvm_ldg_global_f: + case Intrinsic::nvvm_ldg_global_i: + case Intrinsic::nvvm_ldg_global_p: + case Intrinsic::nvvm_ldu_global_f: + case Intrinsic::nvvm_ldu_global_i: + case Intrinsic::nvvm_ldu_global_p: + return SelectLDGLDU(N); + } +} + static unsigned int getCodeAddrSpace(MemSDNode *N, const NVPTXSubtarget &Subtarget) { const Value *Src = N->getMemOperand()->getValue(); @@ -981,22 +1008,101 @@ SDNode *NVPTXDAGToDAGISel::SelectLoadVector(SDNode *N) { return LD; } -SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { +SDNode *NVPTXDAGToDAGISel::SelectLDGLDU(SDNode *N) { SDValue Chain = N->getOperand(0); - SDValue Op1 = N->getOperand(1); + SDValue Op1; + MemSDNode *Mem; + bool IsLDG = true; + + // If this is an LDG intrinsic, the address is the third operand. Its its an + // LDG/LDU SD node (from custom vector handling), then its the second operand + if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) { + Op1 = N->getOperand(2); + Mem = cast<MemIntrinsicSDNode>(N); + unsigned IID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); + switch (IID) { + default: + return NULL; + case Intrinsic::nvvm_ldg_global_f: + case Intrinsic::nvvm_ldg_global_i: + case Intrinsic::nvvm_ldg_global_p: + IsLDG = true; + break; + case Intrinsic::nvvm_ldu_global_f: + case Intrinsic::nvvm_ldu_global_i: + case Intrinsic::nvvm_ldu_global_p: + IsLDG = false; + break; + } + } else { + Op1 = N->getOperand(1); + Mem = cast<MemSDNode>(N); + } + unsigned Opcode; SDLoc DL(N); SDNode *LD; - MemSDNode *Mem = cast<MemSDNode>(N); SDValue Base, Offset, Addr; - EVT EltVT = Mem->getMemoryVT().getVectorElementType(); + EVT EltVT = Mem->getMemoryVT(); + if (EltVT.isVector()) { + EltVT = EltVT.getVectorElementType(); + } if (SelectDirectAddr(Op1, Addr)) { switch (N->getOpcode()) { default: return nullptr; + case ISD::INTRINSIC_W_CHAIN: + if (IsLDG) { + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return nullptr; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8avar; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16avar; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32avar; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64avar; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32avar; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64avar; + break; + } + } else { + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return nullptr; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8avar; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16avar; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32avar; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64avar; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32avar; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64avar; + break; + } + } + break; case NVPTXISD::LDGV2: switch (EltVT.getSimpleVT().SimpleTy) { default: @@ -1092,6 +1198,55 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { switch (N->getOpcode()) { default: return nullptr; + case ISD::INTRINSIC_W_CHAIN: + if (IsLDG) { + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return nullptr; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari64; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari64; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari64; + break; + } + } else { + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return nullptr; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari64; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari64; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari64; + break; + } + } + break; case NVPTXISD::LDGV2: switch (EltVT.getSimpleVT().SimpleTy) { default: @@ -1181,6 +1336,55 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { switch (N->getOpcode()) { default: return nullptr; + case ISD::INTRINSIC_W_CHAIN: + if (IsLDG) { + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return nullptr; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8ari; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16ari; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32ari; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64ari; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32ari; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64ari; + break; + } + } else { + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return nullptr; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8ari; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16ari; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32ari; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64ari; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32ari; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64ari; + break; + } + } + break; case NVPTXISD::LDGV2: switch (EltVT.getSimpleVT().SimpleTy) { default: @@ -1276,6 +1480,55 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { switch (N->getOpcode()) { default: return nullptr; + case ISD::INTRINSIC_W_CHAIN: + if (IsLDG) { + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return nullptr; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg64; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg64; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg64; + break; + } + } else { + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return nullptr; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg64; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg64; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg64; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg64; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg64; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg64; + break; + } + } + break; case NVPTXISD::LDGV2: switch (EltVT.getSimpleVT().SimpleTy) { default: @@ -1365,6 +1618,55 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { switch (N->getOpcode()) { default: return nullptr; + case ISD::INTRINSIC_W_CHAIN: + if (IsLDG) { + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return nullptr; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i8areg; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i16areg; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i32areg; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_i64areg; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f32areg; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDG_GLOBAL_f64areg; + break; + } + } else { + switch (EltVT.getSimpleVT().SimpleTy) { + default: + return nullptr; + case MVT::i8: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i8areg; + break; + case MVT::i16: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i16areg; + break; + case MVT::i32: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i32areg; + break; + case MVT::i64: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_i64areg; + break; + case MVT::f32: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f32areg; + break; + case MVT::f64: + Opcode = NVPTX::INT_PTX_LDU_GLOBAL_f64areg; + break; + } + } + break; case NVPTXISD::LDGV2: switch (EltVT.getSimpleVT().SimpleTy) { default: @@ -1457,7 +1759,7 @@ SDNode *NVPTXDAGToDAGISel::SelectLDGLDUVector(SDNode *N) { } MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1); - MemRefs0[0] = cast<MemSDNode>(N)->getMemOperand(); + MemRefs0[0] = Mem->getMemOperand(); cast<MachineSDNode>(LD)->setMemRefs(MemRefs0, MemRefs0 + 1); return LD; @@ -2959,6 +3261,214 @@ SDNode *NVPTXDAGToDAGISel::SelectSurfaceIntrinsic(SDNode *N) { return Ret; } +/// SelectBFE - Look for instruction sequences that can be made more efficient +/// by using the 'bfe' (bit-field extract) PTX instruction +SDNode *NVPTXDAGToDAGISel::SelectBFE(SDNode *N) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + SDValue Len; + SDValue Start; + SDValue Val; + bool IsSigned = false; + + if (N->getOpcode() == ISD::AND) { + // Canonicalize the operands + // We want 'and %val, %mask' + if (isa<ConstantSDNode>(LHS) && !isa<ConstantSDNode>(RHS)) { + std::swap(LHS, RHS); + } + + ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(RHS); + if (!Mask) { + // We need a constant mask on the RHS of the AND + return NULL; + } + + // Extract the mask bits + uint64_t MaskVal = Mask->getZExtValue(); + if (!isMask_64(MaskVal)) { + // We *could* handle shifted masks here, but doing so would require an + // 'and' operation to fix up the low-order bits so we would trade + // shr+and for bfe+and, which has the same throughput + return NULL; + } + + // How many bits are in our mask? + uint64_t NumBits = CountTrailingOnes_64(MaskVal); + Len = CurDAG->getTargetConstant(NumBits, MVT::i32); + + if (LHS.getOpcode() == ISD::SRL || LHS.getOpcode() == ISD::SRA) { + // We have a 'srl/and' pair, extract the effective start bit and length + Val = LHS.getNode()->getOperand(0); + Start = LHS.getNode()->getOperand(1); + ConstantSDNode *StartConst = dyn_cast<ConstantSDNode>(Start); + if (StartConst) { + uint64_t StartVal = StartConst->getZExtValue(); + // How many "good" bits do we have left? "good" is defined here as bits + // that exist in the original value, not shifted in. + uint64_t GoodBits = Start.getValueType().getSizeInBits() - StartVal; + if (NumBits > GoodBits) { + // Do not handle the case where bits have been shifted in. In theory + // we could handle this, but the cost is likely higher than just + // emitting the srl/and pair. + return NULL; + } + Start = CurDAG->getTargetConstant(StartVal, MVT::i32); + } else { + // Do not handle the case where the shift amount (can be zero if no srl + // was found) is not constant. We could handle this case, but it would + // require run-time logic that would be more expensive than just + // emitting the srl/and pair. + return NULL; + } + } else { + // Do not handle the case where the LHS of the and is not a shift. While + // it would be trivial to handle this case, it would just transform + // 'and' -> 'bfe', but 'and' has higher-throughput. + return NULL; + } + } else if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) { + if (LHS->getOpcode() == ISD::AND) { + ConstantSDNode *ShiftCnst = dyn_cast<ConstantSDNode>(RHS); + if (!ShiftCnst) { + // Shift amount must be constant + return NULL; + } + + uint64_t ShiftAmt = ShiftCnst->getZExtValue(); + + SDValue AndLHS = LHS->getOperand(0); + SDValue AndRHS = LHS->getOperand(1); + + // Canonicalize the AND to have the mask on the RHS + if (isa<ConstantSDNode>(AndLHS)) { + std::swap(AndLHS, AndRHS); + } + + ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(AndRHS); + if (!MaskCnst) { + // Mask must be constant + return NULL; + } + + uint64_t MaskVal = MaskCnst->getZExtValue(); + uint64_t NumZeros; + uint64_t NumBits; + if (isMask_64(MaskVal)) { + NumZeros = 0; + // The number of bits in the result bitfield will be the number of + // trailing ones (the AND) minus the number of bits we shift off + NumBits = CountTrailingOnes_64(MaskVal) - ShiftAmt; + } else if (isShiftedMask_64(MaskVal)) { + NumZeros = countTrailingZeros(MaskVal); + unsigned NumOnes = CountTrailingOnes_64(MaskVal >> NumZeros); + // The number of bits in the result bitfield will be the number of + // trailing zeros plus the number of set bits in the mask minus the + // number of bits we shift off + NumBits = NumZeros + NumOnes - ShiftAmt; + } else { + // This is not a mask we can handle + return NULL; + } + + if (ShiftAmt < NumZeros) { + // Handling this case would require extra logic that would make this + // transformation non-profitable + return NULL; + } + + Val = AndLHS; + Start = CurDAG->getTargetConstant(ShiftAmt, MVT::i32); + Len = CurDAG->getTargetConstant(NumBits, MVT::i32); + } else if (LHS->getOpcode() == ISD::SHL) { + // Here, we have a pattern like: + // + // (sra (shl val, NN), MM) + // or + // (srl (shl val, NN), MM) + // + // If MM >= NN, we can efficiently optimize this with bfe + Val = LHS->getOperand(0); + + SDValue ShlRHS = LHS->getOperand(1); + ConstantSDNode *ShlCnst = dyn_cast<ConstantSDNode>(ShlRHS); + if (!ShlCnst) { + // Shift amount must be constant + return NULL; + } + uint64_t InnerShiftAmt = ShlCnst->getZExtValue(); + + SDValue ShrRHS = RHS; + ConstantSDNode *ShrCnst = dyn_cast<ConstantSDNode>(ShrRHS); + if (!ShrCnst) { + // Shift amount must be constant + return NULL; + } + uint64_t OuterShiftAmt = ShrCnst->getZExtValue(); + + // To avoid extra codegen and be profitable, we need Outer >= Inner + if (OuterShiftAmt < InnerShiftAmt) { + return NULL; + } + + // If the outer shift is more than the type size, we have no bitfield to + // extract (since we also check that the inner shift is <= the outer shift + // then this also implies that the inner shift is < the type size) + if (OuterShiftAmt >= Val.getValueType().getSizeInBits()) { + return NULL; + } + + Start = + CurDAG->getTargetConstant(OuterShiftAmt - InnerShiftAmt, MVT::i32); + Len = + CurDAG->getTargetConstant(Val.getValueType().getSizeInBits() - + OuterShiftAmt, MVT::i32); + + if (N->getOpcode() == ISD::SRA) { + // If we have a arithmetic right shift, we need to use the signed bfe + // variant + IsSigned = true; + } + } else { + // No can do... + return NULL; + } + } else { + // No can do... + return NULL; + } + + + unsigned Opc; + // For the BFE operations we form here from "and" and "srl", always use the + // unsigned variants. + if (Val.getValueType() == MVT::i32) { + if (IsSigned) { + Opc = NVPTX::BFE_S32rii; + } else { + Opc = NVPTX::BFE_U32rii; + } + } else if (Val.getValueType() == MVT::i64) { + if (IsSigned) { + Opc = NVPTX::BFE_S64rii; + } else { + Opc = NVPTX::BFE_U64rii; + } + } else { + // We cannot handle this type + return NULL; + } + + SDValue Ops[] = { + Val, Start, Len + }; + + SDNode *Ret = + CurDAG->getMachineNode(Opc, SDLoc(N), N->getVTList(), Ops); + + return Ret; +} + // SelectDirectAddr - Match a direct address for DAG. // A direct address could be a globaladdress or externalsymbol. bool NVPTXDAGToDAGISel::SelectDirectAddr(SDValue N, SDValue &Address) { diff --git a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h index 11f92e7..c44ccb2 100644 --- a/lib/Target/NVPTX/NVPTXISelDAGToDAG.h +++ b/lib/Target/NVPTX/NVPTXISelDAGToDAG.h @@ -59,10 +59,11 @@ private: SDNode *Select(SDNode *N) override; SDNode *SelectIntrinsicNoChain(SDNode *N); + SDNode *SelectIntrinsicChain(SDNode *N); SDNode *SelectTexSurfHandle(SDNode *N); SDNode *SelectLoad(SDNode *N); SDNode *SelectLoadVector(SDNode *N); - SDNode *SelectLDGLDUVector(SDNode *N); + SDNode *SelectLDGLDU(SDNode *N); SDNode *SelectStore(SDNode *N); SDNode *SelectStoreVector(SDNode *N); SDNode *SelectLoadParam(SDNode *N); @@ -71,6 +72,7 @@ private: SDNode *SelectAddrSpaceCast(SDNode *N); SDNode *SelectTextureIntrinsic(SDNode *N); SDNode *SelectSurfaceIntrinsic(SDNode *N); + SDNode *SelectBFE(SDNode *N); inline SDValue getI32Imm(unsigned Imm) { return CurDAG->getTargetConstant(Imm, MVT::i32); diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp index b0943be..cb452ff 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.cpp +++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp @@ -33,6 +33,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include <sstream> @@ -111,6 +112,7 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) MaxStoresPerMemmove = (unsigned) 0xFFFFFFFF; setBooleanContents(ZeroOrNegativeOneBooleanContent); + setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); // Jump is Expensive. Don't create extra control flow for 'and', 'or' // condition branches. @@ -130,7 +132,13 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) addRegisterClass(MVT::f64, &NVPTX::Float64RegsRegClass); // Operations not directly supported by NVPTX. - setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i1, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i8, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i16, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); setOperationAction(ISD::BR_CC, MVT::f32, Expand); setOperationAction(ISD::BR_CC, MVT::f64, Expand); setOperationAction(ISD::BR_CC, MVT::i1, Expand); @@ -146,6 +154,13 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); + setOperationAction(ISD::SHL_PARTS, MVT::i32 , Custom); + setOperationAction(ISD::SRA_PARTS, MVT::i32 , Custom); + setOperationAction(ISD::SRL_PARTS, MVT::i32 , Custom); + setOperationAction(ISD::SHL_PARTS, MVT::i64 , Custom); + setOperationAction(ISD::SRA_PARTS, MVT::i64 , Custom); + setOperationAction(ISD::SRL_PARTS, MVT::i64 , Custom); + if (nvptxSubtarget.hasROT64()) { setOperationAction(ISD::ROTL, MVT::i64, Legal); setOperationAction(ISD::ROTR, MVT::i64, Legal); @@ -237,6 +252,13 @@ NVPTXTargetLowering::NVPTXTargetLowering(NVPTXTargetMachine &TM) setOperationAction(ISD::CTPOP, MVT::i32, Legal); setOperationAction(ISD::CTPOP, MVT::i64, Legal); + // We have some custom DAG combine patterns for these nodes + setTargetDAGCombine(ISD::ADD); + setTargetDAGCombine(ISD::AND); + setTargetDAGCombine(ISD::FADD); + setTargetDAGCombine(ISD::MUL); + setTargetDAGCombine(ISD::SHL); + // Now deduce the information based on the above mentioned // actions computeRegisterProperties(); @@ -328,6 +350,16 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { return "NVPTXISD::StoreV2"; case NVPTXISD::StoreV4: return "NVPTXISD::StoreV4"; + case NVPTXISD::FUN_SHFL_CLAMP: + return "NVPTXISD::FUN_SHFL_CLAMP"; + case NVPTXISD::FUN_SHFR_CLAMP: + return "NVPTXISD::FUN_SHFR_CLAMP"; + case NVPTXISD::IMAD: + return "NVPTXISD::IMAD"; + case NVPTXISD::MUL_WIDE_SIGNED: + return "NVPTXISD::MUL_WIDE_SIGNED"; + case NVPTXISD::MUL_WIDE_UNSIGNED: + return "NVPTXISD::MUL_WIDE_UNSIGNED"; case NVPTXISD::Tex1DFloatI32: return "NVPTXISD::Tex1DFloatI32"; case NVPTXISD::Tex1DFloatFloat: return "NVPTXISD::Tex1DFloatFloat"; case NVPTXISD::Tex1DFloatFloatLevel: @@ -441,8 +473,12 @@ const char *NVPTXTargetLowering::getTargetNodeName(unsigned Opcode) const { } } -bool NVPTXTargetLowering::shouldSplitVectorType(EVT VT) const { - return VT.getScalarType() == MVT::i1; +TargetLoweringBase::LegalizeTypeAction +NVPTXTargetLowering::getPreferredVectorAction(EVT VT) const { + if (VT.getVectorNumElements() != 1 && VT.getScalarType() == MVT::i1) + return TypeSplitVector; + + return TargetLoweringBase::getPreferredVectorAction(VT); } SDValue @@ -487,26 +523,12 @@ NVPTXTargetLowering::getPrototype(Type *retTy, const ArgListTy &Args, } else if (isa<PointerType>(retTy)) { O << ".param .b" << getPointerTy().getSizeInBits() << " _"; } else { - if ((retTy->getTypeID() == Type::StructTyID) || isa<VectorType>(retTy)) { - SmallVector<EVT, 16> vtparts; - ComputeValueVTs(*this, retTy, vtparts); - unsigned totalsz = 0; - for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { - unsigned elems = 1; - EVT elemtype = vtparts[i]; - if (vtparts[i].isVector()) { - elems = vtparts[i].getVectorNumElements(); - elemtype = vtparts[i].getVectorElementType(); - } - // TODO: no need to loop - for (unsigned j = 0, je = elems; j != je; ++j) { - unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 8)) - sz = 8; - totalsz += sz / 8; - } - } - O << ".param .align " << retAlignment << " .b8 _[" << totalsz << "]"; + if((retTy->getTypeID() == Type::StructTyID) || + isa<VectorType>(retTy)) { + O << ".param .align " + << retAlignment + << " .b8 _[" + << getDataLayout()->getTypeAllocSize(retTy) << "]"; } else { assert(false && "Unknown return type"); } @@ -675,7 +697,8 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, if (Ty->isAggregateType()) { // aggregate SmallVector<EVT, 16> vtparts; - ComputeValueVTs(*this, Ty, vtparts); + SmallVector<uint64_t, 16> Offsets; + ComputePTXValueVTs(*this, Ty, vtparts, &Offsets, 0); unsigned align = getArgumentAlignment(Callee, CS, Ty, paramCount + 1); // declare .param .align <align> .b8 .param<n>[<size>]; @@ -687,34 +710,26 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, DeclareParamOps); InFlag = Chain.getValue(1); - unsigned curOffset = 0; for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { - unsigned elems = 1; EVT elemtype = vtparts[j]; - if (vtparts[j].isVector()) { - elems = vtparts[j].getVectorNumElements(); - elemtype = vtparts[j].getVectorElementType(); - } - for (unsigned k = 0, ke = elems; k != ke; ++k) { - unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 8)) - sz = 8; - SDValue StVal = OutVals[OIdx]; - if (elemtype.getSizeInBits() < 16) { - StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal); - } - SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue CopyParamOps[] = { Chain, - DAG.getConstant(paramCount, MVT::i32), - DAG.getConstant(curOffset, MVT::i32), - StVal, InFlag }; - Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, - CopyParamVTs, CopyParamOps, - elemtype, MachinePointerInfo()); - InFlag = Chain.getValue(1); - curOffset += sz / 8; - ++OIdx; + unsigned ArgAlign = GreatestCommonDivisor64(align, Offsets[j]); + if (elemtype.isInteger() && (sz < 8)) + sz = 8; + SDValue StVal = OutVals[OIdx]; + if (elemtype.getSizeInBits() < 16) { + StVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, StVal); } + SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue CopyParamOps[] = { Chain, + DAG.getConstant(paramCount, MVT::i32), + DAG.getConstant(Offsets[j], MVT::i32), + StVal, InFlag }; + Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, + CopyParamVTs, CopyParamOps, + elemtype, MachinePointerInfo(), + ArgAlign); + InFlag = Chain.getValue(1); + ++OIdx; } if (vtparts.size() > 0) --OIdx; @@ -899,13 +914,15 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } // struct or vector SmallVector<EVT, 16> vtparts; + SmallVector<uint64_t, 16> Offsets; const PointerType *PTy = dyn_cast<PointerType>(Args[i].Ty); assert(PTy && "Type of a byval parameter should be pointer"); - ComputeValueVTs(*this, PTy->getElementType(), vtparts); + ComputePTXValueVTs(*this, PTy->getElementType(), vtparts, &Offsets, 0); // declare .param .align <align> .b8 .param<n>[<size>]; unsigned sz = Outs[OIdx].Flags.getByValSize(); SDVTList DeclareParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); + unsigned ArgAlign = Outs[OIdx].Flags.getByValAlign(); // The ByValAlign in the Outs[OIdx].Flags is alway set at this point, // so we don't need to worry about natural alignment or not. // See TargetLowering::LowerCallTo(). @@ -917,38 +934,28 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, Chain = DAG.getNode(NVPTXISD::DeclareParam, dl, DeclareParamVTs, DeclareParamOps); InFlag = Chain.getValue(1); - unsigned curOffset = 0; for (unsigned j = 0, je = vtparts.size(); j != je; ++j) { - unsigned elems = 1; EVT elemtype = vtparts[j]; - if (vtparts[j].isVector()) { - elems = vtparts[j].getVectorNumElements(); - elemtype = vtparts[j].getVectorElementType(); + int curOffset = Offsets[j]; + unsigned PartAlign = GreatestCommonDivisor64(ArgAlign, curOffset); + SDValue srcAddr = + DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx], + DAG.getConstant(curOffset, getPointerTy())); + SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, + MachinePointerInfo(), false, false, false, + PartAlign); + if (elemtype.getSizeInBits() < 16) { + theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal); } - for (unsigned k = 0, ke = elems; k != ke; ++k) { - unsigned sz = elemtype.getSizeInBits(); - if (elemtype.isInteger() && (sz < 8)) - sz = 8; - SDValue srcAddr = - DAG.getNode(ISD::ADD, dl, getPointerTy(), OutVals[OIdx], - DAG.getConstant(curOffset, getPointerTy())); - SDValue theVal = DAG.getLoad(elemtype, dl, tempChain, srcAddr, - MachinePointerInfo(), false, false, false, - 0); - if (elemtype.getSizeInBits() < 16) { - theVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, theVal); - } - SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); - SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), - DAG.getConstant(curOffset, MVT::i32), theVal, - InFlag }; - Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, - CopyParamOps, elemtype, - MachinePointerInfo()); + SDVTList CopyParamVTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue CopyParamOps[] = { Chain, DAG.getConstant(paramCount, MVT::i32), + DAG.getConstant(curOffset, MVT::i32), theVal, + InFlag }; + Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreParam, dl, CopyParamVTs, + CopyParamOps, elemtype, + MachinePointerInfo()); - InFlag = Chain.getValue(1); - curOffset += sz / 8; - } + InFlag = Chain.getValue(1); } ++paramCount; } @@ -1057,7 +1064,6 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Generate loads from param memory/moves from registers for result if (Ins.size() > 0) { - unsigned resoffset = 0; if (retTy && retTy->isVectorTy()) { EVT ObjectVT = getValueType(retTy); unsigned NumElts = ObjectVT.getVectorNumElements(); @@ -1066,14 +1072,15 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, ObjectVT) == NumElts && "Vector was not scalarized"); unsigned sz = EltVT.getSizeInBits(); - bool needTruncate = sz < 16 ? true : false; + bool needTruncate = sz < 8 ? true : false; if (NumElts == 1) { // Just a simple load SmallVector<EVT, 4> LoadRetVTs; - if (needTruncate) { - // If loading i1 result, generate - // load i16 + if (EltVT == MVT::i1 || EltVT == MVT::i8) { + // If loading i1/i8 result, generate + // load.b8 i16 + // if i1 // trunc i16 to i1 LoadRetVTs.push_back(MVT::i16); } else @@ -1097,9 +1104,10 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } else if (NumElts == 2) { // LoadV2 SmallVector<EVT, 4> LoadRetVTs; - if (needTruncate) { - // If loading i1 result, generate - // load i16 + if (EltVT == MVT::i1 || EltVT == MVT::i8) { + // If loading i1/i8 result, generate + // load.b8 i16 + // if i1 // trunc i16 to i1 LoadRetVTs.push_back(MVT::i16); LoadRetVTs.push_back(MVT::i16); @@ -1142,9 +1150,10 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, EVT VecVT = EVT::getVectorVT(F->getContext(), EltVT, VecSize); for (unsigned i = 0; i < NumElts; i += VecSize) { SmallVector<EVT, 8> LoadRetVTs; - if (needTruncate) { - // If loading i1 result, generate - // load i16 + if (EltVT == MVT::i1 || EltVT == MVT::i8) { + // If loading i1/i8 result, generate + // load.b8 i16 + // if i1 // trunc i16 to i1 for (unsigned j = 0; j < VecSize; ++j) LoadRetVTs.push_back(MVT::i16); @@ -1183,10 +1192,13 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } } else { SmallVector<EVT, 16> VTs; - ComputePTXValueVTs(*this, retTy, VTs); + SmallVector<uint64_t, 16> Offsets; + ComputePTXValueVTs(*this, retTy, VTs, &Offsets, 0); assert(VTs.size() == Ins.size() && "Bad value decomposition"); + unsigned RetAlign = getArgumentAlignment(Callee, CS, retTy, 0); for (unsigned i = 0, e = Ins.size(); i != e; ++i) { unsigned sz = VTs[i].getSizeInBits(); + unsigned AlignI = GreatestCommonDivisor64(RetAlign, Offsets[i]); bool needTruncate = sz < 8 ? true : false; if (VTs[i].isInteger() && (sz < 8)) sz = 8; @@ -1212,19 +1224,18 @@ SDValue NVPTXTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVector<SDValue, 4> LoadRetOps; LoadRetOps.push_back(Chain); LoadRetOps.push_back(DAG.getConstant(1, MVT::i32)); - LoadRetOps.push_back(DAG.getConstant(resoffset, MVT::i32)); + LoadRetOps.push_back(DAG.getConstant(Offsets[i], MVT::i32)); LoadRetOps.push_back(InFlag); SDValue retval = DAG.getMemIntrinsicNode( NVPTXISD::LoadParam, dl, DAG.getVTList(LoadRetVTs), LoadRetOps, - TheLoadType, MachinePointerInfo()); + TheLoadType, MachinePointerInfo(), AlignI); Chain = retval.getValue(1); InFlag = retval.getValue(2); SDValue Ret0 = retval.getValue(0); if (needTruncate) Ret0 = DAG.getNode(ISD::TRUNCATE, dl, Ins[i].VT, Ret0); InVals.push_back(Ret0); - resoffset += sz / 8; } } } @@ -1262,6 +1273,127 @@ NVPTXTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), Ops); } +/// LowerShiftRightParts - Lower SRL_PARTS, SRA_PARTS, which +/// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift +/// amount, or +/// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift +/// amount. +SDValue NVPTXTargetLowering::LowerShiftRightParts(SDValue Op, + SelectionDAG &DAG) const { + assert(Op.getNumOperands() == 3 && "Not a double-shift!"); + assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS); + + EVT VT = Op.getValueType(); + unsigned VTBits = VT.getSizeInBits(); + SDLoc dl(Op); + SDValue ShOpLo = Op.getOperand(0); + SDValue ShOpHi = Op.getOperand(1); + SDValue ShAmt = Op.getOperand(2); + unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL; + + if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) { + + // For 32bit and sm35, we can use the funnel shift 'shf' instruction. + // {dHi, dLo} = {aHi, aLo} >> Amt + // dHi = aHi >> Amt + // dLo = shf.r.clamp aLo, aHi, Amt + + SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); + SDValue Lo = DAG.getNode(NVPTXISD::FUN_SHFR_CLAMP, dl, VT, ShOpLo, ShOpHi, + ShAmt); + + SDValue Ops[2] = { Lo, Hi }; + return DAG.getMergeValues(Ops, dl); + } + else { + + // {dHi, dLo} = {aHi, aLo} >> Amt + // - if (Amt>=size) then + // dLo = aHi >> (Amt-size) + // dHi = aHi >> Amt (this is either all 0 or all 1) + // else + // dLo = (aLo >>logic Amt) | (aHi << (size-Amt)) + // dHi = aHi >> Amt + + SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, + DAG.getConstant(VTBits, MVT::i32), ShAmt); + SDValue Tmp1 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, ShAmt); + SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, + DAG.getConstant(VTBits, MVT::i32)); + SDValue Tmp2 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, RevShAmt); + SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); + SDValue TrueVal = DAG.getNode(Opc, dl, VT, ShOpHi, ExtraShAmt); + + SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, + DAG.getConstant(VTBits, MVT::i32), ISD::SETGE); + SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt); + SDValue Lo = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); + + SDValue Ops[2] = { Lo, Hi }; + return DAG.getMergeValues(Ops, dl); + } +} + +/// LowerShiftLeftParts - Lower SHL_PARTS, which +/// 1) returns two i32 values and take a 2 x i32 value to shift plus a shift +/// amount, or +/// 2) returns two i64 values and take a 2 x i64 value to shift plus a shift +/// amount. +SDValue NVPTXTargetLowering::LowerShiftLeftParts(SDValue Op, + SelectionDAG &DAG) const { + assert(Op.getNumOperands() == 3 && "Not a double-shift!"); + assert(Op.getOpcode() == ISD::SHL_PARTS); + + EVT VT = Op.getValueType(); + unsigned VTBits = VT.getSizeInBits(); + SDLoc dl(Op); + SDValue ShOpLo = Op.getOperand(0); + SDValue ShOpHi = Op.getOperand(1); + SDValue ShAmt = Op.getOperand(2); + + if (VTBits == 32 && nvptxSubtarget.getSmVersion() >= 35) { + + // For 32bit and sm35, we can use the funnel shift 'shf' instruction. + // {dHi, dLo} = {aHi, aLo} << Amt + // dHi = shf.l.clamp aLo, aHi, Amt + // dLo = aLo << Amt + + SDValue Hi = DAG.getNode(NVPTXISD::FUN_SHFL_CLAMP, dl, VT, ShOpLo, ShOpHi, + ShAmt); + SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); + + SDValue Ops[2] = { Lo, Hi }; + return DAG.getMergeValues(Ops, dl); + } + else { + + // {dHi, dLo} = {aHi, aLo} << Amt + // - if (Amt>=size) then + // dLo = aLo << Amt (all 0) + // dLo = aLo << (Amt-size) + // else + // dLo = aLo << Amt + // dHi = (aHi << Amt) | (aLo >> (size-Amt)) + + SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, + DAG.getConstant(VTBits, MVT::i32), ShAmt); + SDValue Tmp1 = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt); + SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt, + DAG.getConstant(VTBits, MVT::i32)); + SDValue Tmp2 = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt); + SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2); + SDValue TrueVal = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt); + + SDValue Cmp = DAG.getSetCC(dl, MVT::i1, ShAmt, + DAG.getConstant(VTBits, MVT::i32), ISD::SETGE); + SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt); + SDValue Hi = DAG.getNode(ISD::SELECT, dl, VT, Cmp, TrueVal, FalseVal); + + SDValue Ops[2] = { Lo, Hi }; + return DAG.getMergeValues(Ops, dl); + } +} + SDValue NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { @@ -1282,6 +1414,11 @@ NVPTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return LowerSTORE(Op, DAG); case ISD::LOAD: return LowerLOAD(Op, DAG); + case ISD::SHL_PARTS: + return LowerShiftLeftParts(Op, DAG); + case ISD::SRA_PARTS: + case ISD::SRL_PARTS: + return LowerShiftRightParts(Op, DAG); default: llvm_unreachable("Custom lowering not defined for operation"); } @@ -1495,7 +1632,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( const Function *F = MF.getFunction(); const AttributeSet &PAL = F->getAttributes(); - const TargetLowering *TLI = nvTM->getTargetLowering(); + const TargetLowering *TLI = DAG.getTarget().getTargetLowering(); SDValue Root = DAG.getRoot(); std::vector<SDValue> OutChains; @@ -1549,8 +1686,7 @@ SDValue NVPTXTargetLowering::LowerFormalArguments( assert(vtparts.size() > 0 && "empty aggregate type not expected"); for (unsigned parti = 0, parte = vtparts.size(); parti != parte; ++parti) { - EVT partVT = vtparts[parti]; - InVals.push_back(DAG.getNode(ISD::UNDEF, dl, partVT)); + InVals.push_back(DAG.getNode(ISD::UNDEF, dl, Ins[InsIdx].VT)); ++InsIdx; } if (vtparts.size() > 0) @@ -1866,7 +2002,7 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, unsigned Offset = 0; EVT VecVT = - EVT::getVectorVT(F->getContext(), OutVals[0].getValueType(), VecSize); + EVT::getVectorVT(F->getContext(), EltVT, VecSize); unsigned PerStoreOffset = TD->getTypeAllocSize(VecVT.getTypeForEVT(F->getContext())); @@ -1925,12 +2061,10 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, } } else { SmallVector<EVT, 16> ValVTs; - // const_cast is necessary since we are still using an LLVM version from - // before the type system re-write. - ComputePTXValueVTs(*this, RetTy, ValVTs); + SmallVector<uint64_t, 16> Offsets; + ComputePTXValueVTs(*this, RetTy, ValVTs, &Offsets, 0); assert(ValVTs.size() == OutVals.size() && "Bad return value decomposition"); - unsigned SizeSoFar = 0; for (unsigned i = 0, e = Outs.size(); i != e; ++i) { SDValue theVal = OutVals[i]; EVT TheValType = theVal.getValueType(); @@ -1954,16 +2088,14 @@ NVPTXTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, else if (TmpVal.getValueType().getSizeInBits() < 16) TmpVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, TmpVal); - SDValue Ops[] = { Chain, DAG.getConstant(SizeSoFar, MVT::i32), TmpVal }; + SDValue Ops[] = { + Chain, + DAG.getConstant(Offsets[i], MVT::i32), + TmpVal }; Chain = DAG.getMemIntrinsicNode(NVPTXISD::StoreRetval, dl, DAG.getVTList(MVT::Other), Ops, TheStoreType, MachinePointerInfo()); - if(TheValType.isVector()) - SizeSoFar += - TheStoreType.getVectorElementType().getStoreSizeInBits() / 8; - else - SizeSoFar += TheStoreType.getStoreSizeInBits()/8; } } } @@ -2220,22 +2352,62 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic( case Intrinsic::nvvm_ldu_global_i: case Intrinsic::nvvm_ldu_global_f: - case Intrinsic::nvvm_ldu_global_p: + case Intrinsic::nvvm_ldu_global_p: { Info.opc = ISD::INTRINSIC_W_CHAIN; if (Intrinsic == Intrinsic::nvvm_ldu_global_i) Info.memVT = getValueType(I.getType()); - else if (Intrinsic == Intrinsic::nvvm_ldu_global_p) + else if(Intrinsic == Intrinsic::nvvm_ldu_global_p) + Info.memVT = getPointerTy(); + else Info.memVT = getValueType(I.getType()); + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.vol = 0; + Info.readMem = true; + Info.writeMem = false; + + // alignment is available as metadata. + // Grab it and set the alignment. + assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata"); + MDNode *AlignMD = I.getMetadata("align"); + assert(AlignMD && "Must have a non-null MDNode"); + assert(AlignMD->getNumOperands() == 1 && "Must have a single operand"); + Value *Align = AlignMD->getOperand(0); + int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue(); + Info.align = Alignment; + + return true; + } + case Intrinsic::nvvm_ldg_global_i: + case Intrinsic::nvvm_ldg_global_f: + case Intrinsic::nvvm_ldg_global_p: { + + Info.opc = ISD::INTRINSIC_W_CHAIN; + if (Intrinsic == Intrinsic::nvvm_ldg_global_i) + Info.memVT = getValueType(I.getType()); + else if(Intrinsic == Intrinsic::nvvm_ldg_global_p) + Info.memVT = getPointerTy(); else - Info.memVT = MVT::f32; + Info.memVT = getValueType(I.getType()); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; Info.vol = 0; Info.readMem = true; Info.writeMem = false; - Info.align = 0; + + // alignment is available as metadata. + // Grab it and set the alignment. + assert(I.hasMetadataOtherThanDebugLoc() && "Must have alignment metadata"); + MDNode *AlignMD = I.getMetadata("align"); + assert(AlignMD && "Must have a non-null MDNode"); + assert(AlignMD->getNumOperands() == 1 && "Must have a single operand"); + Value *Align = AlignMD->getOperand(0); + int64_t Alignment = cast<ConstantInt>(Align)->getZExtValue(); + Info.align = Alignment; + return true; + } case Intrinsic::nvvm_tex_1d_v4f32_i32: case Intrinsic::nvvm_tex_1d_v4f32_f32: @@ -2427,6 +2599,7 @@ NVPTXTargetLowering::getConstraintType(const std::string &Constraint) const { switch (Constraint[0]) { default: break; + case 'b': case 'r': case 'h': case 'c': @@ -2446,6 +2619,8 @@ NVPTXTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { + case 'b': + return std::make_pair(0U, &NVPTX::Int1RegsRegClass); case 'c': return std::make_pair(0U, &NVPTX::Int16RegsRegClass); case 'h': @@ -2469,6 +2644,406 @@ unsigned NVPTXTargetLowering::getFunctionAlignment(const Function *) const { return 4; } +//===----------------------------------------------------------------------===// +// NVPTX DAG Combining +//===----------------------------------------------------------------------===// + +extern unsigned FMAContractLevel; + +/// PerformADDCombineWithOperands - Try DAG combinations for an ADD with +/// operands N0 and N1. This is a helper for PerformADDCombine that is +/// called with the default operands, and if that fails, with commuted +/// operands. +static SDValue PerformADDCombineWithOperands(SDNode *N, SDValue N0, SDValue N1, + TargetLowering::DAGCombinerInfo &DCI, + const NVPTXSubtarget &Subtarget, + CodeGenOpt::Level OptLevel) { + SelectionDAG &DAG = DCI.DAG; + // Skip non-integer, non-scalar case + EVT VT=N0.getValueType(); + if (VT.isVector()) + return SDValue(); + + // fold (add (mul a, b), c) -> (mad a, b, c) + // + if (N0.getOpcode() == ISD::MUL) { + assert (VT.isInteger()); + // For integer: + // Since integer multiply-add costs the same as integer multiply + // but is more costly than integer add, do the fusion only when + // the mul is only used in the add. + if (OptLevel==CodeGenOpt::None || VT != MVT::i32 || + !N0.getNode()->hasOneUse()) + return SDValue(); + + // Do the folding + return DAG.getNode(NVPTXISD::IMAD, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), N1); + } + else if (N0.getOpcode() == ISD::FMUL) { + if (VT == MVT::f32 || VT == MVT::f64) { + if (FMAContractLevel == 0) + return SDValue(); + + // For floating point: + // Do the fusion only when the mul has less than 5 uses and all + // are add. + // The heuristic is that if a use is not an add, then that use + // cannot be fused into fma, therefore mul is still needed anyway. + // If there are more than 4 uses, even if they are all add, fusing + // them will increase register pressue. + // + int numUses = 0; + int nonAddCount = 0; + for (SDNode::use_iterator UI = N0.getNode()->use_begin(), + UE = N0.getNode()->use_end(); + UI != UE; ++UI) { + numUses++; + SDNode *User = *UI; + if (User->getOpcode() != ISD::FADD) + ++nonAddCount; + } + if (numUses >= 5) + return SDValue(); + if (nonAddCount) { + int orderNo = N->getIROrder(); + int orderNo2 = N0.getNode()->getIROrder(); + // simple heuristics here for considering potential register + // pressure, the logics here is that the differnce are used + // to measure the distance between def and use, the longer distance + // more likely cause register pressure. + if (orderNo - orderNo2 < 500) + return SDValue(); + + // Now, check if at least one of the FMUL's operands is live beyond the node N, + // which guarantees that the FMA will not increase register pressure at node N. + bool opIsLive = false; + const SDNode *left = N0.getOperand(0).getNode(); + const SDNode *right = N0.getOperand(1).getNode(); + + if (dyn_cast<ConstantSDNode>(left) || dyn_cast<ConstantSDNode>(right)) + opIsLive = true; + + if (!opIsLive) + for (SDNode::use_iterator UI = left->use_begin(), UE = left->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + int orderNo3 = User->getIROrder(); + if (orderNo3 > orderNo) { + opIsLive = true; + break; + } + } + + if (!opIsLive) + for (SDNode::use_iterator UI = right->use_begin(), UE = right->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + int orderNo3 = User->getIROrder(); + if (orderNo3 > orderNo) { + opIsLive = true; + break; + } + } + + if (!opIsLive) + return SDValue(); + } + + return DAG.getNode(ISD::FMA, SDLoc(N), VT, + N0.getOperand(0), N0.getOperand(1), N1); + } + } + + return SDValue(); +} + +/// PerformADDCombine - Target-specific dag combine xforms for ISD::ADD. +/// +static SDValue PerformADDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const NVPTXSubtarget &Subtarget, + CodeGenOpt::Level OptLevel) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + // First try with the default operand order. + SDValue Result = PerformADDCombineWithOperands(N, N0, N1, DCI, Subtarget, + OptLevel); + if (Result.getNode()) + return Result; + + // If that didn't work, try again with the operands commuted. + return PerformADDCombineWithOperands(N, N1, N0, DCI, Subtarget, OptLevel); +} + +static SDValue PerformANDCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + // The type legalizer turns a vector load of i8 values into a zextload to i16 + // registers, optionally ANY_EXTENDs it (if target type is integer), + // and ANDs off the high 8 bits. Since we turn this load into a + // target-specific DAG node, the DAG combiner fails to eliminate these AND + // nodes. Do that here. + SDValue Val = N->getOperand(0); + SDValue Mask = N->getOperand(1); + + if (isa<ConstantSDNode>(Val)) { + std::swap(Val, Mask); + } + + SDValue AExt; + // Generally, we will see zextload -> IMOV16rr -> ANY_EXTEND -> and + if (Val.getOpcode() == ISD::ANY_EXTEND) { + AExt = Val; + Val = Val->getOperand(0); + } + + if (Val->isMachineOpcode() && Val->getMachineOpcode() == NVPTX::IMOV16rr) { + Val = Val->getOperand(0); + } + + if (Val->getOpcode() == NVPTXISD::LoadV2 || + Val->getOpcode() == NVPTXISD::LoadV4) { + ConstantSDNode *MaskCnst = dyn_cast<ConstantSDNode>(Mask); + if (!MaskCnst) { + // Not an AND with a constant + return SDValue(); + } + + uint64_t MaskVal = MaskCnst->getZExtValue(); + if (MaskVal != 0xff) { + // Not an AND that chops off top 8 bits + return SDValue(); + } + + MemSDNode *Mem = dyn_cast<MemSDNode>(Val); + if (!Mem) { + // Not a MemSDNode?!? + return SDValue(); + } + + EVT MemVT = Mem->getMemoryVT(); + if (MemVT != MVT::v2i8 && MemVT != MVT::v4i8) { + // We only handle the i8 case + return SDValue(); + } + + unsigned ExtType = + cast<ConstantSDNode>(Val->getOperand(Val->getNumOperands()-1))-> + getZExtValue(); + if (ExtType == ISD::SEXTLOAD) { + // If for some reason the load is a sextload, the and is needed to zero + // out the high 8 bits + return SDValue(); + } + + bool AddTo = false; + if (AExt.getNode() != 0) { + // Re-insert the ext as a zext. + Val = DCI.DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), + AExt.getValueType(), Val); + AddTo = true; + } + + // If we get here, the AND is unnecessary. Just replace it with the load + DCI.CombineTo(N, Val, AddTo); + } + + return SDValue(); +} + +enum OperandSignedness { + Signed = 0, + Unsigned, + Unknown +}; + +/// IsMulWideOperandDemotable - Checks if the provided DAG node is an operand +/// that can be demoted to \p OptSize bits without loss of information. The +/// signedness of the operand, if determinable, is placed in \p S. +static bool IsMulWideOperandDemotable(SDValue Op, + unsigned OptSize, + OperandSignedness &S) { + S = Unknown; + + if (Op.getOpcode() == ISD::SIGN_EXTEND || + Op.getOpcode() == ISD::SIGN_EXTEND_INREG) { + EVT OrigVT = Op.getOperand(0).getValueType(); + if (OrigVT.getSizeInBits() == OptSize) { + S = Signed; + return true; + } + } else if (Op.getOpcode() == ISD::ZERO_EXTEND) { + EVT OrigVT = Op.getOperand(0).getValueType(); + if (OrigVT.getSizeInBits() == OptSize) { + S = Unsigned; + return true; + } + } + + return false; +} + +/// AreMulWideOperandsDemotable - Checks if the given LHS and RHS operands can +/// be demoted to \p OptSize bits without loss of information. If the operands +/// contain a constant, it should appear as the RHS operand. The signedness of +/// the operands is placed in \p IsSigned. +static bool AreMulWideOperandsDemotable(SDValue LHS, SDValue RHS, + unsigned OptSize, + bool &IsSigned) { + + OperandSignedness LHSSign; + + // The LHS operand must be a demotable op + if (!IsMulWideOperandDemotable(LHS, OptSize, LHSSign)) + return false; + + // We should have been able to determine the signedness from the LHS + if (LHSSign == Unknown) + return false; + + IsSigned = (LHSSign == Signed); + + // The RHS can be a demotable op or a constant + if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(RHS)) { + APInt Val = CI->getAPIntValue(); + if (LHSSign == Unsigned) { + if (Val.isIntN(OptSize)) { + return true; + } + return false; + } else { + if (Val.isSignedIntN(OptSize)) { + return true; + } + return false; + } + } else { + OperandSignedness RHSSign; + if (!IsMulWideOperandDemotable(RHS, OptSize, RHSSign)) + return false; + + if (LHSSign != RHSSign) + return false; + + return true; + } +} + +/// TryMULWIDECombine - Attempt to replace a multiply of M bits with a multiply +/// of M/2 bits that produces an M-bit result (i.e. mul.wide). This transform +/// works on both multiply DAG nodes and SHL DAG nodes with a constant shift +/// amount. +static SDValue TryMULWIDECombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + EVT MulType = N->getValueType(0); + if (MulType != MVT::i32 && MulType != MVT::i64) { + return SDValue(); + } + + unsigned OptSize = MulType.getSizeInBits() >> 1; + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + // Canonicalize the multiply so the constant (if any) is on the right + if (N->getOpcode() == ISD::MUL) { + if (isa<ConstantSDNode>(LHS)) { + std::swap(LHS, RHS); + } + } + + // If we have a SHL, determine the actual multiply amount + if (N->getOpcode() == ISD::SHL) { + ConstantSDNode *ShlRHS = dyn_cast<ConstantSDNode>(RHS); + if (!ShlRHS) { + return SDValue(); + } + + APInt ShiftAmt = ShlRHS->getAPIntValue(); + unsigned BitWidth = MulType.getSizeInBits(); + if (ShiftAmt.sge(0) && ShiftAmt.slt(BitWidth)) { + APInt MulVal = APInt(BitWidth, 1) << ShiftAmt; + RHS = DCI.DAG.getConstant(MulVal, MulType); + } else { + return SDValue(); + } + } + + bool Signed; + // Verify that our operands are demotable + if (!AreMulWideOperandsDemotable(LHS, RHS, OptSize, Signed)) { + return SDValue(); + } + + EVT DemotedVT; + if (MulType == MVT::i32) { + DemotedVT = MVT::i16; + } else { + DemotedVT = MVT::i32; + } + + // Truncate the operands to the correct size. Note that these are just for + // type consistency and will (likely) be eliminated in later phases. + SDValue TruncLHS = + DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, LHS); + SDValue TruncRHS = + DCI.DAG.getNode(ISD::TRUNCATE, SDLoc(N), DemotedVT, RHS); + + unsigned Opc; + if (Signed) { + Opc = NVPTXISD::MUL_WIDE_SIGNED; + } else { + Opc = NVPTXISD::MUL_WIDE_UNSIGNED; + } + + return DCI.DAG.getNode(Opc, SDLoc(N), MulType, TruncLHS, TruncRHS); +} + +/// PerformMULCombine - Runs PTX-specific DAG combine patterns on MUL nodes. +static SDValue PerformMULCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + CodeGenOpt::Level OptLevel) { + if (OptLevel > 0) { + // Try mul.wide combining at OptLevel > 0 + SDValue Ret = TryMULWIDECombine(N, DCI); + if (Ret.getNode()) + return Ret; + } + + return SDValue(); +} + +/// PerformSHLCombine - Runs PTX-specific DAG combine patterns on SHL nodes. +static SDValue PerformSHLCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + CodeGenOpt::Level OptLevel) { + if (OptLevel > 0) { + // Try mul.wide combining at OptLevel > 0 + SDValue Ret = TryMULWIDECombine(N, DCI); + if (Ret.getNode()) + return Ret; + } + + return SDValue(); +} + +SDValue NVPTXTargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + // FIXME: Get this from the DAG somehow + CodeGenOpt::Level OptLevel = CodeGenOpt::Aggressive; + switch (N->getOpcode()) { + default: break; + case ISD::ADD: + case ISD::FADD: + return PerformADDCombine(N, DCI, nvptxSubtarget, OptLevel); + case ISD::MUL: + return PerformMULCombine(N, DCI, OptLevel); + case ISD::SHL: + return PerformSHLCombine(N, DCI, OptLevel); + case ISD::AND: + return PerformANDCombine(N, DCI); + } + return SDValue(); +} + /// ReplaceVectorLoad - Convert vector loads into multi-output scalar loads. static void ReplaceLoadVector(SDNode *N, SelectionDAG &DAG, SmallVectorImpl<SDValue> &Results) { diff --git a/lib/Target/NVPTX/NVPTXISelLowering.h b/lib/Target/NVPTX/NVPTXISelLowering.h index 7bad8a2..7b4026d 100644 --- a/lib/Target/NVPTX/NVPTXISelLowering.h +++ b/lib/Target/NVPTX/NVPTXISelLowering.h @@ -16,7 +16,6 @@ #define NVPTXISELLOWERING_H #include "NVPTX.h" -#include "NVPTXSubtarget.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" @@ -50,6 +49,11 @@ enum NodeType { CallSeqBegin, CallSeqEnd, CallPrototype, + FUN_SHFL_CLAMP, + FUN_SHFR_CLAMP, + MUL_WIDE_SIGNED, + MUL_WIDE_UNSIGNED, + IMAD, Dummy, LoadV2 = ISD::FIRST_TARGET_MEMORY_OPCODE, @@ -167,6 +171,8 @@ enum NodeType { }; } +class NVPTXSubtarget; + //===--------------------------------------------------------------------===// // TargetLowering Implementation //===--------------------------------------------------------------------===// @@ -196,9 +202,9 @@ public: /// getFunctionAlignment - Return the Log2 alignment of this function. unsigned getFunctionAlignment(const Function *F) const; - EVT getSetCCResultType(LLVMContext &, EVT VT) const override { + EVT getSetCCResultType(LLVMContext &Ctx, EVT VT) const override { if (VT.isVector()) - return MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); + return EVT::getVectorVT(Ctx, MVT::i1, VT.getVectorNumElements()); return MVT::i1; } @@ -236,7 +242,8 @@ public: // PTX always uses 32-bit shift amounts MVT getScalarShiftAmountTy(EVT LHSTy) const override { return MVT::i32; } - bool shouldSplitVectorType(EVT VT) const override; + TargetLoweringBase::LegalizeTypeAction + getPreferredVectorAction(EVT VT) const override; private: const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here @@ -255,8 +262,12 @@ private: SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) const; + void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const override; + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; unsigned getArgumentAlignment(SDValue Callee, const ImmutableCallSite *CS, Type *Ty, unsigned Idx) const; diff --git a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp index 397f4bc..a98fb37 100644 --- a/lib/Target/NVPTX/NVPTXImageOptimizer.cpp +++ b/lib/Target/NVPTX/NVPTXImageOptimizer.cpp @@ -146,7 +146,7 @@ bool NVPTXImageOptimizer::replaceIsTypePTexture(Instruction &I) { void NVPTXImageOptimizer::replaceWith(Instruction *From, ConstantInt *To) { // We implement "poor man's DCE" here to make sure any code that is no longer // live is actually unreachable and can be trivially eliminated by the - // unreachable block elimiation pass. + // unreachable block elimination pass. for (CallInst::use_iterator UI = From->use_begin(), UE = From->use_end(); UI != UE; ++UI) { if (BranchInst *BI = dyn_cast<BranchInst>(*UI)) { diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.cpp b/lib/Target/NVPTX/NVPTXInstrInfo.cpp index cdc8088..b5b4fbe 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.cpp +++ b/lib/Target/NVPTX/NVPTXInstrInfo.cpp @@ -29,8 +29,8 @@ using namespace llvm; void NVPTXInstrInfo::anchor() {} // FIXME: Add the subtarget support on this constructor. -NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm) - : NVPTXGenInstrInfo(), TM(tm), RegInfo(*TM.getSubtargetImpl()) {} +NVPTXInstrInfo::NVPTXInstrInfo(NVPTXSubtarget &STI) + : NVPTXGenInstrInfo(), RegInfo(STI) {} void NVPTXInstrInfo::copyPhysReg( MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.h b/lib/Target/NVPTX/NVPTXInstrInfo.h index 88a9e45..2ac2974 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.h +++ b/lib/Target/NVPTX/NVPTXInstrInfo.h @@ -24,11 +24,10 @@ namespace llvm { class NVPTXInstrInfo : public NVPTXGenInstrInfo { - NVPTXTargetMachine &TM; const NVPTXRegisterInfo RegInfo; virtual void anchor(); public: - explicit NVPTXInstrInfo(NVPTXTargetMachine &TM); + explicit NVPTXInstrInfo(NVPTXSubtarget &STI); const NVPTXRegisterInfo &getRegisterInfo() const { return RegInfo; } diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td index fbcd0e4..d2c0373 100644 --- a/lib/Target/NVPTX/NVPTXInstrInfo.td +++ b/lib/Target/NVPTX/NVPTXInstrInfo.td @@ -158,9 +158,12 @@ def do_SQRTF32_APPROX : Predicate<"!usePrecSqrtF32()">; def do_SQRTF32_RN : Predicate<"usePrecSqrtF32()">; def hasHWROT32 : Predicate<"Subtarget.hasHWROT32()">; +def noHWROT32 : Predicate<"!Subtarget.hasHWROT32()">; def true : Predicate<"1">; +def hasPTX31 : Predicate<"Subtarget.getPTXVersion() >= 31">; + //===----------------------------------------------------------------------===// // Some Common Instruction Class Templates @@ -461,33 +464,45 @@ def SHL2MUL16 : SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(temp.shl(v), MVT::i16); }]>; -def MULWIDES64 : NVPTXInst<(outs Int64Regs:$dst), - (ins Int32Regs:$a, Int32Regs:$b), +def MULWIDES64 + : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + "mul.wide.s32 \t$dst, $a, $b;", []>; +def MULWIDES64Imm + : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b), "mul.wide.s32 \t$dst, $a, $b;", []>; -def MULWIDES64Imm : NVPTXInst<(outs Int64Regs:$dst), - (ins Int32Regs:$a, i64imm:$b), +def MULWIDES64Imm64 + : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b), "mul.wide.s32 \t$dst, $a, $b;", []>; -def MULWIDEU64 : NVPTXInst<(outs Int64Regs:$dst), - (ins Int32Regs:$a, Int32Regs:$b), +def MULWIDEU64 + : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b), + "mul.wide.u32 \t$dst, $a, $b;", []>; +def MULWIDEU64Imm + : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b), "mul.wide.u32 \t$dst, $a, $b;", []>; -def MULWIDEU64Imm : NVPTXInst<(outs Int64Regs:$dst), - (ins Int32Regs:$a, i64imm:$b), +def MULWIDEU64Imm64 + : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b), "mul.wide.u32 \t$dst, $a, $b;", []>; -def MULWIDES32 : NVPTXInst<(outs Int32Regs:$dst), - (ins Int16Regs:$a, Int16Regs:$b), +def MULWIDES32 + : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), "mul.wide.s16 \t$dst, $a, $b;", []>; -def MULWIDES32Imm : NVPTXInst<(outs Int32Regs:$dst), - (ins Int16Regs:$a, i32imm:$b), +def MULWIDES32Imm + : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), + "mul.wide.s16 \t$dst, $a, $b;", []>; +def MULWIDES32Imm32 + : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b), "mul.wide.s16 \t$dst, $a, $b;", []>; -def MULWIDEU32 : NVPTXInst<(outs Int32Regs:$dst), - (ins Int16Regs:$a, Int16Regs:$b), - "mul.wide.u16 \t$dst, $a, $b;", []>; -def MULWIDEU32Imm : NVPTXInst<(outs Int32Regs:$dst), - (ins Int16Regs:$a, i32imm:$b), +def MULWIDEU32 + : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b), + "mul.wide.u16 \t$dst, $a, $b;", []>; +def MULWIDEU32Imm + : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b), "mul.wide.u16 \t$dst, $a, $b;", []>; +def MULWIDEU32Imm32 + : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b), + "mul.wide.u16 \t$dst, $a, $b;", []>; def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)), (MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>, @@ -507,25 +522,63 @@ def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)), (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>; def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)), - (MULWIDES64Imm Int32Regs:$a, (i64 SInt32Const:$b))>, + (MULWIDES64Imm64 Int32Regs:$a, (i64 SInt32Const:$b))>, Requires<[doMulWide]>; def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)), - (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, Requires<[doMulWide]>; + (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, + Requires<[doMulWide]>; def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)), - (MULWIDEU64Imm Int32Regs:$a, (i64 UInt32Const:$b))>, + (MULWIDEU64Imm64 Int32Regs:$a, (i64 UInt32Const:$b))>, Requires<[doMulWide]>; def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)), - (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>; + (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, + Requires<[doMulWide]>; def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)), - (MULWIDES32Imm Int16Regs:$a, (i32 SInt16Const:$b))>, + (MULWIDES32Imm32 Int16Regs:$a, (i32 SInt16Const:$b))>, Requires<[doMulWide]>; def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)), - (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, Requires<[doMulWide]>; + (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, + Requires<[doMulWide]>; def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)), - (MULWIDEU32Imm Int16Regs:$a, (i32 UInt16Const:$b))>, + (MULWIDEU32Imm32 Int16Regs:$a, (i32 UInt16Const:$b))>, + Requires<[doMulWide]>; + + +def SDTMulWide + : SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>]>; +def mul_wide_signed + : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>; +def mul_wide_unsigned + : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>; + +def : Pat<(i32 (mul_wide_signed Int16Regs:$a, Int16Regs:$b)), + (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>, + Requires<[doMulWide]>; +def : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)), + (MULWIDES32Imm Int16Regs:$a, imm:$b)>, + Requires<[doMulWide]>; +def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, Int16Regs:$b)), + (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>, + Requires<[doMulWide]>; +def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)), + (MULWIDEU32Imm Int16Regs:$a, imm:$b)>, + Requires<[doMulWide]>; + + +def : Pat<(i64 (mul_wide_signed Int32Regs:$a, Int32Regs:$b)), + (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>, + Requires<[doMulWide]>; +def : Pat<(i64 (mul_wide_signed Int32Regs:$a, imm:$b)), + (MULWIDES64Imm Int32Regs:$a, imm:$b)>, + Requires<[doMulWide]>; +def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, Int32Regs:$b)), + (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>, + Requires<[doMulWide]>; +def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, imm:$b)), + (MULWIDEU64Imm Int32Regs:$a, imm:$b)>, Requires<[doMulWide]>; defm MULT : I3<"mul.lo.s", mul>; @@ -541,69 +594,75 @@ defm SREM : I3<"rem.s", srem>; defm UREM : I3<"rem.u", urem>; // The ri version will not be selected as DAGCombiner::visitUREM will lower it. +def SDTIMAD + : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, + SDTCisInt<2>, SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>]>; +def imad + : SDNode<"NVPTXISD::IMAD", SDTIMAD>; + def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c), "mad.lo.s16 \t$dst, $a, $b, $c;", - [(set Int16Regs:$dst, (add - (mul Int16Regs:$a, Int16Regs:$b), Int16Regs:$c))]>; + [(set Int16Regs:$dst, + (imad Int16Regs:$a, Int16Regs:$b, Int16Regs:$c))]>; def MAD16rri : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c), "mad.lo.s16 \t$dst, $a, $b, $c;", - [(set Int16Regs:$dst, (add - (mul Int16Regs:$a, Int16Regs:$b), imm:$c))]>; + [(set Int16Regs:$dst, + (imad Int16Regs:$a, Int16Regs:$b, imm:$c))]>; def MAD16rir : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c), "mad.lo.s16 \t$dst, $a, $b, $c;", - [(set Int16Regs:$dst, (add - (mul Int16Regs:$a, imm:$b), Int16Regs:$c))]>; + [(set Int16Regs:$dst, + (imad Int16Regs:$a, imm:$b, Int16Regs:$c))]>; def MAD16rii : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b, i16imm:$c), "mad.lo.s16 \t$dst, $a, $b, $c;", - [(set Int16Regs:$dst, (add (mul Int16Regs:$a, imm:$b), - imm:$c))]>; + [(set Int16Regs:$dst, + (imad Int16Regs:$a, imm:$b, imm:$c))]>; def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c), "mad.lo.s32 \t$dst, $a, $b, $c;", - [(set Int32Regs:$dst, (add - (mul Int32Regs:$a, Int32Regs:$b), Int32Regs:$c))]>; + [(set Int32Regs:$dst, + (imad Int32Regs:$a, Int32Regs:$b, Int32Regs:$c))]>; def MAD32rri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c), "mad.lo.s32 \t$dst, $a, $b, $c;", - [(set Int32Regs:$dst, (add - (mul Int32Regs:$a, Int32Regs:$b), imm:$c))]>; + [(set Int32Regs:$dst, + (imad Int32Regs:$a, Int32Regs:$b, imm:$c))]>; def MAD32rir : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c), "mad.lo.s32 \t$dst, $a, $b, $c;", - [(set Int32Regs:$dst, (add - (mul Int32Regs:$a, imm:$b), Int32Regs:$c))]>; + [(set Int32Regs:$dst, + (imad Int32Regs:$a, imm:$b, Int32Regs:$c))]>; def MAD32rii : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b, i32imm:$c), "mad.lo.s32 \t$dst, $a, $b, $c;", - [(set Int32Regs:$dst, (add - (mul Int32Regs:$a, imm:$b), imm:$c))]>; + [(set Int32Regs:$dst, + (imad Int32Regs:$a, imm:$b, imm:$c))]>; def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c), "mad.lo.s64 \t$dst, $a, $b, $c;", - [(set Int64Regs:$dst, (add - (mul Int64Regs:$a, Int64Regs:$b), Int64Regs:$c))]>; + [(set Int64Regs:$dst, + (imad Int64Regs:$a, Int64Regs:$b, Int64Regs:$c))]>; def MAD64rri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c), "mad.lo.s64 \t$dst, $a, $b, $c;", - [(set Int64Regs:$dst, (add - (mul Int64Regs:$a, Int64Regs:$b), imm:$c))]>; + [(set Int64Regs:$dst, + (imad Int64Regs:$a, Int64Regs:$b, imm:$c))]>; def MAD64rir : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c), "mad.lo.s64 \t$dst, $a, $b, $c;", - [(set Int64Regs:$dst, (add - (mul Int64Regs:$a, imm:$b), Int64Regs:$c))]>; + [(set Int64Regs:$dst, + (imad Int64Regs:$a, imm:$b, Int64Regs:$c))]>; def MAD64rii : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b, i64imm:$c), "mad.lo.s64 \t$dst, $a, $b, $c;", - [(set Int64Regs:$dst, (add - (mul Int64Regs:$a, imm:$b), imm:$c))]>; - + [(set Int64Regs:$dst, + (imad Int64Regs:$a, imm:$b, imm:$c))]>; def INEG16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src), "neg.s16 \t$dst, $src;", @@ -809,36 +868,26 @@ multiclass FPCONTRACT32<string OpcStr, Predicate Pred> { def rrr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c), !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), - [(set Float32Regs:$dst, (fadd - (fmul Float32Regs:$a, Float32Regs:$b), - Float32Regs:$c))]>, Requires<[Pred]>; - // This is to WAR a weird bug in Tablegen that does not automatically - // generate the following permutated rule rrr2 from the above rrr. - // So we explicitly add it here. This happens to FMA32 only. - // See the comments at FMAD32 and FMA32 for more information. - def rrr2 : NVPTXInst<(outs Float32Regs:$dst), - (ins Float32Regs:$a, Float32Regs:$b, Float32Regs:$c), - !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), - [(set Float32Regs:$dst, (fadd Float32Regs:$c, - (fmul Float32Regs:$a, Float32Regs:$b)))]>, + [(set Float32Regs:$dst, + (fma Float32Regs:$a, Float32Regs:$b, Float32Regs:$c))]>, Requires<[Pred]>; def rri : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, Float32Regs:$b, f32imm:$c), !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), - [(set Float32Regs:$dst, (fadd - (fmul Float32Regs:$a, Float32Regs:$b), fpimm:$c))]>, + [(set Float32Regs:$dst, + (fma Float32Regs:$a, Float32Regs:$b, fpimm:$c))]>, Requires<[Pred]>; def rir : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, f32imm:$b, Float32Regs:$c), !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), - [(set Float32Regs:$dst, (fadd - (fmul Float32Regs:$a, fpimm:$b), Float32Regs:$c))]>, + [(set Float32Regs:$dst, + (fma Float32Regs:$a, fpimm:$b, Float32Regs:$c))]>, Requires<[Pred]>; def rii : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a, f32imm:$b, f32imm:$c), !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), - [(set Float32Regs:$dst, (fadd - (fmul Float32Regs:$a, fpimm:$b), fpimm:$c))]>, + [(set Float32Regs:$dst, + (fma Float32Regs:$a, fpimm:$b, fpimm:$c))]>, Requires<[Pred]>; } @@ -846,73 +895,32 @@ multiclass FPCONTRACT64<string OpcStr, Predicate Pred> { def rrr : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c), !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), - [(set Float64Regs:$dst, (fadd - (fmul Float64Regs:$a, Float64Regs:$b), - Float64Regs:$c))]>, Requires<[Pred]>; + [(set Float64Regs:$dst, + (fma Float64Regs:$a, Float64Regs:$b, Float64Regs:$c))]>, + Requires<[Pred]>; def rri : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c), !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), - [(set Float64Regs:$dst, (fadd (fmul Float64Regs:$a, - Float64Regs:$b), fpimm:$c))]>, Requires<[Pred]>; + [(set Float64Regs:$dst, + (fma Float64Regs:$a, Float64Regs:$b, fpimm:$c))]>, + Requires<[Pred]>; def rir : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c), !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), - [(set Float64Regs:$dst, (fadd - (fmul Float64Regs:$a, fpimm:$b), Float64Regs:$c))]>, + [(set Float64Regs:$dst, + (fma Float64Regs:$a, fpimm:$b, Float64Regs:$c))]>, Requires<[Pred]>; def rii : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a, f64imm:$b, f64imm:$c), !strconcat(OpcStr, " \t$dst, $a, $b, $c;"), - [(set Float64Regs:$dst, (fadd - (fmul Float64Regs:$a, fpimm:$b), fpimm:$c))]>, + [(set Float64Regs:$dst, + (fma Float64Regs:$a, fpimm:$b, fpimm:$c))]>, Requires<[Pred]>; } -// Due to a unknown reason (most likely a bug in tablegen), tablegen does not -// automatically generate the rrr2 rule from -// the rrr rule (see FPCONTRACT32) for FMA32, though it does for FMAD32. -// If we reverse the order of the following two lines, then rrr2 rule will be -// generated for FMA32, but not for rrr. -// Therefore, we manually write the rrr2 rule in FPCONTRACT32. -defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doFMAF32_ftz>; -defm FMA32 : FPCONTRACT32<"fma.rn.f32", doFMAF32>; -defm FMA64 : FPCONTRACT64<"fma.rn.f64", doFMAF64>; - -// b*c-a => fmad(b, c, -a) -multiclass FPCONTRACT32_SUB_PAT_MAD<NVPTXInst Inst, Predicate Pred> { - def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a), - (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>, - Requires<[Pred]>; -} - -// a-b*c => fmad(-b,c, a) -// - legal because a-b*c <=> a+(-b*c) <=> a+(-b)*c -// b*c-a => fmad(b, c, -a) -// - legal because b*c-a <=> b*c+(-a) -multiclass FPCONTRACT32_SUB_PAT<NVPTXInst Inst, Predicate Pred> { - def : Pat<(fsub Float32Regs:$a, (fmul Float32Regs:$b, Float32Regs:$c)), - (Inst (FNEGf32 Float32Regs:$b), Float32Regs:$c, Float32Regs:$a)>, - Requires<[Pred]>; - def : Pat<(fsub (fmul Float32Regs:$b, Float32Regs:$c), Float32Regs:$a), - (Inst Float32Regs:$b, Float32Regs:$c, (FNEGf32 Float32Regs:$a))>, - Requires<[Pred]>; -} - -// a-b*c => fmad(-b,c, a) -// b*c-a => fmad(b, c, -a) -multiclass FPCONTRACT64_SUB_PAT<NVPTXInst Inst, Predicate Pred> { - def : Pat<(fsub Float64Regs:$a, (fmul Float64Regs:$b, Float64Regs:$c)), - (Inst (FNEGf64 Float64Regs:$b), Float64Regs:$c, Float64Regs:$a)>, - Requires<[Pred]>; - - def : Pat<(fsub (fmul Float64Regs:$b, Float64Regs:$c), Float64Regs:$a), - (Inst Float64Regs:$b, Float64Regs:$c, (FNEGf64 Float64Regs:$a))>, - Requires<[Pred]>; -} - -defm FMAF32ext_ftz : FPCONTRACT32_SUB_PAT<FMA32_ftzrrr, doFMAF32AGG_ftz>; -defm FMAF32ext : FPCONTRACT32_SUB_PAT<FMA32rrr, doFMAF32AGG>; -defm FMAF64ext : FPCONTRACT64_SUB_PAT<FMA64rrr, doFMAF64AGG>; +defm FMA32_ftz : FPCONTRACT32<"fma.rn.ftz.f32", doF32FTZ>; +defm FMA32 : FPCONTRACT32<"fma.rn.f32", doNoF32FTZ>; +defm FMA64 : FPCONTRACT64<"fma.rn.f64", doNoF32FTZ>; def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src), "sin.approx.f32 \t$dst, $src;", @@ -1083,6 +1091,43 @@ multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode> { defm SRA : RSHIFT_FORMAT<"shr.s", sra>; defm SRL : RSHIFT_FORMAT<"shr.u", srl>; +// +// Rotate: use ptx shf instruction if available. +// + +// 32 bit r2 = rotl r1, n +// => +// r2 = shf.l r1, r1, n +def ROTL32imm_hw : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$src, i32imm:$amt), + "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, (rotl Int32Regs:$src, (i32 imm:$amt)))]>, + Requires<[hasHWROT32]> ; + +def ROTL32reg_hw : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$src, Int32Regs:$amt), + "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>, + Requires<[hasHWROT32]>; + +// 32 bit r2 = rotr r1, n +// => +// r2 = shf.r r1, r1, n +def ROTR32imm_hw : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$src, i32imm:$amt), + "shf.r.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, (rotr Int32Regs:$src, (i32 imm:$amt)))]>, + Requires<[hasHWROT32]>; + +def ROTR32reg_hw : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$src, Int32Regs:$amt), + "shf.r.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>, + Requires<[hasHWROT32]>; + +// +// Rotate: if ptx shf instruction is not available, then use shift+add +// // 32bit def ROT32imm_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2), @@ -1100,9 +1145,11 @@ def SUB_FRM_32 : SDNodeXForm<imm, [{ }]>; def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)), - (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>; + (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, + Requires<[noHWROT32]>; def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)), - (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>; + (ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>, + Requires<[noHWROT32]>; def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), @@ -1115,7 +1162,8 @@ def ROTL32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, !strconcat("shr.b32 \t%rhs, $src, %amt2;\n\t", !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", !strconcat("}}", ""))))))))), - [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>; + [(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>, + Requires<[noHWROT32]>; def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt), @@ -1128,7 +1176,8 @@ def ROTR32reg_sw : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, !strconcat("shl.b32 \t%rhs, $src, %amt2;\n\t", !strconcat("add.u32 \t$dst, %lhs, %rhs;\n\t", !strconcat("}}", ""))))))))), - [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>; + [(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>, + Requires<[noHWROT32]>; // 64bit def ROT64imm_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, @@ -1177,6 +1226,29 @@ def ROTR64reg_sw : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, !strconcat("}}", ""))))))))), [(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>; +// BFE - bit-field extract + +multiclass BFE<string TyStr, RegisterClass RC> { + // BFE supports both 32-bit and 64-bit values, but the start and length + // operands are always 32-bit + def rrr + : NVPTXInst<(outs RC:$d), + (ins RC:$a, Int32Regs:$b, Int32Regs:$c), + !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>; + def rri + : NVPTXInst<(outs RC:$d), + (ins RC:$a, Int32Regs:$b, i32imm:$c), + !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>; + def rii + : NVPTXInst<(outs RC:$d), + (ins RC:$a, i32imm:$b, i32imm:$c), + !strconcat("bfe.", TyStr, " \t$d, $a, $b, $c;"), []>; +} + +defm BFE_S32 : BFE<"s32", Int32Regs>; +defm BFE_U32 : BFE<"u32", Int32Regs>; +defm BFE_S64 : BFE<"s64", Int64Regs>; +defm BFE_U64 : BFE<"u64", Int64Regs>; //----------------------------------- // General Comparison @@ -1292,6 +1364,32 @@ def : Pat<(i1 (select Int1Regs:$p, Int1Regs:$a, Int1Regs:$b)), (ORb1rr (ANDb1rr Int1Regs:$p, Int1Regs:$a), (ANDb1rr (NOT1 Int1Regs:$p), Int1Regs:$b))>; +// +// Funnnel shift in clamp mode +// +// - SDNodes are created so they can be used in the DAG code, +// e.g. NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts) +// +def SDTIntShiftDOp: SDTypeProfile<1, 3, + [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisInt<0>, SDTCisInt<3>]>; +def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>; +def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>; + +def FUNSHFLCLAMP : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), + "shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;", + [(set Int32Regs:$dst, + (FUN_SHFL_CLAMP Int32Regs:$lo, + Int32Regs:$hi, Int32Regs:$amt))]>; + +def FUNSHFRCLAMP : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), + "shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;", + [(set Int32Regs:$dst, + (FUN_SHFR_CLAMP Int32Regs:$lo, + Int32Regs:$hi, Int32Regs:$amt))]>; + //----------------------------------- // Data Movement (Load / Store, Move) //----------------------------------- diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td index 5e228fc..0ad3dfa 100644 --- a/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1057,12 +1057,24 @@ def atomic_load_max_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), (atomic_load_max_32 node:$a, node:$b)>; def atomic_load_max_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), (atomic_load_max_32 node:$a, node:$b)>; +def atomic_load_max_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b) + , (atomic_load_max_64 node:$a, node:$b)>; +def atomic_load_max_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_max_64 node:$a, node:$b)>; +def atomic_load_max_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_max_64 node:$a, node:$b)>; def atomic_load_umax_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), (atomic_load_umax_32 node:$a, node:$b)>; def atomic_load_umax_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), (atomic_load_umax_32 node:$a, node:$b)>; def atomic_load_umax_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), (atomic_load_umax_32 node:$a, node:$b)>; +def atomic_load_umax_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_umax_64 node:$a, node:$b)>; +def atomic_load_umax_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_umax_64 node:$a, node:$b)>; +def atomic_load_umax_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_umax_64 node:$a, node:$b)>; defm INT_PTX_ATOM_LOAD_MAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", ".max", atomic_load_max_32_g, i32imm, imm, hasAtomRedG32>; @@ -1072,6 +1084,14 @@ defm INT_PTX_ATOM_LOAD_MAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".max", atomic_load_max_32_gen, i32imm, imm, hasAtomRedGen32>; defm INT_PTX_ATOM_LOAD_MAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".s32", ".max", atomic_load_max_32_gen, i32imm, imm, useAtomRedG32forGen32>; +defm INT_PTX_ATOM_LOAD_MAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64", + ".max", atomic_load_max_64_g, i64imm, imm, hasAtomRedG64>; +defm INT_PTX_ATOM_LOAD_MAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64", + ".max", atomic_load_max_64_s, i64imm, imm, hasAtomRedS64>; +defm INT_PTX_ATOM_LOAD_MAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".max", + atomic_load_max_64_gen, i64imm, imm, hasAtomRedGen64>; +defm INT_PTX_ATOM_LOAD_MAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", + ".s64", ".max", atomic_load_max_64_gen, i64imm, imm, useAtomRedG64forGen64>; defm INT_PTX_ATOM_LOAD_UMAX_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".max", atomic_load_umax_32_g, i32imm, imm, hasAtomRedG32>; defm INT_PTX_ATOM_LOAD_UMAX_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", @@ -1080,6 +1100,14 @@ defm INT_PTX_ATOM_LOAD_UMAX_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm, hasAtomRedGen32>; defm INT_PTX_ATOM_LOAD_UMAX_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".max", atomic_load_umax_32_gen, i32imm, imm, useAtomRedG32forGen32>; +defm INT_PTX_ATOM_LOAD_UMAX_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", + ".max", atomic_load_umax_64_g, i64imm, imm, hasAtomRedG64>; +defm INT_PTX_ATOM_LOAD_UMAX_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", + ".max", atomic_load_umax_64_s, i64imm, imm, hasAtomRedS64>; +defm INT_PTX_ATOM_LOAD_UMAX_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".max", + atomic_load_umax_64_gen, i64imm, imm, hasAtomRedGen64>; +defm INT_PTX_ATOM_LOAD_UMAX_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", + ".u64", ".max", atomic_load_umax_64_gen, i64imm, imm, useAtomRedG64forGen64>; // atom_min @@ -1089,12 +1117,24 @@ def atomic_load_min_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), (atomic_load_min_32 node:$a, node:$b)>; def atomic_load_min_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), (atomic_load_min_32 node:$a, node:$b)>; +def atomic_load_min_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_min_64 node:$a, node:$b)>; +def atomic_load_min_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_min_64 node:$a, node:$b)>; +def atomic_load_min_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_min_64 node:$a, node:$b)>; def atomic_load_umin_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), (atomic_load_umin_32 node:$a, node:$b)>; def atomic_load_umin_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), (atomic_load_umin_32 node:$a, node:$b)>; def atomic_load_umin_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), (atomic_load_umin_32 node:$a, node:$b)>; +def atomic_load_umin_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_umin_64 node:$a, node:$b)>; +def atomic_load_umin_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_umin_64 node:$a, node:$b)>; +def atomic_load_umin_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_umin_64 node:$a, node:$b)>; defm INT_PTX_ATOM_LOAD_MIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".s32", ".min", atomic_load_min_32_g, i32imm, imm, hasAtomRedG32>; @@ -1104,6 +1144,14 @@ defm INT_PTX_ATOM_LOAD_MIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".s32", ".min", atomic_load_min_32_gen, i32imm, imm, hasAtomRedGen32>; defm INT_PTX_ATOM_LOAD_MIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".s32", ".min", atomic_load_min_32_gen, i32imm, imm, useAtomRedG32forGen32>; +defm INT_PTX_ATOM_LOAD_MIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".s64", + ".min", atomic_load_min_64_g, i64imm, imm, hasAtomRedG64>; +defm INT_PTX_ATOM_LOAD_MIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".s64", + ".min", atomic_load_min_64_s, i64imm, imm, hasAtomRedS64>; +defm INT_PTX_ATOM_LOAD_MIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".s64", ".min", + atomic_load_min_64_gen, i64imm, imm, hasAtomRedGen64>; +defm INT_PTX_ATOM_LOAD_MIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", + ".s64", ".min", atomic_load_min_64_gen, i64imm, imm, useAtomRedG64forGen64>; defm INT_PTX_ATOM_LOAD_UMIN_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".min", atomic_load_umin_32_g, i32imm, imm, hasAtomRedG32>; defm INT_PTX_ATOM_LOAD_UMIN_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".u32", @@ -1112,6 +1160,14 @@ defm INT_PTX_ATOM_LOAD_UMIN_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm, hasAtomRedGen32>; defm INT_PTX_ATOM_LOAD_UMIN_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".min", atomic_load_umin_32_gen, i32imm, imm, useAtomRedG32forGen32>; +defm INT_PTX_ATOM_LOAD_UMIN_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".u64", + ".min", atomic_load_umin_64_g, i64imm, imm, hasAtomRedG64>; +defm INT_PTX_ATOM_LOAD_UMIN_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".u64", + ".min", atomic_load_umin_64_s, i64imm, imm, hasAtomRedS64>; +defm INT_PTX_ATOM_LOAD_UMIN_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".u64", ".min", + atomic_load_umin_64_gen, i64imm, imm, hasAtomRedGen64>; +defm INT_PTX_ATOM_LOAD_UMIN_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", + ".u64", ".min", atomic_load_umin_64_gen, i64imm, imm, useAtomRedG64forGen64>; // atom_inc atom_dec @@ -1153,6 +1209,12 @@ def atomic_load_and_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), (atomic_load_and_32 node:$a, node:$b)>; def atomic_load_and_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), (atomic_load_and_32 node:$a, node:$b)>; +def atomic_load_and_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_and_64 node:$a, node:$b)>; +def atomic_load_and_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_and_64 node:$a, node:$b)>; +def atomic_load_and_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_and_64 node:$a, node:$b)>; defm INT_PTX_ATOM_AND_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and", atomic_load_and_32_g, i32imm, imm, hasAtomRedG32>; @@ -1162,6 +1224,14 @@ defm INT_PTX_ATOM_AND_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".and", atomic_load_and_32_gen, i32imm, imm, hasAtomRedGen32>; defm INT_PTX_ATOM_AND_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".and", atomic_load_and_32_gen, i32imm, imm, useAtomRedG32forGen32>; +defm INT_PTX_ATOM_AND_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".and", + atomic_load_and_64_g, i64imm, imm, hasAtomRedG64>; +defm INT_PTX_ATOM_AND_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".and", + atomic_load_and_64_s, i64imm, imm, hasAtomRedS64>; +defm INT_PTX_ATOM_AND_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".and", + atomic_load_and_64_gen, i64imm, imm, hasAtomRedGen64>; +defm INT_PTX_ATOM_AND_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", + ".and", atomic_load_and_64_gen, i64imm, imm, useAtomRedG64forGen64>; // atom_or @@ -1171,6 +1241,12 @@ def atomic_load_or_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), (atomic_load_or_32 node:$a, node:$b)>; def atomic_load_or_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), (atomic_load_or_32 node:$a, node:$b)>; +def atomic_load_or_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_or_64 node:$a, node:$b)>; +def atomic_load_or_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_or_64 node:$a, node:$b)>; +def atomic_load_or_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_or_64 node:$a, node:$b)>; defm INT_PTX_ATOM_OR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or", atomic_load_or_32_g, i32imm, imm, hasAtomRedG32>; @@ -1180,6 +1256,14 @@ defm INT_PTX_ATOM_OR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".or", atomic_load_or_32_gen, i32imm, imm, useAtomRedG32forGen32>; defm INT_PTX_ATOM_OR_S_32 : F_ATOMIC_2<Int32Regs, ".shared", ".b32", ".or", atomic_load_or_32_s, i32imm, imm, hasAtomRedS32>; +defm INT_PTX_ATOM_OR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".or", + atomic_load_or_64_g, i64imm, imm, hasAtomRedG64>; +defm INT_PTX_ATOM_OR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".or", + atomic_load_or_64_gen, i64imm, imm, hasAtomRedGen64>; +defm INT_PTX_ATOM_OR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", + ".or", atomic_load_or_64_gen, i64imm, imm, useAtomRedG64forGen64>; +defm INT_PTX_ATOM_OR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".or", + atomic_load_or_64_s, i64imm, imm, hasAtomRedS64>; // atom_xor @@ -1189,6 +1273,12 @@ def atomic_load_xor_32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), (atomic_load_xor_32 node:$a, node:$b)>; def atomic_load_xor_32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), (atomic_load_xor_32 node:$a, node:$b)>; +def atomic_load_xor_64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b), + (atomic_load_xor_64 node:$a, node:$b)>; +def atomic_load_xor_64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b), + (atomic_load_xor_64 node:$a, node:$b)>; +def atomic_load_xor_64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b), + (atomic_load_xor_64 node:$a, node:$b)>; defm INT_PTX_ATOM_XOR_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor", atomic_load_xor_32_g, i32imm, imm, hasAtomRedG32>; @@ -1198,6 +1288,14 @@ defm INT_PTX_ATOM_XOR_GEN_32 : F_ATOMIC_2<Int32Regs, "", ".b32", ".xor", atomic_load_xor_32_gen, i32imm, imm, hasAtomRedGen32>; defm INT_PTX_ATOM_XOR_GEN_32_USE_G : F_ATOMIC_2<Int32Regs, ".global", ".b32", ".xor", atomic_load_xor_32_gen, i32imm, imm, useAtomRedG32forGen32>; +defm INT_PTX_ATOM_XOR_G_64 : F_ATOMIC_2<Int64Regs, ".global", ".b64", ".xor", + atomic_load_xor_64_g, i64imm, imm, hasAtomRedG64>; +defm INT_PTX_ATOM_XOR_S_64 : F_ATOMIC_2<Int64Regs, ".shared", ".b64", ".xor", + atomic_load_xor_64_s, i64imm, imm, hasAtomRedS64>; +defm INT_PTX_ATOM_XOR_GEN_64 : F_ATOMIC_2<Int64Regs, "", ".b64", ".xor", + atomic_load_xor_64_gen, i64imm, imm, hasAtomRedGen64>; +defm INT_PTX_ATOM_XOR_GEN_64_USE_G : F_ATOMIC_2<Int64Regs, ".global", ".b64", + ".xor", atomic_load_xor_64_gen, i64imm, imm, useAtomRedG64forGen64>; // atom_cas @@ -1276,67 +1374,33 @@ def INT_PTX_SREG_WARPSIZE : F_SREG<"mov.u32 \t$dst, WARP_SZ;", Int32Regs, // Support for ldu on sm_20 or later //----------------------------------- -def ldu_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldu_global_i node:$ptr), [{ - MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N); - return M->getMemoryVT() == MVT::i8; -}]>; - // Scalar -// @TODO: Revisit this, Changed imemAny to imem -multiclass LDU_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> { +multiclass LDU_G<string TyStr, NVPTXRegClass regclass> { def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), !strconcat("ldu.global.", TyStr), - [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDU]>; + []>, Requires<[hasLDU]>; def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), !strconcat("ldu.global.", TyStr), - [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDU]>; - def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src), + []>, Requires<[hasLDU]>; + def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), !strconcat("ldu.global.", TyStr), - [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>, - Requires<[hasLDU]>; + []>, Requires<[hasLDU]>; def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), !strconcat("ldu.global.", TyStr), - [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDU]>; + []>, Requires<[hasLDU]>; def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), !strconcat("ldu.global.", TyStr), - [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDU]>; + []>, Requires<[hasLDU]>; } -multiclass LDU_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag IntOp> { - def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), - !strconcat("ldu.global.", TyStr), - [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDU]>; - def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), - !strconcat("ldu.global.", TyStr), - [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDU]>; - def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src), - !strconcat("ldu.global.", TyStr), - [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>, - Requires<[hasLDU]>; - def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), - !strconcat("ldu.global.", TyStr), - [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDU]>; - def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), - !strconcat("ldu.global.", TyStr), - [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDU]>; -} - -defm INT_PTX_LDU_GLOBAL_i8 : LDU_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs, - ldu_i8>; -defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs, -int_nvvm_ldu_global_i>; -defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs, -int_nvvm_ldu_global_i>; -defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs, -int_nvvm_ldu_global_i>; -defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs, -int_nvvm_ldu_global_f>; -defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs, -int_nvvm_ldu_global_f>; -defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs, -int_nvvm_ldu_global_p>; -defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs, -int_nvvm_ldu_global_p>; +defm INT_PTX_LDU_GLOBAL_i8 : LDU_G<"u8 \t$result, [$src];", Int16Regs>; +defm INT_PTX_LDU_GLOBAL_i16 : LDU_G<"u16 \t$result, [$src];", Int16Regs>; +defm INT_PTX_LDU_GLOBAL_i32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; +defm INT_PTX_LDU_GLOBAL_i64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; +defm INT_PTX_LDU_GLOBAL_f32 : LDU_G<"f32 \t$result, [$src];", Float32Regs>; +defm INT_PTX_LDU_GLOBAL_f64 : LDU_G<"f64 \t$result, [$src];", Float64Regs>; +defm INT_PTX_LDU_GLOBAL_p32 : LDU_G<"u32 \t$result, [$src];", Int32Regs>; +defm INT_PTX_LDU_GLOBAL_p64 : LDU_G<"u64 \t$result, [$src];", Int64Regs>; // vector @@ -1406,65 +1470,40 @@ defm INT_PTX_LDU_G_v4f32_ELE // Support for ldg on sm_35 or later //----------------------------------- -def ldg_i8 : PatFrag<(ops node:$ptr), (int_nvvm_ldg_global_i node:$ptr), [{ - MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N); - return M->getMemoryVT() == MVT::i8; -}]>; - -multiclass LDG_G<string TyStr, NVPTXRegClass regclass, Intrinsic IntOp> { +multiclass LDG_G<string TyStr, NVPTXRegClass regclass> { def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), !strconcat("ld.global.nc.", TyStr), - [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>; + []>, Requires<[hasLDG]>; def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), !strconcat("ld.global.nc.", TyStr), - [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>; - def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src), + []>, Requires<[hasLDG]>; + def avar: NVPTXInst<(outs regclass:$result), (ins imemAny:$src), !strconcat("ld.global.nc.", TyStr), - [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>, - Requires<[hasLDG]>; + []>, Requires<[hasLDG]>; def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), !strconcat("ld.global.nc.", TyStr), - [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>; + []>, Requires<[hasLDG]>; def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), !strconcat("ld.global.nc.", TyStr), - [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>; -} - -multiclass LDG_G_NOINTRIN<string TyStr, NVPTXRegClass regclass, PatFrag IntOp> { - def areg: NVPTXInst<(outs regclass:$result), (ins Int32Regs:$src), - !strconcat("ld.global.nc.", TyStr), - [(set regclass:$result, (IntOp Int32Regs:$src))]>, Requires<[hasLDG]>; - def areg64: NVPTXInst<(outs regclass:$result), (ins Int64Regs:$src), - !strconcat("ld.global.nc.", TyStr), - [(set regclass:$result, (IntOp Int64Regs:$src))]>, Requires<[hasLDG]>; - def avar: NVPTXInst<(outs regclass:$result), (ins imem:$src), - !strconcat("ld.global.nc.", TyStr), - [(set regclass:$result, (IntOp (Wrapper tglobaladdr:$src)))]>, - Requires<[hasLDG]>; - def ari : NVPTXInst<(outs regclass:$result), (ins MEMri:$src), - !strconcat("ld.global.nc.", TyStr), - [(set regclass:$result, (IntOp ADDRri:$src))]>, Requires<[hasLDG]>; - def ari64 : NVPTXInst<(outs regclass:$result), (ins MEMri64:$src), - !strconcat("ld.global.nc.", TyStr), - [(set regclass:$result, (IntOp ADDRri64:$src))]>, Requires<[hasLDG]>; + []>, Requires<[hasLDG]>; } defm INT_PTX_LDG_GLOBAL_i8 - : LDG_G_NOINTRIN<"u8 \t$result, [$src];", Int16Regs, ldg_i8>; + : LDG_G<"u8 \t$result, [$src];", Int16Regs>; defm INT_PTX_LDG_GLOBAL_i16 - : LDG_G<"u16 \t$result, [$src];", Int16Regs, int_nvvm_ldg_global_i>; + : LDG_G<"u16 \t$result, [$src];", Int16Regs>; defm INT_PTX_LDG_GLOBAL_i32 - : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_i>; + : LDG_G<"u32 \t$result, [$src];", Int32Regs>; defm INT_PTX_LDG_GLOBAL_i64 - : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_i>; + : LDG_G<"u64 \t$result, [$src];", Int64Regs>; defm INT_PTX_LDG_GLOBAL_f32 - : LDG_G<"f32 \t$result, [$src];", Float32Regs, int_nvvm_ldg_global_f>; + : LDG_G<"f32 \t$result, [$src];", Float32Regs>; defm INT_PTX_LDG_GLOBAL_f64 - : LDG_G<"f64 \t$result, [$src];", Float64Regs, int_nvvm_ldg_global_f>; + : LDG_G<"f64 \t$result, [$src];", Float64Regs>; defm INT_PTX_LDG_GLOBAL_p32 - : LDG_G<"u32 \t$result, [$src];", Int32Regs, int_nvvm_ldg_global_p>; + : LDG_G<"u32 \t$result, [$src];", Int32Regs>; defm INT_PTX_LDG_GLOBAL_p64 - : LDG_G<"u64 \t$result, [$src];", Int64Regs, int_nvvm_ldg_global_p>; + : LDG_G<"u64 \t$result, [$src];", Int64Regs>; // vector @@ -1689,6 +1728,207 @@ def INT_NVVM_COMPILER_ERROR_64 : NVPTXInst<(outs), (ins Int64Regs:$a), [(int_nvvm_compiler_error Int64Regs:$a)]>; +// isspacep + +def ISSPACEP_CONST_32 + : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), + "isspacep.const \t$d, $a;", + [(set Int1Regs:$d, (int_nvvm_isspacep_const Int32Regs:$a))]>, + Requires<[hasPTX31]>; +def ISSPACEP_CONST_64 + : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), + "isspacep.const \t$d, $a;", + [(set Int1Regs:$d, (int_nvvm_isspacep_const Int64Regs:$a))]>, + Requires<[hasPTX31]>; +def ISSPACEP_GLOBAL_32 + : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), + "isspacep.global \t$d, $a;", + [(set Int1Regs:$d, (int_nvvm_isspacep_global Int32Regs:$a))]>; +def ISSPACEP_GLOBAL_64 + : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), + "isspacep.global \t$d, $a;", + [(set Int1Regs:$d, (int_nvvm_isspacep_global Int64Regs:$a))]>; +def ISSPACEP_LOCAL_32 + : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), + "isspacep.local \t$d, $a;", + [(set Int1Regs:$d, (int_nvvm_isspacep_local Int32Regs:$a))]>; +def ISSPACEP_LOCAL_64 + : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), + "isspacep.local \t$d, $a;", + [(set Int1Regs:$d, (int_nvvm_isspacep_local Int64Regs:$a))]>; +def ISSPACEP_SHARED_32 + : NVPTXInst<(outs Int1Regs:$d), (ins Int32Regs:$a), + "isspacep.shared \t$d, $a;", + [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int32Regs:$a))]>; +def ISSPACEP_SHARED_64 + : NVPTXInst<(outs Int1Regs:$d), (ins Int64Regs:$a), + "isspacep.shared \t$d, $a;", + [(set Int1Regs:$d, (int_nvvm_isspacep_shared Int64Regs:$a))]>; + + +// Special register reads +def MOV_SPECIAL : NVPTXInst<(outs Int32Regs:$d), + (ins SpecialRegs:$r), + "mov.b32\t$d, $r;", []>; + +def : Pat<(int_nvvm_read_ptx_sreg_envreg0), (MOV_SPECIAL ENVREG0)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg1), (MOV_SPECIAL ENVREG1)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg2), (MOV_SPECIAL ENVREG2)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg3), (MOV_SPECIAL ENVREG3)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg4), (MOV_SPECIAL ENVREG4)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg5), (MOV_SPECIAL ENVREG5)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg6), (MOV_SPECIAL ENVREG6)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg7), (MOV_SPECIAL ENVREG7)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg8), (MOV_SPECIAL ENVREG8)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg9), (MOV_SPECIAL ENVREG9)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg10), (MOV_SPECIAL ENVREG10)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg11), (MOV_SPECIAL ENVREG11)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg12), (MOV_SPECIAL ENVREG12)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg13), (MOV_SPECIAL ENVREG13)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg14), (MOV_SPECIAL ENVREG14)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg15), (MOV_SPECIAL ENVREG15)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg16), (MOV_SPECIAL ENVREG16)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg17), (MOV_SPECIAL ENVREG17)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg18), (MOV_SPECIAL ENVREG18)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg19), (MOV_SPECIAL ENVREG19)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg20), (MOV_SPECIAL ENVREG20)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg21), (MOV_SPECIAL ENVREG21)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg22), (MOV_SPECIAL ENVREG22)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg23), (MOV_SPECIAL ENVREG23)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg24), (MOV_SPECIAL ENVREG24)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg25), (MOV_SPECIAL ENVREG25)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg26), (MOV_SPECIAL ENVREG26)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg27), (MOV_SPECIAL ENVREG27)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg28), (MOV_SPECIAL ENVREG28)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg29), (MOV_SPECIAL ENVREG29)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg30), (MOV_SPECIAL ENVREG30)>; +def : Pat<(int_nvvm_read_ptx_sreg_envreg31), (MOV_SPECIAL ENVREG31)>; + + +// rotate builtin support + +def ROTATE_B32_HW_IMM + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$src, i32imm:$amt), + "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, + (int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)))]>, + Requires<[hasHWROT32]> ; + +def ROTATE_B32_HW_REG + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$src, Int32Regs:$amt), + "shf.l.wrap.b32 \t$dst, $src, $src, $amt;", + [(set Int32Regs:$dst, + (int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt))]>, + Requires<[hasHWROT32]> ; + +def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, (i32 imm:$amt)), + (ROT32imm_sw Int32Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, + Requires<[noHWROT32]> ; + +def : Pat<(int_nvvm_rotate_b32 Int32Regs:$src, Int32Regs:$amt), + (ROTL32reg_sw Int32Regs:$src, Int32Regs:$amt)>, + Requires<[noHWROT32]> ; + +def GET_LO_INT64 + : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), + !strconcat("{{\n\t", + !strconcat(".reg .b32 %dummy;\n\t", + !strconcat("mov.b64 \t{$dst,%dummy}, $src;\n\t", + !strconcat("}}", "")))), + []> ; + +def GET_HI_INT64 + : NVPTXInst<(outs Int32Regs:$dst), (ins Int64Regs:$src), + !strconcat("{{\n\t", + !strconcat(".reg .b32 %dummy;\n\t", + !strconcat("mov.b64 \t{%dummy,$dst}, $src;\n\t", + !strconcat("}}", "")))), + []> ; + +def PACK_TWO_INT32 + : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$lo, Int32Regs:$hi), + "mov.b64 \t$dst, {{$lo, $hi}};", []> ; + +def : Pat<(int_nvvm_swap_lo_hi_b64 Int64Regs:$src), + (PACK_TWO_INT32 (GET_HI_INT64 Int64Regs:$src), + (GET_LO_INT64 Int64Regs:$src))> ; + +// funnel shift, requires >= sm_32 +def SHF_L_WRAP_B32_IMM + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), + "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, + Requires<[hasHWROT32]>; + +def SHF_L_WRAP_B32_REG + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), + "shf.l.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, + Requires<[hasHWROT32]>; + +def SHF_R_WRAP_B32_IMM + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, i32imm:$amt), + "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, + Requires<[hasHWROT32]>; + +def SHF_R_WRAP_B32_REG + : NVPTXInst<(outs Int32Regs:$dst), + (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt), + "shf.r.wrap.b32 \t$dst, $lo, $hi, $amt;",[]>, + Requires<[hasHWROT32]>; + +// HW version of rotate 64 +def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), + (PACK_TWO_INT32 + (SHF_L_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), + (GET_LO_INT64 Int64Regs:$src), imm:$amt), + (SHF_L_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), + (GET_HI_INT64 Int64Regs:$src), imm:$amt))>, + Requires<[hasHWROT32]>; + +def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), + (PACK_TWO_INT32 + (SHF_L_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), + (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt), + (SHF_L_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), + (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt))>, + Requires<[hasHWROT32]>; + + +def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), + (PACK_TWO_INT32 + (SHF_R_WRAP_B32_IMM (GET_LO_INT64 Int64Regs:$src), + (GET_HI_INT64 Int64Regs:$src), imm:$amt), + (SHF_R_WRAP_B32_IMM (GET_HI_INT64 Int64Regs:$src), + (GET_LO_INT64 Int64Regs:$src), imm:$amt))>, + Requires<[hasHWROT32]>; + +def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), + (PACK_TWO_INT32 + (SHF_R_WRAP_B32_REG (GET_LO_INT64 Int64Regs:$src), + (GET_HI_INT64 Int64Regs:$src), Int32Regs:$amt), + (SHF_R_WRAP_B32_REG (GET_HI_INT64 Int64Regs:$src), + (GET_LO_INT64 Int64Regs:$src), Int32Regs:$amt))>, + Requires<[hasHWROT32]>; + +// SW version of rotate 64 +def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, (i32 imm:$amt)), + (ROT64imm_sw Int64Regs:$src, imm:$amt, (SUB_FRM_32 node:$amt))>, + Requires<[noHWROT32]>; +def : Pat<(int_nvvm_rotate_b64 Int64Regs:$src, Int32Regs:$amt), + (ROTL64reg_sw Int64Regs:$src, Int32Regs:$amt)>, + Requires<[noHWROT32]>; +def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, (i32 imm:$amt)), + (ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>, + Requires<[noHWROT32]>; +def : Pat<(int_nvvm_rotate_right_b64 Int64Regs:$src, Int32Regs:$amt), + (ROTR64reg_sw Int64Regs:$src, Int32Regs:$amt)>, + Requires<[noHWROT32]>; + + //----------------------------------- // Texture Intrinsics //----------------------------------- diff --git a/lib/Target/NVPTX/NVPTXMCExpr.h b/lib/Target/NVPTX/NVPTXMCExpr.h index 0ee018c..5547649 100644 --- a/lib/Target/NVPTX/NVPTXMCExpr.h +++ b/lib/Target/NVPTX/NVPTXMCExpr.h @@ -66,7 +66,7 @@ public: const MCAsmLayout *Layout) const override { return false; } - void AddValueSymbols(MCAssembler *) const override {}; + void visitUsedExpr(MCStreamer &Streamer) const override {}; const MCSection *FindAssociatedSection() const override { return nullptr; } diff --git a/lib/Target/NVPTX/NVPTXRegisterInfo.td b/lib/Target/NVPTX/NVPTXRegisterInfo.td index 7a38a66..3482248 100644 --- a/lib/Target/NVPTX/NVPTXRegisterInfo.td +++ b/lib/Target/NVPTX/NVPTXRegisterInfo.td @@ -46,6 +46,10 @@ foreach i = 0-4 in { def da#i : NVPTXReg<"%da"#i>; } +foreach i = 0-31 in { + def ENVREG#i : NVPTXReg<"%envreg"#i>; +} + //===----------------------------------------------------------------------===// // Register classes //===----------------------------------------------------------------------===// @@ -61,4 +65,5 @@ def Float32ArgRegs : NVPTXRegClass<[f32], 32, (add (sequence "fa%u", 0, 4))>; def Float64ArgRegs : NVPTXRegClass<[f64], 64, (add (sequence "da%u", 0, 4))>; // Read NVPTXRegisterInfo.cpp to see how VRFrame and VRDepot are used. -def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot)>; +def SpecialRegs : NVPTXRegClass<[i32], 32, (add VRFrame, VRDepot, + (sequence "ENVREG%u", 0, 31))>; diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp index 8c7df52..d5cded2 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -25,10 +25,41 @@ using namespace llvm; // Pin the vtable to this file. void NVPTXSubtarget::anchor() {} +static std::string computeDataLayout(bool is64Bit) { + std::string Ret = "e"; + + if (!is64Bit) + Ret += "-p:32:32"; + + Ret += "-i64:64-v16:16-v32:32-n16:32:64"; + + return Ret; +} + +NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU, + StringRef FS) { + // Provide the default CPU if we don't have one. + if (CPU.empty() && FS.size()) + llvm_unreachable("we are not using FeatureStr"); + TargetName = CPU.empty() ? "sm_20" : CPU; + + ParseSubtargetFeatures(TargetName, FS); + + // Set default to PTX 3.2 (CUDA 5.5) + if (PTXVersion == 0) { + PTXVersion = 32; + } + + return *this; +} + NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool is64Bit) + const std::string &FS, const TargetMachine &TM, + bool is64Bit) : NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0), - SmVersion(20) { + SmVersion(20), DL(computeDataLayout(is64Bit)), + InstrInfo(initializeSubtargetDependencies(CPU, FS)), + TLInfo((NVPTXTargetMachine &)TM), TSInfo(&DL), FrameLowering(*this) { Triple T(TT); @@ -36,26 +67,4 @@ NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU, drvInterface = NVPTX::NVCL; else drvInterface = NVPTX::CUDA; - - // Provide the default CPU if none - std::string defCPU = "sm_20"; - - ParseSubtargetFeatures((CPU.empty() ? defCPU : CPU), FS); - - // Get the TargetName from the FS if available - if (FS.empty() && CPU.empty()) - TargetName = defCPU; - else if (!CPU.empty()) - TargetName = CPU; - else - llvm_unreachable("we are not using FeatureStr"); - - // We default to PTX 3.1, but we cannot just default to it in the initializer - // since the attribute parser checks if the given option is >= the default. - // So if we set ptx31 as the default, the ptx30 attribute would never match. - // Instead, we use 0 as the default and manually set 31 if the default is - // used. - if (PTXVersion == 0) { - PTXVersion = 31; - } } diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h index 581e5ed..3ed5747 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/lib/Target/NVPTX/NVPTXSubtarget.h @@ -15,6 +15,12 @@ #define NVPTXSUBTARGET_H #include "NVPTX.h" +#include "NVPTXFrameLowering.h" +#include "NVPTXISelLowering.h" +#include "NVPTXInstrInfo.h" +#include "NVPTXRegisterInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <string> @@ -35,12 +41,30 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo { // SM version x.y is represented as 10*x+y, e.g. 3.1 == 31 unsigned int SmVersion; + const DataLayout DL; // Calculates type size & alignment + NVPTXInstrInfo InstrInfo; + NVPTXTargetLowering TLInfo; + TargetSelectionDAGInfo TSInfo; + + // NVPTX does not have any call stack frame, but need a NVPTX specific + // FrameLowering class because TargetFrameLowering is abstract. + NVPTXFrameLowering FrameLowering; + public: /// This constructor initializes the data members to match that /// of the specified module. /// NVPTXSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool is64Bit); + const std::string &FS, const TargetMachine &TM, bool is64Bit); + + const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; } + const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; } + const DataLayout *getDataLayout() const { return &DL; } + const NVPTXRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + const NVPTXTargetLowering *getTargetLowering() const { return &TLInfo; } + const TargetSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } bool hasBrkPt() const { return SmVersion >= 11; } bool hasAtomRedG32() const { return SmVersion >= 11; } @@ -57,10 +81,12 @@ public: bool hasFMAF32() const { return SmVersion >= 20; } bool hasFMAF64() const { return SmVersion >= 13; } bool hasLDG() const { return SmVersion >= 32; } - bool hasLDU() const { return SmVersion >= 20; } + bool hasLDU() const { return ((SmVersion >= 20) && (SmVersion < 30)); } bool hasGenericLdSt() const { return SmVersion >= 20; } - inline bool hasHWROT32() const { return false; } - inline bool hasSWROT32() const { return true; } + inline bool hasHWROT32() const { return SmVersion >= 32; } + inline bool hasSWROT32() const { + return ((SmVersion >= 20) && (SmVersion < 32)); + } inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); } inline bool hasROT64() const { return SmVersion >= 20; } @@ -76,6 +102,7 @@ public: unsigned getPTXVersion() const { return PTXVersion; } + NVPTXSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); void ParseSubtargetFeatures(StringRef CPU, StringRef FS); }; diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 26a4f84..069a1b9 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -66,26 +66,13 @@ extern "C" void LLVMInitializeNVPTXTarget() { *PassRegistry::getPassRegistry()); } -static std::string computeDataLayout(const NVPTXSubtarget &ST) { - std::string Ret = "e"; - - if (!ST.is64Bit()) - Ret += "-p:32:32"; - - Ret += "-i64:64-v16:16-v32:32-n16:32:64"; - - return Ret; -} - -NVPTXTargetMachine::NVPTXTargetMachine( - const Target &T, StringRef TT, StringRef CPU, StringRef FS, - const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, bool is64bit) +NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT, + StringRef CPU, StringRef FS, + const TargetOptions &Options, + Reloc::Model RM, CodeModel::Model CM, + CodeGenOpt::Level OL, bool is64bit) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, is64bit), DL(computeDataLayout(Subtarget)), - InstrInfo(*this), TLInfo(*this), TSInfo(*this), - FrameLowering( - *this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ { + Subtarget(TT, CPU, FS, *this, is64bit) { initAsmInfo(); } @@ -119,6 +106,7 @@ public: bool addInstSelector() override; bool addPreRegAlloc() override; bool addPostRegAlloc() override; + void addMachineSSAOptimization() override; FunctionPass *createTargetRegisterAllocator(bool) override; void addFastRegAlloc(FunctionPass *RegAllocPass) override; @@ -220,3 +208,43 @@ void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { printAndVerify("After StackSlotColoring"); } + +void NVPTXPassConfig::addMachineSSAOptimization() { + // Pre-ra tail duplication. + if (addPass(&EarlyTailDuplicateID)) + printAndVerify("After Pre-RegAlloc TailDuplicate"); + + // Optimize PHIs before DCE: removing dead PHI cycles may make more + // instructions dead. + addPass(&OptimizePHIsID); + + // This pass merges large allocas. StackSlotColoring is a different pass + // which merges spill slots. + addPass(&StackColoringID); + + // If the target requests it, assign local variables to stack slots relative + // to one another and simplify frame index references where possible. + addPass(&LocalStackSlotAllocationID); + + // With optimization, dead code should already be eliminated. However + // there is one known exception: lowered code for arguments that are only + // used by tail calls, where the tail calls reuse the incoming stack + // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). + addPass(&DeadMachineInstructionElimID); + printAndVerify("After codegen DCE pass"); + + // Allow targets to insert passes that improve instruction level parallelism, + // like if-conversion. Such passes will typically need dominator trees and + // loop info, just like LICM and CSE below. + if (addILPOpts()) + printAndVerify("After ILP optimizations"); + + addPass(&MachineLICMID); + addPass(&MachineCSEID); + + addPass(&MachineSinkingID); + printAndVerify("After Machine LICM, CSE and Sinking passes"); + + addPass(&PeepholeOptimizerID); + printAndVerify("After codegen peephole optimization pass"); +} diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index 2db7c18..a7a1c8f 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -14,13 +14,8 @@ #ifndef NVPTX_TARGETMACHINE_H #define NVPTX_TARGETMACHINE_H -#include "ManagedStringPool.h" -#include "NVPTXFrameLowering.h" -#include "NVPTXISelLowering.h" -#include "NVPTXInstrInfo.h" -#include "NVPTXRegisterInfo.h" #include "NVPTXSubtarget.h" -#include "llvm/IR/DataLayout.h" +#include "ManagedStringPool.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSelectionDAGInfo.h" @@ -31,50 +26,37 @@ namespace llvm { /// class NVPTXTargetMachine : public LLVMTargetMachine { NVPTXSubtarget Subtarget; - const DataLayout DL; // Calculates type size & alignment - NVPTXInstrInfo InstrInfo; - NVPTXTargetLowering TLInfo; - TargetSelectionDAGInfo TSInfo; - - // NVPTX does not have any call stack frame, but need a NVPTX specific - // FrameLowering class because TargetFrameLowering is abstract. - NVPTXFrameLowering FrameLowering; // Hold Strings that can be free'd all together with NVPTXTargetMachine ManagedStringPool ManagedStrPool; - //bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level, - // bool DisableVerify, MCContext *&OutCtx); - public: NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit); const TargetFrameLowering *getFrameLowering() const override { - return &FrameLowering; + return getSubtargetImpl()->getFrameLowering(); + } + const NVPTXInstrInfo *getInstrInfo() const override { + return getSubtargetImpl()->getInstrInfo(); + } + const DataLayout *getDataLayout() const override { + return getSubtargetImpl()->getDataLayout(); } - const NVPTXInstrInfo *getInstrInfo() const override { return &InstrInfo; } - const DataLayout *getDataLayout() const override { return &DL; } const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; } - const NVPTXRegisterInfo *getRegisterInfo() const override { - return &(InstrInfo.getRegisterInfo()); + return getSubtargetImpl()->getRegisterInfo(); } - NVPTXTargetLowering *getTargetLowering() const override { - return const_cast<NVPTXTargetLowering *>(&TLInfo); + const NVPTXTargetLowering *getTargetLowering() const override { + return getSubtargetImpl()->getTargetLowering(); } const TargetSelectionDAGInfo *getSelectionDAGInfo() const override { - return &TSInfo; + return getSubtargetImpl()->getSelectionDAGInfo(); } - //virtual bool addInstSelector(PassManagerBase &PM, - // CodeGenOpt::Level OptLevel); - - //virtual bool addPreRegAlloc(PassManagerBase &, CodeGenOpt::Level); - ManagedStringPool *getManagedStrPool() const { return const_cast<ManagedStringPool *>(&ManagedStrPool); } diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp index cb8bd72..a8d6b95 100644 --- a/lib/Target/NVPTX/NVVMReflect.cpp +++ b/lib/Target/NVPTX/NVVMReflect.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" #include "llvm/IR/Type.h" #include "llvm/Pass.h" @@ -47,17 +48,16 @@ class NVVMReflect : public ModulePass { private: StringMap<int> VarMap; typedef DenseMap<std::string, int>::iterator VarMapIter; - Function *ReflectFunction; public: static char ID; - NVVMReflect() : ModulePass(ID), ReflectFunction(nullptr) { + NVVMReflect() : ModulePass(ID) { initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); VarMap.clear(); } NVVMReflect(const StringMap<int> &Mapping) - : ModulePass(ID), ReflectFunction(nullptr) { + : ModulePass(ID) { initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); for (StringMap<int>::const_iterator I = Mapping.begin(), E = Mapping.end(); I != E; ++I) { @@ -70,6 +70,8 @@ public: } bool runOnModule(Module &) override; +private: + bool handleFunction(Function *ReflectFunction); void setVarMap(); }; } @@ -120,19 +122,7 @@ void NVVMReflect::setVarMap() { } } -bool NVVMReflect::runOnModule(Module &M) { - if (!NVVMReflectEnabled) - return false; - - setVarMap(); - - ReflectFunction = M.getFunction(NVVM_REFLECT_FUNCTION); - - // If reflect function is not used, then there will be - // no entry in the module. - if (!ReflectFunction) - return false; - +bool NVVMReflect::handleFunction(Function *ReflectFunction) { // Validate _reflect function assert(ReflectFunction->isDeclaration() && "_reflect function should not have a body"); @@ -155,13 +145,15 @@ bool NVVMReflect::runOnModule(Module &M) { "Only one operand expect for _reflect function"); // In cuda, we will have an extra constant-to-generic conversion of // the string. - const Value *conv = Reflect->getArgOperand(0); - assert(isa<CallInst>(conv) && "Expected a const-to-gen conversion"); - const CallInst *ConvCall = cast<CallInst>(conv); - const Value *str = ConvCall->getArgOperand(0); - assert(isa<ConstantExpr>(str) && + const Value *Str = Reflect->getArgOperand(0); + if (isa<CallInst>(Str)) { + // CUDA path + const CallInst *ConvCall = cast<CallInst>(Str); + Str = ConvCall->getArgOperand(0); + } + assert(isa<ConstantExpr>(Str) && "Format of _reflect function not recognized"); - const ConstantExpr *GEP = cast<ConstantExpr>(str); + const ConstantExpr *GEP = cast<ConstantExpr>(Str); const Value *Sym = GEP->getOperand(0); assert(isa<Constant>(Sym) && "Format of _reflect function not recognized"); @@ -195,3 +187,36 @@ bool NVVMReflect::runOnModule(Module &M) { ToRemove[i]->eraseFromParent(); return true; } + +bool NVVMReflect::runOnModule(Module &M) { + if (!NVVMReflectEnabled) + return false; + + setVarMap(); + + + bool Res = false; + std::string Name; + Type *Tys[1]; + Type *I8Ty = Type::getInt8Ty(M.getContext()); + Function *ReflectFunction; + + // Check for standard overloaded versions of llvm.nvvm.reflect + + for (unsigned i = 0; i != 5; ++i) { + Tys[0] = PointerType::get(I8Ty, i); + Name = Intrinsic::getName(Intrinsic::nvvm_reflect, Tys); + ReflectFunction = M.getFunction(Name); + if(ReflectFunction != 0) { + Res |= handleFunction(ReflectFunction); + } + } + + ReflectFunction = M.getFunction(NVVM_REFLECT_FUNCTION); + // If reflect function is not used, then there will be + // no entry in the module. + if (ReflectFunction != 0) + Res |= handleFunction(ReflectFunction); + + return Res; +} diff --git a/lib/Target/NVPTX/cl_common_defines.h b/lib/Target/NVPTX/cl_common_defines.h index 45cc0b8..02c5a94 100644 --- a/lib/Target/NVPTX/cl_common_defines.h +++ b/lib/Target/NVPTX/cl_common_defines.h @@ -1,5 +1,5 @@ -#ifndef __CL_COMMON_DEFINES_H__ -#define __CL_COMMON_DEFINES_H__ +#ifndef CL_COMMON_DEFINES_H +#define CL_COMMON_DEFINES_H // This file includes defines that are common to both kernel code and // the NVPTX back-end. @@ -119,4 +119,4 @@ typedef enum clk_sampler_type { #define CLK_LOCAL_MEM_FENCE (1 << 0) #define CLK_GLOBAL_MEM_FENCE (1 << 1) -#endif // __CL_COMMON_DEFINES_H__ +#endif // CL_COMMON_DEFINES_H diff --git a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 3ac037d..2f562ca 100644 --- a/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -238,7 +238,7 @@ class PPCAsmParser : public MCTargetAsmParser { bool ParseExpression(const MCExpr *&EVal); bool ParseDarwinExpression(const MCExpr *&EVal); - bool ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + bool ParseOperand(OperandVector &Operands); bool ParseDirectiveWord(unsigned Size, SMLoc L); bool ParseDirectiveTC(unsigned Size, SMLoc L); @@ -246,12 +246,11 @@ class PPCAsmParser : public MCTargetAsmParser { bool ParseDarwinDirectiveMachine(SMLoc L); bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out, unsigned &ErrorInfo, + OperandVector &Operands, MCStreamer &Out, + unsigned &ErrorInfo, bool MatchingInlineAsm) override; - void ProcessInstruction(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Ops); + void ProcessInstruction(MCInst &Inst, const OperandVector &Ops); /// @name Auto-generated Match Functions /// { @@ -276,13 +275,12 @@ public: setAvailableFeatures(ComputeAvailableFeatures(STI.getFeatureBits())); } - bool ParseInstruction(ParseInstructionInfo &Info, - StringRef Name, SMLoc NameLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands) override; + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) override; bool ParseDirective(AsmToken DirectiveID) override; - unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, + unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind) override; const MCExpr *applyModifierToExpr(const MCExpr *E, @@ -548,8 +546,9 @@ public: void print(raw_ostream &OS) const override; - static PPCOperand *CreateToken(StringRef Str, SMLoc S, bool IsPPC64) { - PPCOperand *Op = new PPCOperand(Token); + static std::unique_ptr<PPCOperand> CreateToken(StringRef Str, SMLoc S, + bool IsPPC64) { + auto Op = make_unique<PPCOperand>(Token); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->StartLoc = S; @@ -558,22 +557,27 @@ public: return Op; } - static PPCOperand *CreateTokenWithStringCopy(StringRef Str, SMLoc S, - bool IsPPC64) { + static std::unique_ptr<PPCOperand> + CreateTokenWithStringCopy(StringRef Str, SMLoc S, bool IsPPC64) { // Allocate extra memory for the string and copy it. + // FIXME: This is incorrect, Operands are owned by unique_ptr with a default + // deleter which will destroy them by simply using "delete", not correctly + // calling operator delete on this extra memory after calling the dtor + // explicitly. void *Mem = ::operator new(sizeof(PPCOperand) + Str.size()); - PPCOperand *Op = new (Mem) PPCOperand(Token); - Op->Tok.Data = (const char *)(Op + 1); + std::unique_ptr<PPCOperand> Op(new (Mem) PPCOperand(Token)); + Op->Tok.Data = (const char *)(Op.get() + 1); Op->Tok.Length = Str.size(); - std::memcpy((char *)(Op + 1), Str.data(), Str.size()); + std::memcpy((void *)Op->Tok.Data, Str.data(), Str.size()); Op->StartLoc = S; Op->EndLoc = S; Op->IsPPC64 = IsPPC64; return Op; } - static PPCOperand *CreateImm(int64_t Val, SMLoc S, SMLoc E, bool IsPPC64) { - PPCOperand *Op = new PPCOperand(Immediate); + static std::unique_ptr<PPCOperand> CreateImm(int64_t Val, SMLoc S, SMLoc E, + bool IsPPC64) { + auto Op = make_unique<PPCOperand>(Immediate); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; @@ -581,9 +585,9 @@ public: return Op; } - static PPCOperand *CreateExpr(const MCExpr *Val, - SMLoc S, SMLoc E, bool IsPPC64) { - PPCOperand *Op = new PPCOperand(Expression); + static std::unique_ptr<PPCOperand> CreateExpr(const MCExpr *Val, SMLoc S, + SMLoc E, bool IsPPC64) { + auto Op = make_unique<PPCOperand>(Expression); Op->Expr.Val = Val; Op->Expr.CRVal = EvaluateCRExpr(Val); Op->StartLoc = S; @@ -592,9 +596,9 @@ public: return Op; } - static PPCOperand *CreateTLSReg(const MCSymbolRefExpr *Sym, - SMLoc S, SMLoc E, bool IsPPC64) { - PPCOperand *Op = new PPCOperand(TLSRegister); + static std::unique_ptr<PPCOperand> + CreateTLSReg(const MCSymbolRefExpr *Sym, SMLoc S, SMLoc E, bool IsPPC64) { + auto Op = make_unique<PPCOperand>(TLSRegister); Op->TLSReg.Sym = Sym; Op->StartLoc = S; Op->EndLoc = E; @@ -602,8 +606,8 @@ public: return Op; } - static PPCOperand *CreateFromMCExpr(const MCExpr *Val, - SMLoc S, SMLoc E, bool IsPPC64) { + static std::unique_ptr<PPCOperand> + CreateFromMCExpr(const MCExpr *Val, SMLoc S, SMLoc E, bool IsPPC64) { if (const MCConstantExpr *CE = dyn_cast<MCConstantExpr>(Val)) return CreateImm(CE->getValue(), S, E, IsPPC64); @@ -634,10 +638,8 @@ void PPCOperand::print(raw_ostream &OS) const { } } - -void PPCAsmParser:: -ProcessInstruction(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +void PPCAsmParser::ProcessInstruction(MCInst &Inst, + const OperandVector &Operands) { int Opcode = Inst.getOpcode(); switch (Opcode) { case PPC::LAx: { @@ -917,11 +919,10 @@ ProcessInstruction(MCInst &Inst, } } -bool PPCAsmParser:: -MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out, unsigned &ErrorInfo, - bool MatchingInlineAsm) { +bool PPCAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm) { MCInst Inst; switch (MatchInstructionImpl(Operands, Inst, ErrorInfo, MatchingInlineAsm)) { @@ -942,7 +943,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, if (ErrorInfo >= Operands.size()) return Error(IDLoc, "too few operands for instruction"); - ErrorLoc = ((PPCOperand*)Operands[ErrorInfo])->getStartLoc(); + ErrorLoc = ((PPCOperand &)*Operands[ErrorInfo]).getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; } @@ -1216,12 +1217,10 @@ ParseDarwinExpression(const MCExpr *&EVal) { /// ParseOperand /// This handles registers in the form 'NN', '%rNN' for ELF platforms and /// rNN for MachO. -bool PPCAsmParser:: -ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +bool PPCAsmParser::ParseOperand(OperandVector &Operands) { SMLoc S = Parser.getTok().getLoc(); SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); const MCExpr *EVal; - PPCOperand *Op; // Attempt to parse the next token as an immediate switch (getLexer().getKind()) { @@ -1233,8 +1232,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { int64_t IntVal; if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) { Parser.Lex(); // Eat the identifier token. - Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64()); - Operands.push_back(Op); + Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64())); return false; } return Error(S, "invalid register name"); @@ -1249,8 +1247,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { int64_t IntVal; if (!MatchRegisterName(Parser.getTok(), RegNo, IntVal)) { Parser.Lex(); // Eat the identifier token. - Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64()); - Operands.push_back(Op); + Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64())); return false; } } @@ -1272,8 +1269,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { } // Push the parsed operand into the list of operands - Op = PPCOperand::CreateFromMCExpr(EVal, S, E, isPPC64()); - Operands.push_back(Op); + Operands.push_back(PPCOperand::CreateFromMCExpr(EVal, S, E, isPPC64())); // Check whether this is a TLS call expression bool TLSCall = false; @@ -1292,8 +1288,7 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { E = Parser.getTok().getLoc(); Parser.Lex(); // Eat the ')'. - Op = PPCOperand::CreateFromMCExpr(TLSSym, S, E, isPPC64()); - Operands.push_back(Op); + Operands.push_back(PPCOperand::CreateFromMCExpr(TLSSym, S, E, isPPC64())); } // Otherwise, check for D-form memory operands @@ -1340,17 +1335,15 @@ ParseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { E = Parser.getTok().getLoc(); Parser.Lex(); // Eat the ')'. - Op = PPCOperand::CreateImm(IntVal, S, E, isPPC64()); - Operands.push_back(Op); + Operands.push_back(PPCOperand::CreateImm(IntVal, S, E, isPPC64())); } return false; } /// Parse an instruction mnemonic followed by its operands. -bool PPCAsmParser:: -ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +bool PPCAsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) { // The first operand is the token for the instruction name. // If the next character is a '+' or '-', we need to add it to the // instruction name, to match what TableGen is doing. @@ -1554,7 +1547,7 @@ extern "C" void LLVMInitializePowerPCAsmParser() { // Define this matcher function after the auto-generated include so we // have the match class enum definitions. -unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, +unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand &AsmOp, unsigned Kind) { // If the kind is a token for a literal immediate, check if our asm // operand matches. This is for InstAliases which have a fixed-value @@ -1568,8 +1561,8 @@ unsigned PPCAsmParser::validateTargetOperandClass(MCParsedAsmOperand *AsmOp, default: return Match_InvalidOperand; } - PPCOperand *Op = static_cast<PPCOperand*>(AsmOp); - if (Op->isImm() && Op->getImm() == ImmVal) + PPCOperand &Op = static_cast<PPCOperand &>(AsmOp); + if (Op.isImm() && Op.getImm() == ImmVal) return Match_Success; return Match_InvalidOperand; diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index a4983ad..435a93f 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -102,17 +102,45 @@ public: // Output the constant in big/little endian byte order. unsigned Size = Desc.getSize(); - if (IsLittleEndian) { - for (unsigned i = 0; i != Size; ++i) { - OS << (char)Bits; - Bits >>= 8; + switch (Size) { + case 4: + if (IsLittleEndian) { + OS << (char)(Bits); + OS << (char)(Bits >> 8); + OS << (char)(Bits >> 16); + OS << (char)(Bits >> 24); + } else { + OS << (char)(Bits >> 24); + OS << (char)(Bits >> 16); + OS << (char)(Bits >> 8); + OS << (char)(Bits); } - } else { - int ShiftValue = (Size * 8) - 8; - for (unsigned i = 0; i != Size; ++i) { - OS << (char)(Bits >> ShiftValue); - Bits <<= 8; + break; + case 8: + // If we emit a pair of instructions, the first one is + // always in the top 32 bits, even on little-endian. + if (IsLittleEndian) { + OS << (char)(Bits >> 32); + OS << (char)(Bits >> 40); + OS << (char)(Bits >> 48); + OS << (char)(Bits >> 56); + OS << (char)(Bits); + OS << (char)(Bits >> 8); + OS << (char)(Bits >> 16); + OS << (char)(Bits >> 24); + } else { + OS << (char)(Bits >> 56); + OS << (char)(Bits >> 48); + OS << (char)(Bits >> 40); + OS << (char)(Bits >> 32); + OS << (char)(Bits >> 24); + OS << (char)(Bits >> 16); + OS << (char)(Bits >> 8); + OS << (char)(Bits); } + break; + default: + llvm_unreachable ("Invalid instruction size"); } ++MCNumEmitted; // Keep track of the # of mi's emitted. diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index 10d068d..3ac0aca 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -11,6 +11,7 @@ #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCObjectStreamer.h" using namespace llvm; @@ -127,33 +128,6 @@ PPCMCExpr::EvaluateAsRelocatableImpl(MCValue &Res, return true; } -// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps -// that method should be made public? -static void AddValueSymbols_(const MCExpr *Value, MCAssembler *Asm) { - switch (Value->getKind()) { - case MCExpr::Target: - llvm_unreachable("Can't handle nested target expr!"); - - case MCExpr::Constant: - break; - - case MCExpr::Binary: { - const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value); - AddValueSymbols_(BE->getLHS(), Asm); - AddValueSymbols_(BE->getRHS(), Asm); - break; - } - - case MCExpr::SymbolRef: - Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol()); - break; - - case MCExpr::Unary: - AddValueSymbols_(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm); - break; - } -} - -void PPCMCExpr::AddValueSymbols(MCAssembler *Asm) const { - AddValueSymbols_(getSubExpr(), Asm); +void PPCMCExpr::visitUsedExpr(MCStreamer &Streamer) const { + Streamer.visitUsedExpr(*getSubExpr()); } diff --git a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h index 3421b91..bca4085 100644 --- a/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h +++ b/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h @@ -79,7 +79,7 @@ public: void PrintImpl(raw_ostream &OS) const override; bool EvaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout) const override; - void AddValueSymbols(MCAssembler *) const override; + void visitUsedExpr(MCStreamer &Streamer) const override; const MCSection *FindAssociatedSection() const override { return getSubExpr()->FindAssociatedSection(); } diff --git a/lib/Target/PowerPC/PPC.td b/lib/Target/PowerPC/PPC.td index bd58539..a9842b2 100644 --- a/lib/Target/PowerPC/PPC.td +++ b/lib/Target/PowerPC/PPC.td @@ -46,6 +46,7 @@ def DirectivePwr5x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR5X", "" def DirectivePwr6: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6", "">; def DirectivePwr6x: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR6X", "">; def DirectivePwr7: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR7", "">; +def DirectivePwr8: SubtargetFeature<"", "DarwinDirective", "PPC::DIR_PWR8", "">; def Feature64Bit : SubtargetFeature<"64bit","Has64BitSupport", "true", "Enable 64-bit instructions">; @@ -285,6 +286,15 @@ def : ProcessorModel<"pwr7", P7Model, FeaturePOPCNTD, FeatureLDBRX, Feature64Bit /*, Feature64BitRegs */, DeprecatedMFTB, DeprecatedDST]>; +def : ProcessorModel<"pwr8", P7Model /* FIXME: Update to P8Model when available */, + [DirectivePwr8, FeatureAltivec, + FeatureMFOCRF, FeatureFCPSGN, FeatureFSqrt, FeatureFRE, + FeatureFRES, FeatureFRSQRTE, FeatureFRSQRTES, + FeatureRecipPrec, FeatureSTFIWX, FeatureLFIWAX, + FeatureFPRND, FeatureFPCVT, FeatureISEL, + FeaturePOPCNTD, FeatureLDBRX, + Feature64Bit /*, Feature64BitRegs */, + DeprecatedMFTB, DeprecatedDST]>; def : Processor<"ppc", G3Itineraries, [Directive32]>; def : ProcessorModel<"ppc64", G5Model, [Directive64, FeatureAltivec, diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index e89fb2d..fd044d9 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -365,8 +365,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { // Transform %Xd = ADDIStocHA %X2, <ga:@sym> LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin()); - // Change the opcode to ADDIS8. If the global address is external, - // has common linkage, is a function address, or is a jump table + // Change the opcode to ADDIS8. If the global address is external, has + // common linkage, is a non-local function address, or is a jump table // address, then generate a TOC entry and reference that. Otherwise // reference the symbol directly. TmpInst.setOpcode(PPC::ADDIS8); @@ -375,7 +375,7 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { "Invalid operand for ADDIStocHA!"); MCSymbol *MOSymbol = nullptr; bool IsExternal = false; - bool IsFunction = false; + bool IsNonLocalFunction = false; bool IsCommon = false; bool IsAvailExt = false; @@ -384,15 +384,16 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { MOSymbol = getSymbol(GV); IsExternal = GV->isDeclaration(); IsCommon = GV->hasCommonLinkage(); - IsFunction = GV->getType()->getElementType()->isFunctionTy(); + IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() && + (GV->isDeclaration() || GV->isWeakForLinker()); IsAvailExt = GV->hasAvailableExternallyLinkage(); } else if (MO.isCPI()) MOSymbol = GetCPISymbol(MO.getIndex()); else if (MO.isJTI()) MOSymbol = GetJTISymbol(MO.getIndex()); - if (IsExternal || IsFunction || IsCommon || IsAvailExt || MO.isJTI() || - TM.getCodeModel() == CodeModel::Large) + if (IsExternal || IsNonLocalFunction || IsCommon || IsAvailExt || + MO.isJTI() || TM.getCodeModel() == CodeModel::Large) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); const MCExpr *Exp = @@ -425,7 +426,8 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { else if (MO.isGlobal()) { const GlobalValue *GValue = MO.getGlobal(); MOSymbol = getSymbol(GValue); - if (GValue->isDeclaration() || GValue->hasCommonLinkage() || + if (GValue->getType()->getElementType()->isFunctionTy() || + GValue->isDeclaration() || GValue->hasCommonLinkage() || GValue->hasAvailableExternallyLinkage() || TM.getCodeModel() == CodeModel::Large) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); @@ -450,17 +452,19 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { assert((MO.isGlobal() || MO.isCPI()) && "Invalid operand for ADDItocL"); MCSymbol *MOSymbol = nullptr; bool IsExternal = false; - bool IsFunction = false; + bool IsNonLocalFunction = false; if (MO.isGlobal()) { const GlobalValue *GV = MO.getGlobal(); MOSymbol = getSymbol(GV); IsExternal = GV->isDeclaration(); - IsFunction = GV->getType()->getElementType()->isFunctionTy(); + IsNonLocalFunction = GV->getType()->getElementType()->isFunctionTy() && + (GV->isDeclaration() || GV->isWeakForLinker()); } else if (MO.isCPI()) MOSymbol = GetCPISymbol(MO.getIndex()); - if (IsFunction || IsExternal || TM.getCodeModel() == CodeModel::Large) + if (IsNonLocalFunction || IsExternal || + TM.getCodeModel() == CodeModel::Large) MOSymbol = lookUpOrCreateTOCEntry(MOSymbol); const MCExpr *Exp = diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index ed3cb4d..92a0ec1 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -1030,6 +1030,10 @@ bool PPCFastISel::SelectFPToI(const Instruction *I, bool IsSigned) { if (DstVT != MVT::i32 && DstVT != MVT::i64) return false; + // If we don't have FCTIDUZ and we need it, punt to SelectionDAG. + if (DstVT == MVT::i64 && !IsSigned && !PPCSubTarget->hasFPCVT()) + return false; + Value *Src = I->getOperand(0); Type *SrcTy = Src->getType(); if (!isTypeLegal(SrcTy, SrcVT)) @@ -1197,6 +1201,11 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args, bool IsVarArg) { SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CC, IsVarArg, *FuncInfo.MF, TM, ArgLocs, *Context); + + // Reserve space for the linkage area on the stack. + unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false); + CCInfo.AllocateStack(LinkageSize, 8); + CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_PPC64_ELF_FIS); // Bail out if we can't handle any of the arguments. @@ -1218,6 +1227,13 @@ bool PPCFastISel::processCallArgs(SmallVectorImpl<Value*> &Args, // Get a count of how many bytes are to be pushed onto the stack. NumBytes = CCInfo.getNextStackOffset(); + // The prolog code of the callee may store up to 8 GPR argument registers to + // the stack, allowing va_start to index over them in memory if its varargs. + // Because we cannot tell if this is needed on the caller side, we have to + // conservatively assume that it is needed. As such, make sure we have at + // least enough stack space for the caller to store the 8 GPRs. + NumBytes = std::max(NumBytes, LinkageSize + 64); + // Issue CALLSEQ_START. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(TII.getCallFrameSetupOpcode())) @@ -1858,16 +1874,9 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { // FIXME: Jump tables are not yet required because fast-isel doesn't // handle switches; if that changes, we need them as well. For now, // what follows assumes everything's a generic (or TLS) global address. - const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV); - if (!GVar) { - // If GV is an alias, use the aliasee for determining thread-locality. - if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) - GVar = dyn_cast_or_null<GlobalVariable>(GA->getAliasee()); - } // FIXME: We don't yet handle the complexity of TLS. - bool IsTLS = GVar && GVar->isThreadLocal(); - if (IsTLS) + if (GV->isThreadLocal()) return 0; // For small code model, generate a simple TOC load. @@ -1877,8 +1886,8 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { .addGlobalAddress(GV) .addReg(PPC::X2); else { - // If the address is an externally defined symbol, a symbol with - // common or externally available linkage, a function address, or a + // If the address is an externally defined symbol, a symbol with common + // or externally available linkage, a non-local function address, or a // jump table address (not yet needed), or if we are generating code // for large code model, we generate: // LDtocL(GV, ADDIStocHA(%X2, GV)) @@ -1889,12 +1898,13 @@ unsigned PPCFastISel::PPCMaterializeGV(const GlobalValue *GV, MVT VT) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::ADDIStocHA), HighPartReg).addReg(PPC::X2).addGlobalAddress(GV); - // !GVar implies a function address. An external variable is one - // without an initializer. // If/when switches are implemented, jump tables should be handled // on the "if" path here. - if (CModel == CodeModel::Large || !GVar || !GVar->hasInitializer() || - GVar->hasCommonLinkage() || GVar->hasAvailableExternallyLinkage()) + if (CModel == CodeModel::Large || + (GV->getType()->getElementType()->isFunctionTy() && + (GV->isDeclaration() || GV->isWeakForLinker())) || + GV->isDeclaration() || GV->hasCommonLinkage() || + GV->hasAvailableExternallyLinkage()) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(PPC::LDtocL), DestReg).addGlobalAddress(GV).addReg(HighPartReg); else diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index e294156..65e9cf2 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -15,6 +15,7 @@ #include "PPCInstrBuilder.h" #include "PPCInstrInfo.h" #include "PPCMachineFunctionInfo.h" +#include "PPCSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -35,6 +36,167 @@ static const uint16_t VRRegNo[] = { PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 }; +PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, + (STI.hasQPX() || STI.isBGQ()) ? 32 : 16, 0), + Subtarget(STI) {} + +// With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. +const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( + unsigned &NumEntries) const { + if (Subtarget.isDarwinABI()) { + NumEntries = 1; + if (Subtarget.isPPC64()) { + static const SpillSlot darwin64Offsets = {PPC::X31, -8}; + return &darwin64Offsets; + } else { + static const SpillSlot darwinOffsets = {PPC::R31, -4}; + return &darwinOffsets; + } + } + + // Early exit if not using the SVR4 ABI. + if (!Subtarget.isSVR4ABI()) { + NumEntries = 0; + return nullptr; + } + + // Note that the offsets here overlap, but this is fixed up in + // processFunctionBeforeFrameFinalized. + + static const SpillSlot Offsets[] = { + // Floating-point register save area offsets. + {PPC::F31, -8}, + {PPC::F30, -16}, + {PPC::F29, -24}, + {PPC::F28, -32}, + {PPC::F27, -40}, + {PPC::F26, -48}, + {PPC::F25, -56}, + {PPC::F24, -64}, + {PPC::F23, -72}, + {PPC::F22, -80}, + {PPC::F21, -88}, + {PPC::F20, -96}, + {PPC::F19, -104}, + {PPC::F18, -112}, + {PPC::F17, -120}, + {PPC::F16, -128}, + {PPC::F15, -136}, + {PPC::F14, -144}, + + // General register save area offsets. + {PPC::R31, -4}, + {PPC::R30, -8}, + {PPC::R29, -12}, + {PPC::R28, -16}, + {PPC::R27, -20}, + {PPC::R26, -24}, + {PPC::R25, -28}, + {PPC::R24, -32}, + {PPC::R23, -36}, + {PPC::R22, -40}, + {PPC::R21, -44}, + {PPC::R20, -48}, + {PPC::R19, -52}, + {PPC::R18, -56}, + {PPC::R17, -60}, + {PPC::R16, -64}, + {PPC::R15, -68}, + {PPC::R14, -72}, + + // CR save area offset. We map each of the nonvolatile CR fields + // to the slot for CR2, which is the first of the nonvolatile CR + // fields to be assigned, so that we only allocate one save slot. + // See PPCRegisterInfo::hasReservedSpillSlot() for more information. + {PPC::CR2, -4}, + + // VRSAVE save area offset. + {PPC::VRSAVE, -4}, + + // Vector register save area + {PPC::V31, -16}, + {PPC::V30, -32}, + {PPC::V29, -48}, + {PPC::V28, -64}, + {PPC::V27, -80}, + {PPC::V26, -96}, + {PPC::V25, -112}, + {PPC::V24, -128}, + {PPC::V23, -144}, + {PPC::V22, -160}, + {PPC::V21, -176}, + {PPC::V20, -192}}; + + static const SpillSlot Offsets64[] = { + // Floating-point register save area offsets. + {PPC::F31, -8}, + {PPC::F30, -16}, + {PPC::F29, -24}, + {PPC::F28, -32}, + {PPC::F27, -40}, + {PPC::F26, -48}, + {PPC::F25, -56}, + {PPC::F24, -64}, + {PPC::F23, -72}, + {PPC::F22, -80}, + {PPC::F21, -88}, + {PPC::F20, -96}, + {PPC::F19, -104}, + {PPC::F18, -112}, + {PPC::F17, -120}, + {PPC::F16, -128}, + {PPC::F15, -136}, + {PPC::F14, -144}, + + // General register save area offsets. + {PPC::X31, -8}, + {PPC::X30, -16}, + {PPC::X29, -24}, + {PPC::X28, -32}, + {PPC::X27, -40}, + {PPC::X26, -48}, + {PPC::X25, -56}, + {PPC::X24, -64}, + {PPC::X23, -72}, + {PPC::X22, -80}, + {PPC::X21, -88}, + {PPC::X20, -96}, + {PPC::X19, -104}, + {PPC::X18, -112}, + {PPC::X17, -120}, + {PPC::X16, -128}, + {PPC::X15, -136}, + {PPC::X14, -144}, + + // VRSAVE save area offset. + {PPC::VRSAVE, -4}, + + // Vector register save area + {PPC::V31, -16}, + {PPC::V30, -32}, + {PPC::V29, -48}, + {PPC::V28, -64}, + {PPC::V27, -80}, + {PPC::V26, -96}, + {PPC::V25, -112}, + {PPC::V24, -128}, + {PPC::V23, -144}, + {PPC::V22, -160}, + {PPC::V21, -176}, + {PPC::V20, -192}}; + + if (Subtarget.isPPC64()) { + NumEntries = array_lengthof(Offsets64); + + return Offsets64; + } else { + NumEntries = array_lengthof(Offsets); + + return Offsets; + } +} + /// RemoveVRSaveCode - We have found that this function does not need any code /// to manipulate the VRSAVE register, even though it uses vector registers. /// This can happen when the only registers used are known to be live in or out @@ -236,9 +398,9 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, // Get the maximum call frame size of all the calls. unsigned maxCallFrameSize = MFI->getMaxCallFrameSize(); - // Maximum call frame needs to be at least big enough for linkage and 8 args. - unsigned minCallFrameSize = getMinCallFrameSize(Subtarget.isPPC64(), - Subtarget.isDarwinABI()); + // Maximum call frame needs to be at least big enough for linkage area. + unsigned minCallFrameSize = getLinkageSize(Subtarget.isPPC64(), + Subtarget.isDarwinABI()); maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); // If we have dynamic alloca then maxCallFrameSize needs to be aligned so diff --git a/lib/Target/PowerPC/PPCFrameLowering.h b/lib/Target/PowerPC/PPCFrameLowering.h index 94e9b67..7a226f7 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.h +++ b/lib/Target/PowerPC/PPCFrameLowering.h @@ -14,23 +14,18 @@ #define POWERPC_FRAMEINFO_H #include "PPC.h" -#include "PPCSubtarget.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" namespace llvm { - class PPCSubtarget; +class PPCSubtarget; class PPCFrameLowering: public TargetFrameLowering { const PPCSubtarget &Subtarget; public: - PPCFrameLowering(const PPCSubtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, - (sti.hasQPX() || sti.isBGQ()) ? 32 : 16, 0), - Subtarget(sti) { - } + PPCFrameLowering(const PPCSubtarget &STI); unsigned determineFrameLayout(MachineFunction &MF, bool UpdateMF = true, @@ -79,6 +74,12 @@ public: return isPPC64 ? 16 : 4; } + /// getTOCSaveOffset - Return the previous frame offset to save the + /// TOC register -- 64-bit SVR4 ABI only. + static unsigned getTOCSaveOffset(void) { + return 40; + } + /// getFramePointerSaveOffset - Return the previous frame offset to save the /// frame pointer. static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) { @@ -114,190 +115,9 @@ public: return 8; } - /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI - /// argument area. - static unsigned getMinCallArgumentsSize(bool isPPC64, bool isDarwinABI) { - // For the Darwin ABI / 64-bit SVR4 ABI: - // The prolog code of the callee may store up to 8 GPR argument registers to - // the stack, allowing va_start to index over them in memory if its varargs. - // Because we cannot tell if this is needed on the caller side, we have to - // conservatively assume that it is needed. As such, make sure we have at - // least enough stack space for the caller to store the 8 GPRs. - if (isDarwinABI || isPPC64) - return 8 * (isPPC64 ? 8 : 4); - - // 32-bit SVR4 ABI: - // There is no default stack allocated for the 8 first GPR arguments. - return 0; - } - - /// getMinCallFrameSize - Return the minimum size a call frame can be using - /// the PowerPC ABI. - static unsigned getMinCallFrameSize(bool isPPC64, bool isDarwinABI) { - // The call frame needs to be at least big enough for linkage and 8 args. - return getLinkageSize(isPPC64, isDarwinABI) + - getMinCallArgumentsSize(isPPC64, isDarwinABI); - } - - // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. const SpillSlot * - getCalleeSavedSpillSlots(unsigned &NumEntries) const override { - if (Subtarget.isDarwinABI()) { - NumEntries = 1; - if (Subtarget.isPPC64()) { - static const SpillSlot darwin64Offsets = {PPC::X31, -8}; - return &darwin64Offsets; - } else { - static const SpillSlot darwinOffsets = {PPC::R31, -4}; - return &darwinOffsets; - } - } - - // Early exit if not using the SVR4 ABI. - if (!Subtarget.isSVR4ABI()) { - NumEntries = 0; - return nullptr; - } - - // Note that the offsets here overlap, but this is fixed up in - // processFunctionBeforeFrameFinalized. - - static const SpillSlot Offsets[] = { - // Floating-point register save area offsets. - {PPC::F31, -8}, - {PPC::F30, -16}, - {PPC::F29, -24}, - {PPC::F28, -32}, - {PPC::F27, -40}, - {PPC::F26, -48}, - {PPC::F25, -56}, - {PPC::F24, -64}, - {PPC::F23, -72}, - {PPC::F22, -80}, - {PPC::F21, -88}, - {PPC::F20, -96}, - {PPC::F19, -104}, - {PPC::F18, -112}, - {PPC::F17, -120}, - {PPC::F16, -128}, - {PPC::F15, -136}, - {PPC::F14, -144}, - - // General register save area offsets. - {PPC::R31, -4}, - {PPC::R30, -8}, - {PPC::R29, -12}, - {PPC::R28, -16}, - {PPC::R27, -20}, - {PPC::R26, -24}, - {PPC::R25, -28}, - {PPC::R24, -32}, - {PPC::R23, -36}, - {PPC::R22, -40}, - {PPC::R21, -44}, - {PPC::R20, -48}, - {PPC::R19, -52}, - {PPC::R18, -56}, - {PPC::R17, -60}, - {PPC::R16, -64}, - {PPC::R15, -68}, - {PPC::R14, -72}, - - // CR save area offset. We map each of the nonvolatile CR fields - // to the slot for CR2, which is the first of the nonvolatile CR - // fields to be assigned, so that we only allocate one save slot. - // See PPCRegisterInfo::hasReservedSpillSlot() for more information. - {PPC::CR2, -4}, - - // VRSAVE save area offset. - {PPC::VRSAVE, -4}, - - // Vector register save area - {PPC::V31, -16}, - {PPC::V30, -32}, - {PPC::V29, -48}, - {PPC::V28, -64}, - {PPC::V27, -80}, - {PPC::V26, -96}, - {PPC::V25, -112}, - {PPC::V24, -128}, - {PPC::V23, -144}, - {PPC::V22, -160}, - {PPC::V21, -176}, - {PPC::V20, -192} - }; - - static const SpillSlot Offsets64[] = { - // Floating-point register save area offsets. - {PPC::F31, -8}, - {PPC::F30, -16}, - {PPC::F29, -24}, - {PPC::F28, -32}, - {PPC::F27, -40}, - {PPC::F26, -48}, - {PPC::F25, -56}, - {PPC::F24, -64}, - {PPC::F23, -72}, - {PPC::F22, -80}, - {PPC::F21, -88}, - {PPC::F20, -96}, - {PPC::F19, -104}, - {PPC::F18, -112}, - {PPC::F17, -120}, - {PPC::F16, -128}, - {PPC::F15, -136}, - {PPC::F14, -144}, - - // General register save area offsets. - {PPC::X31, -8}, - {PPC::X30, -16}, - {PPC::X29, -24}, - {PPC::X28, -32}, - {PPC::X27, -40}, - {PPC::X26, -48}, - {PPC::X25, -56}, - {PPC::X24, -64}, - {PPC::X23, -72}, - {PPC::X22, -80}, - {PPC::X21, -88}, - {PPC::X20, -96}, - {PPC::X19, -104}, - {PPC::X18, -112}, - {PPC::X17, -120}, - {PPC::X16, -128}, - {PPC::X15, -136}, - {PPC::X14, -144}, - - // VRSAVE save area offset. - {PPC::VRSAVE, -4}, - - // Vector register save area - {PPC::V31, -16}, - {PPC::V30, -32}, - {PPC::V29, -48}, - {PPC::V28, -64}, - {PPC::V27, -80}, - {PPC::V26, -96}, - {PPC::V25, -112}, - {PPC::V24, -128}, - {PPC::V23, -144}, - {PPC::V22, -160}, - {PPC::V21, -176}, - {PPC::V20, -192} - }; - - if (Subtarget.isPPC64()) { - NumEntries = array_lengthof(Offsets64); - - return Offsets64; - } else { - NumEntries = array_lengthof(Offsets); - - return Offsets; - } - } + getCalleeSavedSpillSlots(unsigned &NumEntries) const override; }; - } // End llvm namespace #endif diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index 7ca706b..d9b242c 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -162,7 +162,8 @@ unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) { unsigned Directive = DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective(); // If we're using a special group-terminating nop, then we need only one. - if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7) + if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 || + Directive == PPC::DIR_PWR8 ) return 1; return 5 - CurSlots; @@ -223,7 +224,7 @@ void PPCDispatchGroupSBHazardRecognizer::EmitNoop() { // If the group has now filled all of its slots, or if we're using a special // group-terminating nop, the group is complete. if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 || - CurSlots == 6) { + Directive == PPC::DIR_PWR8 || CurSlots == 6) { CurGroup.clear(); CurSlots = CurBranches = 0; } else { @@ -258,8 +259,8 @@ void PPCDispatchGroupSBHazardRecognizer::EmitNoop() { // 3. Handling of the esoteric cases in "Resource-based Instruction Grouping". // -PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetMachine &TM) - : TM(TM) { +PPCHazardRecognizer970::PPCHazardRecognizer970(const ScheduleDAG &DAG) + : DAG(DAG) { EndDispatchGroup(); } @@ -278,7 +279,7 @@ PPCHazardRecognizer970::GetInstrType(unsigned Opcode, bool &isFirst, bool &isSingle, bool &isCracked, bool &isLoad, bool &isStore) { - const MCInstrDesc &MCID = TM.getInstrInfo()->get(Opcode); + const MCInstrDesc &MCID = DAG.TII->get(Opcode); isLoad = MCID.mayLoad(); isStore = MCID.mayStore(); diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h index cf4332c..23f76c1 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.h +++ b/lib/Target/PowerPC/PPCHazardRecognizers.h @@ -54,7 +54,7 @@ public: /// setting the CTR register then branching through it within a dispatch group), /// or storing then loading from the same address within a dispatch group. class PPCHazardRecognizer970 : public ScheduleHazardRecognizer { - const TargetMachine &TM; + const ScheduleDAG &DAG; unsigned NumIssued; // Number of insts issued, including advanced cycles. @@ -75,7 +75,7 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer { unsigned NumStores; public: - PPCHazardRecognizer970(const TargetMachine &TM); + PPCHazardRecognizer970(const ScheduleDAG &DAG); virtual HazardType getHazardType(SUnit *SU, int Stalls) override; virtual void EmitInstruction(SUnit *SU) override; virtual void AdvanceCycle() override; diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 251e8b6..4881b3f 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -1454,10 +1454,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { if (CModel != CodeModel::Medium && CModel != CodeModel::Large) break; - // The first source operand is a TargetGlobalAddress or a - // TargetJumpTable. If it is an externally defined symbol, a symbol - // with common linkage, a function address, or a jump table address, - // or if we are generating code for large code model, we generate: + // The first source operand is a TargetGlobalAddress or a TargetJumpTable. + // If it is an externally defined symbol, a symbol with common linkage, + // a non-local function address, or a jump table address, or if we are + // generating code for large code model, we generate: // LDtocL(<ga:@sym>, ADDIStocHA(%X2, <ga:@sym>)) // Otherwise we generate: // ADDItocL(ADDIStocHA(%X2, <ga:@sym>), <ga:@sym>) @@ -1472,8 +1472,10 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) { if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA)) { const GlobalValue *GValue = G->getGlobal(); - if (GValue->isDeclaration() || GValue->hasCommonLinkage() || - GValue->hasAvailableExternallyLinkage()) + if ((GValue->getType()->getElementType()->isFunctionTy() && + (GValue->isDeclaration() || GValue->isWeakForLinker())) || + GValue->isDeclaration() || GValue->hasCommonLinkage() || + GValue->hasAvailableExternallyLinkage()) return CurDAG->getMachineNode(PPC::LDtocL, dl, MVT::i64, GA, SDValue(Tmp, 0)); } diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index cf4c9e6..bc057bf 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -19,6 +19,7 @@ #include "PPCTargetObjectFile.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -50,20 +51,18 @@ cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden); // FIXME: Remove this once the bug has been fixed! extern cl::opt<bool> ANDIGlueBug; -static TargetLoweringObjectFile *CreateTLOF(const PPCTargetMachine &TM) { - if (TM.getSubtargetImpl()->isDarwin()) +static TargetLoweringObjectFile *createTLOF(const Triple &TT) { + // If it isn't a Mach-O file then it's going to be a linux ELF + // object file. + if (TT.isOSDarwin()) return new TargetLoweringObjectFileMachO(); - if (TM.getSubtargetImpl()->isSVR4ABI()) - return new PPC64LinuxTargetObjectFile(); - - return new TargetLoweringObjectFileELF(); + return new PPC64LinuxTargetObjectFile(); } PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) - : TargetLowering(TM, CreateTLOF(TM)), PPCSubTarget(*TM.getSubtargetImpl()) { - const PPCSubtarget *Subtarget = &TM.getSubtarget<PPCSubtarget>(); - + : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))), + Subtarget(*TM.getSubtargetImpl()) { setPow2DivIsCheap(); // Use _setjmp/_longjmp instead of setjmp/longjmp. @@ -72,7 +71,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all // arguments are at least 4/8 bytes aligned. - bool isPPC64 = Subtarget->isPPC64(); + bool isPPC64 = Subtarget.isPPC64(); setMinStackArgumentAlignment(isPPC64 ? 8:4); // Set up the register classes. @@ -98,10 +97,10 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setIndexedStoreAction(ISD::PRE_INC, MVT::i32, Legal); setIndexedStoreAction(ISD::PRE_INC, MVT::i64, Legal); - if (Subtarget->useCRBits()) { + if (Subtarget.useCRBits()) { setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - if (isPPC64 || Subtarget->hasFPCVT()) { + if (isPPC64 || Subtarget.hasFPCVT()) { setOperationAction(ISD::SINT_TO_FP, MVT::i1, Promote); AddPromotedToType (ISD::SINT_TO_FP, MVT::i1, isPPC64 ? MVT::i64 : MVT::i32); @@ -176,17 +175,17 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FLT_ROUNDS_, MVT::i32, Custom); // If we're enabling GP optimizations, use hardware square root - if (!Subtarget->hasFSQRT() && + if (!Subtarget.hasFSQRT() && !(TM.Options.UnsafeFPMath && - Subtarget->hasFRSQRTE() && Subtarget->hasFRE())) + Subtarget.hasFRSQRTE() && Subtarget.hasFRE())) setOperationAction(ISD::FSQRT, MVT::f64, Expand); - if (!Subtarget->hasFSQRT() && + if (!Subtarget.hasFSQRT() && !(TM.Options.UnsafeFPMath && - Subtarget->hasFRSQRTES() && Subtarget->hasFRES())) + Subtarget.hasFRSQRTES() && Subtarget.hasFRES())) setOperationAction(ISD::FSQRT, MVT::f32, Expand); - if (Subtarget->hasFCPSGN()) { + if (Subtarget.hasFCPSGN()) { setOperationAction(ISD::FCOPYSIGN, MVT::f64, Legal); setOperationAction(ISD::FCOPYSIGN, MVT::f32, Legal); } else { @@ -194,7 +193,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); } - if (Subtarget->hasFPRND()) { + if (Subtarget.hasFPRND()) { setOperationAction(ISD::FFLOOR, MVT::f64, Legal); setOperationAction(ISD::FCEIL, MVT::f64, Legal); setOperationAction(ISD::FTRUNC, MVT::f64, Legal); @@ -216,7 +215,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Expand); - if (Subtarget->hasPOPCNTD()) { + if (Subtarget.hasPOPCNTD()) { setOperationAction(ISD::CTPOP, MVT::i32 , Legal); setOperationAction(ISD::CTPOP, MVT::i64 , Legal); } else { @@ -228,7 +227,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::ROTR, MVT::i32 , Expand); setOperationAction(ISD::ROTR, MVT::i64 , Expand); - if (!Subtarget->useCRBits()) { + if (!Subtarget.useCRBits()) { // PowerPC does not have Select setOperationAction(ISD::SELECT, MVT::i32, Expand); setOperationAction(ISD::SELECT, MVT::i64, Expand); @@ -241,11 +240,11 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); // PowerPC wants to optimize integer setcc a bit - if (!Subtarget->useCRBits()) + if (!Subtarget.useCRBits()) setOperationAction(ISD::SETCC, MVT::i32, Custom); // PowerPC does not have BRCOND which requires SetCC - if (!Subtarget->useCRBits()) + if (!Subtarget.useCRBits()) setOperationAction(ISD::BRCOND, MVT::Other, Expand); setOperationAction(ISD::BR_JT, MVT::Other, Expand); @@ -297,7 +296,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); - if (Subtarget->isSVR4ABI()) { + if (Subtarget.isSVR4ABI()) { if (isPPC64) { // VAARG always uses double-word chunks, so promote anything smaller. setOperationAction(ISD::VAARG, MVT::i1, Promote); @@ -317,7 +316,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) } else setOperationAction(ISD::VAARG, MVT::Other, Expand); - if (Subtarget->isSVR4ABI() && !isPPC64) + if (Subtarget.isSVR4ABI() && !isPPC64) // VACOPY is custom lowered with the 32-bit SVR4 ABI. setOperationAction(ISD::VACOPY , MVT::Other, Custom); else @@ -350,7 +349,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setCondCodeAction(ISD::SETONE, MVT::f32, Expand); setCondCodeAction(ISD::SETONE, MVT::f64, Expand); - if (Subtarget->has64BitSupport()) { + if (Subtarget.has64BitSupport()) { // They also have instructions for converting between i64 and fp. setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); @@ -360,7 +359,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // We cannot do this with Promote because i64 is not a legal type. setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); - if (PPCSubTarget.hasLFIWAX() || Subtarget->isPPC64()) + if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); } else { // PowerPC does not have FP_TO_UINT on 32-bit implementations. @@ -368,8 +367,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) } // With the instructions enabled under FPCVT, we can do everything. - if (PPCSubTarget.hasFPCVT()) { - if (Subtarget->has64BitSupport()) { + if (Subtarget.hasFPCVT()) { + if (Subtarget.has64BitSupport()) { setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); @@ -382,7 +381,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); } - if (Subtarget->use64BitRegs()) { + if (Subtarget.use64BitRegs()) { // 64-bit PowerPC implementations can support i64 types directly addRegisterClass(MVT::i64, &PPC::G8RCRegClass); // BUILD_PAIR can't be handled natively, and should be expanded to shl/or @@ -398,7 +397,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); } - if (Subtarget->hasAltivec()) { + if (Subtarget.hasAltivec()) { // First set operation action for all vector types to expand. Then we // will selectively turn on ones that can be effectively codegen'd. for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; @@ -488,7 +487,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::XOR , MVT::v4i32, Legal); setOperationAction(ISD::LOAD , MVT::v4i32, Legal); setOperationAction(ISD::SELECT, MVT::v4i32, - Subtarget->useCRBits() ? Legal : Expand); + Subtarget.useCRBits() ? Legal : Expand); setOperationAction(ISD::STORE , MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Legal); @@ -507,7 +506,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::MUL, MVT::v4f32, Legal); setOperationAction(ISD::FMA, MVT::v4f32, Legal); - if (TM.Options.UnsafeFPMath || Subtarget->hasVSX()) { + if (TM.Options.UnsafeFPMath || Subtarget.hasVSX()) { setOperationAction(ISD::FDIV, MVT::v4f32, Legal); setOperationAction(ISD::FSQRT, MVT::v4f32, Legal); } @@ -535,7 +534,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setCondCodeAction(ISD::SETO, MVT::v4f32, Expand); setCondCodeAction(ISD::SETONE, MVT::v4f32, Expand); - if (Subtarget->hasVSX()) { + if (Subtarget.hasVSX()) { setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Legal); @@ -613,7 +612,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) } } - if (Subtarget->has64BitSupport()) { + if (Subtarget.has64BitSupport()) { setOperationAction(ISD::PREFETCH, MVT::Other, Legal); setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Legal); } @@ -642,7 +641,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setTargetDAGCombine(ISD::LOAD); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::BR_CC); - if (Subtarget->useCRBits()) + if (Subtarget.useCRBits()) setTargetDAGCombine(ISD::BRCOND); setTargetDAGCombine(ISD::BSWAP); setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); @@ -651,7 +650,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); - if (Subtarget->useCRBits()) { + if (Subtarget.useCRBits()) { setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::SETCC); setTargetDAGCombine(ISD::SELECT_CC); @@ -664,7 +663,7 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) } // Darwin long double math library functions have $LDBL128 appended. - if (Subtarget->isDarwin()) { + if (Subtarget.isDarwin()) { setLibcallName(RTLIB::COS_PPCF128, "cosl$LDBL128"); setLibcallName(RTLIB::POW_PPCF128, "powl$LDBL128"); setLibcallName(RTLIB::REM_PPCF128, "fmodl$LDBL128"); @@ -679,21 +678,21 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // With 32 condition bits, we don't need to sink (and duplicate) compares // aggressively in CodeGenPrep. - if (Subtarget->useCRBits()) + if (Subtarget.useCRBits()) setHasMultipleConditionRegisters(); setMinFunctionAlignment(2); - if (PPCSubTarget.isDarwin()) + if (Subtarget.isDarwin()) setPrefFunctionAlignment(4); - if (isPPC64 && Subtarget->isJITCodeModel()) + if (isPPC64 && Subtarget.isJITCodeModel()) // Temporary workaround for the inability of PPC64 JIT to handle jump // tables. setSupportJumpTables(false); setInsertFencesForAtomic(true); - if (Subtarget->enableMachineScheduler()) + if (Subtarget.enableMachineScheduler()) setSchedulingPreference(Sched::Source); else setSchedulingPreference(Sched::Hybrid); @@ -702,8 +701,8 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // The Freescale cores does better with aggressive inlining of memcpy and // friends. Gcc uses same threshold of 128 bytes (= 32 word stores). - if (Subtarget->getDarwinDirective() == PPC::DIR_E500mc || - Subtarget->getDarwinDirective() == PPC::DIR_E5500) { + if (Subtarget.getDarwinDirective() == PPC::DIR_E500mc || + Subtarget.getDarwinDirective() == PPC::DIR_E5500) { MaxStoresPerMemset = 32; MaxStoresPerMemsetOptSize = 16; MaxStoresPerMemcpy = 32; @@ -747,14 +746,14 @@ static void getMaxByValAlign(Type *Ty, unsigned &MaxAlign, /// function arguments in the caller parameter area. unsigned PPCTargetLowering::getByValTypeAlignment(Type *Ty) const { // Darwin passes everything on 4 byte boundary. - if (PPCSubTarget.isDarwin()) + if (Subtarget.isDarwin()) return 4; // 16byte and wider vectors are passed on 16byte boundary. // The rest is 8 on PPC64 and 4 on PPC32 boundary. - unsigned Align = PPCSubTarget.isPPC64() ? 8 : 4; - if (PPCSubTarget.hasAltivec() || PPCSubTarget.hasQPX()) - getMaxByValAlign(Ty, Align, PPCSubTarget.hasQPX() ? 32 : 16); + unsigned Align = Subtarget.isPPC64() ? 8 : 4; + if (Subtarget.hasAltivec() || Subtarget.hasQPX()) + getMaxByValAlign(Ty, Align, Subtarget.hasQPX() ? 32 : 16); return Align; } @@ -774,7 +773,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::Hi: return "PPCISD::Hi"; case PPCISD::Lo: return "PPCISD::Lo"; case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; - case PPCISD::TOC_RESTORE: return "PPCISD::TOC_RESTORE"; case PPCISD::LOAD: return "PPCISD::LOAD"; case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC"; case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; @@ -826,7 +824,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { EVT PPCTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { if (!VT.isVector()) - return PPCSubTarget.useCRBits() ? MVT::i1 : MVT::i32; + return Subtarget.useCRBits() ? MVT::i1 : MVT::i32; return VT.changeVectorElementTypeToInteger(); } @@ -855,15 +853,17 @@ static bool isConstantOrUndef(int Op, int Val) { /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. -bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { +bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary, + SelectionDAG &DAG) { + unsigned j = DAG.getTarget().getDataLayout()->isLittleEndian() ? 0 : 1; if (!isUnary) { for (unsigned i = 0; i != 16; ++i) - if (!isConstantOrUndef(N->getMaskElt(i), i*2+1)) + if (!isConstantOrUndef(N->getMaskElt(i), i*2+j)) return false; } else { for (unsigned i = 0; i != 8; ++i) - if (!isConstantOrUndef(N->getMaskElt(i), i*2+1) || - !isConstantOrUndef(N->getMaskElt(i+8), i*2+1)) + if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) || + !isConstantOrUndef(N->getMaskElt(i+8), i*2+j)) return false; } return true; @@ -871,18 +871,27 @@ bool PPC::isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUWUM instruction. -bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary) { +bool PPC::isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary, + SelectionDAG &DAG) { + unsigned j, k; + if (DAG.getTarget().getDataLayout()->isLittleEndian()) { + j = 0; + k = 1; + } else { + j = 2; + k = 3; + } if (!isUnary) { for (unsigned i = 0; i != 16; i += 2) - if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || - !isConstantOrUndef(N->getMaskElt(i+1), i*2+3)) + if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || + !isConstantOrUndef(N->getMaskElt(i+1), i*2+k)) return false; } else { for (unsigned i = 0; i != 8; i += 2) - if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) || - !isConstantOrUndef(N->getMaskElt(i+1), i*2+3) || - !isConstantOrUndef(N->getMaskElt(i+8), i*2+2) || - !isConstantOrUndef(N->getMaskElt(i+9), i*2+3)) + if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) || + !isConstantOrUndef(N->getMaskElt(i+1), i*2+k) || + !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) || + !isConstantOrUndef(N->getMaskElt(i+9), i*2+k)) return false; } return true; @@ -909,27 +918,39 @@ static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, } /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for -/// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). +/// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes). bool PPC::isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, - bool isUnary) { - if (!isUnary) - return isVMerge(N, UnitSize, 8, 24); - return isVMerge(N, UnitSize, 8, 8); + bool isUnary, SelectionDAG &DAG) { + if (DAG.getTarget().getDataLayout()->isLittleEndian()) { + if (!isUnary) + return isVMerge(N, UnitSize, 0, 16); + return isVMerge(N, UnitSize, 0, 0); + } else { + if (!isUnary) + return isVMerge(N, UnitSize, 8, 24); + return isVMerge(N, UnitSize, 8, 8); + } } /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for -/// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). +/// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes). bool PPC::isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, - bool isUnary) { - if (!isUnary) - return isVMerge(N, UnitSize, 0, 16); - return isVMerge(N, UnitSize, 0, 0); + bool isUnary, SelectionDAG &DAG) { + if (DAG.getTarget().getDataLayout()->isLittleEndian()) { + if (!isUnary) + return isVMerge(N, UnitSize, 8, 24); + return isVMerge(N, UnitSize, 8, 8); + } else { + if (!isUnary) + return isVMerge(N, UnitSize, 0, 16); + return isVMerge(N, UnitSize, 0, 0); + } } /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. -int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { +int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG) { if (N->getValueType(0) != MVT::v16i8) return -1; @@ -946,18 +967,38 @@ int PPC::isVSLDOIShuffleMask(SDNode *N, bool isUnary) { // numbered from this value. unsigned ShiftAmt = SVOp->getMaskElt(i); if (ShiftAmt < i) return -1; - ShiftAmt -= i; - if (!isUnary) { - // Check the rest of the elements to see if they are consecutive. - for (++i; i != 16; ++i) - if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) - return -1; - } else { - // Check the rest of the elements to see if they are consecutive. - for (++i; i != 16; ++i) - if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) - return -1; + if (DAG.getTarget().getDataLayout()->isLittleEndian()) { + + ShiftAmt += i; + + if (!isUnary) { + // Check the rest of the elements to see if they are consecutive. + for (++i; i != 16; ++i) + if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt - i)) + return -1; + } else { + // Check the rest of the elements to see if they are consecutive. + for (++i; i != 16; ++i) + if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt - i) & 15)) + return -1; + } + + } else { // Big Endian + + ShiftAmt -= i; + + if (!isUnary) { + // Check the rest of the elements to see if they are consecutive. + for (++i; i != 16; ++i) + if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i)) + return -1; + } else { + // Check the rest of the elements to see if they are consecutive. + for (++i; i != 16; ++i) + if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15)) + return -1; + } } return ShiftAmt; } @@ -1010,10 +1051,14 @@ bool PPC::isAllNegativeZeroVector(SDNode *N) { /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. -unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize) { +unsigned PPC::getVSPLTImmediate(SDNode *N, unsigned EltSize, + SelectionDAG &DAG) { ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); assert(isSplatShuffleMask(SVOp, EltSize)); - return SVOp->getMaskElt(0) / EltSize; + if (DAG.getTarget().getDataLayout()->isLittleEndian()) + return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize); + else + return SVOp->getMaskElt(0) / EltSize; } /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed @@ -1299,7 +1344,7 @@ bool PPCTargetLowering::SelectAddressRegImm(SDValue N, SDValue &Disp, short Imm; if (isIntS16Immediate(CN, Imm) && (!Aligned || (Imm & 3) == 0)) { Disp = DAG.getTargetConstant(Imm, CN->getValueType(0)); - Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, + Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, CN->getValueType(0)); return true; } @@ -1350,7 +1395,7 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base, } // Otherwise, do it the hard way, using R0 as the base register. - Base = DAG.getRegister(PPCSubTarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, + Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO, N.getValueType()); Index = N; return true; @@ -1497,7 +1542,7 @@ SDValue PPCTargetLowering::LowerConstantPool(SDValue Op, // 64-bit SVR4 ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. - if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { + if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlignment(), 0); return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(CP), MVT::i64, GA, DAG.getRegister(PPC::X2, MVT::i64)); @@ -1518,7 +1563,7 @@ SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { // 64-bit SVR4 ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. - if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { + if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); return DAG.getNode(PPCISD::TOC_ENTRY, SDLoc(JT), MVT::i64, GA, DAG.getRegister(PPC::X2, MVT::i64)); @@ -1555,7 +1600,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op, SDLoc dl(GA); const GlobalValue *GV = GA->getGlobal(); EVT PtrVT = getPointerTy(); - bool is64bit = PPCSubTarget.isPPC64(); + bool is64bit = Subtarget.isPPC64(); TLSModel::Model Model = getTargetMachine().getTLSModel(GV); @@ -1646,7 +1691,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, // 64-bit SVR4 ABI code is always position-independent. // The actual address of the GlobalValue is stored in the TOC. - if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { + if (Subtarget.isSVR4ABI() && Subtarget.isPPC64()) { SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset()); return DAG.getNode(PPCISD::TOC_ENTRY, DL, MVT::i64, GA, DAG.getRegister(PPC::X2, MVT::i64)); @@ -1891,7 +1936,8 @@ SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Chain) .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol("__trampoline_setup", PtrVT), &Args, 0); + DAG.getExternalSymbol("__trampoline_setup", PtrVT), + std::move(Args), 0); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); return CallResult.second; @@ -2086,6 +2132,43 @@ static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, return ArgSize; } +/// CalculateStackSlotAlignment - Calculates the alignment of this argument +/// on the stack. +static unsigned CalculateStackSlotAlignment(EVT ArgVT, ISD::ArgFlagsTy Flags, + unsigned PtrByteSize) { + unsigned Align = PtrByteSize; + + // Altivec parameters are padded to a 16 byte boundary. + if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || + ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || + ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) + Align = 16; + + // ByVal parameters are aligned as requested. + if (Flags.isByVal()) { + unsigned BVAlign = Flags.getByValAlign(); + if (BVAlign > PtrByteSize) { + if (BVAlign % PtrByteSize != 0) + llvm_unreachable( + "ByVal alignment is not a multiple of the pointer size"); + + Align = BVAlign; + } + } + + return Align; +} + +/// EnsureStackAlignment - Round stack frame size up from NumBytes to +/// ensure minimum alignment required for target. +static unsigned EnsureStackAlignment(const TargetMachine &Target, + unsigned NumBytes) { + unsigned TargetAlign = Target.getFrameLowering()->getStackAlignment(); + unsigned AlignMask = TargetAlign - 1; + NumBytes = (NumBytes + AlignMask) & ~AlignMask; + return NumBytes; +} + SDValue PPCTargetLowering::LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -2094,8 +2177,8 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain, SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { - if (PPCSubTarget.isSVR4ABI()) { - if (PPCSubTarget.isPPC64()) + if (Subtarget.isSVR4ABI()) { + if (Subtarget.isPPC64()) return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); else @@ -2161,7 +2244,8 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( getTargetMachine(), ArgLocs, *DAG.getContext()); // Reserve space for the linkage area on the stack. - CCInfo.AllocateStack(PPCFrameLowering::getLinkageSize(false, false), PtrByteSize); + unsigned LinkageSize = PPCFrameLowering::getLinkageSize(false, false); + CCInfo.AllocateStack(LinkageSize, PtrByteSize); CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4); @@ -2184,7 +2268,7 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( RC = &PPC::F4RCRegClass; break; case MVT::f64: - if (PPCSubTarget.hasVSX()) + if (Subtarget.hasVSX()) RC = &PPC::VSFRCRegClass; else RC = &PPC::F8RCRegClass; @@ -2240,23 +2324,14 @@ PPCTargetLowering::LowerFormalArguments_32SVR4( // Area that is at least reserved in the caller of this function. unsigned MinReservedArea = CCByValInfo.getNextStackOffset(); + MinReservedArea = std::max(MinReservedArea, LinkageSize); // Set the size that is at least reserved in caller of this function. Tail // call optimized function's reserved stack space needs to be aligned so that // taking the difference between two stack areas will result in an aligned // stack. - PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); - - MinReservedArea = - std::max(MinReservedArea, - PPCFrameLowering::getMinCallFrameSize(false, false)); - - unsigned TargetAlign = DAG.getMachineFunction().getTarget().getFrameLowering()-> - getStackAlignment(); - unsigned AlignMask = TargetAlign-1; - MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; - - FI->setMinReservedArea(MinReservedArea); + MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea); + FuncInfo->setMinReservedArea(MinReservedArea); SmallVector<SDValue, 8> MemOps; @@ -2352,32 +2427,6 @@ PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal); } -// Set the size that is at least reserved in caller of this function. Tail -// call optimized functions' reserved stack space needs to be aligned so that -// taking the difference between two stack areas will result in an aligned -// stack. -void -PPCTargetLowering::setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG, - unsigned nAltivecParamsAtEnd, - unsigned MinReservedArea, - bool isPPC64) const { - PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); - // Add the Altivec parameters at the end, if needed. - if (nAltivecParamsAtEnd) { - MinReservedArea = ((MinReservedArea+15)/16)*16; - MinReservedArea += 16*nAltivecParamsAtEnd; - } - MinReservedArea = - std::max(MinReservedArea, - PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); - unsigned TargetAlign - = DAG.getMachineFunction().getTarget().getFrameLowering()-> - getStackAlignment(); - unsigned AlignMask = TargetAlign-1; - MinReservedArea = (MinReservedArea + AlignMask) & ~AlignMask; - FI->setMinReservedArea(MinReservedArea); -} - SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( SDValue Chain, @@ -2388,6 +2437,7 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( SmallVectorImpl<SDValue> &InVals) const { // TODO: add description of PPC stack frame format, or at least some docs. // + bool isLittleEndian = Subtarget.isLittleEndian(); MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); @@ -2398,9 +2448,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( (CallConv == CallingConv::Fast)); unsigned PtrByteSize = 8; - unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true); - // Area that is at least reserved in caller of this function. - unsigned MinReservedArea = ArgOffset; + unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false); + unsigned ArgOffset = LinkageSize; static const MCPhysReg GPR[] = { PPC::X3, PPC::X4, PPC::X5, PPC::X6, @@ -2422,14 +2471,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( const unsigned Num_FPR_Regs = 13; const unsigned Num_VR_Regs = array_lengthof(VR); - unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; + unsigned GPR_idx, FPR_idx = 0, VR_idx = 0; // Add DAG nodes to load the arguments or copy them out of registers. On // entry to a function on PPC, the arguments start after the linkage area, // although the first ones are often in registers. SmallVector<SDValue, 8> MemOps; - unsigned nAltivecParamsAtEnd = 0; Function::const_arg_iterator FuncArg = MF.getFunction()->arg_begin(); unsigned CurArgIdx = 0; for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { @@ -2442,24 +2490,15 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( std::advance(FuncArg, Ins[ArgNo].OrigArgIndex - CurArgIdx); CurArgIdx = Ins[ArgNo].OrigArgIndex; + /* Respect alignment of argument on the stack. */ + unsigned Align = + CalculateStackSlotAlignment(ObjectVT, Flags, PtrByteSize); + ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; unsigned CurArgOffset = ArgOffset; - // Varargs or 64 bit Altivec parameters are padded to a 16 byte boundary. - if (ObjectVT==MVT::v4f32 || ObjectVT==MVT::v4i32 || - ObjectVT==MVT::v8i16 || ObjectVT==MVT::v16i8 || - ObjectVT==MVT::v2f64 || ObjectVT==MVT::v2i64) { - if (isVarArg) { - MinReservedArea = ((MinReservedArea+15)/16)*16; - MinReservedArea += CalculateStackSlotSize(ObjectVT, - Flags, - PtrByteSize); - } else - nAltivecParamsAtEnd++; - } else - // Calculate min reserved area. - MinReservedArea += CalculateStackSlotSize(Ins[ArgNo].VT, - Flags, - PtrByteSize); + /* Compute GPR index associated with argument offset. */ + GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; + GPR_idx = std::min(GPR_idx, Num_GPR_Regs); // FIXME the codegen can be much improved in some cases. // We do not have to keep everything in memory. @@ -2481,14 +2520,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( continue; } - unsigned BVAlign = Flags.getByValAlign(); - if (BVAlign > 8) { - ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign; - CurArgOffset = ArgOffset; - } - // All aggregates smaller than 8 bytes must be passed right-justified. - if (ObjSize < PtrByteSize) + if (ObjSize < PtrByteSize && !isLittleEndian) CurArgOffset = CurArgOffset + (PtrByteSize - ObjSize); // The value of the object is its address. int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true); @@ -2522,7 +2555,6 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( } MemOps.push_back(Store); - ++GPR_idx; } // Whether we copied from a register or not, advance the offset // into the parameter save area by a full doubleword. @@ -2567,8 +2599,6 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( // PPC64 passes i8, i16, and i32 values in i64 registers. Promote // value to MVT::i64 and then truncate to the correct register size. ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl); - - ++GPR_idx; } else { needsLoad = true; ArgSize = PtrByteSize; @@ -2578,18 +2608,13 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( case MVT::f32: case MVT::f64: - // Every 8 bytes of argument space consumes one of the GPRs available for - // argument passing. - if (GPR_idx != Num_GPR_Regs) { - ++GPR_idx; - } if (FPR_idx != Num_FPR_Regs) { unsigned VReg; if (ObjectVT == MVT::f32) VReg = MF.addLiveIn(FPR[FPR_idx], &PPC::F4RCRegClass); else - VReg = MF.addLiveIn(FPR[FPR_idx], PPCSubTarget.hasVSX() ? + VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX() ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass); @@ -2608,39 +2633,25 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( case MVT::v16i8: case MVT::v2f64: case MVT::v2i64: - // Note that vector arguments in registers don't reserve stack space, - // except in varargs functions. if (VR_idx != Num_VR_Regs) { unsigned VReg = (ObjectVT == MVT::v2f64 || ObjectVT == MVT::v2i64) ? MF.addLiveIn(VSRH[VR_idx], &PPC::VSHRCRegClass) : MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass); ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT); - if (isVarArg) { - while ((ArgOffset % 16) != 0) { - ArgOffset += PtrByteSize; - if (GPR_idx != Num_GPR_Regs) - GPR_idx++; - } - ArgOffset += 16; - GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? - } ++VR_idx; } else { - // Vectors are aligned. - ArgOffset = ((ArgOffset+15)/16)*16; - CurArgOffset = ArgOffset; - ArgOffset += 16; needsLoad = true; } + ArgOffset += 16; break; } // We need to load the argument to a virtual register if we determined // above that we ran out of physical registers of the appropriate type. if (needsLoad) { - int FI = MFI->CreateFixedObject(ObjSize, - CurArgOffset + (ArgSize - ObjSize), - isImmutable); + if (ObjSize < ArgSize && !isLittleEndian) + CurArgOffset += ArgSize - ObjSize; + int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, isImmutable); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), false, false, false, 0); @@ -2649,11 +2660,16 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( InVals.push_back(ArgVal); } + // Area that is at least reserved in the caller of this function. + unsigned MinReservedArea; + MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize); + // Set the size that is at least reserved in caller of this function. Tail // call optimized functions' reserved stack space needs to be aligned so that // taking the difference between two stack areas will result in an aligned // stack. - setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, true); + MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea); + FuncInfo->setMinReservedArea(MinReservedArea); // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. @@ -2667,7 +2683,8 @@ PPCTargetLowering::LowerFormalArguments_64SVR4( // If this function is vararg, store any remaining integer argument regs // to their spots on the stack so that they may be loaded by deferencing the // result of va_next. - for (; GPR_idx != Num_GPR_Regs; ++GPR_idx) { + for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; + GPR_idx < Num_GPR_Regs; ++GPR_idx) { unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass); SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT); SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, @@ -2706,7 +2723,8 @@ PPCTargetLowering::LowerFormalArguments_Darwin( (CallConv == CallingConv::Fast)); unsigned PtrByteSize = isPPC64 ? 8 : 4; - unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); + unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true); + unsigned ArgOffset = LinkageSize; // Area that is at least reserved in caller of this function. unsigned MinReservedArea = ArgOffset; @@ -2997,11 +3015,21 @@ PPCTargetLowering::LowerFormalArguments_Darwin( InVals.push_back(ArgVal); } + // Allow for Altivec parameters at the end, if needed. + if (nAltivecParamsAtEnd) { + MinReservedArea = ((MinReservedArea+15)/16)*16; + MinReservedArea += 16*nAltivecParamsAtEnd; + } + + // Area that is at least reserved in the caller of this function. + MinReservedArea = std::max(MinReservedArea, LinkageSize + 8 * PtrByteSize); + // Set the size that is at least reserved in caller of this function. Tail // call optimized functions' reserved stack space needs to be aligned so that // taking the difference between two stack areas will result in an aligned // stack. - setMinReservedArea(MF, DAG, nAltivecParamsAtEnd, MinReservedArea, isPPC64); + MinReservedArea = EnsureStackAlignment(MF.getTarget(), MinReservedArea); + FuncInfo->setMinReservedArea(MinReservedArea); // If the function takes variable number of arguments, make a frame index for // the start of the first vararg value... for expansion of llvm.va_start. @@ -3040,75 +3068,6 @@ PPCTargetLowering::LowerFormalArguments_Darwin( return Chain; } -/// CalculateParameterAndLinkageAreaSize - Get the size of the parameter plus -/// linkage area for the Darwin ABI, or the 64-bit SVR4 ABI. -static unsigned -CalculateParameterAndLinkageAreaSize(SelectionDAG &DAG, - bool isPPC64, - bool isVarArg, - unsigned CC, - const SmallVectorImpl<ISD::OutputArg> - &Outs, - const SmallVectorImpl<SDValue> &OutVals, - unsigned &nAltivecParamsAtEnd) { - // Count how many bytes are to be pushed on the stack, including the linkage - // area, and parameter passing area. We start with 24/48 bytes, which is - // prereserved space for [SP][CR][LR][3 x unused]. - unsigned NumBytes = PPCFrameLowering::getLinkageSize(isPPC64, true); - unsigned NumOps = Outs.size(); - unsigned PtrByteSize = isPPC64 ? 8 : 4; - - // Add up all the space actually used. - // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually - // they all go in registers, but we must reserve stack space for them for - // possible use by the caller. In varargs or 64-bit calls, parameters are - // assigned stack space in order, with padding so Altivec parameters are - // 16-byte aligned. - nAltivecParamsAtEnd = 0; - for (unsigned i = 0; i != NumOps; ++i) { - ISD::ArgFlagsTy Flags = Outs[i].Flags; - EVT ArgVT = Outs[i].VT; - // Varargs Altivec parameters are padded to a 16 byte boundary. - if (ArgVT==MVT::v4f32 || ArgVT==MVT::v4i32 || - ArgVT==MVT::v8i16 || ArgVT==MVT::v16i8 || - ArgVT==MVT::v2f64 || ArgVT==MVT::v2i64) { - if (!isVarArg && !isPPC64) { - // Non-varargs Altivec parameters go after all the non-Altivec - // parameters; handle those later so we know how much padding we need. - nAltivecParamsAtEnd++; - continue; - } - // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. - NumBytes = ((NumBytes+15)/16)*16; - } - NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); - } - - // Allow for Altivec parameters at the end, if needed. - if (nAltivecParamsAtEnd) { - NumBytes = ((NumBytes+15)/16)*16; - NumBytes += 16*nAltivecParamsAtEnd; - } - - // The prolog code of the callee may store up to 8 GPR argument registers to - // the stack, allowing va_start to index over them in memory if its varargs. - // Because we cannot tell if this is needed on the caller side, we have to - // conservatively assume that it is needed. As such, make sure we have at - // least enough stack space for the caller to store the 8 GPRs. - NumBytes = std::max(NumBytes, - PPCFrameLowering::getMinCallFrameSize(isPPC64, true)); - - // Tail call needs the stack to be aligned. - if (CC == CallingConv::Fast && DAG.getTarget().Options.GuaranteedTailCallOpt){ - unsigned TargetAlign = DAG.getMachineFunction().getTarget(). - getFrameLowering()->getStackAlignment(); - unsigned AlignMask = TargetAlign-1; - NumBytes = (NumBytes + AlignMask) & ~AlignMask; - } - - return NumBytes; -} - /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be /// adjusted to accommodate the arguments for the tailcall. static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall, @@ -3280,7 +3239,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, SDLoc dl) const { if (SPDiff) { // Load the LR and FP stack slot for later adjusting. - EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; + EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32; LROpOut = getReturnAddrFrameIndex(DAG); LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(), false, false, false, 0); @@ -3373,10 +3332,10 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall, SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass, SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys, - const PPCSubtarget &PPCSubTarget) { + const PPCSubtarget &Subtarget) { - bool isPPC64 = PPCSubTarget.isPPC64(); - bool isSVR4ABI = PPCSubTarget.isSVR4ABI(); + bool isPPC64 = Subtarget.isPPC64(); + bool isSVR4ABI = Subtarget.isSVR4ABI(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); NodeTys.push_back(MVT::Other); // Returns a chain @@ -3385,11 +3344,12 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, unsigned CallOpc = PPCISD::CALL; bool needIndirectCall = true; - if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { - // If this is an absolute destination address, use the munged value. - Callee = SDValue(Dest, 0); - needIndirectCall = false; - } + if (!isSVR4ABI || !isPPC64) + if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG)) { + // If this is an absolute destination address, use the munged value. + Callee = SDValue(Dest, 0); + needIndirectCall = false; + } if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) { // XXX Work around for http://llvm.org/bugs/show_bug.cgi?id=5201 @@ -3398,8 +3358,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, if (!DAG.getTarget().getSubtarget<PPCSubtarget>().isJITCodeModel()) { unsigned OpFlags = 0; if (DAG.getTarget().getRelocationModel() != Reloc::Static && - (PPCSubTarget.getTargetTriple().isMacOSX() && - PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && + (Subtarget.getTargetTriple().isMacOSX() && + Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5)) && (G->getGlobal()->isDeclaration() || G->getGlobal()->isWeakForLinker())) { // PC-relative references to external symbols should go through $stub, @@ -3422,8 +3382,8 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, unsigned char OpFlags = 0; if (DAG.getTarget().getRelocationModel() != Reloc::Static && - (PPCSubTarget.getTargetTriple().isMacOSX() && - PPCSubTarget.getTargetTriple().isMacOSXVersionLT(10, 5))) { + (Subtarget.getTargetTriple().isMacOSX() && + Subtarget.getTargetTriple().isMacOSXVersionLT(10, 5))) { // PC-relative references to external symbols should go through $stub, // unless we're building with the leopard linker or later, which // automatically synthesizes these stubs. @@ -3497,8 +3457,10 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, // additional register being allocated and an unnecessary move instruction // being generated. VTs = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue TOCOff = DAG.getIntPtrConstant(8); + SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff); SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, - Callee, InFlag); + AddTOC, InFlag); Chain = LoadTOCPtr.getValue(0); InFlag = LoadTOCPtr.getValue(1); @@ -3613,10 +3575,10 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, SmallVector<SDValue, 8> Ops; unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff, isTailCall, RegsToPass, Ops, NodeTys, - PPCSubTarget); + Subtarget); // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls - if (isVarArg && PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) + if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64()) Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32)); // When performing tail call optimization the callee pops its arguments off @@ -3657,7 +3619,7 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, // same TOC), the NOP will remain unchanged. bool needsTOCRestore = false; - if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) { + if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64()) { if (CallOpc == PPCISD::BCTRL) { // This is a call through a function pointer. // Restore the caller TOC from the save area into R2. @@ -3682,7 +3644,12 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, if (needsTOCRestore) { SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); - Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag); + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); + unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(); + SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset); + SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); + Chain = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, AddTOC, InFlag); InFlag = Chain.getValue(1); } @@ -3718,8 +3685,8 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, report_fatal_error("failed to perform tail call elimination on a call " "site marked musttail"); - if (PPCSubTarget.isSVR4ABI()) { - if (PPCSubTarget.isPPC64()) + if (Subtarget.isSVR4ABI()) { + if (Subtarget.isPPC64()) return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg, isTailCall, Outs, OutVals, Ins, dl, DAG, InVals); @@ -3981,6 +3948,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, SDLoc dl, SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { + bool isLittleEndian = Subtarget.isLittleEndian(); unsigned NumOps = Outs.size(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -3997,16 +3965,37 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, CallConv == CallingConv::Fast) MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); - unsigned nAltivecParamsAtEnd = 0; - // Count how many bytes are to be pushed on the stack, including the linkage // area, and parameter passing area. We start with at least 48 bytes, which // is reserved space for [SP][CR][LR][3 x unused]. - // NOTE: For PPC64, nAltivecParamsAtEnd always remains zero as a result - // of this call. - unsigned NumBytes = - CalculateParameterAndLinkageAreaSize(DAG, true, isVarArg, CallConv, - Outs, OutVals, nAltivecParamsAtEnd); + unsigned LinkageSize = PPCFrameLowering::getLinkageSize(true, false); + unsigned NumBytes = LinkageSize; + + // Add up all the space actually used. + for (unsigned i = 0; i != NumOps; ++i) { + ISD::ArgFlagsTy Flags = Outs[i].Flags; + EVT ArgVT = Outs[i].VT; + + /* Respect alignment of argument on the stack. */ + unsigned Align = CalculateStackSlotAlignment(ArgVT, Flags, PtrByteSize); + NumBytes = ((NumBytes + Align - 1) / Align) * Align; + + NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); + } + + unsigned NumBytesActuallyUsed = NumBytes; + + // The prolog code of the callee may store up to 8 GPR argument registers to + // the stack, allowing va_start to index over them in memory if its varargs. + // Because we cannot tell if this is needed on the caller side, we have to + // conservatively assume that it is needed. As such, make sure we have at + // least enough stack space for the caller to store the 8 GPRs. + NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); + + // Tail call needs the stack to be aligned. + if (getTargetMachine().Options.GuaranteedTailCallOpt && + CallConv == CallingConv::Fast) + NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes); // Calculate by how many bytes the stack has to be adjusted in case of tail // call optimization. @@ -4038,8 +4027,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // memory. Also, if this is a vararg function, floating point operations // must be stored to our stack, and loaded into integer regs as well, if // any integer regs are available for argument passing. - unsigned ArgOffset = PPCFrameLowering::getLinkageSize(true, true); - unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; + unsigned ArgOffset = LinkageSize; + unsigned GPR_idx, FPR_idx = 0, VR_idx = 0; static const MCPhysReg GPR[] = { PPC::X3, PPC::X4, PPC::X5, PPC::X6, @@ -4068,6 +4057,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, SDValue Arg = OutVals[i]; ISD::ArgFlagsTy Flags = Outs[i].Flags; + /* Respect alignment of argument on the stack. */ + unsigned Align = + CalculateStackSlotAlignment(Outs[i].VT, Flags, PtrByteSize); + ArgOffset = ((ArgOffset + Align - 1) / Align) * Align; + + /* Compute GPR index associated with argument offset. */ + GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize; + GPR_idx = std::min(GPR_idx, NumGPRs); + // PtrOff will be used to store the current argument to the stack if a // register cannot be found for it. SDValue PtrOff; @@ -4099,15 +4097,6 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, if (Size == 0) continue; - unsigned BVAlign = Flags.getByValAlign(); - if (BVAlign > 8) { - if (BVAlign % PtrByteSize != 0) - llvm_unreachable( - "ByVal alignment is not a multiple of the pointer size"); - - ArgOffset = ((ArgOffset+BVAlign-1)/BVAlign)*BVAlign; - } - // All aggregates smaller than 8 bytes must be passed right-justified. if (Size==1 || Size==2 || Size==4) { EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32); @@ -4116,7 +4105,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, MachinePointerInfo(), VT, false, false, 0); MemOpChains.push_back(Load.getValue(1)); - RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load)); ArgOffset += PtrByteSize; continue; @@ -4124,9 +4113,12 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, } if (GPR_idx == NumGPRs && Size < 8) { - SDValue Const = DAG.getConstant(PtrByteSize - Size, - PtrOff.getValueType()); - SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); + SDValue AddPtr = PtrOff; + if (!isLittleEndian) { + SDValue Const = DAG.getConstant(PtrByteSize - Size, + PtrOff.getValueType()); + AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); + } Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, CallSeqStart, Flags, DAG, dl); @@ -4161,8 +4153,11 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // small aggregates, particularly for packed ones. // FIXME: It would be preferable to use the slot in the // parameter save area instead of a new local variable. - SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType()); - SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); + SDValue AddPtr = PtrOff; + if (!isLittleEndian) { + SDValue Const = DAG.getConstant(8 - Size, PtrOff.getValueType()); + AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const); + } Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr, CallSeqStart, Flags, DAG, dl); @@ -4172,7 +4167,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, MachinePointerInfo(), false, false, false, 0); MemOpChains.push_back(Load.getValue(1)); - RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load)); // Done with this argument. ArgOffset += PtrByteSize; @@ -4205,7 +4200,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, case MVT::i32: case MVT::i64: if (GPR_idx != NumGPRs) { - RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Arg)); } else { LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, false, MemOpChains, @@ -4223,7 +4218,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // must be passed right-justified in the stack doubleword, and // in the GPR, if one is available. SDValue StoreOff; - if (Arg.getSimpleValueType().SimpleTy == MVT::f32) { + if (Arg.getSimpleValueType().SimpleTy == MVT::f32 && + !isLittleEndian) { SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); StoreOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); } else @@ -4239,15 +4235,13 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, MachinePointerInfo(), false, false, false, 0); MemOpChains.push_back(Load.getValue(1)); - RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); + RegsToPass.push_back(std::make_pair(GPR[GPR_idx], Load)); } - } else if (GPR_idx != NumGPRs) - // If we have any FPRs remaining, we may also have GPRs remaining. - ++GPR_idx; + } } else { // Single-precision floating-point values are mapped to the // second (rightmost) word of the stack doubleword. - if (Arg.getValueType() == MVT::f32) { + if (Arg.getValueType() == MVT::f32 && !isLittleEndian) { SDValue ConstFour = DAG.getConstant(4, PtrOff.getValueType()); PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); } @@ -4264,21 +4258,13 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, case MVT::v16i8: case MVT::v2f64: case MVT::v2i64: + // For a varargs call, named arguments go into VRs or on the stack as + // usual; unnamed arguments always go to the stack or the corresponding + // GPRs when within range. For now, we always put the value in both + // locations (or even all three). if (isVarArg) { - // These go aligned on the stack, or in the corresponding R registers - // when within range. The Darwin PPC ABI doc claims they also go in - // V registers; in fact gcc does this only for arguments that are - // prototyped, not for those that match the ... We do it for all - // arguments, seems to work. - while (ArgOffset % 16 !=0) { - ArgOffset += PtrByteSize; - if (GPR_idx != NumGPRs) - GPR_idx++; - } // We could elide this store in the case where the object fits // entirely in R registers. Maybe later. - PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, - DAG.getConstant(ArgOffset, PtrVT)); SDValue Store = DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo(), false, false, 0); MemOpChains.push_back(Store); @@ -4309,10 +4295,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, break; } - // Non-varargs Altivec params generally go in registers, but have - // stack space allocated at the end. + // Non-varargs Altivec params go into VRs or on the stack. if (VR_idx != NumVRs) { - // Doesn't have GPR space allocated. unsigned VReg = (Arg.getSimpleValueType() == MVT::v2f64 || Arg.getSimpleValueType() == MVT::v2i64) ? VSRH[VR_idx] : VR[VR_idx]; @@ -4323,12 +4307,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset, true, isTailCall, true, MemOpChains, TailCallArguments, dl); - ArgOffset += 16; } + ArgOffset += 16; break; } } + assert(NumBytesActuallyUsed == ArgOffset); + (void)NumBytesActuallyUsed; + if (!MemOpChains.empty()) Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); @@ -4337,19 +4324,15 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, // pointers in the 64-bit SVR4 ABI. if (!isTailCall && !dyn_cast<GlobalAddressSDNode>(Callee) && - !dyn_cast<ExternalSymbolSDNode>(Callee) && - !isBLACompatibleAddress(Callee, DAG)) { + !dyn_cast<ExternalSymbolSDNode>(Callee)) { // Load r2 into a virtual register and store it to the TOC save area. SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64); // TOC save area offset. - SDValue PtrOff = DAG.getIntPtrConstant(40); + unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(); + SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset); SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(), false, false, 0); - // R12 must contain the address of an indirect callee. This does not - // mean the MTCTR instruction must use R12; it's easier to model this - // as an extra parameter, so do that. - RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); } // Build a sequence of copy-to-reg nodes chained together with token chain @@ -4397,15 +4380,55 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, CallConv == CallingConv::Fast) MF.getInfo<PPCFunctionInfo>()->setHasFastCall(); - unsigned nAltivecParamsAtEnd = 0; - // Count how many bytes are to be pushed on the stack, including the linkage // area, and parameter passing area. We start with 24/48 bytes, which is // prereserved space for [SP][CR][LR][3 x unused]. - unsigned NumBytes = - CalculateParameterAndLinkageAreaSize(DAG, isPPC64, isVarArg, CallConv, - Outs, OutVals, - nAltivecParamsAtEnd); + unsigned LinkageSize = PPCFrameLowering::getLinkageSize(isPPC64, true); + unsigned NumBytes = LinkageSize; + + // Add up all the space actually used. + // In 32-bit non-varargs calls, Altivec parameters all go at the end; usually + // they all go in registers, but we must reserve stack space for them for + // possible use by the caller. In varargs or 64-bit calls, parameters are + // assigned stack space in order, with padding so Altivec parameters are + // 16-byte aligned. + unsigned nAltivecParamsAtEnd = 0; + for (unsigned i = 0; i != NumOps; ++i) { + ISD::ArgFlagsTy Flags = Outs[i].Flags; + EVT ArgVT = Outs[i].VT; + // Varargs Altivec parameters are padded to a 16 byte boundary. + if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 || + ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 || + ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64) { + if (!isVarArg && !isPPC64) { + // Non-varargs Altivec parameters go after all the non-Altivec + // parameters; handle those later so we know how much padding we need. + nAltivecParamsAtEnd++; + continue; + } + // Varargs and 64-bit Altivec parameters are padded to 16 byte boundary. + NumBytes = ((NumBytes+15)/16)*16; + } + NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize); + } + + // Allow for Altivec parameters at the end, if needed. + if (nAltivecParamsAtEnd) { + NumBytes = ((NumBytes+15)/16)*16; + NumBytes += 16*nAltivecParamsAtEnd; + } + + // The prolog code of the callee may store up to 8 GPR argument registers to + // the stack, allowing va_start to index over them in memory if its varargs. + // Because we cannot tell if this is needed on the caller side, we have to + // conservatively assume that it is needed. As such, make sure we have at + // least enough stack space for the caller to store the 8 GPRs. + NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize); + + // Tail call needs the stack to be aligned. + if (getTargetMachine().Options.GuaranteedTailCallOpt && + CallConv == CallingConv::Fast) + NumBytes = EnsureStackAlignment(MF.getTarget(), NumBytes); // Calculate by how many bytes the stack has to be adjusted in case of tail // call optimization. @@ -4441,7 +4464,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // memory. Also, if this is a vararg function, floating point operations // must be stored to our stack, and loaded into integer regs as well, if // any integer regs are available for argument passing. - unsigned ArgOffset = PPCFrameLowering::getLinkageSize(isPPC64, true); + unsigned ArgOffset = LinkageSize; unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0; static const MCPhysReg GPR_32[] = { // 32-bit registers. @@ -4818,8 +4841,8 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { MachineFunction &MF = DAG.getMachineFunction(); - bool isPPC64 = PPCSubTarget.isPPC64(); - bool isDarwinABI = PPCSubTarget.isDarwinABI(); + bool isPPC64 = Subtarget.isPPC64(); + bool isDarwinABI = Subtarget.isDarwinABI(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Get current frame pointer save index. The users of this index will be @@ -4842,8 +4865,8 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const { SDValue PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const { MachineFunction &MF = DAG.getMachineFunction(); - bool isPPC64 = PPCSubTarget.isPPC64(); - bool isDarwinABI = PPCSubTarget.isDarwinABI(); + bool isPPC64 = Subtarget.isPPC64(); + bool isDarwinABI = Subtarget.isDarwinABI(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); // Get current frame pointer save index. The users of this index will be @@ -5063,12 +5086,12 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!"); case MVT::i32: Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIWZ : - (PPCSubTarget.hasFPCVT() ? PPCISD::FCTIWUZ : + (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ), dl, MVT::f64, Src); break; case MVT::i64: - assert((Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()) && + assert((Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()) && "i64 FP_TO_UINT is supported only with FPCVT"); Tmp = DAG.getNode(Op.getOpcode()==ISD::FP_TO_SINT ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ, @@ -5077,8 +5100,8 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, } // Convert the FP value to an int value through memory. - bool i32Stack = Op.getValueType() == MVT::i32 && PPCSubTarget.hasSTFIWX() && - (Op.getOpcode() == ISD::FP_TO_SINT || PPCSubTarget.hasFPCVT()); + bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() && + (Op.getOpcode() == ISD::FP_TO_SINT || Subtarget.hasFPCVT()); SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64); int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex(); MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(FI); @@ -5120,17 +5143,17 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, DAG.getConstantFP(1.0, Op.getValueType()), DAG.getConstantFP(0.0, Op.getValueType())); - assert((Op.getOpcode() == ISD::SINT_TO_FP || PPCSubTarget.hasFPCVT()) && + assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) && "UINT_TO_FP is supported only with FPCVT"); // If we have FCFIDS, then use it when converting to single-precision. // Otherwise, convert to double-precision and then round. - unsigned FCFOp = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? + unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS : PPCISD::FCFIDS) : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU : PPCISD::FCFID); - MVT FCFTy = (PPCSubTarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? + MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32) ? MVT::f32 : MVT::f64; if (Op.getOperand(0).getValueType() == MVT::i64) { @@ -5146,7 +5169,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, // However, if -enable-unsafe-fp-math is in effect, accept double // rounding to avoid the extra overhead. if (Op.getValueType() == MVT::f32 && - !PPCSubTarget.hasFPCVT() && + !Subtarget.hasFPCVT() && !DAG.getTarget().Options.UnsafeFPMath) { // Twiddle input to make sure the low 11 bits are zero. (If this @@ -5184,7 +5207,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, SDValue Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT); SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Bits); - if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT()) + if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); return FP; @@ -5201,7 +5224,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue Ld; - if (PPCSubTarget.hasLFIWAX() || PPCSubTarget.hasFPCVT()) { + if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) { int FrameIdx = FrameInfo->CreateStackObject(4, 4, false); SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT); @@ -5220,7 +5243,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, dl, DAG.getVTList(MVT::f64, MVT::Other), Ops, MVT::i32, MMO); } else { - assert(PPCSubTarget.isPPC64() && + assert(Subtarget.isPPC64() && "i32->FP without LFIWAX supported only on PPC64"); int FrameIdx = FrameInfo->CreateStackObject(8, 8, false); @@ -5242,7 +5265,7 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, // FCFID it and return it. SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Ld); - if (Op.getValueType() == MVT::f32 && !PPCSubTarget.hasFPCVT()) + if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP, DAG.getIntPtrConstant(0)); return FP; } @@ -5557,6 +5580,22 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } + // The remaining cases assume either big endian element order or + // a splat-size that equates to the element size of the vector + // to be built. An example that doesn't work for little endian is + // {0, -1, 0, -1, 0, -1, 0, -1} which has a splat size of 32 bits + // and a vector element size of 16 bits. The code below will + // produce the vector in big endian element order, which for little + // endian is {-1, 0, -1, 0, -1, 0, -1, 0}. + + // For now, just avoid these optimizations in that case. + // FIXME: Develop correct optimizations for LE with mismatched + // splat and element sizes. + + if (Subtarget.isLittleEndian() && + SplatSize != Op.getValueType().getVectorElementType().getSizeInBits()) + return SDValue(); + // Check to see if this is a wide variety of vsplti*, binop self cases. static const signed char SplatCsts[] = { -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, @@ -5725,6 +5764,7 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SDValue V2 = Op.getOperand(1); ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); EVT VT = Op.getValueType(); + bool isLittleEndian = Subtarget.isLittleEndian(); // Cases that are handled by instructions that take permute immediates // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be @@ -5733,15 +5773,15 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, if (PPC::isSplatShuffleMask(SVOp, 1) || PPC::isSplatShuffleMask(SVOp, 2) || PPC::isSplatShuffleMask(SVOp, 4) || - PPC::isVPKUWUMShuffleMask(SVOp, true) || - PPC::isVPKUHUMShuffleMask(SVOp, true) || - PPC::isVSLDOIShuffleMask(SVOp, true) != -1 || - PPC::isVMRGLShuffleMask(SVOp, 1, true) || - PPC::isVMRGLShuffleMask(SVOp, 2, true) || - PPC::isVMRGLShuffleMask(SVOp, 4, true) || - PPC::isVMRGHShuffleMask(SVOp, 1, true) || - PPC::isVMRGHShuffleMask(SVOp, 2, true) || - PPC::isVMRGHShuffleMask(SVOp, 4, true)) { + PPC::isVPKUWUMShuffleMask(SVOp, true, DAG) || + PPC::isVPKUHUMShuffleMask(SVOp, true, DAG) || + PPC::isVSLDOIShuffleMask(SVOp, true, DAG) != -1 || + PPC::isVMRGLShuffleMask(SVOp, 1, true, DAG) || + PPC::isVMRGLShuffleMask(SVOp, 2, true, DAG) || + PPC::isVMRGLShuffleMask(SVOp, 4, true, DAG) || + PPC::isVMRGHShuffleMask(SVOp, 1, true, DAG) || + PPC::isVMRGHShuffleMask(SVOp, 2, true, DAG) || + PPC::isVMRGHShuffleMask(SVOp, 4, true, DAG)) { return Op; } } @@ -5749,15 +5789,15 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, // Altivec has a variety of "shuffle immediates" that take two vector inputs // and produce a fixed permutation. If any of these match, do not lower to // VPERM. - if (PPC::isVPKUWUMShuffleMask(SVOp, false) || - PPC::isVPKUHUMShuffleMask(SVOp, false) || - PPC::isVSLDOIShuffleMask(SVOp, false) != -1 || - PPC::isVMRGLShuffleMask(SVOp, 1, false) || - PPC::isVMRGLShuffleMask(SVOp, 2, false) || - PPC::isVMRGLShuffleMask(SVOp, 4, false) || - PPC::isVMRGHShuffleMask(SVOp, 1, false) || - PPC::isVMRGHShuffleMask(SVOp, 2, false) || - PPC::isVMRGHShuffleMask(SVOp, 4, false)) + if (PPC::isVPKUWUMShuffleMask(SVOp, false, DAG) || + PPC::isVPKUHUMShuffleMask(SVOp, false, DAG) || + PPC::isVSLDOIShuffleMask(SVOp, false, DAG) != -1 || + PPC::isVMRGLShuffleMask(SVOp, 1, false, DAG) || + PPC::isVMRGLShuffleMask(SVOp, 2, false, DAG) || + PPC::isVMRGLShuffleMask(SVOp, 4, false, DAG) || + PPC::isVMRGHShuffleMask(SVOp, 1, false, DAG) || + PPC::isVMRGHShuffleMask(SVOp, 2, false, DAG) || + PPC::isVMRGHShuffleMask(SVOp, 4, false, DAG)) return Op; // Check to see if this is a shuffle of 4-byte values. If so, we can use our @@ -5791,7 +5831,9 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, // If this shuffle can be expressed as a shuffle of 4-byte elements, use the // perfect shuffle vector to determine if it is cost effective to do this as // discrete instructions, or whether we should use a vperm. - if (isFourElementShuffle) { + // For now, we skip this for little endian until such time as we have a + // little-endian perfect shuffle table. + if (isFourElementShuffle && !isLittleEndian) { // Compute the index in the perfect shuffle table. unsigned PFTableIndex = PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3]; @@ -5820,6 +5862,11 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except // that it is in input element units, not in bytes. Convert now. + + // For little endian, the order of the input vectors is reversed, and + // the permutation mask is complemented with respect to 31. This is + // necessary to produce proper semantics with the big-endian-biased vperm + // instruction. EVT EltVT = V1.getValueType().getVectorElementType(); unsigned BytesPerElement = EltVT.getSizeInBits()/8; @@ -5828,13 +5875,22 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i]; for (unsigned j = 0; j != BytesPerElement; ++j) - ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, - MVT::i32)); + if (isLittleEndian) + ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement+j), + MVT::i32)); + else + ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement+j, + MVT::i32)); } SDValue VPermMask = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v16i8, ResultMask); - return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), V1, V2, VPermMask); + if (isLittleEndian) + return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), + V2, V1, VPermMask); + else + return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(), + V1, V2, VPermMask); } /// getAltivecCompareInfo - Given an intrinsic, return false if it is not an @@ -6027,6 +6083,7 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { LHS, RHS, Zero, DAG, dl); } else if (Op.getValueType() == MVT::v16i8) { SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1); + bool isLittleEndian = Subtarget.isLittleEndian(); // Multiply the even 8-bit parts, producing 16-bit sums. SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub, @@ -6038,13 +6095,24 @@ SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { LHS, RHS, DAG, dl, MVT::v8i16); OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts); - // Merge the results together. + // Merge the results together. Because vmuleub and vmuloub are + // instructions with a big-endian bias, we must reverse the + // element numbering and reverse the meaning of "odd" and "even" + // when generating little endian code. int Ops[16]; for (unsigned i = 0; i != 8; ++i) { - Ops[i*2 ] = 2*i+1; - Ops[i*2+1] = 2*i+1+16; + if (isLittleEndian) { + Ops[i*2 ] = 2*i; + Ops[i*2+1] = 2*i+16; + } else { + Ops[i*2 ] = 2*i+1; + Ops[i*2+1] = 2*i+1+16; + } } - return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); + if (isLittleEndian) + return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops); + else + return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops); } else { llvm_unreachable("Unknown mul to lower!"); } @@ -6064,17 +6132,17 @@ SDValue PPCTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); case ISD::VASTART: - return LowerVASTART(Op, DAG, PPCSubTarget); + return LowerVASTART(Op, DAG, Subtarget); case ISD::VAARG: - return LowerVAARG(Op, DAG, PPCSubTarget); + return LowerVAARG(Op, DAG, Subtarget); case ISD::VACOPY: - return LowerVACOPY(Op, DAG, PPCSubTarget); + return LowerVACOPY(Op, DAG, Subtarget); - case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, PPCSubTarget); + case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG, Subtarget); case ISD::DYNAMIC_STACKALLOC: - return LowerDYNAMIC_STACKALLOC(Op, DAG, PPCSubTarget); + return LowerDYNAMIC_STACKALLOC(Op, DAG, Subtarget); case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); @@ -6144,7 +6212,7 @@ void PPCTargetLowering::ReplaceNodeResults(SDNode *N, EVT VT = N->getValueType(0); if (VT == MVT::i64) { - SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, PPCSubTarget); + SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG, Subtarget); Results.push_back(NewNode); Results.push_back(NewNode.getValue(1)); @@ -6255,7 +6323,7 @@ PPCTargetLowering::EmitPartwordAtomicBinary(MachineInstr *MI, // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address // registers without caring whether they're 32 or 64, but here we're // doing actual arithmetic on the addresses. - bool is64bit = PPCSubTarget.isPPC64(); + bool is64bit = Subtarget.isPPC64(); unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO; const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -6450,7 +6518,7 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, unsigned LabelReg = MRI.createVirtualRegister(PtrRC); unsigned BufReg = MI->getOperand(1).getReg(); - if (PPCSubTarget.isPPC64() && PPCSubTarget.isSVR4ABI()) { + if (Subtarget.isPPC64() && Subtarget.isSVR4ABI()) { MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD)) .addReg(PPC::X2) .addImm(TOCOffset) @@ -6463,12 +6531,12 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, unsigned BaseReg; if (MF->getFunction()->getAttributes().hasAttribute( AttributeSet::FunctionIndex, Attribute::Naked)) - BaseReg = PPCSubTarget.isPPC64() ? PPC::X1 : PPC::R1; + BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1; else - BaseReg = PPCSubTarget.isPPC64() ? PPC::BP8 : PPC::BP; + BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP; MIB = BuildMI(*thisMBB, MI, DL, - TII->get(PPCSubTarget.isPPC64() ? PPC::STD : PPC::STW)) + TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW)) .addReg(BaseReg) .addImm(BPOffset) .addReg(BufReg); @@ -6492,10 +6560,10 @@ PPCTargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, // mainMBB: // mainDstReg = 0 MIB = BuildMI(mainMBB, DL, - TII->get(PPCSubTarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); + TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg); // Store IP - if (PPCSubTarget.isPPC64()) { + if (Subtarget.isPPC64()) { MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD)) .addReg(LabelReg) .addImm(LabelOffset) @@ -6607,7 +6675,7 @@ PPCTargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, MIB.setMemRefs(MMOBegin, MMOEnd); // Reload TOC - if (PVT == MVT::i64 && PPCSubTarget.isSVR4ABI()) { + if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) { MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2) .addImm(TOCOffset) .addReg(BufReg); @@ -6645,7 +6713,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, MachineFunction *F = BB->getParent(); - if (PPCSubTarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || + if (Subtarget.hasISEL() && (MI->getOpcode() == PPC::SELECT_CC_I4 || MI->getOpcode() == PPC::SELECT_CC_I8 || MI->getOpcode() == PPC::SELECT_I4 || MI->getOpcode() == PPC::SELECT_I8)) { @@ -6765,13 +6833,13 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, BB = EmitAtomicBinary(MI, BB, true, PPC::XOR8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I8) - BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ANDC); + BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I16) - BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ANDC); + BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I32) - BB = EmitAtomicBinary(MI, BB, false, PPC::ANDC); + BB = EmitAtomicBinary(MI, BB, false, PPC::NAND); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_NAND_I64) - BB = EmitAtomicBinary(MI, BB, true, PPC::ANDC8); + BB = EmitAtomicBinary(MI, BB, true, PPC::NAND8); else if (MI->getOpcode() == PPC::ATOMIC_LOAD_SUB_I8) BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF); @@ -6862,7 +6930,7 @@ PPCTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, // We must use 64-bit registers for addresses when targeting 64-bit, // since we're actually doing arithmetic on them. Other registers // can be 32-bit. - bool is64bit = PPCSubTarget.isPPC64(); + bool is64bit = Subtarget.isPPC64(); bool is8bit = MI->getOpcode() == PPC::ATOMIC_CMP_SWAP_I8; unsigned dest = MI->getOperand(0).getReg(); @@ -7070,10 +7138,10 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op, EVT VT = Op.getValueType(); - if ((VT == MVT::f32 && PPCSubTarget.hasFRES()) || - (VT == MVT::f64 && PPCSubTarget.hasFRE()) || - (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) || - (VT == MVT::v2f64 && PPCSubTarget.hasVSX())) { + if ((VT == MVT::f32 && Subtarget.hasFRES()) || + (VT == MVT::f64 && Subtarget.hasFRE()) || + (VT == MVT::v4f32 && Subtarget.hasAltivec()) || + (VT == MVT::v2f64 && Subtarget.hasVSX())) { // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) // For the reciprocal, we need to find the zero of the function: @@ -7086,7 +7154,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecip(SDValue Op, // correct after every iteration. The minimum architected relative // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has // 23 digits and double has 52 digits. - int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3; + int Iterations = Subtarget.hasRecipPrec() ? 1 : 3; if (VT.getScalarType() == MVT::f64) ++Iterations; @@ -7133,10 +7201,10 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op, EVT VT = Op.getValueType(); - if ((VT == MVT::f32 && PPCSubTarget.hasFRSQRTES()) || - (VT == MVT::f64 && PPCSubTarget.hasFRSQRTE()) || - (VT == MVT::v4f32 && PPCSubTarget.hasAltivec()) || - (VT == MVT::v2f64 && PPCSubTarget.hasVSX())) { + if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) || + (VT == MVT::f64 && Subtarget.hasFRSQRTE()) || + (VT == MVT::v4f32 && Subtarget.hasAltivec()) || + (VT == MVT::v2f64 && Subtarget.hasVSX())) { // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i) // For the reciprocal sqrt, we need to find the zero of the function: @@ -7149,7 +7217,7 @@ SDValue PPCTargetLowering::DAGCombineFastRecipFSQRT(SDValue Op, // correct after every iteration. The minimum architected relative // accuracy is 2^-5. When hasRecipPrec(), this is 2^-14. IEEE float has // 23 digits and double has 52 digits. - int Iterations = PPCSubTarget.hasRecipPrec() ? 1 : 3; + int Iterations = Subtarget.hasRecipPrec() ? 1 : 3; if (VT.getScalarType() == MVT::f64) ++Iterations; @@ -7266,10 +7334,9 @@ static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG) { if (!Visited.count(ChainLD->getChain().getNode())) Queue.push_back(ChainLD->getChain().getNode()); } else if (ChainNext->getOpcode() == ISD::TokenFactor) { - for (SDNode::op_iterator O = ChainNext->op_begin(), - OE = ChainNext->op_end(); O != OE; ++O) - if (!Visited.count(O->getNode())) - Queue.push_back(O->getNode()); + for (const SDUse &O : ChainNext->ops()) + if (!Visited.count(O.getNode())) + Queue.push_back(O.getNode()); } else LoadRoots.insert(ChainNext); } @@ -7312,7 +7379,7 @@ SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N, SelectionDAG &DAG = DCI.DAG; SDLoc dl(N); - assert(PPCSubTarget.useCRBits() && + assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits"); // If we're tracking CR bits, we need to be careful that we don't have: // trunc(binary-ops(zext(x), zext(y))) @@ -7610,9 +7677,9 @@ SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N, return SDValue(); if (!((N->getOperand(0).getValueType() == MVT::i1 && - PPCSubTarget.useCRBits()) || + Subtarget.useCRBits()) || (N->getOperand(0).getValueType() == MVT::i32 && - PPCSubTarget.isPPC64()))) + Subtarget.isPPC64()))) return SDValue(); if (N->getOperand(0).getOpcode() != ISD::AND && @@ -7930,8 +7997,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, DCI.AddToWorklist(RV.getNode()); RV = DAGCombineFastRecip(RV, DCI); if (RV.getNode()) { - // Unfortunately, RV is now NaN if the input was exactly 0. Select out - // this case and force the answer to 0. + // Unfortunately, RV is now NaN if the input was exactly 0. Select out + // this case and force the answer to 0. EVT VT = RV.getValueType(); @@ -8051,6 +8118,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // This is a type-legal unaligned Altivec load. SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); + bool isLittleEndian = Subtarget.isLittleEndian(); // This implements the loading of unaligned vectors as described in // the venerable Apple Velocity Engine overview. Specifically: @@ -8058,25 +8126,28 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html // // The general idea is to expand a sequence of one or more unaligned - // loads into a alignment-based permutation-control instruction (lvsl), - // a series of regular vector loads (which always truncate their - // input address to an aligned address), and a series of permutations. - // The results of these permutations are the requested loaded values. - // The trick is that the last "extra" load is not taken from the address - // you might suspect (sizeof(vector) bytes after the last requested - // load), but rather sizeof(vector) - 1 bytes after the last - // requested vector. The point of this is to avoid a page fault if the - // base address happened to be aligned. This works because if the base - // address is aligned, then adding less than a full vector length will - // cause the last vector in the sequence to be (re)loaded. Otherwise, - // the next vector will be fetched as you might suspect was necessary. + // loads into an alignment-based permutation-control instruction (lvsl + // or lvsr), a series of regular vector loads (which always truncate + // their input address to an aligned address), and a series of + // permutations. The results of these permutations are the requested + // loaded values. The trick is that the last "extra" load is not taken + // from the address you might suspect (sizeof(vector) bytes after the + // last requested load), but rather sizeof(vector) - 1 bytes after the + // last requested vector. The point of this is to avoid a page fault if + // the base address happened to be aligned. This works because if the + // base address is aligned, then adding less than a full vector length + // will cause the last vector in the sequence to be (re)loaded. + // Otherwise, the next vector will be fetched as you might suspect was + // necessary. // We might be able to reuse the permutation generation from // a different base address offset from this one by an aligned amount. // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this // optimization later. - SDValue PermCntl = BuildIntrinsicOp(Intrinsic::ppc_altivec_lvsl, Ptr, - DAG, dl, MVT::v16i8); + Intrinsic::ID Intr = (isLittleEndian ? + Intrinsic::ppc_altivec_lvsr : + Intrinsic::ppc_altivec_lvsl); + SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, MVT::v16i8); // Refine the alignment of the original load (a "new" load created here // which was identical to the first except for the alignment would be @@ -8125,8 +8196,18 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, if (ExtraLoad.getValueType() != MVT::v4i32) ExtraLoad = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, ExtraLoad); - SDValue Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm, - BaseLoad, ExtraLoad, PermCntl, DAG, dl); + // Because vperm has a big-endian bias, we must reverse the order + // of the input vectors and complement the permute control vector + // when generating little endian code. We have already handled the + // latter by using lvsr instead of lvsl, so just reverse BaseLoad + // and ExtraLoad here. + SDValue Perm; + if (isLittleEndian) + Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm, + ExtraLoad, BaseLoad, PermCntl, DAG, dl); + else + Perm = BuildIntrinsicOp(Intrinsic::ppc_altivec_vperm, + BaseLoad, ExtraLoad, PermCntl, DAG, dl); if (VT != MVT::v4i32) Perm = DAG.getNode(ISD::BITCAST, dl, VT, Perm); @@ -8151,12 +8232,11 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, ++UI; SmallVector<SDValue, 8> Ops; - for (SDNode::op_iterator O = User->op_begin(), - OE = User->op_end(); O != OE; ++O) { - if (*O == Use) + for (const SDUse &O : User->ops()) { + if (O == Use) Ops.push_back(To); else - Ops.push_back(*O); + Ops.push_back(O); } DAG.UpdateNodeOperands(User, Ops); @@ -8166,9 +8246,12 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } } break; - case ISD::INTRINSIC_WO_CHAIN: - if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == - Intrinsic::ppc_altivec_lvsl && + case ISD::INTRINSIC_WO_CHAIN: { + bool isLittleEndian = Subtarget.isLittleEndian(); + Intrinsic::ID Intr = (isLittleEndian ? + Intrinsic::ppc_altivec_lvsr : + Intrinsic::ppc_altivec_lvsl); + if (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue() == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) { SDValue Add = N->getOperand(1); @@ -8180,8 +8263,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, UE = BasePtr->use_end(); UI != UE; ++UI) { if (UI->getOpcode() == ISD::INTRINSIC_WO_CHAIN && cast<ConstantSDNode>(UI->getOperand(0))->getZExtValue() == - Intrinsic::ppc_altivec_lvsl) { - // We've found another LVSL, and this address if an aligned + Intr) { + // We've found another LVSL/LVSR, and this address is an aligned // multiple of that one. The results will be the same, so use the // one we've just found instead. @@ -8190,6 +8273,7 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N, } } } + } break; case ISD::BSWAP: @@ -8537,11 +8621,11 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // GCC RS6000 Constraint Letters switch (Constraint[0]) { case 'b': // R1-R31 - if (VT == MVT::i64 && PPCSubTarget.isPPC64()) + if (VT == MVT::i64 && Subtarget.isPPC64()) return std::make_pair(0U, &PPC::G8RC_NOX0RegClass); return std::make_pair(0U, &PPC::GPRC_NOR0RegClass); case 'r': // R0-R31 - if (VT == MVT::i64 && PPCSubTarget.isPPC64()) + if (VT == MVT::i64 && Subtarget.isPPC64()) return std::make_pair(0U, &PPC::G8RCRegClass); return std::make_pair(0U, &PPC::GPRCRegClass); case 'f': @@ -8573,7 +8657,7 @@ PPCTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, // register. // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use // the AsmName field from *RegisterInfo.td, then this would not be necessary. - if (R.first && VT == MVT::i64 && PPCSubTarget.isPPC64() && + if (R.first && VT == MVT::i64 && Subtarget.isPPC64() && PPC::GPRCRegClass.contains(R.first)) { const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); return std::make_pair(TRI->getMatchingSuperReg(R.first, @@ -8707,8 +8791,8 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, // the stack. PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); FuncInfo->setLRStoreRequired(); - bool isPPC64 = PPCSubTarget.isPPC64(); - bool isDarwinABI = PPCSubTarget.isDarwinABI(); + bool isPPC64 = Subtarget.isPPC64(); + bool isDarwinABI = Subtarget.isDarwinABI(); if (Depth > 0) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); @@ -8762,8 +8846,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, // this table could be generated automatically from RegInfo. unsigned PPCTargetLowering::getRegisterByName(const char* RegName, EVT VT) const { - bool isPPC64 = PPCSubTarget.isPPC64(); - bool isDarwinABI = PPCSubTarget.isDarwinABI(); + bool isPPC64 = Subtarget.isPPC64(); + bool isDarwinABI = Subtarget.isDarwinABI(); if ((isPPC64 && VT != MVT::i64 && VT != MVT::i32) || (!isPPC64 && VT != MVT::i32)) @@ -8804,7 +8888,7 @@ EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc, MachineFunction &MF) const { - if (this->PPCSubTarget.isPPC64()) { + if (Subtarget.isPPC64()) { return MVT::i64; } else { return MVT::i32; @@ -8863,7 +8947,7 @@ bool PPCTargetLowering::allowsUnalignedMemoryAccesses(EVT VT, return false; if (VT.getSimpleVT().isVector()) { - if (PPCSubTarget.hasVSX()) { + if (Subtarget.hasVSX()) { if (VT != MVT::v2f64 && VT != MVT::v2i64) return false; } else { @@ -8907,7 +8991,7 @@ PPCTargetLowering::shouldExpandBuildVectorWithShuffles( } Sched::Preference PPCTargetLowering::getSchedulingPreference(SDNode *N) const { - if (DisableILPPref || PPCSubTarget.enableMachineScheduler()) + if (DisableILPPref || Subtarget.enableMachineScheduler()) return TargetLowering::getSchedulingPreference(N); return Sched::ILP; diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 080ef5d..df05aa5 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -18,7 +18,6 @@ #include "PPC.h" #include "PPCInstrInfo.h" #include "PPCRegisterInfo.h" -#include "PPCSubtarget.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" @@ -71,19 +70,14 @@ namespace llvm { TOC_ENTRY, - /// The following three target-specific nodes are used for calls through + /// The following two target-specific nodes are used for calls through /// function pointers in the 64-bit SVR4 ABI. - /// Restore the TOC from the TOC save area of the current stack frame. - /// This is basically a hard coded load instruction which additionally - /// takes/produces a flag. - TOC_RESTORE, - /// Like a regular LOAD but additionally taking/producing a flag. LOAD, - /// LOAD into r2 (also taking/producing a flag). Like TOC_RESTORE, this is - /// a hard coded load instruction. + /// Like LOAD (taking/producing a flag), but using r2 as hard-coded + /// destination. LOAD_TOC, /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX) @@ -303,25 +297,27 @@ namespace llvm { namespace PPC { /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUHUM instruction. - bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary); + bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary, + SelectionDAG &DAG); /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a /// VPKUWUM instruction. - bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary); + bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, bool isUnary, + SelectionDAG &DAG); /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for /// a VRGL* instruction with the specified unit size (1,2 or 4 bytes). bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, - bool isUnary); + bool isUnary, SelectionDAG &DAG); /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for /// a VRGH* instruction with the specified unit size (1,2 or 4 bytes). bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, - bool isUnary); + bool isUnary, SelectionDAG &DAG); /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift /// amount, otherwise return -1. - int isVSLDOIShuffleMask(SDNode *N, bool isUnary); + int isVSLDOIShuffleMask(SDNode *N, bool isUnary, SelectionDAG &DAG); /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand /// specifies a splat of a single element that is suitable for input to @@ -334,7 +330,7 @@ namespace llvm { /// getVSPLTImmediate - Return the appropriate VSPLT* immediate to splat the /// specified isSplatShuffleMask VECTOR_SHUFFLE mask. - unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize); + unsigned getVSPLTImmediate(SDNode *N, unsigned EltSize, SelectionDAG &DAG); /// get_VSPLTI_elt - If this is a build_vector of constants which can be /// formed by using a vspltis[bhw] instruction of the specified element @@ -343,8 +339,9 @@ namespace llvm { SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG); } + class PPCSubtarget; class PPCTargetLowering : public TargetLowering { - const PPCSubtarget &PPCSubTarget; + const PPCSubtarget &Subtarget; public: explicit PPCTargetLowering(PPCTargetMachine &TM); @@ -613,11 +610,6 @@ namespace llvm { extendArgForPPC64(ISD::ArgFlagsTy Flags, EVT ObjectVT, SelectionDAG &DAG, SDValue ArgVal, SDLoc dl) const; - void - setMinReservedArea(MachineFunction &MF, SelectionDAG &DAG, - unsigned nAltivecParamsAtEnd, - unsigned MinReservedArea, bool isPPC64) const; - SDValue LowerFormalArguments_Darwin(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index b71c09e..9318f70 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -802,17 +802,11 @@ def LDtocCPT: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg), [(set i64:$rD, (PPCtoc_entry tconstpool:$disp, i64:$reg))]>, isPPC64; -let hasSideEffects = 1, isCodeGenOnly = 1 in { -let RST = 2, DS = 2 in -def LDinto_toc: DSForm_1a<58, 0, (outs), (ins g8rc:$reg), - "ld 2, 8($reg)", IIC_LdStLD, - [(PPCload_toc i64:$reg)]>, isPPC64; - -let RST = 2, DS = 10, RA = 1 in -def LDtoc_restore : DSForm_1a<58, 0, (outs), (ins), - "ld 2, 40(1)", IIC_LdStLD, - [(PPCtoc_restore)]>, isPPC64; -} +let hasSideEffects = 1, isCodeGenOnly = 1, RST = 2 in +def LDinto_toc: DSForm_1<58, 0, (outs), (ins memrix:$src), + "ld 2, $src", IIC_LdStLD, + [(PPCload_toc ixaddr:$src)]>, isPPC64; + def LDX : XForm_1<31, 21, (outs g8rc:$rD), (ins memrr:$src), "ldx $rD, $src", IIC_LdStLD, [(set i64:$rD, (load xaddr:$src))]>, isPPC64; diff --git a/lib/Target/PowerPC/PPCInstrAltivec.td b/lib/Target/PowerPC/PPCInstrAltivec.td index f3c2eab..dce46d8 100644 --- a/lib/Target/PowerPC/PPCInstrAltivec.td +++ b/lib/Target/PowerPC/PPCInstrAltivec.td @@ -22,111 +22,127 @@ def vnot_ppc : PatFrag<(ops node:$in), def vpkuhum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false); + return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), false, + *CurDAG); }]>; def vpkuwum_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false); + return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), false, + *CurDAG); }]>; def vpkuhum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true); + return PPC::isVPKUHUMShuffleMask(cast<ShuffleVectorSDNode>(N), true, + *CurDAG); }]>; def vpkuwum_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true); + return PPC::isVPKUWUMShuffleMask(cast<ShuffleVectorSDNode>(N), true, + *CurDAG); }]>; def vmrglb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ - return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false); + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false, + *CurDAG); }]>; def vmrglh_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ - return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false); + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false, + *CurDAG); }]>; def vmrglw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ - return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false); + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false, + *CurDAG); }]>; def vmrghb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ - return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false); + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, false, + *CurDAG); }]>; def vmrghh_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ - return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false); + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, false, + *CurDAG); }]>; def vmrghw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ - return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false); + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, false, + *CurDAG); }]>; def vmrglb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle (v16i8 node:$lhs), node:$rhs), [{ - return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true); + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true, + *CurDAG); }]>; def vmrglh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true); + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true, + *CurDAG); }]>; def vmrglw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true); + return PPC::isVMRGLShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true, + *CurDAG); }]>; def vmrghb_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true); + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 1, true, + *CurDAG); }]>; def vmrghh_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true); + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 2, true, + *CurDAG); }]>; def vmrghw_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true); + return PPC::isVMRGHShuffleMask(cast<ShuffleVectorSDNode>(N), 4, true, + *CurDAG); }]>; def VSLDOI_get_imm : SDNodeXForm<vector_shuffle, [{ - return getI32Imm(PPC::isVSLDOIShuffleMask(N, false)); + return getI32Imm(PPC::isVSLDOIShuffleMask(N, false, *CurDAG)); }]>; def vsldoi_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVSLDOIShuffleMask(N, false) != -1; + return PPC::isVSLDOIShuffleMask(N, false, *CurDAG) != -1; }], VSLDOI_get_imm>; /// VSLDOI_unary* - These are used to match vsldoi(X,X), which is turned into /// vector_shuffle(X,undef,mask) by the dag combiner. def VSLDOI_unary_get_imm : SDNodeXForm<vector_shuffle, [{ - return getI32Imm(PPC::isVSLDOIShuffleMask(N, true)); + return getI32Imm(PPC::isVSLDOIShuffleMask(N, true, *CurDAG)); }]>; def vsldoi_unary_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ - return PPC::isVSLDOIShuffleMask(N, true) != -1; + return PPC::isVSLDOIShuffleMask(N, true, *CurDAG) != -1; }], VSLDOI_unary_get_imm>; // VSPLT*_get_imm xform function: convert vector_shuffle mask to VSPLT* imm. def VSPLTB_get_imm : SDNodeXForm<vector_shuffle, [{ - return getI32Imm(PPC::getVSPLTImmediate(N, 1)); + return getI32Imm(PPC::getVSPLTImmediate(N, 1, *CurDAG)); }]>; def vspltb_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 1); }], VSPLTB_get_imm>; def VSPLTH_get_imm : SDNodeXForm<vector_shuffle, [{ - return getI32Imm(PPC::getVSPLTImmediate(N, 2)); + return getI32Imm(PPC::getVSPLTImmediate(N, 2, *CurDAG)); }]>; def vsplth_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ return PPC::isSplatShuffleMask(cast<ShuffleVectorSDNode>(N), 2); }], VSPLTH_get_imm>; def VSPLTW_get_imm : SDNodeXForm<vector_shuffle, [{ - return getI32Imm(PPC::getVSPLTImmediate(N, 4)); + return getI32Imm(PPC::getVSPLTImmediate(N, 4, *CurDAG)); }]>; def vspltw_shuffle : PatFrag<(ops node:$lhs, node:$rhs), (vector_shuffle node:$lhs, node:$rhs), [{ diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index 7fed2c6..1e4396c 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -360,20 +360,6 @@ class DSForm_1<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr, let Inst{30-31} = xo; } -class DSForm_1a<bits<6> opcode, bits<2> xo, dag OOL, dag IOL, string asmstr, - InstrItinClass itin, list<dag> pattern> - : I<opcode, OOL, IOL, asmstr, itin> { - bits<5> RST; - bits<14> DS; - bits<5> RA; - - let Pattern = pattern; - - let Inst{6-10} = RST; - let Inst{11-15} = RA; - let Inst{16-29} = DS; - let Inst{30-31} = xo; -} // 1.7.6 X-Form class XForm_base_r3xo<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index fd72384..9bac91d 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -27,6 +27,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/CommandLine.h" @@ -60,23 +61,25 @@ cl::Hidden); // Pin the vtable to this file. void PPCInstrInfo::anchor() {} -PPCInstrInfo::PPCInstrInfo(PPCTargetMachine &tm) - : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP), - TM(tm), RI(*TM.getSubtargetImpl()) {} +PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI) + : PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP), + Subtarget(STI), RI(STI) {} /// CreateTargetHazardRecognizer - Return the hazard recognizer to use for /// this target when scheduling the DAG. -ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( - const TargetMachine *TM, - const ScheduleDAG *DAG) const { - unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective(); +ScheduleHazardRecognizer * +PPCInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, + const ScheduleDAG *DAG) const { + unsigned Directive = + static_cast<const PPCSubtarget *>(STI)->getDarwinDirective(); if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 || Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) { - const InstrItineraryData *II = TM->getInstrItineraryData(); + const InstrItineraryData *II = + &static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData(); return new ScoreboardHazardRecognizer(II, DAG); } - return TargetInstrInfo::CreateTargetHazardRecognizer(TM, DAG); + return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG); } /// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer @@ -84,17 +87,18 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetPostRAHazardRecognizer( const InstrItineraryData *II, const ScheduleDAG *DAG) const { - unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective(); + unsigned Directive = + DAG->TM.getSubtarget<PPCSubtarget>().getDarwinDirective(); - if (Directive == PPC::DIR_PWR7) + if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8) return new PPCDispatchGroupSBHazardRecognizer(II, DAG); // Most subtargets use a PPC970 recognizer. if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 && Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) { - assert(TM.getInstrInfo() && "No InstrInfo?"); + assert(DAG->TII && "No InstrInfo?"); - return new PPCHazardRecognizer970(TM); + return new PPCHazardRecognizer970(*DAG); } return new ScoreboardHazardRecognizer(II, DAG); @@ -129,7 +133,7 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, // On some cores, there is an additional delay between writing to a condition // register, and using it from a branch. - unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective(); + unsigned Directive = Subtarget.getDarwinDirective(); switch (Directive) { default: break; case PPC::DIR_7400: @@ -142,6 +146,7 @@ int PPCInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case PPC::DIR_PWR6: case PPC::DIR_PWR6X: case PPC::DIR_PWR7: + case PPC::DIR_PWR8: Latency += 2; break; } @@ -313,12 +318,13 @@ void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const { // This function is used for scheduling, and the nop wanted here is the type // that terminates dispatch groups on the POWER cores. - unsigned Directive = TM.getSubtarget<PPCSubtarget>().getDarwinDirective(); + unsigned Directive = Subtarget.getDarwinDirective(); unsigned Opcode; switch (Directive) { default: Opcode = PPC::NOP; break; case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break; case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break; + case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */ } DebugLoc DL; @@ -332,7 +338,7 @@ bool PPCInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const { - bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); + bool isPPC64 = Subtarget.isPPC64(); // If the block has no terminators, it just falls into the block after it. MachineBasicBlock::iterator I = MBB.end(); @@ -538,7 +544,7 @@ PPCInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, assert((Cond.size() == 2 || Cond.size() == 0) && "PPC branch conditions have two components!"); - bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); + bool isPPC64 = Subtarget.isPPC64(); // One-way branch. if (!FBB) { @@ -579,7 +585,7 @@ bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB, const SmallVectorImpl<MachineOperand> &Cond, unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const { - if (!TM.getSubtargetImpl()->hasISEL()) + if (!Subtarget.hasISEL()) return false; if (Cond.size() != 2) @@ -623,7 +629,7 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB, assert(Cond.size() == 2 && "PPC branch conditions have two components!"); - assert(TM.getSubtargetImpl()->hasISEL() && + assert(Subtarget.hasISEL() && "Cannot insert select on target without ISEL support"); // Get the register classes. @@ -826,7 +832,7 @@ PPCInstrInfo::StoreRegToStackSlot(MachineFunction &MF, FrameIdx)); NonRI = true; } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { - assert(TM.getSubtargetImpl()->isDarwin() && + assert(Subtarget.isDarwin() && "VRSAVE only needs spill/restore on Darwin"); NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::SPILL_VRSAVE)) .addReg(SrcReg, @@ -921,7 +927,7 @@ PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, DebugLoc DL, FrameIdx)); NonRI = true; } else if (PPC::VRSAVERCRegClass.hasSubClassEq(RC)) { - assert(TM.getSubtargetImpl()->isDarwin() && + assert(Subtarget.isDarwin() && "VRSAVE only needs spill/restore on Darwin"); NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(PPC::RESTORE_VRSAVE), @@ -1035,7 +1041,7 @@ bool PPCInstrInfo::FoldImmediate(MachineInstr *UseMI, MachineInstr *DefMI, unsigned ZeroReg; if (UseInfo->isLookupPtrRegClass()) { - bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); + bool isPPC64 = Subtarget.isPPC64(); ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO; } else { ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ? @@ -1102,7 +1108,7 @@ bool PPCInstrInfo::PredicateInstruction( unsigned OpC = MI->getOpcode(); if (OpC == PPC::BLR) { if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) { - bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); + bool isPPC64 = Subtarget.isPPC64(); MI->setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR) : (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR))); @@ -1124,7 +1130,7 @@ bool PPCInstrInfo::PredicateInstruction( return true; } else if (OpC == PPC::B) { if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) { - bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); + bool isPPC64 = Subtarget.isPPC64(); MI->setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) : (isPPC64 ? PPC::BDZ8 : PPC::BDZ))); @@ -1162,7 +1168,7 @@ bool PPCInstrInfo::PredicateInstruction( llvm_unreachable("Cannot predicate bctr[l] on the ctr register"); bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8; - bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); + bool isPPC64 = Subtarget.isPPC64(); if (Pred[0].getImm() == PPC::PRED_BIT_SET) { MI->setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8) : @@ -1323,7 +1329,7 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, // for equality checks (as those don't depend on the sign). On PPC64, // we are restricted to equality for unsigned 64-bit comparisons and for // signed 32-bit comparisons the applicability is more restricted. - bool isPPC64 = TM.getSubtargetImpl()->isPPC64(); + bool isPPC64 = Subtarget.isPPC64(); bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW; bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW; bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD; diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h index d9db3e1..83f14c6 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.h +++ b/lib/Target/PowerPC/PPCInstrInfo.h @@ -65,7 +65,7 @@ enum PPC970_Unit { class PPCInstrInfo : public PPCGenInstrInfo { - PPCTargetMachine &TM; + PPCSubtarget &Subtarget; const PPCRegisterInfo RI; bool StoreRegToStackSlot(MachineFunction &MF, @@ -80,7 +80,7 @@ class PPCInstrInfo : public PPCGenInstrInfo { bool &NonRI, bool &SpillsVRS) const; virtual void anchor(); public: - explicit PPCInstrInfo(PPCTargetMachine &TM); + explicit PPCInstrInfo(PPCSubtarget &STI); /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should @@ -89,7 +89,7 @@ public: const PPCRegisterInfo &getRegisterInfo() const { return RI; } ScheduleHazardRecognizer * - CreateTargetHazardRecognizer(const TargetMachine *TM, + CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI, const ScheduleDAG *DAG) const override; ScheduleHazardRecognizer * CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index e421f8e..c2e3382 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -141,9 +141,6 @@ def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>, def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>, [SDNPHasChain, SDNPSideEffect, SDNPInGlue, SDNPOutGlue]>; -def PPCtoc_restore : SDNode<"PPCISD::TOC_RESTORE", SDTypeProfile<0, 0, []>, - [SDNPHasChain, SDNPSideEffect, - SDNPInGlue, SDNPOutGlue]>; def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone, diff --git a/lib/Target/PowerPC/PPCJITInfo.cpp b/lib/Target/PowerPC/PPCJITInfo.cpp index 7bbc71b..e5f113a 100644 --- a/lib/Target/PowerPC/PPCJITInfo.cpp +++ b/lib/Target/PowerPC/PPCJITInfo.cpp @@ -13,7 +13,7 @@ #include "PPCJITInfo.h" #include "PPCRelocations.h" -#include "PPCTargetMachine.h" +#include "PPCSubtarget.h" #include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -25,6 +25,11 @@ using namespace llvm; static TargetJITInfo::JITCompilerFn JITCompilerFunction; +PPCJITInfo::PPCJITInfo(PPCSubtarget &STI) + : Subtarget(STI), is64Bit(STI.isPPC64()) { + useGOT = 0; +} + #define BUILD_ADDIS(RD,RS,IMM16) \ ((15 << 26) | ((RD) << 21) | ((RS) << 16) | ((IMM16) & 65535)) #define BUILD_ORI(RD,RS,UIMM16) \ @@ -393,7 +398,7 @@ void *PPCJITInfo::emitFunctionStub(const Function* F, void *Fn, JCE.emitWordBE(0xf821ffb1); // stdu r1,-80(r1) JCE.emitWordBE(0x7d6802a6); // mflr r11 JCE.emitWordBE(0xf9610060); // std r11, 96(r1) - } else if (TM.getSubtargetImpl()->isDarwinABI()){ + } else if (Subtarget.isDarwinABI()){ JCE.emitWordBE(0x9421ffe0); // stwu r1,-32(r1) JCE.emitWordBE(0x7d6802a6); // mflr r11 JCE.emitWordBE(0x91610028); // stw r11, 40(r1) diff --git a/lib/Target/PowerPC/PPCJITInfo.h b/lib/Target/PowerPC/PPCJITInfo.h index 0693e3e..b6b37ff 100644 --- a/lib/Target/PowerPC/PPCJITInfo.h +++ b/lib/Target/PowerPC/PPCJITInfo.h @@ -18,32 +18,29 @@ #include "llvm/Target/TargetJITInfo.h" namespace llvm { - class PPCTargetMachine; +class PPCSubtarget; +class PPCJITInfo : public TargetJITInfo { +protected: + PPCSubtarget &Subtarget; + bool is64Bit; - class PPCJITInfo : public TargetJITInfo { - protected: - PPCTargetMachine &TM; - bool is64Bit; - public: - PPCJITInfo(PPCTargetMachine &tm, bool tmIs64Bit) : TM(tm) { - useGOT = 0; - is64Bit = tmIs64Bit; - } +public: + PPCJITInfo(PPCSubtarget &STI); - StubLayout getStubLayout() override; - void *emitFunctionStub(const Function* F, void *Fn, - JITCodeEmitter &JCE) override; - LazyResolverFn getLazyResolverFunction(JITCompilerFn) override; - void relocate(void *Function, MachineRelocation *MR, - unsigned NumRelocs, unsigned char* GOTBase) override; + StubLayout getStubLayout() override; + void *emitFunctionStub(const Function *F, void *Fn, + JITCodeEmitter &JCE) override; + LazyResolverFn getLazyResolverFunction(JITCompilerFn) override; + void relocate(void *Function, MachineRelocation *MR, unsigned NumRelocs, + unsigned char *GOTBase) override; - /// replaceMachineCodeForFunction - Make it so that calling the function - /// whose machine code is at OLD turns into a call to NEW, perhaps by - /// overwriting OLD with a branch to NEW. This is used for self-modifying - /// code. - /// - void replaceMachineCodeForFunction(void *Old, void *New) override; - }; + /// replaceMachineCodeForFunction - Make it so that calling the function + /// whose machine code is at OLD turns into a call to NEW, perhaps by + /// overwriting OLD with a branch to NEW. This is used for self-modifying + /// code. + /// + void replaceMachineCodeForFunction(void *Old, void *New) override; +}; } #endif diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index e333b51..eca774e 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -973,6 +973,14 @@ void PPCRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, unsigned OffsetOperandNo = getOffsetONFromFION(MI, FIOperandNum); Offset += MI.getOperand(OffsetOperandNo).getImm(); MI.getOperand(OffsetOperandNo).ChangeToImmediate(Offset); + + MachineBasicBlock &MBB = *MI.getParent(); + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const MCInstrDesc &MCID = MI.getDesc(); + MachineRegisterInfo &MRI = MF.getRegInfo(); + MRI.constrainRegClass(BaseReg, + TII.getRegClass(MCID, FIOperandNum, this, MF)); } bool PPCRegisterInfo::isFrameOffsetLegal(const MachineInstr *MI, diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp index f742f72..dc16742 100644 --- a/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp +++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.cpp @@ -16,9 +16,7 @@ using namespace llvm; #define DEBUG_TYPE "powerpc-selectiondag-info" -PPCSelectionDAGInfo::PPCSelectionDAGInfo(const PPCTargetMachine &TM) - : TargetSelectionDAGInfo(TM) { -} +PPCSelectionDAGInfo::PPCSelectionDAGInfo(const DataLayout *DL) + : TargetSelectionDAGInfo(DL) {} -PPCSelectionDAGInfo::~PPCSelectionDAGInfo() { -} +PPCSelectionDAGInfo::~PPCSelectionDAGInfo() {} diff --git a/lib/Target/PowerPC/PPCSelectionDAGInfo.h b/lib/Target/PowerPC/PPCSelectionDAGInfo.h index 341b69c..b2e7f3b 100644 --- a/lib/Target/PowerPC/PPCSelectionDAGInfo.h +++ b/lib/Target/PowerPC/PPCSelectionDAGInfo.h @@ -22,7 +22,7 @@ class PPCTargetMachine; class PPCSelectionDAGInfo : public TargetSelectionDAGInfo { public: - explicit PPCSelectionDAGInfo(const PPCTargetMachine &TM); + explicit PPCSelectionDAGInfo(const DataLayout *DL); ~PPCSelectionDAGInfo(); }; diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index ea9daee..2e1b74a 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -32,15 +32,57 @@ using namespace llvm; #define GET_SUBTARGETINFO_CTOR #include "PPCGenSubtargetInfo.inc" -PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool is64Bit, - CodeGenOpt::Level OptLevel) - : PPCGenSubtargetInfo(TT, CPU, FS), IsPPC64(is64Bit), TargetTriple(TT), - OptLevel(OptLevel) { +/// Return the datalayout string of a subtarget. +static std::string getDataLayoutString(const PPCSubtarget &ST) { + const Triple &T = ST.getTargetTriple(); + + std::string Ret; + + // Most PPC* platforms are big endian, PPC64LE is little endian. + if (ST.isLittleEndian()) + Ret = "e"; + else + Ret = "E"; + + Ret += DataLayout::getManglingComponent(T); + + // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit + // pointers. + if (!ST.isPPC64() || T.getOS() == Triple::Lv2) + Ret += "-p:32:32"; + + // Note, the alignment values for f64 and i64 on ppc64 in Darwin + // documentation are wrong; these are correct (i.e. "what gcc does"). + if (ST.isPPC64() || ST.isSVR4ABI()) + Ret += "-i64:64"; + else + Ret += "-f64:32:64"; + + // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones. + if (ST.isPPC64()) + Ret += "-n32:64"; + else + Ret += "-n32"; + + return Ret; +} + +PPCSubtarget &PPCSubtarget::initializeSubtargetDependencies(StringRef CPU, + StringRef FS) { initializeEnvironment(); resetSubtargetFeatures(CPU, FS); + return *this; } +PPCSubtarget::PPCSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, PPCTargetMachine &TM, + bool is64Bit, CodeGenOpt::Level OptLevel) + : PPCGenSubtargetInfo(TT, CPU, FS), IsPPC64(is64Bit), TargetTriple(TT), + OptLevel(OptLevel), + FrameLowering(initializeSubtargetDependencies(CPU, FS)), + DL(getDataLayoutString(*this)), InstrInfo(*this), JITInfo(*this), + TLInfo(TM), TSInfo(&DL) {} + /// SetJITMode - This is called to inform the subtarget info that we are /// producing code for the JIT. void PPCSubtarget::SetJITMode() { @@ -156,6 +198,11 @@ void PPCSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { // Determine endianness. IsLittleEndian = (TargetTriple.getArch() == Triple::ppc64le); + + // FIXME: For now, we disable VSX in little-endian mode until endian + // issues in those instructions can be addressed. + if (IsLittleEndian) + HasVSX = false; } /// hasLazyResolverStub - Return true if accesses to the specified global have @@ -200,6 +247,7 @@ static bool needsAggressiveScheduling(unsigned Directive) { case PPC::DIR_E500mc: case PPC::DIR_E5500: case PPC::DIR_PWR7: + case PPC::DIR_PWR8: return true; } } diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index ee43fd5..2a16699 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -14,7 +14,13 @@ #ifndef POWERPCSUBTARGET_H #define POWERPCSUBTARGET_H +#include "PPCFrameLowering.h" +#include "PPCInstrInfo.h" +#include "PPCISelLowering.h" +#include "PPCJITInfo.h" +#include "PPCSelectionDAGInfo.h" #include "llvm/ADT/Triple.h" +#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <string> @@ -50,6 +56,7 @@ namespace PPC { DIR_PWR6, DIR_PWR6X, DIR_PWR7, + DIR_PWR8, DIR_64 }; } @@ -102,12 +109,19 @@ protected: /// OptLevel - What default optimization level we're emitting code for. CodeGenOpt::Level OptLevel; + PPCFrameLowering FrameLowering; + const DataLayout DL; + PPCInstrInfo InstrInfo; + PPCJITInfo JITInfo; + PPCTargetLowering TLInfo; + PPCSelectionDAGInfo TSInfo; + public: /// This constructor initializes the data members to match that /// of the specified triple. /// PPCSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool is64Bit, + const std::string &FS, PPCTargetMachine &TM, bool is64Bit, CodeGenOpt::Level OptLevel); /// ParseSubtargetFeatures - Parses features string setting specified @@ -127,10 +141,21 @@ public: /// unsigned getDarwinDirective() const { return DarwinDirective; } - /// getInstrItins - Return the instruction itineraies based on subtarget + /// getInstrItins - Return the instruction itineraries based on subtarget /// selection. const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } + const PPCFrameLowering *getFrameLowering() const { return &FrameLowering; } + const DataLayout *getDataLayout() const { return &DL; } + const PPCInstrInfo *getInstrInfo() const { return &InstrInfo; } + PPCJITInfo *getJITInfo() { return &JITInfo; } + const PPCTargetLowering *getTargetLowering() const { return &TLInfo; } + const PPCSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } + + /// initializeSubtargetDependencies - Initializes using a CPU and feature string + /// so that we can use initializer lists for subtarget initialization. + PPCSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); + /// \brief Reset the features for the PowerPC target. void resetSubtargetFeatures(const MachineFunction *MF) override; private: diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 2323add..9563b90 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -37,53 +37,12 @@ extern "C" void LLVMInitializePowerPCTarget() { RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget); } -/// Return the datalayout string of a subtarget. -static std::string getDataLayoutString(const PPCSubtarget &ST) { - const Triple &T = ST.getTargetTriple(); - - std::string Ret; - - // Most PPC* platforms are big endian, PPC64LE is little endian. - if (ST.isLittleEndian()) - Ret = "e"; - else - Ret = "E"; - - Ret += DataLayout::getManglingComponent(T); - - // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit - // pointers. - if (!ST.isPPC64() || T.getOS() == Triple::Lv2) - Ret += "-p:32:32"; - - // Note, the alignment values for f64 and i64 on ppc64 in Darwin - // documentation are wrong; these are correct (i.e. "what gcc does"). - if (ST.isPPC64() || ST.isSVR4ABI()) - Ret += "-i64:64"; - else - Ret += "-f64:32:64"; - - // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones. - if (ST.isPPC64()) - Ret += "-n32:64"; - else - Ret += "-n32"; - - return Ret; -} - -PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, +PPCTargetMachine::PPCTargetMachine(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, - CodeGenOpt::Level OL, - bool is64Bit) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, is64Bit, OL), - DL(getDataLayoutString(Subtarget)), InstrInfo(*this), - FrameLowering(Subtarget), JITInfo(*this, is64Bit), - TLInfo(*this), TSInfo(*this), - InstrItins(Subtarget.getInstrItineraryData()) { + CodeGenOpt::Level OL, bool is64Bit) + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS, *this, is64Bit, OL) { initAsmInfo(); } diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 9e92494..4c7029c 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -14,11 +14,7 @@ #ifndef PPC_TARGETMACHINE_H #define PPC_TARGETMACHINE_H -#include "PPCFrameLowering.h" -#include "PPCISelLowering.h" #include "PPCInstrInfo.h" -#include "PPCJITInfo.h" -#include "PPCSelectionDAGInfo.h" #include "PPCSubtarget.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" @@ -29,13 +25,6 @@ namespace llvm { /// class PPCTargetMachine : public LLVMTargetMachine { PPCSubtarget Subtarget; - const DataLayout DL; // Calculates type size & alignment - PPCInstrInfo InstrInfo; - PPCFrameLowering FrameLowering; - PPCJITInfo JITInfo; - PPCTargetLowering TLInfo; - PPCSelectionDAGInfo TSInfo; - InstrItineraryData InstrItins; public: PPCTargetMachine(const Target &T, StringRef TT, @@ -43,25 +32,29 @@ public: Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64Bit); - const PPCInstrInfo *getInstrInfo() const override { return &InstrInfo; } - const PPCFrameLowering *getFrameLowering() const override { - return &FrameLowering; + const PPCInstrInfo *getInstrInfo() const override { + return getSubtargetImpl()->getInstrInfo(); } - PPCJITInfo *getJITInfo() override { return &JITInfo; } + const PPCFrameLowering *getFrameLowering() const override { + return getSubtargetImpl()->getFrameLowering(); + } + PPCJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); } const PPCTargetLowering *getTargetLowering() const override { - return &TLInfo; + return getSubtargetImpl()->getTargetLowering(); } const PPCSelectionDAGInfo* getSelectionDAGInfo() const override { - return &TSInfo; + return getSubtargetImpl()->getSelectionDAGInfo(); } - const PPCRegisterInfo *getRegisterInfo() const override { - return &InstrInfo.getRegisterInfo(); + const PPCRegisterInfo *getRegisterInfo() const override { + return &getInstrInfo()->getRegisterInfo(); } - const DataLayout *getDataLayout() const override { return &DL; } + const DataLayout *getDataLayout() const override { + return getSubtargetImpl()->getDataLayout(); + } const PPCSubtarget *getSubtargetImpl() const override { return &Subtarget; } const InstrItineraryData *getInstrItineraryData() const override { - return &InstrItins; + return &getSubtargetImpl()->getInstrItineraryData(); } // Pass Pipeline Configuration diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h index 949fdfb..713fc4b 100644 --- a/lib/Target/R600/AMDGPU.h +++ b/lib/Target/R600/AMDGPU.h @@ -17,6 +17,7 @@ namespace llvm { class AMDGPUInstrPrinter; +class AMDGPUSubtarget; class AMDGPUTargetMachine; class FunctionPass; class MCAsmInfo; @@ -40,6 +41,7 @@ FunctionPass *createSIAnnotateControlFlowPass(); FunctionPass *createSILowerI1CopiesPass(); FunctionPass *createSILowerControlFlowPass(TargetMachine &tm); FunctionPass *createSIFixSGPRCopiesPass(TargetMachine &tm); +FunctionPass *createSIFixSGPRLiveRangesPass(); FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS); FunctionPass *createSIInsertWaits(TargetMachine &tm); @@ -47,14 +49,18 @@ void initializeSILowerI1CopiesPass(PassRegistry &); extern char &SILowerI1CopiesID; // Passes common to R600 and SI +FunctionPass *createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST); Pass *createAMDGPUStructurizeCFGPass(); -FunctionPass *createAMDGPUConvertToISAPass(TargetMachine &tm); FunctionPass *createAMDGPUISelDag(TargetMachine &tm); /// \brief Creates an AMDGPU-specific Target Transformation Info pass. ImmutablePass * createAMDGPUTargetTransformInfoPass(const AMDGPUTargetMachine *TM); +void initializeSIFixSGPRLiveRangesPass(PassRegistry&); +extern char &SIFixSGPRLiveRangesID; + + extern Target TheAMDGPUTarget; } // End namespace llvm diff --git a/lib/Target/R600/AMDGPU.td b/lib/Target/R600/AMDGPU.td index 2edc115..6ff9ab7 100644 --- a/lib/Target/R600/AMDGPU.td +++ b/lib/Target/R600/AMDGPU.td @@ -7,8 +7,7 @@ // //==-----------------------------------------------------------------------===// -// Include AMDIL TD files -include "AMDILBase.td" +include "llvm/Target/Target.td" //===----------------------------------------------------------------------===// // Subtarget Features @@ -33,30 +32,25 @@ def FeatureIfCvt : SubtargetFeature <"disable-ifcvt", "false", "Disable the if conversion pass">; -def FeatureFP64 : SubtargetFeature<"fp64", +def FeatureFP64 : SubtargetFeature<"fp64", "FP64", "true", - "Enable 64bit double precision operations">; + "Enable double precision operations">; def Feature64BitPtr : SubtargetFeature<"64BitPtr", "Is64bit", "true", - "Specify if 64bit addressing should be used.">; - -def Feature32on64BitPtr : SubtargetFeature<"64on32BitPtr", - "Is32on64bit", - "false", - "Specify if 64bit sized pointers with 32bit addressing should be used.">; + "Specify if 64-bit addressing should be used">; def FeatureR600ALUInst : SubtargetFeature<"R600ALUInst", "R600ALUInst", "false", - "Older version of ALU instructions encoding.">; + "Older version of ALU instructions encoding">; def FeatureVertexCache : SubtargetFeature<"HasVertexCache", "HasVertexCache", "true", - "Specify use of dedicated vertex cache.">; + "Specify use of dedicated vertex cache">; def FeatureCaymanISA : SubtargetFeature<"caymanISA", "CaymanISA", @@ -87,28 +81,40 @@ def FeatureWavefrontSize16 : SubtargetFeatureWavefrontSize<16>; def FeatureWavefrontSize32 : SubtargetFeatureWavefrontSize<32>; def FeatureWavefrontSize64 : SubtargetFeatureWavefrontSize<64>; +class SubtargetFeatureLocalMemorySize <int Value> : SubtargetFeature< + "localmemorysize"#Value, + "LocalMemorySize", + !cast<string>(Value), + "The size of local memory in bytes">; + class SubtargetFeatureGeneration <string Value, list<SubtargetFeature> Implies> : SubtargetFeature <Value, "Gen", "AMDGPUSubtarget::"#Value, Value#" GPU generation", Implies>; +def FeatureLocalMemorySize0 : SubtargetFeatureLocalMemorySize<0>; +def FeatureLocalMemorySize32768 : SubtargetFeatureLocalMemorySize<32768>; +def FeatureLocalMemorySize65536 : SubtargetFeatureLocalMemorySize<65536>; + def FeatureR600 : SubtargetFeatureGeneration<"R600", - [FeatureR600ALUInst, FeatureFetchLimit8]>; + [FeatureR600ALUInst, FeatureFetchLimit8, FeatureLocalMemorySize0]>; def FeatureR700 : SubtargetFeatureGeneration<"R700", - [FeatureFetchLimit16]>; + [FeatureFetchLimit16, FeatureLocalMemorySize0]>; def FeatureEvergreen : SubtargetFeatureGeneration<"EVERGREEN", - [FeatureFetchLimit16]>; + [FeatureFetchLimit16, FeatureLocalMemorySize32768]>; def FeatureNorthernIslands : SubtargetFeatureGeneration<"NORTHERN_ISLANDS", - [FeatureFetchLimit16, FeatureWavefrontSize64]>; + [FeatureFetchLimit16, FeatureWavefrontSize64, + FeatureLocalMemorySize32768] +>; def FeatureSouthernIslands : SubtargetFeatureGeneration<"SOUTHERN_ISLANDS", - [Feature64BitPtr, FeatureFP64]>; + [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize32768]>; def FeatureSeaIslands : SubtargetFeatureGeneration<"SEA_ISLANDS", - [Feature64BitPtr, FeatureFP64]>; + [Feature64BitPtr, FeatureFP64, FeatureLocalMemorySize65536]>; //===----------------------------------------------------------------------===// def AMDGPUInstrInfo : InstrInfo { @@ -120,6 +126,10 @@ def AMDGPU : Target { let InstructionSet = AMDGPUInstrInfo; } +// Dummy Instruction itineraries for pseudo instructions +def ALU_NULL : FuncUnit; +def NullALU : InstrItinClass; + //===----------------------------------------------------------------------===// // Predicate helper class //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/AMDGPUAsmPrinter.cpp b/lib/Target/R600/AMDGPUAsmPrinter.cpp index 170f479..a6e217b 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.cpp +++ b/lib/Target/R600/AMDGPUAsmPrinter.cpp @@ -19,6 +19,7 @@ #include "AMDGPUAsmPrinter.h" #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600MachineFunctionInfo.h" #include "R600RegisterInfo.h" @@ -35,6 +36,24 @@ using namespace llvm; +// TODO: This should get the default rounding mode from the kernel. We just set +// the default here, but this could change if the OpenCL rounding mode pragmas +// are used. +// +// The denormal mode here should match what is reported by the OpenCL runtime +// for the CL_FP_DENORM bit from CL_DEVICE_{HALF|SINGLE|DOUBLE}_FP_CONFIG, but +// can also be override to flush with the -cl-denorms-are-zero compiler flag. +// +// AMD OpenCL only sets flush none and reports CL_FP_DENORM for double +// precision, and leaves single precision to flush all and does not report +// CL_FP_DENORM for CL_DEVICE_SINGLE_FP_CONFIG. Mesa's OpenCL currently reports +// CL_FP_DENORM for both. +static uint32_t getFPMode(MachineFunction &) { + return FP_ROUND_MODE_SP(FP_ROUND_ROUND_TO_NEAREST) | + FP_ROUND_MODE_DP(FP_ROUND_ROUND_TO_NEAREST) | + FP_DENORM_MODE_SP(FP_DENORM_FLUSH_NONE) | + FP_DENORM_MODE_DP(FP_DENORM_FLUSH_NONE); +} static AsmPrinter *createAMDGPUAsmPrinterPass(TargetMachine &tm, MCStreamer &Streamer) { @@ -92,6 +111,10 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { false); OutStreamer.emitRawComment(" NumVgprs: " + Twine(KernelInfo.NumVGPR), false); + OutStreamer.emitRawComment(" FloatMode: " + Twine(KernelInfo.FloatMode), + false); + OutStreamer.emitRawComment(" IeeeMode: " + Twine(KernelInfo.IEEEMode), + false); } else { R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); OutStreamer.emitRawComment( @@ -279,16 +302,27 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, if (VCCUsed) MaxSGPR += 2; - ProgInfo.CodeLen = CodeSize; - ProgInfo.NumSGPR = MaxSGPR; ProgInfo.NumVGPR = MaxVGPR; + ProgInfo.NumSGPR = MaxSGPR; + + // Set the value to initialize FP_ROUND and FP_DENORM parts of the mode + // register. + ProgInfo.FloatMode = getFPMode(MF); + + // XXX: Not quite sure what this does, but sc seems to unset this. + ProgInfo.IEEEMode = 0; + + // Do not clamp NAN to 0. + ProgInfo.DX10Clamp = 0; + + ProgInfo.CodeLen = CodeSize; } void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF, const SIProgramInfo &KernelInfo) { const AMDGPUSubtarget &STM = TM.getSubtarget<AMDGPUSubtarget>(); - SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); + unsigned RsrcReg; switch (MFI->ShaderType) { default: // Fall through @@ -298,25 +332,41 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(MachineFunction &MF, case ShaderType::VERTEX: RsrcReg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; break; } - OutStreamer.EmitIntValue(RsrcReg, 4); - OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) | - S_00B028_SGPRS(KernelInfo.NumSGPR / 8), 4); - unsigned LDSAlignShift; if (STM.getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) { - // LDS is allocated in 64 dword blocks + // LDS is allocated in 64 dword blocks. LDSAlignShift = 8; } else { - // LDS is allocated in 128 dword blocks + // LDS is allocated in 128 dword blocks. LDSAlignShift = 9; } + unsigned LDSBlocks = - RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift; + RoundUpToAlignment(MFI->LDSSize, 1 << LDSAlignShift) >> LDSAlignShift; if (MFI->ShaderType == ShaderType::COMPUTE) { + OutStreamer.EmitIntValue(R_00B848_COMPUTE_PGM_RSRC1, 4); + + const uint32_t ComputePGMRSrc1 = + S_00B848_VGPRS(KernelInfo.NumVGPR / 4) | + S_00B848_SGPRS(KernelInfo.NumSGPR / 8) | + S_00B848_PRIORITY(KernelInfo.Priority) | + S_00B848_FLOAT_MODE(KernelInfo.FloatMode) | + S_00B848_PRIV(KernelInfo.Priv) | + S_00B848_DX10_CLAMP(KernelInfo.DX10Clamp) | + S_00B848_IEEE_MODE(KernelInfo.DebugMode) | + S_00B848_IEEE_MODE(KernelInfo.IEEEMode); + + OutStreamer.EmitIntValue(ComputePGMRSrc1, 4); + OutStreamer.EmitIntValue(R_00B84C_COMPUTE_PGM_RSRC2, 4); OutStreamer.EmitIntValue(S_00B84C_LDS_SIZE(LDSBlocks), 4); + } else { + OutStreamer.EmitIntValue(RsrcReg, 4); + OutStreamer.EmitIntValue(S_00B028_VGPRS(KernelInfo.NumVGPR / 4) | + S_00B028_SGPRS(KernelInfo.NumSGPR / 8), 4); } + if (MFI->ShaderType == ShaderType::PIXEL) { OutStreamer.EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4); OutStreamer.EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(LDSBlocks), 4); diff --git a/lib/Target/R600/AMDGPUAsmPrinter.h b/lib/Target/R600/AMDGPUAsmPrinter.h index 71adc9a..c1acb6e 100644 --- a/lib/Target/R600/AMDGPUAsmPrinter.h +++ b/lib/Target/R600/AMDGPUAsmPrinter.h @@ -25,13 +25,28 @@ class AMDGPUAsmPrinter : public AsmPrinter { private: struct SIProgramInfo { SIProgramInfo() : - CodeLen(0), + NumVGPR(0), NumSGPR(0), - NumVGPR(0) {} + Priority(0), + FloatMode(0), + Priv(0), + DX10Clamp(0), + DebugMode(0), + IEEEMode(0), + CodeLen(0) {} + // Fields set in PGM_RSRC1 pm4 packet. + uint32_t NumVGPR; + uint32_t NumSGPR; + uint32_t Priority; + uint32_t FloatMode; + uint32_t Priv; + uint32_t DX10Clamp; + uint32_t DebugMode; + uint32_t IEEEMode; + + // Bonus information for debugging. uint64_t CodeLen; - unsigned NumSGPR; - unsigned NumVGPR; }; void getSIProgramInfo(SIProgramInfo &Out, MachineFunction &MF) const; diff --git a/lib/Target/R600/AMDGPUConvertToISA.cpp b/lib/Target/R600/AMDGPUConvertToISA.cpp deleted file mode 100644 index 91aeee2..0000000 --- a/lib/Target/R600/AMDGPUConvertToISA.cpp +++ /dev/null @@ -1,62 +0,0 @@ -//===-- AMDGPUConvertToISA.cpp - Lower AMDIL to HW ISA --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -/// \file -/// \brief This pass lowers AMDIL machine instructions to the appropriate -/// hardware instructions. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" -#include "AMDGPUInstrInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" - -using namespace llvm; - -namespace { - -class AMDGPUConvertToISAPass : public MachineFunctionPass { - -private: - static char ID; - TargetMachine &TM; - -public: - AMDGPUConvertToISAPass(TargetMachine &tm) : - MachineFunctionPass(ID), TM(tm) { } - - bool runOnMachineFunction(MachineFunction &MF) override; - - const char *getPassName() const override {return "AMDGPU Convert to ISA";} - -}; - -} // End anonymous namespace - -char AMDGPUConvertToISAPass::ID = 0; - -FunctionPass *llvm::createAMDGPUConvertToISAPass(TargetMachine &tm) { - return new AMDGPUConvertToISAPass(tm); -} - -bool AMDGPUConvertToISAPass::runOnMachineFunction(MachineFunction &MF) { - const AMDGPUInstrInfo * TII = - static_cast<const AMDGPUInstrInfo*>(TM.getInstrInfo()); - - for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); - BB != BB_E; ++BB) { - MachineBasicBlock &MBB = *BB; - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) { - MachineInstr &MI = *I; - TII->convertToISA(MI, MF, MBB.findDebugLoc(I)); - } - } - return false; -} diff --git a/lib/Target/R600/AMDGPUFrameLowering.cpp b/lib/Target/R600/AMDGPUFrameLowering.cpp index e7e90d3..9e8302e 100644 --- a/lib/Target/R600/AMDGPUFrameLowering.cpp +++ b/lib/Target/R600/AMDGPUFrameLowering.cpp @@ -83,7 +83,7 @@ int AMDGPUFrameLowering::getFrameIndexOffset(const MachineFunction &MF, for (int i = MFI->getObjectIndexBegin(); i < UpperBound; ++i) { OffsetBytes = RoundUpToAlignment(OffsetBytes, MFI->getObjectAlignment(i)); OffsetBytes += MFI->getObjectSize(i); - // Each regiter holds 4 bytes, so we must always align the offset to at + // Each register holds 4 bytes, so we must always align the offset to at // least 4 bytes, so that 2 frame objects won't share the same register. OffsetBytes = RoundUpToAlignment(OffsetBytes, 4); } diff --git a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp index f1f0bfa..b4d79e5 100644 --- a/lib/Target/R600/AMDGPUISelDAGToDAG.cpp +++ b/lib/Target/R600/AMDGPUISelDAGToDAG.cpp @@ -14,6 +14,7 @@ #include "AMDGPUInstrInfo.h" #include "AMDGPUISelLowering.h" // For AMDGPUISD #include "AMDGPURegisterInfo.h" +#include "AMDGPUSubtarget.h" #include "R600InstrInfo.h" #include "SIISelLowering.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -83,6 +84,11 @@ private: SDValue& Offset); bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); + bool SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr, SDValue &Offset, + SDValue &ImmOffset) const; + + SDNode *SelectADD_SUB_I64(SDNode *N); + SDNode *SelectDIV_SCALE(SDNode *N); // Include the pieces autogenerated from the target description. #include "AMDGPUGenDAGISel.inc" @@ -211,51 +217,16 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { // We are selecting i64 ADD here instead of custom lower it during // DAG legalization, so we can fold some i64 ADDs used for address // calculation into the LOAD and STORE instructions. - case ISD::ADD: { + case ISD::ADD: + case ISD::SUB: { if (N->getValueType(0) != MVT::i64 || ST.getGeneration() < AMDGPUSubtarget::SOUTHERN_ISLANDS) break; - SDLoc DL(N); - SDValue LHS = N->getOperand(0); - SDValue RHS = N->getOperand(1); - - SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32); - SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32); - - SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, - DL, MVT::i32, LHS, Sub0); - SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, - DL, MVT::i32, LHS, Sub1); - - SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, - DL, MVT::i32, RHS, Sub0); - SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, - DL, MVT::i32, RHS, Sub1); - - SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); - - SmallVector<SDValue, 8> AddLoArgs; - AddLoArgs.push_back(SDValue(Lo0, 0)); - AddLoArgs.push_back(SDValue(Lo1, 0)); - - SDNode *AddLo = CurDAG->getMachineNode( - isCFDepth0() ? AMDGPU::S_ADD_I32 : AMDGPU::V_ADD_I32_e32, - DL, VTList, AddLoArgs); - SDValue Carry = SDValue(AddLo, 1); - SDNode *AddHi = CurDAG->getMachineNode( - isCFDepth0() ? AMDGPU::S_ADDC_U32 : AMDGPU::V_ADDC_U32_e32, - DL, MVT::i32, SDValue(Hi0, 0), SDValue(Hi1, 0), Carry); - - SDValue Args[5] = { - CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32), - SDValue(AddLo,0), - Sub0, - SDValue(AddHi,0), - Sub1, - }; - return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args); + return SelectADD_SUB_I64(N); } + case ISD::SCALAR_TO_VECTOR: + case AMDGPUISD::BUILD_VERTICAL_VECTOR: case ISD::BUILD_VECTOR: { unsigned RegClassID; const AMDGPURegisterInfo *TRI = @@ -264,7 +235,8 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { static_cast<const SIRegisterInfo*>(TM.getRegisterInfo()); EVT VT = N->getValueType(0); unsigned NumVectorElts = VT.getVectorNumElements(); - assert(VT.getVectorElementType().bitsEq(MVT::i32)); + EVT EltVT = VT.getVectorElementType(); + assert(EltVT.bitsEq(MVT::i32)); if (ST.getGeneration() >= AMDGPUSubtarget::SOUTHERN_ISLANDS) { bool UseVReg = true; for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); @@ -305,7 +277,12 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { // can't be bundled by our scheduler. switch(NumVectorElts) { case 2: RegClassID = AMDGPU::R600_Reg64RegClassID; break; - case 4: RegClassID = AMDGPU::R600_Reg128RegClassID; break; + case 4: + if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) + RegClassID = AMDGPU::R600_Reg128VerticalRegClassID; + else + RegClassID = AMDGPU::R600_Reg128RegClassID; + break; default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR"); } } @@ -313,8 +290,7 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { SDValue RegClass = CurDAG->getTargetConstant(RegClassID, MVT::i32); if (NumVectorElts == 1) { - return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, - VT.getVectorElementType(), + return CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), RegClass); } @@ -323,11 +299,12 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { // 16 = Max Num Vector Elements // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) // 1 = Vector Register Class - SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(N->getNumOperands() * 2 + 1); + SmallVector<SDValue, 16 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, MVT::i32); bool IsRegSeq = true; - for (unsigned i = 0; i < N->getNumOperands(); i++) { + unsigned NOps = N->getNumOperands(); + for (unsigned i = 0; i < NOps; i++) { // XXX: Why is this here? if (dyn_cast<RegisterSDNode>(N->getOperand(i))) { IsRegSeq = false; @@ -337,6 +314,20 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32); } + + if (NOps != NumVectorElts) { + // Fill in the missing undef elements if this was a scalar_to_vector. + assert(Opc == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts); + + MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, + SDLoc(N), EltVT); + for (unsigned i = NOps; i < NumVectorElts; ++i) { + RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); + RegSeqArgs[1 + (2 * i) + 1] = + CurDAG->getTargetConstant(TRI->getSubRegFromChannel(i), MVT::i32); + } + } + if (!IsRegSeq) break; return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), @@ -466,6 +457,9 @@ SDNode *AMDGPUDAGToDAGISel::Select(SDNode *N) { PackedOffsetWidth); } + case AMDGPUISD::DIV_SCALE: { + return SelectDIV_SCALE(N); + } } return SelectCode(N); } @@ -659,6 +653,129 @@ bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, return true; } +SDNode *AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { + SDLoc DL(N); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + + bool IsAdd = (N->getOpcode() == ISD::ADD); + + SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, MVT::i32); + SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, MVT::i32); + + SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, + DL, MVT::i32, LHS, Sub0); + SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, + DL, MVT::i32, LHS, Sub1); + + SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, + DL, MVT::i32, RHS, Sub0); + SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, + DL, MVT::i32, RHS, Sub1); + + SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); + SDValue AddLoArgs[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; + + + unsigned Opc = IsAdd ? AMDGPU::S_ADD_I32 : AMDGPU::S_SUB_I32; + unsigned CarryOpc = IsAdd ? AMDGPU::S_ADDC_U32 : AMDGPU::S_SUBB_U32; + + if (!isCFDepth0()) { + Opc = IsAdd ? AMDGPU::V_ADD_I32_e32 : AMDGPU::V_SUB_I32_e32; + CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e32 : AMDGPU::V_SUBB_U32_e32; + } + + SDNode *AddLo = CurDAG->getMachineNode( Opc, DL, VTList, AddLoArgs); + SDValue Carry(AddLo, 1); + SDNode *AddHi + = CurDAG->getMachineNode(CarryOpc, DL, MVT::i32, + SDValue(Hi0, 0), SDValue(Hi1, 0), Carry); + + SDValue Args[5] = { + CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, MVT::i32), + SDValue(AddLo,0), + Sub0, + SDValue(AddHi,0), + Sub1, + }; + return CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, MVT::i64, Args); +} + +SDNode *AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { + SDLoc SL(N); + EVT VT = N->getValueType(0); + + assert(VT == MVT::f32 || VT == MVT::f64); + + unsigned Opc + = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64 : AMDGPU::V_DIV_SCALE_F32; + + const SDValue Zero = CurDAG->getTargetConstant(0, MVT::i32); + + SDValue Ops[] = { + N->getOperand(0), + N->getOperand(1), + N->getOperand(2), + Zero, + Zero, + Zero, + Zero + }; + + return CurDAG->SelectNodeTo(N, Opc, VT, MVT::i1, Ops); +} + +static SDValue wrapAddr64Rsrc(SelectionDAG *DAG, SDLoc DL, SDValue Ptr) { + return SDValue(DAG->getMachineNode(AMDGPU::SI_ADDR64_RSRC, DL, MVT::v4i32, + Ptr), 0); +} + +bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &Ptr, + SDValue &Offset, + SDValue &ImmOffset) const { + SDLoc DL(Addr); + + if (CurDAG->isBaseWithConstantOffset(Addr)) { + SDValue N0 = Addr.getOperand(0); + SDValue N1 = Addr.getOperand(1); + ConstantSDNode *C1 = cast<ConstantSDNode>(N1); + + if (isUInt<12>(C1->getZExtValue())) { + + if (N0.getOpcode() == ISD::ADD) { + // (add (add N2, N3), C1) + SDValue N2 = N0.getOperand(0); + SDValue N3 = N0.getOperand(1); + Ptr = wrapAddr64Rsrc(CurDAG, DL, N2); + Offset = N3; + ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16); + return true; + } + + // (add N0, C1) + Ptr = wrapAddr64Rsrc(CurDAG, DL, CurDAG->getTargetConstant(0, MVT::i64));; + Offset = N0; + ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), MVT::i16); + return true; + } + } + if (Addr.getOpcode() == ISD::ADD) { + // (add N0, N1) + SDValue N0 = Addr.getOperand(0); + SDValue N1 = Addr.getOperand(1); + Ptr = wrapAddr64Rsrc(CurDAG, DL, N0); + Offset = N1; + ImmOffset = CurDAG->getTargetConstant(0, MVT::i16); + return true; + } + + // default case + Ptr = wrapAddr64Rsrc(CurDAG, DL, CurDAG->getConstant(0, MVT::i64)); + Offset = Addr; + ImmOffset = CurDAG->getTargetConstant(0, MVT::i16); + return true; +} + void AMDGPUDAGToDAGISel::PostprocessISelDAG() { const AMDGPUTargetLowering& Lowering = *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); diff --git a/lib/Target/R600/AMDGPUISelLowering.cpp b/lib/Target/R600/AMDGPUISelLowering.cpp index 6c443ea..0ada7a3 100644 --- a/lib/Target/R600/AMDGPUISelLowering.cpp +++ b/lib/Target/R600/AMDGPUISelLowering.cpp @@ -16,9 +16,9 @@ #include "AMDGPUISelLowering.h" #include "AMDGPU.h" #include "AMDGPUFrameLowering.h" +#include "AMDGPUIntrinsicInfo.h" #include "AMDGPURegisterInfo.h" #include "AMDGPUSubtarget.h" -#include "AMDILIntrinsicInfo.h" #include "R600MachineFunctionInfo.h" #include "SIMachineFunctionInfo.h" #include "llvm/Analysis/ValueTracking.h" @@ -84,13 +84,37 @@ static bool allocateStack(unsigned ValNo, MVT ValVT, MVT LocVT, #include "AMDGPUGenCallingConv.inc" +// Find a larger type to do a load / store of a vector with. +EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) { + unsigned StoreSize = VT.getStoreSizeInBits(); + if (StoreSize <= 32) + return EVT::getIntegerVT(Ctx, StoreSize); + + assert(StoreSize % 32 == 0 && "Store size not a multiple of 32"); + return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32); +} + +// Type for a vector that will be loaded to. +EVT AMDGPUTargetLowering::getEquivalentLoadRegType(LLVMContext &Ctx, EVT VT) { + unsigned StoreSize = VT.getStoreSizeInBits(); + if (StoreSize <= 32) + return EVT::getIntegerVT(Ctx, 32); + + return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32); +} + AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : TargetLowering(TM, new TargetLoweringObjectFileELF()) { Subtarget = &TM.getSubtarget<AMDGPUSubtarget>(); - // Initialize target lowering borrowed from AMDIL - InitAMDILLowering(); + setOperationAction(ISD::Constant, MVT::i32, Legal); + setOperationAction(ISD::Constant, MVT::i64, Legal); + setOperationAction(ISD::ConstantFP, MVT::f32, Legal); + setOperationAction(ISD::ConstantFP, MVT::f64, Legal); + + setOperationAction(ISD::BR_JT, MVT::Other, Expand); + setOperationAction(ISD::BRIND, MVT::Other, Expand); // We need to custom lower some of the intrinsics setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); @@ -107,9 +131,6 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::FROUND, MVT::f32, Legal); setOperationAction(ISD::FTRUNC, MVT::f32, Legal); - // The hardware supports ROTR, but not ROTL - setOperationAction(ISD::ROTL, MVT::i32, Expand); - // Lower floating point store/load to integer store/load to reduce the number // of patterns in tablegen. setOperationAction(ISD::STORE, MVT::f32, Promote); @@ -118,6 +139,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::STORE, MVT::v2f32, Promote); AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32); + setOperationAction(ISD::STORE, MVT::i64, Promote); + AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32); + setOperationAction(ISD::STORE, MVT::v4f32, Promote); AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); @@ -161,6 +185,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::LOAD, MVT::v2f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32); + setOperationAction(ISD::LOAD, MVT::i64, Promote); + AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32); + setOperationAction(ISD::LOAD, MVT::v4f32, Promote); AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); @@ -202,29 +229,63 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::BR_CC, MVT::i1, Expand); - setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); + if (Subtarget->getGeneration() < AMDGPUSubtarget::SEA_ISLANDS) { + setOperationAction(ISD::FCEIL, MVT::f64, Custom); + setOperationAction(ISD::FTRUNC, MVT::f64, Custom); + setOperationAction(ISD::FRINT, MVT::f64, Custom); + setOperationAction(ISD::FFLOOR, MVT::f64, Custom); + } - setOperationAction(ISD::FNEG, MVT::v2f32, Expand); - setOperationAction(ISD::FNEG, MVT::v4f32, Expand); + if (!Subtarget->hasBFI()) { + // fcopysign can be done in a single instruction with BFI. + setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand); + setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); + } - setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); + const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; + for (MVT VT : ScalarIntVTs) { + setOperationAction(ISD::SREM, VT, Expand); + setOperationAction(ISD::SDIV, VT, Expand); - setOperationAction(ISD::MUL, MVT::i64, Expand); - setOperationAction(ISD::SUB, MVT::i64, Expand); + // GPU does not have divrem function for signed or unsigned. + setOperationAction(ISD::SDIVREM, VT, Custom); + setOperationAction(ISD::UDIVREM, VT, Custom); + + // GPU does not have [S|U]MUL_LOHI functions as a single instruction. + setOperationAction(ISD::SMUL_LOHI, VT, Expand); + setOperationAction(ISD::UMUL_LOHI, VT, Expand); + setOperationAction(ISD::BSWAP, VT, Expand); + setOperationAction(ISD::CTTZ, VT, Expand); + setOperationAction(ISD::CTLZ, VT, Expand); + } + + if (!Subtarget->hasBCNT(32)) + setOperationAction(ISD::CTPOP, MVT::i32, Expand); + + if (!Subtarget->hasBCNT(64)) + setOperationAction(ISD::CTPOP, MVT::i64, Expand); + + // The hardware supports 32-bit ROTR, but not ROTL. + setOperationAction(ISD::ROTL, MVT::i32, Expand); + setOperationAction(ISD::ROTL, MVT::i64, Expand); + setOperationAction(ISD::ROTR, MVT::i64, Expand); + + setOperationAction(ISD::FP_TO_SINT, MVT::i64, Expand); + setOperationAction(ISD::MUL, MVT::i64, Expand); + setOperationAction(ISD::MULHU, MVT::i64, Expand); + setOperationAction(ISD::MULHS, MVT::i64, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); - setOperationAction(ISD::UDIVREM, MVT::i32, Custom); - setOperationAction(ISD::UDIVREM, MVT::i64, Custom); setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::VSELECT, MVT::v2f32, Expand); - setOperationAction(ISD::VSELECT, MVT::v4f32, Expand); + setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); - static const MVT::SimpleValueType IntTypes[] = { + static const MVT::SimpleValueType VectorIntTypes[] = { MVT::v2i32, MVT::v4i32 }; - for (MVT VT : IntTypes) { - //Expand the following operations for the current type by default + for (MVT VT : VectorIntTypes) { + // Expand the following operations for the current type by default. setOperationAction(ISD::ADD, VT, Expand); setOperationAction(ISD::AND, VT, Expand); setOperationAction(ISD::FP_TO_SINT, VT, Expand); @@ -232,40 +293,93 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) : setOperationAction(ISD::MUL, VT, Expand); setOperationAction(ISD::OR, VT, Expand); setOperationAction(ISD::SHL, VT, Expand); - setOperationAction(ISD::SINT_TO_FP, VT, Expand); - setOperationAction(ISD::SRL, VT, Expand); setOperationAction(ISD::SRA, VT, Expand); + setOperationAction(ISD::SRL, VT, Expand); + setOperationAction(ISD::ROTL, VT, Expand); + setOperationAction(ISD::ROTR, VT, Expand); setOperationAction(ISD::SUB, VT, Expand); - setOperationAction(ISD::UDIV, VT, Expand); + setOperationAction(ISD::SINT_TO_FP, VT, Expand); setOperationAction(ISD::UINT_TO_FP, VT, Expand); + // TODO: Implement custom UREM / SREM routines. + setOperationAction(ISD::SDIV, VT, Expand); + setOperationAction(ISD::UDIV, VT, Expand); + setOperationAction(ISD::SREM, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); + setOperationAction(ISD::SMUL_LOHI, VT, Expand); + setOperationAction(ISD::UMUL_LOHI, VT, Expand); + setOperationAction(ISD::SDIVREM, VT, Custom); + setOperationAction(ISD::UDIVREM, VT, Custom); + setOperationAction(ISD::ADDC, VT, Expand); + setOperationAction(ISD::SUBC, VT, Expand); + setOperationAction(ISD::ADDE, VT, Expand); + setOperationAction(ISD::SUBE, VT, Expand); setOperationAction(ISD::SELECT, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); + setOperationAction(ISD::SELECT_CC, VT, Expand); setOperationAction(ISD::XOR, VT, Expand); + setOperationAction(ISD::BSWAP, VT, Expand); + setOperationAction(ISD::CTPOP, VT, Expand); + setOperationAction(ISD::CTTZ, VT, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); + setOperationAction(ISD::CTLZ, VT, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); } - static const MVT::SimpleValueType FloatTypes[] = { + static const MVT::SimpleValueType FloatVectorTypes[] = { MVT::v2f32, MVT::v4f32 }; - for (MVT VT : FloatTypes) { + for (MVT VT : FloatVectorTypes) { setOperationAction(ISD::FABS, VT, Expand); setOperationAction(ISD::FADD, VT, Expand); + setOperationAction(ISD::FCEIL, VT, Expand); setOperationAction(ISD::FCOS, VT, Expand); setOperationAction(ISD::FDIV, VT, Expand); + setOperationAction(ISD::FEXP2, VT, Expand); + setOperationAction(ISD::FLOG2, VT, Expand); setOperationAction(ISD::FPOW, VT, Expand); setOperationAction(ISD::FFLOOR, VT, Expand); setOperationAction(ISD::FTRUNC, VT, Expand); setOperationAction(ISD::FMUL, VT, Expand); + setOperationAction(ISD::FMA, VT, Expand); setOperationAction(ISD::FRINT, VT, Expand); + setOperationAction(ISD::FNEARBYINT, VT, Expand); setOperationAction(ISD::FSQRT, VT, Expand); setOperationAction(ISD::FSIN, VT, Expand); setOperationAction(ISD::FSUB, VT, Expand); + setOperationAction(ISD::FNEG, VT, Expand); setOperationAction(ISD::SELECT, VT, Expand); + setOperationAction(ISD::VSELECT, VT, Expand); + setOperationAction(ISD::SELECT_CC, VT, Expand); + setOperationAction(ISD::FCOPYSIGN, VT, Expand); + setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); } + setOperationAction(ISD::FNEARBYINT, MVT::f32, Custom); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Custom); + setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::SELECT_CC); + + setSchedulingPreference(Sched::RegPressure); + setJumpIsExpensive(true); + + setSelectIsExpensive(false); + PredictableSelectIsExpensive = false; + + // There are no integer divide instructions, and these expand to a pretty + // large sequence of instructions. + setIntDivIsCheap(false); + setPow2DivIsCheap(false); + + // TODO: Investigate this when 64-bit divides are implemented. + addBypassSlowDiv(64, 32); + + // FIXME: Need to really handle these. + MaxStoresPerMemcpy = 4096; + MaxStoresPerMemmove = 4096; + MaxStoresPerMemset = 4096; } //===----------------------------------------------------------------------===// @@ -276,6 +390,23 @@ MVT AMDGPUTargetLowering::getVectorIdxTy() const { return MVT::i32; } +bool AMDGPUTargetLowering::isSelectSupported(SelectSupportKind SelType) const { + return true; +} + +// The backend supports 32 and 64 bit floating point immediates. +// FIXME: Why are we reporting vectors of FP immediates as legal? +bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { + EVT ScalarVT = VT.getScalarType(); + return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64); +} + +// We don't want to shrink f64 / f32 constants. +bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const { + EVT ScalarVT = VT.getScalarType(); + return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64); +} + bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, EVT CastTy) const { if (LoadTy.getSizeInBits() != CastTy.getSizeInBits()) @@ -330,6 +461,10 @@ bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const { return Src == MVT::i32 && Dest == MVT::i64; } +bool AMDGPUTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { + return isZExtFree(Val.getValueType(), VT2); +} + bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const { // There aren't really 64-bit registers, but pairs of 32-bit ones and only a // limited number of native 64-bit operations. Shrinking an operation to fit @@ -383,25 +518,28 @@ SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI, return SDValue(); } -SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) - const { +SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: Op.getNode()->dump(); llvm_unreachable("Custom lowering code for this" "instruction is not implemented yet!"); break; - // AMDIL DAG lowering - case ISD::SDIV: return LowerSDIV(Op, DAG); - case ISD::SREM: return LowerSREM(Op, DAG); case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); - case ISD::BRCOND: return LowerBRCOND(Op, DAG); - // AMDGPU DAG lowering case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); case ISD::FrameIndex: return LowerFrameIndex(Op, DAG); case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); + case ISD::SDIV: return LowerSDIV(Op, DAG); + case ISD::SREM: return LowerSREM(Op, DAG); case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); + case ISD::SDIVREM: return LowerSDIVREM(Op, DAG); + case ISD::FCEIL: return LowerFCEIL(Op, DAG); + case ISD::FTRUNC: return LowerFTRUNC(Op, DAG); + case ISD::FRINT: return LowerFRINT(Op, DAG); + case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG); + case ISD::FFLOOR: return LowerFFLOOR(Op, DAG); case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); } return Op; @@ -419,95 +557,23 @@ void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N, // ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do // nothing here and let the illegal result integer be handled normally. return; - case ISD::UDIV: { - SDValue Op = SDValue(N, 0); - SDLoc DL(Op); - EVT VT = Op.getValueType(); - SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), - N->getOperand(0), N->getOperand(1)); - Results.push_back(UDIVREM); - break; - } - case ISD::UREM: { - SDValue Op = SDValue(N, 0); - SDLoc DL(Op); - EVT VT = Op.getValueType(); - SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), - N->getOperand(0), N->getOperand(1)); - Results.push_back(UDIVREM.getValue(1)); - break; - } - case ISD::UDIVREM: { - SDValue Op = SDValue(N, 0); - SDLoc DL(Op); - EVT VT = Op.getValueType(); - EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext()); - - SDValue one = DAG.getConstant(1, HalfVT); - SDValue zero = DAG.getConstant(0, HalfVT); - - //HiLo split - SDValue LHS = N->getOperand(0); - SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero); - SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one); - - SDValue RHS = N->getOperand(1); - SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero); - SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one); - - // Get Speculative values - SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo); - SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo); - - SDValue REM_Hi = zero; - SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ); - - SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ); - SDValue DIV_Lo = zero; - - const unsigned halfBitWidth = HalfVT.getSizeInBits(); - - for (unsigned i = 0; i < halfBitWidth; ++i) { - SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT); - // Get Value of high bit - SDValue HBit; - if (halfBitWidth == 32 && Subtarget->hasBFE()) { - HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one); - } else { - HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS); - HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one); - } - - SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo, - DAG.getConstant(halfBitWidth - 1, HalfVT)); - REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one); - REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry); - - REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one); - REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit); - - - SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi); - - SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT); - SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETGE); - - DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT); - - // Update REM - - SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS); - - REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETGE); - REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero); - REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one); - } + case ISD::LOAD: { + SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode(); + if (!Node) + return; - SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi); - SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi); - Results.push_back(DIV); - Results.push_back(REM); - break; + Results.push_back(SDValue(Node, 0)); + Results.push_back(SDValue(Node, 1)); + // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode + // function + DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1)); + return; + } + case ISD::STORE: { + SDValue Lowered = LowerSTORE(SDValue(N, 0), DAG); + if (Lowered.getNode()) + Results.push_back(Lowered); + return; } default: return; @@ -531,12 +597,14 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init, SelectionDAG &DAG) const { const DataLayout *TD = getTargetMachine().getDataLayout(); SDLoc DL(InitPtr); + Type *InitTy = Init->getType(); + if (const ConstantInt *CI = dyn_cast<ConstantInt>(Init)) { - EVT VT = EVT::getEVT(CI->getType()); - PointerType *PtrTy = PointerType::get(CI->getType(), 0); - return DAG.getStore(Chain, DL, DAG.getConstant(*CI, VT), InitPtr, - MachinePointerInfo(UndefValue::get(PtrTy)), false, false, - TD->getPrefTypeAlignment(CI->getType())); + EVT VT = EVT::getEVT(InitTy); + PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS); + return DAG.getStore(Chain, DL, DAG.getConstant(*CI, VT), InitPtr, + MachinePointerInfo(UndefValue::get(PtrTy)), false, false, + TD->getPrefTypeAlignment(InitTy)); } if (const ConstantFP *CFP = dyn_cast<ConstantFP>(Init)) { @@ -547,7 +615,6 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init, TD->getPrefTypeAlignment(CFP->getType())); } - Type *InitTy = Init->getType(); if (StructType *ST = dyn_cast<StructType>(InitTy)) { const StructLayout *SL = TD->getStructLayout(ST); @@ -589,6 +656,14 @@ SDValue AMDGPUTargetLowering::LowerConstantInitializer(const Constant* Init, return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); } + if (isa<UndefValue>(Init)) { + EVT VT = EVT::getEVT(InitTy); + PointerType *PtrTy = PointerType::get(InitTy, AMDGPUAS::PRIVATE_ADDRESS); + return DAG.getStore(Chain, DL, DAG.getUNDEF(VT), InitPtr, + MachinePointerInfo(UndefValue::get(PtrTy)), false, false, + TD->getPrefTypeAlignment(InitTy)); + } + Init->dump(); llvm_unreachable("Unhandled constant initializer"); } @@ -628,11 +703,19 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, unsigned Size = TD->getTypeAllocSize(EltType); unsigned Alignment = TD->getPrefTypeAlignment(EltType); + MVT PrivPtrVT = getPointerTy(AMDGPUAS::PRIVATE_ADDRESS); + MVT ConstPtrVT = getPointerTy(AMDGPUAS::CONSTANT_ADDRESS); + + int FI = FrameInfo->CreateStackObject(Size, Alignment, false); + SDValue InitPtr = DAG.getFrameIndex(FI, PrivPtrVT); + const GlobalVariable *Var = cast<GlobalVariable>(GV); + if (!Var->hasInitializer()) { + // This has no use, but bugpoint will hit it. + return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), ConstPtrVT); + } + const Constant *Init = Var->getInitializer(); - int FI = FrameInfo->CreateStackObject(Size, Alignment, false); - SDValue InitPtr = DAG.getFrameIndex(FI, - getPointerTy(AMDGPUAS::PRIVATE_ADDRESS)); SmallVector<SDNode*, 8> WorkList; for (SDNode::use_iterator I = DAG.getEntryNode()->use_begin(), @@ -651,8 +734,7 @@ SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, } DAG.UpdateNodeOperands(*I, Ops); } - return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), - getPointerTy(AMDGPUAS::CONSTANT_ADDRESS)); + return DAG.getZExtOrTrunc(InitPtr, SDLoc(Op), ConstPtrVT); } } } @@ -688,8 +770,7 @@ SDValue AMDGPUTargetLowering::LowerFrameIndex(SDValue Op, const AMDGPUFrameLowering *TFL = static_cast<const AMDGPUFrameLowering*>(getTargetMachine().getFrameLowering()); - FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Op); - assert(FIN); + FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op); unsigned FrameIndex = FIN->getIndex(); unsigned Offset = TFL->getFrameIndexOffset(MF, FrameIndex); @@ -705,26 +786,66 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, switch (IntrinsicID) { default: return Op; - case AMDGPUIntrinsic::AMDIL_abs: + case AMDGPUIntrinsic::AMDGPU_abs: + case AMDGPUIntrinsic::AMDIL_abs: // Legacy name. return LowerIntrinsicIABS(Op, DAG); - case AMDGPUIntrinsic::AMDIL_exp: - return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1)); case AMDGPUIntrinsic::AMDGPU_lrp: return LowerIntrinsicLRP(Op, DAG); - case AMDGPUIntrinsic::AMDIL_fraction: + case AMDGPUIntrinsic::AMDGPU_fract: + case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name. return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1)); - case AMDGPUIntrinsic::AMDIL_max: - return DAG.getNode(AMDGPUISD::FMAX, DL, VT, Op.getOperand(1), - Op.getOperand(2)); + + case AMDGPUIntrinsic::AMDGPU_clamp: + case AMDGPUIntrinsic::AMDIL_clamp: // Legacy name. + return DAG.getNode(AMDGPUISD::CLAMP, DL, VT, + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case Intrinsic::AMDGPU_div_scale: { + // 3rd parameter required to be a constant. + const ConstantSDNode *Param = dyn_cast<ConstantSDNode>(Op.getOperand(3)); + if (!Param) + return DAG.getUNDEF(VT); + + // Translate to the operands expected by the machine instruction. The + // first parameter must be the same as the first instruction. + SDValue Numerator = Op.getOperand(1); + SDValue Denominator = Op.getOperand(2); + SDValue Src0 = Param->isAllOnesValue() ? Numerator : Denominator; + + return DAG.getNode(AMDGPUISD::DIV_SCALE, DL, VT, + Src0, Denominator, Numerator); + } + + case Intrinsic::AMDGPU_div_fmas: + return DAG.getNode(AMDGPUISD::DIV_FMAS, DL, VT, + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case Intrinsic::AMDGPU_div_fixup: + return DAG.getNode(AMDGPUISD::DIV_FIXUP, DL, VT, + Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + + case Intrinsic::AMDGPU_trig_preop: + return DAG.getNode(AMDGPUISD::TRIG_PREOP, DL, VT, + Op.getOperand(1), Op.getOperand(2)); + + case Intrinsic::AMDGPU_rcp: + return DAG.getNode(AMDGPUISD::RCP, DL, VT, Op.getOperand(1)); + + case Intrinsic::AMDGPU_rsq: + return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1)); + + case AMDGPUIntrinsic::AMDGPU_legacy_rsq: + return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1)); + + case Intrinsic::AMDGPU_rsq_clamped: + return DAG.getNode(AMDGPUISD::RSQ_CLAMPED, DL, VT, Op.getOperand(1)); + case AMDGPUIntrinsic::AMDGPU_imax: return DAG.getNode(AMDGPUISD::SMAX, DL, VT, Op.getOperand(1), Op.getOperand(2)); case AMDGPUIntrinsic::AMDGPU_umax: return DAG.getNode(AMDGPUISD::UMAX, DL, VT, Op.getOperand(1), Op.getOperand(2)); - case AMDGPUIntrinsic::AMDIL_min: - return DAG.getNode(AMDGPUISD::FMIN, DL, VT, Op.getOperand(1), - Op.getOperand(2)); case AMDGPUIntrinsic::AMDGPU_imin: return DAG.getNode(AMDGPUISD::SMIN, DL, VT, Op.getOperand(1), Op.getOperand(2)); @@ -748,6 +869,18 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, return DAG.getNode(AMDGPUISD::MAD_I24, DL, VT, Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); + case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte0: + return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0, DL, VT, Op.getOperand(1)); + + case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte1: + return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE1, DL, VT, Op.getOperand(1)); + + case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte2: + return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE2, DL, VT, Op.getOperand(1)); + + case AMDGPUIntrinsic::AMDGPU_cvt_f32_ubyte3: + return DAG.getNode(AMDGPUISD::CVT_F32_UBYTE3, DL, VT, Op.getOperand(1)); + case AMDGPUIntrinsic::AMDGPU_bfe_i32: return DAG.getNode(AMDGPUISD::BFE_I32, DL, VT, Op.getOperand(1), @@ -771,8 +904,16 @@ SDValue AMDGPUTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, Op.getOperand(1), Op.getOperand(2)); - case AMDGPUIntrinsic::AMDIL_round_nearest: + case AMDGPUIntrinsic::AMDGPU_brev: + return DAG.getNode(AMDGPUISD::BREV, DL, VT, Op.getOperand(1)); + + case AMDGPUIntrinsic::AMDIL_exp: // Legacy name. + return DAG.getNode(ISD::FEXP2, DL, VT, Op.getOperand(1)); + + case AMDGPUIntrinsic::AMDIL_round_nearest: // Legacy name. return DAG.getNode(ISD::FRINT, DL, VT, Op.getOperand(1)); + case AMDGPUIntrinsic::AMDGPU_trunc: // Legacy name. + return DAG.getNode(ISD::FTRUNC, DL, VT, Op.getOperand(1)); } } @@ -863,27 +1004,41 @@ SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const { LoadSDNode *Load = dyn_cast<LoadSDNode>(Op); EVT MemEltVT = Load->getMemoryVT().getVectorElementType(); + EVT LoadVT = Op.getValueType(); EVT EltVT = Op.getValueType().getVectorElementType(); EVT PtrVT = Load->getBasePtr().getValueType(); + unsigned NumElts = Load->getMemoryVT().getVectorNumElements(); SmallVector<SDValue, 8> Loads; + SmallVector<SDValue, 8> Chains; + SDLoc SL(Op); for (unsigned i = 0, e = NumElts; i != e; ++i) { SDValue Ptr = DAG.getNode(ISD::ADD, SL, PtrVT, Load->getBasePtr(), DAG.getConstant(i * (MemEltVT.getSizeInBits() / 8), PtrVT)); - Loads.push_back(DAG.getExtLoad(Load->getExtensionType(), SL, EltVT, - Load->getChain(), Ptr, - MachinePointerInfo(Load->getMemOperand()->getValue()), - MemEltVT, Load->isVolatile(), Load->isNonTemporal(), - Load->getAlignment())); + + SDValue NewLoad + = DAG.getExtLoad(Load->getExtensionType(), SL, EltVT, + Load->getChain(), Ptr, + MachinePointerInfo(Load->getMemOperand()->getValue()), + MemEltVT, Load->isVolatile(), Load->isNonTemporal(), + Load->getAlignment()); + Loads.push_back(NewLoad.getValue(0)); + Chains.push_back(NewLoad.getValue(1)); } - return DAG.getNode(ISD::BUILD_VECTOR, SL, Op.getValueType(), Loads); + + SDValue Ops[] = { + DAG.getNode(ISD::BUILD_VECTOR, SL, LoadVT, Loads), + DAG.getNode(ISD::TokenFactor, SL, MVT::Other, Chains) + }; + + return DAG.getMergeValues(Ops, SL); } SDValue AMDGPUTargetLowering::MergeVectorStore(const SDValue &Op, SelectionDAG &DAG) const { - StoreSDNode *Store = dyn_cast<StoreSDNode>(Op); + StoreSDNode *Store = cast<StoreSDNode>(Op); EVT MemVT = Store->getMemoryVT(); unsigned MemBits = MemVT.getSizeInBits(); @@ -981,7 +1136,13 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { Load->getBasePtr(), MemVT, Load->getMemOperand()); - return DAG.getNode(ISD::getExtForLoadExtType(ExtType), DL, VT, ExtLoad32); + + SDValue Ops[] = { + DAG.getNode(ISD::getExtForLoadExtType(ExtType), DL, VT, ExtLoad32), + ExtLoad32.getValue(1) + }; + + return DAG.getMergeValues(Ops, DL); } if (ExtType == ISD::NON_EXTLOAD && VT.getSizeInBits() < 32) { @@ -995,7 +1156,13 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue NewLD = DAG.getExtLoad(ISD::EXTLOAD, DL, MVT::i32, Chain, BasePtr, MVT::i8, MMO); - return DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD); + + SDValue Ops[] = { + DAG.getNode(ISD::TRUNCATE, DL, VT, NewLD), + NewLD.getValue(1) + }; + + return DAG.getMergeValues(Ops, DL); } // Lower loads constant address space global variable loads @@ -1003,11 +1170,12 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { isa<GlobalVariable>( GetUnderlyingObject(Load->getMemOperand()->getValue()))) { + SDValue Ptr = DAG.getZExtOrTrunc(Load->getBasePtr(), DL, getPointerTy(AMDGPUAS::PRIVATE_ADDRESS)); Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, MVT::i32)); - return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(), + return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(), Load->getChain(), Ptr, DAG.getTargetConstant(0, MVT::i32), Op.getOperand(2)); } @@ -1034,10 +1202,21 @@ SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { EVT MemEltVT = MemVT.getScalarType(); if (ExtType == ISD::SEXTLOAD) { SDValue MemEltVTNode = DAG.getValueType(MemEltVT); - return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode); + + SDValue Ops[] = { + DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode), + Load->getChain() + }; + + return DAG.getMergeValues(Ops, DL); } - return DAG.getZeroExtendInReg(Ret, DL, MemEltVT); + SDValue Ops[] = { + DAG.getZeroExtendInReg(Ret, DL, MemEltVT), + Load->getChain() + }; + + return DAG.getMergeValues(Ops, DL); } SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { @@ -1097,6 +1276,251 @@ SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { return SDValue(); } +SDValue AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT OVT = Op.getValueType(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + MVT INTTY; + MVT FLTTY; + if (!OVT.isVector()) { + INTTY = MVT::i32; + FLTTY = MVT::f32; + } else if (OVT.getVectorNumElements() == 2) { + INTTY = MVT::v2i32; + FLTTY = MVT::v2f32; + } else if (OVT.getVectorNumElements() == 4) { + INTTY = MVT::v4i32; + FLTTY = MVT::v4f32; + } + unsigned bitsize = OVT.getScalarType().getSizeInBits(); + // char|short jq = ia ^ ib; + SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS); + + // jq = jq >> (bitsize - 2) + jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT)); + + // jq = jq | 0x1 + jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT)); + + // jq = (int)jq + jq = DAG.getSExtOrTrunc(jq, DL, INTTY); + + // int ia = (int)LHS; + SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY); + + // int ib, (int)RHS; + SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY); + + // float fa = (float)ia; + SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); + + // float fb = (float)ib; + SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); + + // float fq = native_divide(fa, fb); + SDValue fq = DAG.getNode(ISD::FMUL, DL, FLTTY, + fa, DAG.getNode(AMDGPUISD::RCP, DL, FLTTY, fb)); + + // fq = trunc(fq); + fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); + + // float fqneg = -fq; + SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); + + // float fr = mad(fqneg, fb, fa); + SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY, + DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa); + + // int iq = (int)fq; + SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); + + // fr = fabs(fr); + fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr); + + // fb = fabs(fb); + fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb); + + // int cv = fr >= fb; + SDValue cv; + if (INTTY == MVT::i32) { + cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); + } else { + cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); + } + // jq = (cv ? jq : 0); + jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq, + DAG.getConstant(0, OVT)); + // dst = iq + jq; + iq = DAG.getSExtOrTrunc(iq, DL, OVT); + iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq); + return iq; +} + +SDValue AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT OVT = Op.getValueType(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + // The LowerSDIV32 function generates equivalent to the following IL. + // mov r0, LHS + // mov r1, RHS + // ilt r10, r0, 0 + // ilt r11, r1, 0 + // iadd r0, r0, r10 + // iadd r1, r1, r11 + // ixor r0, r0, r10 + // ixor r1, r1, r11 + // udiv r0, r0, r1 + // ixor r10, r10, r11 + // iadd r0, r0, r10 + // ixor DST, r0, r10 + + // mov r0, LHS + SDValue r0 = LHS; + + // mov r1, RHS + SDValue r1 = RHS; + + // ilt r10, r0, 0 + SDValue r10 = DAG.getSelectCC(DL, + r0, DAG.getConstant(0, OVT), + DAG.getConstant(-1, OVT), + DAG.getConstant(0, OVT), + ISD::SETLT); + + // ilt r11, r1, 0 + SDValue r11 = DAG.getSelectCC(DL, + r1, DAG.getConstant(0, OVT), + DAG.getConstant(-1, OVT), + DAG.getConstant(0, OVT), + ISD::SETLT); + + // iadd r0, r0, r10 + r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); + + // iadd r1, r1, r11 + r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); + + // ixor r0, r0, r10 + r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); + + // ixor r1, r1, r11 + r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); + + // udiv r0, r0, r1 + r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1); + + // ixor r10, r10, r11 + r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11); + + // iadd r0, r0, r10 + r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); + + // ixor DST, r0, r10 + SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); + return DST; +} + +SDValue AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const { + return SDValue(Op.getNode(), 0); +} + +SDValue AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const { + EVT OVT = Op.getValueType().getScalarType(); + + if (OVT == MVT::i64) + return LowerSDIV64(Op, DAG); + + if (OVT.getScalarType() == MVT::i32) + return LowerSDIV32(Op, DAG); + + if (OVT == MVT::i16 || OVT == MVT::i8) { + // FIXME: We should be checking for the masked bits. This isn't reached + // because i8 and i16 are not legal types. + return LowerSDIV24(Op, DAG); + } + + return SDValue(Op.getNode(), 0); +} + +SDValue AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT OVT = Op.getValueType(); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + // The LowerSREM32 function generates equivalent to the following IL. + // mov r0, LHS + // mov r1, RHS + // ilt r10, r0, 0 + // ilt r11, r1, 0 + // iadd r0, r0, r10 + // iadd r1, r1, r11 + // ixor r0, r0, r10 + // ixor r1, r1, r11 + // udiv r20, r0, r1 + // umul r20, r20, r1 + // sub r0, r0, r20 + // iadd r0, r0, r10 + // ixor DST, r0, r10 + + // mov r0, LHS + SDValue r0 = LHS; + + // mov r1, RHS + SDValue r1 = RHS; + + // ilt r10, r0, 0 + SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT); + + // ilt r11, r1, 0 + SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT); + + // iadd r0, r0, r10 + r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); + + // iadd r1, r1, r11 + r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); + + // ixor r0, r0, r10 + r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); + + // ixor r1, r1, r11 + r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); + + // udiv r20, r0, r1 + SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1); + + // umul r20, r20, r1 + r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1); + + // sub r0, r0, r20 + r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20); + + // iadd r0, r0, r10 + r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); + + // ixor DST, r0, r10 + SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); + return DST; +} + +SDValue AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const { + return SDValue(Op.getNode(), 0); +} + +SDValue AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const { + EVT OVT = Op.getValueType(); + + if (OVT.getScalarType() == MVT::i64) + return LowerSREM64(Op, DAG); + + if (OVT.getScalarType() == MVT::i32) + return LowerSREM32(Op, DAG); + + return SDValue(Op.getNode(), 0); +} + SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); @@ -1201,6 +1625,177 @@ SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, return DAG.getMergeValues(Ops, DL); } +SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + + SDValue Zero = DAG.getConstant(0, VT); + SDValue NegOne = DAG.getConstant(-1, VT); + + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + + SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT); + SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT); + SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign); + SDValue RSign = LHSign; // Remainder sign is the same as LHS + + LHS = DAG.getNode(ISD::ADD, DL, VT, LHS, LHSign); + RHS = DAG.getNode(ISD::ADD, DL, VT, RHS, RHSign); + + LHS = DAG.getNode(ISD::XOR, DL, VT, LHS, LHSign); + RHS = DAG.getNode(ISD::XOR, DL, VT, RHS, RHSign); + + SDValue Div = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), LHS, RHS); + SDValue Rem = Div.getValue(1); + + Div = DAG.getNode(ISD::XOR, DL, VT, Div, DSign); + Rem = DAG.getNode(ISD::XOR, DL, VT, Rem, RSign); + + Div = DAG.getNode(ISD::SUB, DL, VT, Div, DSign); + Rem = DAG.getNode(ISD::SUB, DL, VT, Rem, RSign); + + SDValue Res[2] = { + Div, + Rem + }; + return DAG.getMergeValues(Res, DL); +} + +SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const { + SDLoc SL(Op); + SDValue Src = Op.getOperand(0); + + // result = trunc(src) + // if (src > 0.0 && src != result) + // result += 1.0 + + SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src); + + const SDValue Zero = DAG.getConstantFP(0.0, MVT::f64); + const SDValue One = DAG.getConstantFP(1.0, MVT::f64); + + EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64); + + SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT); + SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE); + SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc); + + SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero); + return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add); +} + +SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const { + SDLoc SL(Op); + SDValue Src = Op.getOperand(0); + + assert(Op.getValueType() == MVT::f64); + + const SDValue Zero = DAG.getConstant(0, MVT::i32); + const SDValue One = DAG.getConstant(1, MVT::i32); + + SDValue VecSrc = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src); + + // Extract the upper half, since this is where we will find the sign and + // exponent. + SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, VecSrc, One); + + const unsigned FractBits = 52; + const unsigned ExpBits = 11; + + // Extract the exponent. + SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_I32, SL, MVT::i32, + Hi, + DAG.getConstant(FractBits - 32, MVT::i32), + DAG.getConstant(ExpBits, MVT::i32)); + SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart, + DAG.getConstant(1023, MVT::i32)); + + // Extract the sign bit. + const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, MVT::i32); + SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask); + + // Extend back to to 64-bits. + SDValue SignBit64 = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, + Zero, SignBit); + SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64); + + SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src); + const SDValue FractMask + = DAG.getConstant((UINT64_C(1) << FractBits) - 1, MVT::i64); + + SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp); + SDValue Not = DAG.getNOT(SL, Shr, MVT::i64); + SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not); + + EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::i32); + + const SDValue FiftyOne = DAG.getConstant(FractBits - 1, MVT::i32); + + SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT); + SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT); + + SDValue Tmp1 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpLt0, SignBit64, Tmp0); + SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpGt51, BcInt, Tmp1); + + return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2); +} + +SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const { + SDLoc SL(Op); + SDValue Src = Op.getOperand(0); + + assert(Op.getValueType() == MVT::f64); + + APFloat C1Val(APFloat::IEEEdouble, "0x1.0p+52"); + SDValue C1 = DAG.getConstantFP(C1Val, MVT::f64); + SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src); + + SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign); + SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign); + + SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src); + + APFloat C2Val(APFloat::IEEEdouble, "0x1.fffffffffffffp+51"); + SDValue C2 = DAG.getConstantFP(C2Val, MVT::f64); + + EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64); + SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT); + + return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2); +} + +SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const { + // FNEARBYINT and FRINT are the same, except in their handling of FP + // exceptions. Those aren't really meaningful for us, and OpenCL only has + // rint, so just treat them as equivalent. + return DAG.getNode(ISD::FRINT, SDLoc(Op), Op.getValueType(), Op.getOperand(0)); +} + +SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const { + SDLoc SL(Op); + SDValue Src = Op.getOperand(0); + + // result = trunc(src); + // if (src < 0.0 && src != result) + // result += -1.0. + + SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src); + + const SDValue Zero = DAG.getConstantFP(0.0, MVT::f64); + const SDValue NegOne = DAG.getConstantFP(-1.0, MVT::f64); + + EVT SetCCVT = getSetCCResultType(*DAG.getContext(), MVT::f64); + + SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT); + SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE); + SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc); + + SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero); + return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add); +} + SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const { SDValue S0 = Op.getOperand(0); @@ -1218,7 +1813,6 @@ SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op, FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi, DAG.getConstantFP(4294967296.0f, MVT::f32)); // 2^32 return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi); - } SDValue AMDGPUTargetLowering::ExpandSIGN_EXTEND_INREG(SDValue Op, @@ -1303,6 +1897,37 @@ static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0, return DAG.getConstant(Src0 >> Offset, MVT::i32); } +SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + EVT VT = N->getValueType(0); + + if (VT.isVector() || VT.getSizeInBits() > 32) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDLoc DL(N); + + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue Mul; + + if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) { + N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32); + N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32); + Mul = DAG.getNode(AMDGPUISD::MUL_U24, DL, MVT::i32, N0, N1); + } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) { + N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32); + N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32); + Mul = DAG.getNode(AMDGPUISD::MUL_I24, DL, MVT::i32, N0, N1); + } else { + return SDValue(); + } + + // We need to use sext even for MUL_U24, because MUL_U24 is used + // for signed multiply of 8 and 16-bit types. + return DAG.getSExtOrTrunc(Mul, DL, VT); +} + SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -1310,34 +1935,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, switch(N->getOpcode()) { default: break; - case ISD::MUL: { - EVT VT = N->getValueType(0); - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - SDValue Mul; - - // FIXME: Add support for 24-bit multiply with 64-bit output on SI. - if (VT.isVector() || VT.getSizeInBits() > 32) - break; - - if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) { - N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32); - N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32); - Mul = DAG.getNode(AMDGPUISD::MUL_U24, DL, MVT::i32, N0, N1); - } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) { - N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32); - N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32); - Mul = DAG.getNode(AMDGPUISD::MUL_I24, DL, MVT::i32, N0, N1); - } else { - break; - } - - // We need to use sext even for MUL_U24, because MUL_U24 is used - // for signed multiply of 8 and 16-bit types. - SDValue Reg = DAG.getSExtOrTrunc(Mul, DL, VT); - - return Reg; - } + case ISD::MUL: + return performMulCombine(N, DCI); case AMDGPUISD::MUL_I24: case AMDGPUISD::MUL_U24: { SDValue N0 = N->getOperand(0); @@ -1511,29 +2110,38 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { // AMDIL DAG nodes NODE_NAME_CASE(CALL); NODE_NAME_CASE(UMUL); - NODE_NAME_CASE(DIV_INF); NODE_NAME_CASE(RET_FLAG); NODE_NAME_CASE(BRANCH_COND); // AMDGPU DAG nodes NODE_NAME_CASE(DWORDADDR) NODE_NAME_CASE(FRACT) + NODE_NAME_CASE(CLAMP) NODE_NAME_CASE(FMAX) NODE_NAME_CASE(SMAX) NODE_NAME_CASE(UMAX) NODE_NAME_CASE(FMIN) NODE_NAME_CASE(SMIN) NODE_NAME_CASE(UMIN) + NODE_NAME_CASE(URECIP) + NODE_NAME_CASE(DIV_SCALE) + NODE_NAME_CASE(DIV_FMAS) + NODE_NAME_CASE(DIV_FIXUP) + NODE_NAME_CASE(TRIG_PREOP) + NODE_NAME_CASE(RCP) + NODE_NAME_CASE(RSQ) + NODE_NAME_CASE(RSQ_LEGACY) + NODE_NAME_CASE(RSQ_CLAMPED) + NODE_NAME_CASE(DOT4) NODE_NAME_CASE(BFE_U32) NODE_NAME_CASE(BFE_I32) NODE_NAME_CASE(BFI) NODE_NAME_CASE(BFM) + NODE_NAME_CASE(BREV) NODE_NAME_CASE(MUL_U24) NODE_NAME_CASE(MUL_I24) NODE_NAME_CASE(MAD_U24) NODE_NAME_CASE(MAD_I24) - NODE_NAME_CASE(URECIP) - NODE_NAME_CASE(DOT4) NODE_NAME_CASE(EXPORT) NODE_NAME_CASE(CONST_ADDRESS) NODE_NAME_CASE(REGISTER_LOAD) @@ -1544,6 +2152,11 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { NODE_NAME_CASE(SAMPLEB) NODE_NAME_CASE(SAMPLED) NODE_NAME_CASE(SAMPLEL) + NODE_NAME_CASE(CVT_F32_UBYTE0) + NODE_NAME_CASE(CVT_F32_UBYTE1) + NODE_NAME_CASE(CVT_F32_UBYTE2) + NODE_NAME_CASE(CVT_F32_UBYTE3) + NODE_NAME_CASE(BUILD_VERTICAL_VECTOR) NODE_NAME_CASE(STORE_MSKOR) NODE_NAME_CASE(TBUFFER_STORE_FORMAT) } diff --git a/lib/Target/R600/AMDGPUISelLowering.h b/lib/Target/R600/AMDGPUISelLowering.h index d5d821d..98a92ad 100644 --- a/lib/Target/R600/AMDGPUISelLowering.h +++ b/lib/Target/R600/AMDGPUISelLowering.h @@ -42,10 +42,33 @@ private: SDValue MergeVectorStore(const SDValue &Op, SelectionDAG &DAG) const; /// \brief Split a vector store into multiple scalar stores. /// \returns The resulting chain. + + SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFCEIL(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRINT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFNEARBYINT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const; + SDValue ExpandSIGN_EXTEND_INREG(SDValue Op, + unsigned BitsDiff, + SelectionDAG &DAG) const; + SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; + + SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const; + protected: + static EVT getEquivalentMemType(LLVMContext &Context, EVT VT); + static EVT getEquivalentLoadRegType(LLVMContext &Context, EVT VT); /// \brief Helper function that adds Reg to the LiveIn list of the DAG's /// MachineFunction. @@ -61,6 +84,7 @@ protected: SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const; bool isHWTrueValue(SDValue Op) const; bool isHWFalseValue(SDValue Op) const; @@ -87,10 +111,16 @@ public: bool isZExtFree(Type *Src, Type *Dest) const override; bool isZExtFree(EVT Src, EVT Dest) const override; + bool isZExtFree(SDValue Val, EVT VT2) const override; bool isNarrowingProfitable(EVT VT1, EVT VT2) const override; MVT getVectorIdxTy() const override; + bool isSelectSupported(SelectSupportKind) const override; + + bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; + bool ShouldShrinkFPConstant(EVT VT) const override; + bool isLoadBitCastBeneficial(EVT, EVT) const override; SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, @@ -101,6 +131,7 @@ public: SmallVectorImpl<SDValue> &InVals) const override; SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override; + SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; void ReplaceNodeResults(SDNode * N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const override; @@ -128,38 +159,6 @@ public: SDValue Op, const SelectionDAG &DAG, unsigned Depth = 0) const override; - -// Functions defined in AMDILISelLowering.cpp -public: - bool getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, unsigned Intrinsic) const override; - - /// We want to mark f32/f64 floating point values as legal. - bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; - - /// We don't want to shrink f64/f32 constants. - bool ShouldShrinkFPConstant(EVT VT) const override; - - SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; - -private: - void InitAMDILLowering(); - SDValue LowerSREM(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSREM8(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSREM16(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSREM32(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSREM64(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSDIV24(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSDIV32(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSDIV64(SDValue Op, SelectionDAG &DAG) const; - - SDValue ExpandSIGN_EXTEND_INREG(SDValue Op, - unsigned BitsDiff, - SelectionDAG &DAG) const; - SDValue LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) const; - EVT genIntType(uint32_t size = 32, uint32_t numEle = 1) const; - SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; }; namespace AMDGPUISD { @@ -169,12 +168,15 @@ enum { FIRST_NUMBER = ISD::BUILTIN_OP_END, CALL, // Function call based on a single integer UMUL, // 32bit unsigned multiplication - DIV_INF, // Divide with infinity returned on zero divisor RET_FLAG, BRANCH_COND, // End AMDIL ISD Opcodes DWORDADDR, FRACT, + CLAMP, + + // SIN_HW, COS_HW - f32 for SI, 1 ULP max error, valid from -100 pi to 100 pi. + // Denormals handled on some parts. COS_HW, SIN_HW, FMAX, @@ -184,11 +186,23 @@ enum { SMIN, UMIN, URECIP, + DIV_SCALE, + DIV_FMAS, + DIV_FIXUP, + TRIG_PREOP, // 1 ULP max error for f64 + + // RCP, RSQ - For f32, 1 ULP max error, no denormal handling. + // For f64, max error 2^29 ULP, handles denormals. + RCP, + RSQ, + RSQ_LEGACY, + RSQ_CLAMPED, DOT4, BFE_U32, // Extract range of bits with zero extension to 32-bits. BFE_I32, // Extract range of bits with sign extension to 32-bits. BFI, // (src0 & src1) | (~src0 & src2) BFM, // Insert a range of bits into a 32-bit word. + BREV, // Reverse bits. MUL_U24, MUL_I24, MAD_U24, @@ -203,6 +217,21 @@ enum { SAMPLEB, SAMPLED, SAMPLEL, + + // These cvt_f32_ubyte* nodes need to remain consecutive and in order. + CVT_F32_UBYTE0, + CVT_F32_UBYTE1, + CVT_F32_UBYTE2, + CVT_F32_UBYTE3, + /// This node is for VLIW targets and it is used to represent a vector + /// that is stored in consecutive registers with the same channel. + /// For example: + /// |X |Y|Z|W| + /// T0|v.x| | | | + /// T1|v.y| | | | + /// T2|v.z| | | | + /// T3|v.w| | | | + BUILD_VERTICAL_VECTOR, FIRST_MEM_OPCODE_NUMBER = ISD::FIRST_TARGET_MEMORY_OPCODE, STORE_MSKOR, LOAD_CONSTANT, diff --git a/lib/Target/R600/AMDGPUInstrInfo.cpp b/lib/Target/R600/AMDGPUInstrInfo.cpp index 1c3361a..fef5b8c 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.cpp +++ b/lib/Target/R600/AMDGPUInstrInfo.cpp @@ -30,8 +30,8 @@ using namespace llvm; // Pin the vtable to this file. void AMDGPUInstrInfo::anchor() {} -AMDGPUInstrInfo::AMDGPUInstrInfo(TargetMachine &tm) - : AMDGPUGenInstrInfo(-1,-1), RI(tm), TM(tm) { } +AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &st) + : AMDGPUGenInstrInfo(-1,-1), RI(st), ST(st) { } const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const { return RI; @@ -320,33 +320,11 @@ int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const { return -1; } - Offset = TM.getFrameLowering()->getFrameIndexOffset(MF, -1); + Offset = MF.getTarget().getFrameLowering()->getFrameIndexOffset(MF, -1); return getIndirectIndexBegin(MF) + Offset; } - -void AMDGPUInstrInfo::convertToISA(MachineInstr & MI, MachineFunction &MF, - DebugLoc DL) const { - MachineRegisterInfo &MRI = MF.getRegInfo(); - const AMDGPURegisterInfo & RI = getRegisterInfo(); - - for (unsigned i = 0; i < MI.getNumOperands(); i++) { - MachineOperand &MO = MI.getOperand(i); - // Convert dst regclass to one that is supported by the ISA - if (MO.isReg() && MO.isDef()) { - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { - const TargetRegisterClass * oldRegClass = MRI.getRegClass(MO.getReg()); - const TargetRegisterClass * newRegClass = RI.getISARegClass(oldRegClass); - - assert(newRegClass); - - MRI.setRegClass(MO.getReg(), newRegClass); - } - } - } -} - int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const { switch (Channels) { default: return Opcode; diff --git a/lib/Target/R600/AMDGPUInstrInfo.h b/lib/Target/R600/AMDGPUInstrInfo.h index 74baf6b..95dc8c1 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.h +++ b/lib/Target/R600/AMDGPUInstrInfo.h @@ -33,7 +33,7 @@ namespace llvm { -class AMDGPUTargetMachine; +class AMDGPUSubtarget; class MachineFunction; class MachineInstr; class MachineInstrBuilder; @@ -45,9 +45,9 @@ private: MachineBasicBlock &MBB) const; virtual void anchor(); protected: - TargetMachine &TM; + const AMDGPUSubtarget &ST; public: - explicit AMDGPUInstrInfo(TargetMachine &tm); + explicit AMDGPUInstrInfo(const AMDGPUSubtarget &st); virtual const AMDGPURegisterInfo &getRegisterInfo() const = 0; @@ -137,14 +137,6 @@ public: bool isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const override; // Helper functions that check the opcode for status information - bool isLoadInst(llvm::MachineInstr *MI) const; - bool isExtLoadInst(llvm::MachineInstr *MI) const; - bool isSWSExtLoadInst(llvm::MachineInstr *MI) const; - bool isSExtLoadInst(llvm::MachineInstr *MI) const; - bool isZExtLoadInst(llvm::MachineInstr *MI) const; - bool isAExtLoadInst(llvm::MachineInstr *MI) const; - bool isStoreInst(llvm::MachineInstr *MI) const; - bool isTruncStoreInst(llvm::MachineInstr *MI) const; bool isRegisterStore(const MachineInstr &MI) const; bool isRegisterLoad(const MachineInstr &MI) const; @@ -185,11 +177,6 @@ public: unsigned ValueReg, unsigned Address, unsigned OffsetReg) const = 0; - - /// \brief Convert the AMDIL MachineInstr to a supported ISA - /// MachineInstr - void convertToISA(MachineInstr & MI, MachineFunction &MF, DebugLoc DL) const; - /// \brief Build a MOV instruction. virtual MachineInstr *buildMovInstr(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, diff --git a/lib/Target/R600/AMDGPUInstrInfo.td b/lib/Target/R600/AMDGPUInstrInfo.td index f96dbb4..934d59d 100644 --- a/lib/Target/R600/AMDGPUInstrInfo.td +++ b/lib/Target/R600/AMDGPUInstrInfo.td @@ -19,6 +19,14 @@ def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [ SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisInt<0>, SDTCisInt<3> ]>; +def AMDGPUTrigPreOp : SDTypeProfile<1, 2, + [SDTCisSameAs<0, 1>, SDTCisFP<0>, SDTCisInt<2>] +>; + +def AMDGPUDivScaleOp : SDTypeProfile<2, 3, + [SDTCisFP<0>, SDTCisInt<1>, SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, SDTCisSameAs<0, 4>] +>; + //===----------------------------------------------------------------------===// // AMDGPU DAG Nodes // @@ -29,11 +37,25 @@ def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>; // out = a - floor(a) def AMDGPUfract : SDNode<"AMDGPUISD::FRACT", SDTFPUnaryOp>; +// out = 1.0 / a +def AMDGPUrcp : SDNode<"AMDGPUISD::RCP", SDTFPUnaryOp>; + +// out = 1.0 / sqrt(a) +def AMDGPUrsq : SDNode<"AMDGPUISD::RSQ", SDTFPUnaryOp>; + +// out = 1.0 / sqrt(a) +def AMDGPUrsq_legacy : SDNode<"AMDGPUISD::RSQ_LEGACY", SDTFPUnaryOp>; + +// out = 1.0 / sqrt(a) result clamped to +/- max_float. +def AMDGPUrsq_clamped : SDNode<"AMDGPUISD::RSQ_CLAMPED", SDTFPUnaryOp>; + // out = max(a, b) a and b are floats def AMDGPUfmax : SDNode<"AMDGPUISD::FMAX", SDTFPBinOp, [SDNPCommutative, SDNPAssociative] >; +def AMDGPUclamp : SDNode<"AMDGPUISD::CLAMP", SDTFPTernaryOp, []>; + // out = max(a, b) a and b are signed ints def AMDGPUsmax : SDNode<"AMDGPUISD::SMAX", SDTIntBinOp, [SDNPCommutative, SDNPAssociative] @@ -59,12 +81,38 @@ def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp, [SDNPCommutative, SDNPAssociative] >; + +def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0", + SDTIntToFPOp, []>; +def AMDGPUcvt_f32_ubyte1 : SDNode<"AMDGPUISD::CVT_F32_UBYTE1", + SDTIntToFPOp, []>; +def AMDGPUcvt_f32_ubyte2 : SDNode<"AMDGPUISD::CVT_F32_UBYTE2", + SDTIntToFPOp, []>; +def AMDGPUcvt_f32_ubyte3 : SDNode<"AMDGPUISD::CVT_F32_UBYTE3", + SDTIntToFPOp, []>; + + // urecip - This operation is a helper for integer division, it returns the // result of 1 / a as a fractional unsigned integer. // out = (2^32 / a) + e // e is rounding error def AMDGPUurecip : SDNode<"AMDGPUISD::URECIP", SDTIntUnaryOp>; +// Special case divide preop and flags. +def AMDGPUdiv_scale : SDNode<"AMDGPUISD::DIV_SCALE", AMDGPUDivScaleOp>; + +// Special case divide FMA with scale and flags (src0 = Quotient, +// src1 = Denominator, src2 = Numerator). +def AMDGPUdiv_fmas : SDNode<"AMDGPUISD::DIV_FMAS", SDTFPTernaryOp>; + +// Single or double precision division fixup. +// Special case divide fixup and flags(src0 = Quotient, src1 = +// Denominator, src2 = Numerator). +def AMDGPUdiv_fixup : SDNode<"AMDGPUISD::DIV_FIXUP", SDTFPTernaryOp>; + +// Look Up 2.0 / pi src0 with segment select src1[4:0] +def AMDGPUtrig_preop : SDNode<"AMDGPUISD::TRIG_PREOP", AMDGPUTrigPreOp>; + def AMDGPUregister_load : SDNode<"AMDGPUISD::REGISTER_LOAD", SDTypeProfile<1, 2, [SDTCisPtrTy<1>, SDTCisInt<2>]>, [SDNPHasChain, SDNPMayLoad]>; @@ -92,6 +140,8 @@ def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>; def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>; def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>; +def AMDGPUbrev : SDNode<"AMDGPUISD::BREV", SDTIntUnaryOp>; + // Signed and unsigned 24-bit mulitply. The highest 8-bits are ignore when // performing the mulitply. The result is a 32-bit value. def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp, @@ -107,3 +157,22 @@ def AMDGPUmad_u24 : SDNode<"AMDGPUISD::MAD_U24", AMDGPUDTIntTernaryOp, def AMDGPUmad_i24 : SDNode<"AMDGPUISD::MAD_I24", AMDGPUDTIntTernaryOp, [] >; + +//===----------------------------------------------------------------------===// +// Flow Control Profile Types +//===----------------------------------------------------------------------===// +// Branch instruction where second and third are basic blocks +def SDTIL_BRCond : SDTypeProfile<0, 2, [ + SDTCisVT<0, OtherVT> + ]>; + +//===----------------------------------------------------------------------===// +// Flow Control DAG Nodes +//===----------------------------------------------------------------------===// +def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>; + +//===----------------------------------------------------------------------===// +// Call/Return DAG Nodes +//===----------------------------------------------------------------------===// +def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone, + [SDNPHasChain, SDNPOptInGlue]>; diff --git a/lib/Target/R600/AMDGPUInstructions.td b/lib/Target/R600/AMDGPUInstructions.td index 80bdf5b..b86b781 100644 --- a/lib/Target/R600/AMDGPUInstructions.td +++ b/lib/Target/R600/AMDGPUInstructions.td @@ -49,6 +49,11 @@ def u8imm : Operand<i8> { let PrintMethod = "printU8ImmOperand"; } +//===--------------------------------------------------------------------===// +// Custom Operands +//===--------------------------------------------------------------------===// +def brtarget : Operand<OtherVT>; + //===----------------------------------------------------------------------===// // PatLeafs for floating-point comparisons //===----------------------------------------------------------------------===// @@ -127,6 +132,21 @@ def COND_NULL : PatLeaf < // Load/Store Pattern Fragments //===----------------------------------------------------------------------===// +def global_store : PatFrag<(ops node:$val, node:$ptr), + (store node:$val, node:$ptr), [{ + return isGlobalStore(dyn_cast<StoreSDNode>(N)); +}]>; + +// Global address space loads +def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isGlobalLoad(dyn_cast<LoadSDNode>(N)); +}]>; + +// Constant address space loads +def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ + return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); +}]>; + def az_extload : PatFrag<(ops node:$ptr), (unindexedload node:$ptr), [{ LoadSDNode *L = cast<LoadSDNode>(N); return L->getExtensionType() == ISD::ZEXTLOAD || @@ -232,26 +252,55 @@ def local_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ return isLocalLoad(dyn_cast<LoadSDNode>(N)); }]>; -def atomic_load_add_local : PatFrag<(ops node:$ptr, node:$value), - (atomic_load_add node:$ptr, node:$value), [{ - return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; -}]>; -def atomic_load_sub_local : PatFrag<(ops node:$ptr, node:$value), - (atomic_load_sub node:$ptr, node:$value), [{ - return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +class local_binary_atomic_op<SDNode atomic_op> : + PatFrag<(ops node:$ptr, node:$value), + (atomic_op node:$ptr, node:$value), [{ + return cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; }]>; + +def atomic_swap_local : local_binary_atomic_op<atomic_swap>; +def atomic_load_add_local : local_binary_atomic_op<atomic_load_add>; +def atomic_load_sub_local : local_binary_atomic_op<atomic_load_sub>; +def atomic_load_and_local : local_binary_atomic_op<atomic_load_and>; +def atomic_load_or_local : local_binary_atomic_op<atomic_load_or>; +def atomic_load_xor_local : local_binary_atomic_op<atomic_load_xor>; +def atomic_load_nand_local : local_binary_atomic_op<atomic_load_nand>; +def atomic_load_min_local : local_binary_atomic_op<atomic_load_min>; +def atomic_load_max_local : local_binary_atomic_op<atomic_load_max>; +def atomic_load_umin_local : local_binary_atomic_op<atomic_load_umin>; +def atomic_load_umax_local : local_binary_atomic_op<atomic_load_umax>; + def mskor_global : PatFrag<(ops node:$val, node:$ptr), (AMDGPUstore_mskor node:$val, node:$ptr), [{ return dyn_cast<MemSDNode>(N)->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; }]>; +def atomic_cmp_swap_32_local : + PatFrag<(ops node:$ptr, node:$cmp, node:$swap), + (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{ + AtomicSDNode *AN = cast<AtomicSDNode>(N); + return AN->getMemoryVT() == MVT::i32 && + AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +}]>; + +def atomic_cmp_swap_64_local : + PatFrag<(ops node:$ptr, node:$cmp, node:$swap), + (atomic_cmp_swap node:$ptr, node:$cmp, node:$swap), [{ + AtomicSDNode *AN = cast<AtomicSDNode>(N); + return AN->getMemoryVT() == MVT::i64 && + AN->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; +}]>; + + class Constants { int TWO_PI = 0x40c90fdb; int PI = 0x40490fdb; int TWO_PI_INV = 0x3e22f983; int FP_UINT_MAX_PLUS_1 = 0x4f800000; // 1 << 32 in floating point encoding +int FP32_NEG_ONE = 0xbf800000; +int FP32_ONE = 0x3f800000; } def CONST : Constants; @@ -273,7 +322,7 @@ class CLAMP <RegisterClass rc> : AMDGPUShaderInst < (outs rc:$dst), (ins rc:$src0), "CLAMP $dst, $src0", - [(set f32:$dst, (int_AMDIL_clamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))] + [(set f32:$dst, (AMDGPUclamp f32:$src0, (f32 FP_ZERO), (f32 FP_ONE)))] >; class FABS <RegisterClass rc> : AMDGPUShaderInst < @@ -363,7 +412,7 @@ class DwordAddrPat<ValueType vt, RegisterClass rc> : Pat < // BFI_INT patterns -multiclass BFIPatterns <Instruction BFI_INT> { +multiclass BFIPatterns <Instruction BFI_INT, Instruction LoadImm32> { // Definition from ISA doc: // (y & x) | (z & ~x) @@ -379,6 +428,19 @@ multiclass BFIPatterns <Instruction BFI_INT> { (BFI_INT $x, $y, $z) >; + def : Pat < + (fcopysign f32:$src0, f32:$src1), + (BFI_INT (LoadImm32 0x7fffffff), $src0, $src1) + >; + + def : Pat < + (f64 (fcopysign f64:$src0, f64:$src1)), + (INSERT_SUBREG (INSERT_SUBREG (f64 (IMPLICIT_DEF)), + (i32 (EXTRACT_SUBREG $src0, sub0)), sub0), + (BFI_INT (LoadImm32 0x7fffffff), + (i32 (EXTRACT_SUBREG $src0, sub1)), + (i32 (EXTRACT_SUBREG $src1, sub1))), sub1) + >; } // SHA-256 Ma patterns @@ -457,6 +519,23 @@ multiclass Expand24UBitOps<Instruction MulInst, Instruction AddInst> { >; } +class RcpPat<Instruction RcpInst, ValueType vt> : Pat < + (fdiv FP_ONE, vt:$src), + (RcpInst $src) +>; + +multiclass RsqPat<Instruction RsqInst, ValueType vt> { + def : Pat < + (fdiv FP_ONE, (fsqrt vt:$src)), + (RsqInst $src) + >; + + def : Pat < + (AMDGPUrcp (fsqrt vt:$src)), + (RsqInst $src) + >; +} + include "R600Instructions.td" include "R700Instructions.td" include "EvergreenInstructions.td" diff --git a/lib/Target/R600/AMDILIntrinsicInfo.cpp b/lib/Target/R600/AMDGPUIntrinsicInfo.cpp index fab4a3b..58916a9 100644 --- a/lib/Target/R600/AMDILIntrinsicInfo.cpp +++ b/lib/Target/R600/AMDGPUIntrinsicInfo.cpp @@ -1,4 +1,4 @@ -//===- AMDILIntrinsicInfo.cpp - AMDGPU Intrinsic Information ------*- C++ -*-===// +//===- AMDGPUIntrinsicInfo.cpp - AMDGPU Intrinsic Information ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -12,7 +12,7 @@ // //===-----------------------------------------------------------------------===// -#include "AMDILIntrinsicInfo.h" +#include "AMDGPUIntrinsicInfo.h" #include "AMDGPUSubtarget.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Intrinsics.h" @@ -24,14 +24,12 @@ using namespace llvm; #include "AMDGPUGenIntrinsics.inc" #undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN -AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo(TargetMachine *tm) - : TargetIntrinsicInfo() { -} +AMDGPUIntrinsicInfo::AMDGPUIntrinsicInfo(TargetMachine *tm) + : TargetIntrinsicInfo() {} -std::string -AMDGPUIntrinsicInfo::getName(unsigned int IntrID, Type **Tys, - unsigned int numTys) const { - static const char* const names[] = { +std::string AMDGPUIntrinsicInfo::getName(unsigned IntrID, Type **Tys, + unsigned numTys) const { + static const char *const names[] = { #define GET_INTRINSIC_NAME_TABLE #include "AMDGPUGenIntrinsics.inc" #undef GET_INTRINSIC_NAME_TABLE @@ -40,23 +38,23 @@ AMDGPUIntrinsicInfo::getName(unsigned int IntrID, Type **Tys, if (IntrID < Intrinsic::num_intrinsics) { return nullptr; } - assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics - && "Invalid intrinsic ID"); + assert(IntrID < AMDGPUIntrinsic::num_AMDGPU_intrinsics && + "Invalid intrinsic ID"); std::string Result(names[IntrID - Intrinsic::num_intrinsics]); return Result; } -unsigned int -AMDGPUIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const { +unsigned AMDGPUIntrinsicInfo::lookupName(const char *Name, + unsigned Len) const { if (!StringRef(Name, Len).startswith("llvm.")) return 0; // All intrinsics start with 'llvm.' #define GET_FUNCTION_RECOGNIZER #include "AMDGPUGenIntrinsics.inc" #undef GET_FUNCTION_RECOGNIZER - AMDGPUIntrinsic::ID IntrinsicID - = (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic; + AMDGPUIntrinsic::ID IntrinsicID = + (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic; IntrinsicID = getIntrinsicForGCCBuiltin("AMDGPU", Name); if (IntrinsicID != (AMDGPUIntrinsic::ID)Intrinsic::not_intrinsic) { @@ -65,17 +63,15 @@ AMDGPUIntrinsicInfo::lookupName(const char *Name, unsigned int Len) const { return 0; } -bool -AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const { - // Overload Table +bool AMDGPUIntrinsicInfo::isOverloaded(unsigned id) const { +// Overload Table #define GET_INTRINSIC_OVERLOAD_TABLE #include "AMDGPUGenIntrinsics.inc" #undef GET_INTRINSIC_OVERLOAD_TABLE } -Function* -AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID, - Type **Tys, - unsigned numTys) const { +Function *AMDGPUIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID, + Type **Tys, + unsigned numTys) const { llvm_unreachable("Not implemented"); } diff --git a/lib/Target/R600/AMDILIntrinsicInfo.h b/lib/Target/R600/AMDGPUIntrinsicInfo.h index 924275a..5be68a2 100644 --- a/lib/Target/R600/AMDILIntrinsicInfo.h +++ b/lib/Target/R600/AMDGPUIntrinsicInfo.h @@ -1,4 +1,4 @@ -//===- AMDILIntrinsicInfo.h - AMDGPU Intrinsic Information ------*- C++ -*-===// +//===- AMDGPUIntrinsicInfo.h - AMDGPU Intrinsic Information ------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -11,8 +11,8 @@ /// \brief Interface for the AMDGPU Implementation of the Intrinsic Info class. // //===-----------------------------------------------------------------------===// -#ifndef AMDIL_INTRINSICS_H -#define AMDIL_INTRINSICS_H +#ifndef AMDGPU_INTRINSICINFO_H +#define AMDGPU_INTRINSICINFO_H #include "llvm/IR/Intrinsics.h" #include "llvm/Target/TargetIntrinsicInfo.h" @@ -34,16 +34,15 @@ enum ID { class AMDGPUIntrinsicInfo : public TargetIntrinsicInfo { public: AMDGPUIntrinsicInfo(TargetMachine *tm); - std::string getName(unsigned int IntrId, Type **Tys = nullptr, - unsigned int numTys = 0) const override; - unsigned int lookupName(const char *Name, unsigned int Len) const override; - bool isOverloaded(unsigned int IID) const override; - Function *getDeclaration(Module *M, unsigned int ID, + std::string getName(unsigned IntrId, Type **Tys = nullptr, + unsigned numTys = 0) const override; + unsigned lookupName(const char *Name, unsigned Len) const override; + bool isOverloaded(unsigned IID) const override; + Function *getDeclaration(Module *M, unsigned ID, Type **Tys = nullptr, - unsigned int numTys = 0) const override; + unsigned numTys = 0) const override; }; } // end namespace llvm -#endif // AMDIL_INTRINSICS_H - +#endif // AMDGPU_INTRINSICINFO_H diff --git a/lib/Target/R600/AMDGPUIntrinsics.td b/lib/Target/R600/AMDGPUIntrinsics.td index 9ad5e72..d934676 100644 --- a/lib/Target/R600/AMDGPUIntrinsics.td +++ b/lib/Target/R600/AMDGPUIntrinsics.td @@ -18,18 +18,26 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { def int_AMDGPU_reserve_reg : Intrinsic<[], [llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_store_output : Intrinsic<[], [llvm_float_ty, llvm_i32_ty], []>; def int_AMDGPU_swizzle : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_i32_ty], [IntrNoMem]>; - + def int_AMDGPU_abs : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_arl : Intrinsic<[llvm_i32_ty], [llvm_float_ty], [IntrNoMem]>; def int_AMDGPU_cndlt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_AMDGPU_div : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_fract : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + def int_AMDGPU_clamp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; + + // This is named backwards (instead of rsq_legacy) so we don't have + // to define it with the public builtins intrinsics. This is a + // workaround for how intrinsic names are parsed. If the name is + // llvm.AMDGPU.rsq.legacy, the parser assumes that you meant + // llvm.AMDGPU.rsq.{f32 | f64} and incorrectly mangled the name. + def int_AMDGPU_legacy_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; + def int_AMDGPU_dp4 : Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, llvm_v4f32_ty], [IntrNoMem]>; def int_AMDGPU_kill : Intrinsic<[], [llvm_float_ty], []>; def int_AMDGPU_kilp : Intrinsic<[], [], []>; def int_AMDGPU_lrp : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_AMDGPU_mul : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_AMDGPU_pow : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; - def int_AMDGPU_rcp : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; - def int_AMDGPU_rsq : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>; def int_AMDGPU_seq : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_AMDGPU_sgt : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; def int_AMDGPU_sge : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>; @@ -53,12 +61,27 @@ let TargetPrefix = "AMDGPU", isTarget = 1 in { def int_AMDGPU_imul24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_imad24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_umad24 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_cvt_f32_ubyte0 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_cvt_f32_ubyte1 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_cvt_f32_ubyte2 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_cvt_f32_ubyte3 : Intrinsic<[llvm_float_ty], [llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_cube : Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty], [IntrNoMem]>; def int_AMDGPU_bfi : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_bfe_i32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_bfe_u32 : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_bfm : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; + def int_AMDGPU_brev : Intrinsic<[llvm_i32_ty], [llvm_i32_ty], [IntrNoMem]>; def int_AMDGPU_barrier_local : Intrinsic<[], [], []>; + def int_AMDGPU_barrier_global : Intrinsic<[], [], []>; +} + +// Legacy names for compatibility. +let TargetPrefix = "AMDIL", isTarget = 1 in { + def int_AMDIL_abs : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>; + def int_AMDIL_fraction : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + def int_AMDIL_clamp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; + def int_AMDIL_exp : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; + def int_AMDIL_round_nearest : Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; } let TargetPrefix = "TGSI", isTarget = 1 in { diff --git a/lib/Target/R600/AMDGPUMCInstLower.cpp b/lib/Target/R600/AMDGPUMCInstLower.cpp index b759495..ac82e88 100644 --- a/lib/Target/R600/AMDGPUMCInstLower.cpp +++ b/lib/Target/R600/AMDGPUMCInstLower.cpp @@ -15,6 +15,7 @@ #include "AMDGPUMCInstLower.h" #include "AMDGPUAsmPrinter.h" +#include "AMDGPUTargetMachine.h" #include "InstPrinter/AMDGPUInstPrinter.h" #include "R600InstrInfo.h" #include "SIInstrInfo.h" diff --git a/lib/Target/R600/AMDGPUMCInstLower.h b/lib/Target/R600/AMDGPUMCInstLower.h index 2b7f1e3..58fe34d 100644 --- a/lib/Target/R600/AMDGPUMCInstLower.h +++ b/lib/Target/R600/AMDGPUMCInstLower.h @@ -14,9 +14,9 @@ namespace llvm { class AMDGPUSubtarget; -class MCInst; -class MCContext; class MachineInstr; +class MCContext; +class MCInst; class AMDGPUMCInstLower { diff --git a/lib/Target/R600/AMDGPUPromoteAlloca.cpp b/lib/Target/R600/AMDGPUPromoteAlloca.cpp new file mode 100644 index 0000000..218750d --- /dev/null +++ b/lib/Target/R600/AMDGPUPromoteAlloca.cpp @@ -0,0 +1,387 @@ +//===-- AMDGPUPromoteAlloca.cpp - Promote Allocas -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass eliminates allocas by either converting them into vectors or +// by migrating them to local address space. +// +//===----------------------------------------------------------------------===// + +#include "AMDGPU.h" +#include "AMDGPUSubtarget.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstVisitor.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "amdgpu-promote-alloca" + +using namespace llvm; + +namespace { + +class AMDGPUPromoteAlloca : public FunctionPass, + public InstVisitor<AMDGPUPromoteAlloca> { + + static char ID; + Module *Mod; + const AMDGPUSubtarget &ST; + int LocalMemAvailable; + +public: + AMDGPUPromoteAlloca(const AMDGPUSubtarget &st) : FunctionPass(ID), ST(st), + LocalMemAvailable(0) { } + virtual bool doInitialization(Module &M); + virtual bool runOnFunction(Function &F); + virtual const char *getPassName() const { + return "AMDGPU Promote Alloca"; + } + void visitAlloca(AllocaInst &I); +}; + +} // End anonymous namespace + +char AMDGPUPromoteAlloca::ID = 0; + +bool AMDGPUPromoteAlloca::doInitialization(Module &M) { + Mod = &M; + return false; +} + +bool AMDGPUPromoteAlloca::runOnFunction(Function &F) { + + const FunctionType *FTy = F.getFunctionType(); + + LocalMemAvailable = ST.getLocalMemorySize(); + + + // If the function has any arguments in the local address space, then it's + // possible these arguments require the entire local memory space, so + // we cannot use local memory in the pass. + for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) { + const Type *ParamTy = FTy->getParamType(i); + if (ParamTy->isPointerTy() && + ParamTy->getPointerAddressSpace() == AMDGPUAS::LOCAL_ADDRESS) { + LocalMemAvailable = 0; + DEBUG(dbgs() << "Function has local memory argument. Promoting to " + "local memory disabled.\n"); + break; + } + } + + if (LocalMemAvailable > 0) { + // Check how much local memory is being used by global objects + for (Module::global_iterator I = Mod->global_begin(), + E = Mod->global_end(); I != E; ++I) { + GlobalVariable *GV = I; + PointerType *GVTy = GV->getType(); + if (GVTy->getAddressSpace() != AMDGPUAS::LOCAL_ADDRESS) + continue; + for (Value::use_iterator U = GV->use_begin(), + UE = GV->use_end(); U != UE; ++U) { + Instruction *Use = dyn_cast<Instruction>(*U); + if (!Use) + continue; + if (Use->getParent()->getParent() == &F) + LocalMemAvailable -= + Mod->getDataLayout()->getTypeAllocSize(GVTy->getElementType()); + } + } + } + + LocalMemAvailable = std::max(0, LocalMemAvailable); + DEBUG(dbgs() << LocalMemAvailable << "bytes free in local memory.\n"); + + visit(F); + + return false; +} + +static VectorType *arrayTypeToVecType(const Type *ArrayTy) { + return VectorType::get(ArrayTy->getArrayElementType(), + ArrayTy->getArrayNumElements()); +} + +static Value* calculateVectorIndex(Value *Ptr, + std::map<GetElementPtrInst*, Value*> GEPIdx) { + if (isa<AllocaInst>(Ptr)) + return Constant::getNullValue(Type::getInt32Ty(Ptr->getContext())); + + GetElementPtrInst *GEP = cast<GetElementPtrInst>(Ptr); + + return GEPIdx[GEP]; +} + +static Value* GEPToVectorIndex(GetElementPtrInst *GEP) { + // FIXME we only support simple cases + if (GEP->getNumOperands() != 3) + return NULL; + + ConstantInt *I0 = dyn_cast<ConstantInt>(GEP->getOperand(1)); + if (!I0 || !I0->isZero()) + return NULL; + + return GEP->getOperand(2); +} + +// Not an instruction handled below to turn into a vector. +// +// TODO: Check isTriviallyVectorizable for calls and handle other +// instructions. +static bool canVectorizeInst(Instruction *Inst) { + switch (Inst->getOpcode()) { + case Instruction::Load: + case Instruction::Store: + case Instruction::BitCast: + case Instruction::AddrSpaceCast: + return true; + default: + return false; + } +} + +static bool tryPromoteAllocaToVector(AllocaInst *Alloca) { + Type *AllocaTy = Alloca->getAllocatedType(); + + DEBUG(dbgs() << "Alloca Candidate for vectorization \n"); + + // FIXME: There is no reason why we can't support larger arrays, we + // are just being conservative for now. + if (!AllocaTy->isArrayTy() || + AllocaTy->getArrayElementType()->isVectorTy() || + AllocaTy->getArrayNumElements() > 4) { + + DEBUG(dbgs() << " Cannot convert type to vector"); + return false; + } + + std::map<GetElementPtrInst*, Value*> GEPVectorIdx; + std::vector<Value*> WorkList; + for (User *AllocaUser : Alloca->users()) { + GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(AllocaUser); + if (!GEP) { + if (!canVectorizeInst(cast<Instruction>(AllocaUser))) + return false; + + WorkList.push_back(AllocaUser); + continue; + } + + Value *Index = GEPToVectorIndex(GEP); + + // If we can't compute a vector index from this GEP, then we can't + // promote this alloca to vector. + if (!Index) { + DEBUG(dbgs() << " Cannot compute vector index for GEP " << *GEP << '\n'); + return false; + } + + GEPVectorIdx[GEP] = Index; + for (User *GEPUser : AllocaUser->users()) { + if (!canVectorizeInst(cast<Instruction>(GEPUser))) + return false; + + WorkList.push_back(GEPUser); + } + } + + VectorType *VectorTy = arrayTypeToVecType(AllocaTy); + + DEBUG(dbgs() << " Converting alloca to vector " + << *AllocaTy << " -> " << *VectorTy << '\n'); + + for (std::vector<Value*>::iterator I = WorkList.begin(), + E = WorkList.end(); I != E; ++I) { + Instruction *Inst = cast<Instruction>(*I); + IRBuilder<> Builder(Inst); + switch (Inst->getOpcode()) { + case Instruction::Load: { + Value *Ptr = Inst->getOperand(0); + Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); + Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0)); + Value *VecValue = Builder.CreateLoad(BitCast); + Value *ExtractElement = Builder.CreateExtractElement(VecValue, Index); + Inst->replaceAllUsesWith(ExtractElement); + Inst->eraseFromParent(); + break; + } + case Instruction::Store: { + Value *Ptr = Inst->getOperand(1); + Value *Index = calculateVectorIndex(Ptr, GEPVectorIdx); + Value *BitCast = Builder.CreateBitCast(Alloca, VectorTy->getPointerTo(0)); + Value *VecValue = Builder.CreateLoad(BitCast); + Value *NewVecValue = Builder.CreateInsertElement(VecValue, + Inst->getOperand(0), + Index); + Builder.CreateStore(NewVecValue, BitCast); + Inst->eraseFromParent(); + break; + } + case Instruction::BitCast: + case Instruction::AddrSpaceCast: + break; + + default: + Inst->dump(); + llvm_unreachable("Inconsistency in instructions promotable to vector"); + } + } + return true; +} + +static void collectUsesWithPtrTypes(Value *Val, std::vector<Value*> &WorkList) { + for (User *User : Val->users()) { + if(std::find(WorkList.begin(), WorkList.end(), User) != WorkList.end()) + continue; + if (isa<CallInst>(User)) { + WorkList.push_back(User); + continue; + } + if (!User->getType()->isPointerTy()) + continue; + WorkList.push_back(User); + collectUsesWithPtrTypes(User, WorkList); + } +} + +void AMDGPUPromoteAlloca::visitAlloca(AllocaInst &I) { + IRBuilder<> Builder(&I); + + // First try to replace the alloca with a vector + Type *AllocaTy = I.getAllocatedType(); + + DEBUG(dbgs() << "Trying to promote " << I << '\n'); + + if (tryPromoteAllocaToVector(&I)) + return; + + DEBUG(dbgs() << " alloca is not a candidate for vectorization.\n"); + + // FIXME: This is the maximum work group size. We should try to get + // value from the reqd_work_group_size function attribute if it is + // available. + unsigned WorkGroupSize = 256; + int AllocaSize = WorkGroupSize * + Mod->getDataLayout()->getTypeAllocSize(AllocaTy); + + if (AllocaSize > LocalMemAvailable) { + DEBUG(dbgs() << " Not enough local memory to promote alloca.\n"); + return; + } + + DEBUG(dbgs() << "Promoting alloca to local memory\n"); + LocalMemAvailable -= AllocaSize; + + GlobalVariable *GV = new GlobalVariable( + *Mod, ArrayType::get(I.getAllocatedType(), 256), false, + GlobalValue::ExternalLinkage, 0, I.getName(), 0, + GlobalVariable::NotThreadLocal, AMDGPUAS::LOCAL_ADDRESS); + + FunctionType *FTy = FunctionType::get( + Type::getInt32Ty(Mod->getContext()), false); + AttributeSet AttrSet; + AttrSet.addAttribute(Mod->getContext(), 0, Attribute::ReadNone); + + Value *ReadLocalSizeY = Mod->getOrInsertFunction( + "llvm.r600.read.local.size.y", FTy, AttrSet); + Value *ReadLocalSizeZ = Mod->getOrInsertFunction( + "llvm.r600.read.local.size.z", FTy, AttrSet); + Value *ReadTIDIGX = Mod->getOrInsertFunction( + "llvm.r600.read.tidig.x", FTy, AttrSet); + Value *ReadTIDIGY = Mod->getOrInsertFunction( + "llvm.r600.read.tidig.y", FTy, AttrSet); + Value *ReadTIDIGZ = Mod->getOrInsertFunction( + "llvm.r600.read.tidig.z", FTy, AttrSet); + + + Value *TCntY = Builder.CreateCall(ReadLocalSizeY); + Value *TCntZ = Builder.CreateCall(ReadLocalSizeZ); + Value *TIdX = Builder.CreateCall(ReadTIDIGX); + Value *TIdY = Builder.CreateCall(ReadTIDIGY); + Value *TIdZ = Builder.CreateCall(ReadTIDIGZ); + + Value *Tmp0 = Builder.CreateMul(TCntY, TCntZ); + Tmp0 = Builder.CreateMul(Tmp0, TIdX); + Value *Tmp1 = Builder.CreateMul(TIdY, TCntZ); + Value *TID = Builder.CreateAdd(Tmp0, Tmp1); + TID = Builder.CreateAdd(TID, TIdZ); + + std::vector<Value*> Indices; + Indices.push_back(Constant::getNullValue(Type::getInt32Ty(Mod->getContext()))); + Indices.push_back(TID); + + Value *Offset = Builder.CreateGEP(GV, Indices); + I.mutateType(Offset->getType()); + I.replaceAllUsesWith(Offset); + I.eraseFromParent(); + + std::vector<Value*> WorkList; + + collectUsesWithPtrTypes(Offset, WorkList); + + for (std::vector<Value*>::iterator i = WorkList.begin(), + e = WorkList.end(); i != e; ++i) { + Value *V = *i; + CallInst *Call = dyn_cast<CallInst>(V); + if (!Call) { + Type *EltTy = V->getType()->getPointerElementType(); + PointerType *NewTy = PointerType::get(EltTy, AMDGPUAS::LOCAL_ADDRESS); + V->mutateType(NewTy); + continue; + } + + IntrinsicInst *Intr = dyn_cast<IntrinsicInst>(Call); + if (!Intr) { + std::vector<Type*> ArgTypes; + for (unsigned ArgIdx = 0, ArgEnd = Call->getNumArgOperands(); + ArgIdx != ArgEnd; ++ArgIdx) { + ArgTypes.push_back(Call->getArgOperand(ArgIdx)->getType()); + } + Function *F = Call->getCalledFunction(); + FunctionType *NewType = FunctionType::get(Call->getType(), ArgTypes, + F->isVarArg()); + Constant *C = Mod->getOrInsertFunction(StringRef(F->getName().str() + ".local"), NewType, + F->getAttributes()); + Function *NewF = cast<Function>(C); + Call->setCalledFunction(NewF); + continue; + } + + Builder.SetInsertPoint(Intr); + switch (Intr->getIntrinsicID()) { + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + // These intrinsics are for address space 0 only + Intr->eraseFromParent(); + continue; + case Intrinsic::memcpy: { + MemCpyInst *MemCpy = cast<MemCpyInst>(Intr); + Builder.CreateMemCpy(MemCpy->getRawDest(), MemCpy->getRawSource(), + MemCpy->getLength(), MemCpy->getAlignment(), + MemCpy->isVolatile()); + Intr->eraseFromParent(); + continue; + } + case Intrinsic::memset: { + MemSetInst *MemSet = cast<MemSetInst>(Intr); + Builder.CreateMemSet(MemSet->getRawDest(), MemSet->getValue(), + MemSet->getLength(), MemSet->getAlignment(), + MemSet->isVolatile()); + Intr->eraseFromParent(); + continue; + } + default: + Intr->dump(); + llvm_unreachable("Don't know how to promote alloca intrinsic use."); + } + } +} + +FunctionPass *llvm::createAMDGPUPromoteAlloca(const AMDGPUSubtarget &ST) { + return new AMDGPUPromoteAlloca(ST); +} diff --git a/lib/Target/R600/AMDGPURegisterInfo.cpp b/lib/Target/R600/AMDGPURegisterInfo.cpp index 19927fa..3433280 100644 --- a/lib/Target/R600/AMDGPURegisterInfo.cpp +++ b/lib/Target/R600/AMDGPURegisterInfo.cpp @@ -17,9 +17,9 @@ using namespace llvm; -AMDGPURegisterInfo::AMDGPURegisterInfo(TargetMachine &tm) +AMDGPURegisterInfo::AMDGPURegisterInfo(const AMDGPUSubtarget &st) : AMDGPUGenRegisterInfo(0), - TM(tm) + ST(st) { } //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/AMDGPURegisterInfo.h b/lib/Target/R600/AMDGPURegisterInfo.h index a7cba0d..4731595 100644 --- a/lib/Target/R600/AMDGPURegisterInfo.h +++ b/lib/Target/R600/AMDGPURegisterInfo.h @@ -25,27 +25,19 @@ namespace llvm { -class AMDGPUTargetMachine; +class AMDGPUSubtarget; class TargetInstrInfo; struct AMDGPURegisterInfo : public AMDGPUGenRegisterInfo { - TargetMachine &TM; static const MCPhysReg CalleeSavedReg; + const AMDGPUSubtarget &ST; - AMDGPURegisterInfo(TargetMachine &tm); + AMDGPURegisterInfo(const AMDGPUSubtarget &st); BitVector getReservedRegs(const MachineFunction &MF) const override { assert(!"Unimplemented"); return BitVector(); } - /// \param RC is an AMDIL reg class. - /// - /// \returns The ISA reg class that is equivalent to \p RC. - virtual const TargetRegisterClass * getISARegClass( - const TargetRegisterClass * RC) const { - assert(!"Unimplemented"); return nullptr; - } - virtual const TargetRegisterClass* getCFGStructurizerRegClass(MVT VT) const { assert(!"Unimplemented"); return nullptr; } diff --git a/lib/Target/R600/AMDGPUSubtarget.cpp b/lib/Target/R600/AMDGPUSubtarget.cpp index f3b9932..b83c290 100644 --- a/lib/Target/R600/AMDGPUSubtarget.cpp +++ b/lib/Target/R600/AMDGPUSubtarget.cpp @@ -13,6 +13,8 @@ //===----------------------------------------------------------------------===// #include "AMDGPUSubtarget.h" +#include "R600InstrInfo.h" +#include "SIInstrInfo.h" using namespace llvm; @@ -23,90 +25,42 @@ using namespace llvm; #define GET_SUBTARGETINFO_CTOR #include "AMDGPUGenSubtargetInfo.inc" -AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS) : - AMDGPUGenSubtargetInfo(TT, CPU, FS), DumpCode(false) { - InstrItins = getInstrItineraryForCPU(CPU); - - // Default card - StringRef GPU = CPU; - Is64bit = false; - HasVertexCache = false; - TexVTXClauseSize = 0; - Gen = AMDGPUSubtarget::R600; - FP64 = false; - CaymanISA = false; - EnableIRStructurizer = true; - EnableIfCvt = true; - WavefrontSize = 0; - CFALUBug = false; +AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS) : + AMDGPUGenSubtargetInfo(TT, GPU, FS), + DevName(GPU), + Is64bit(false), + DumpCode(false), + R600ALUInst(false), + HasVertexCache(false), + TexVTXClauseSize(0), + Gen(AMDGPUSubtarget::R600), + FP64(false), + CaymanISA(false), + EnableIRStructurizer(true), + EnableIfCvt(true), + WavefrontSize(0), + CFALUBug(false), + LocalMemorySize(0), + InstrItins(getInstrItineraryForCPU(GPU)) { ParseSubtargetFeatures(GPU, FS); - DevName = GPU; -} -bool -AMDGPUSubtarget::is64bit() const { - return Is64bit; -} -bool -AMDGPUSubtarget::hasVertexCache() const { - return HasVertexCache; -} -short -AMDGPUSubtarget::getTexVTXClauseSize() const { - return TexVTXClauseSize; -} -enum AMDGPUSubtarget::Generation -AMDGPUSubtarget::getGeneration() const { - return Gen; -} -bool -AMDGPUSubtarget::hasHWFP64() const { - return FP64; -} -bool -AMDGPUSubtarget::hasCaymanISA() const { - return CaymanISA; -} -bool -AMDGPUSubtarget::IsIRStructurizerEnabled() const { - return EnableIRStructurizer; -} -bool -AMDGPUSubtarget::isIfCvtEnabled() const { - return EnableIfCvt; -} -unsigned -AMDGPUSubtarget::getWavefrontSize() const { - return WavefrontSize; + if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { + InstrInfo.reset(new R600InstrInfo(*this)); + } else { + InstrInfo.reset(new SIInstrInfo(*this)); + } } -unsigned -AMDGPUSubtarget::getStackEntrySize() const { + +unsigned AMDGPUSubtarget::getStackEntrySize() const { assert(getGeneration() <= NORTHERN_ISLANDS); switch(getWavefrontSize()) { case 16: return 8; case 32: - if (hasCaymanISA()) - return 4; - else - return 8; + return hasCaymanISA() ? 4 : 8; case 64: return 4; default: llvm_unreachable("Illegal wavefront size."); } } -bool -AMDGPUSubtarget::hasCFAluBug() const { - assert(getGeneration() <= NORTHERN_ISLANDS); - return CFALUBug; -} -bool -AMDGPUSubtarget::isTargetELF() const { - return false; -} - -std::string -AMDGPUSubtarget::getDeviceName() const { - return DevName; -} diff --git a/lib/Target/R600/AMDGPUSubtarget.h b/lib/Target/R600/AMDGPUSubtarget.h index 1b041d6..0c388b3 100644 --- a/lib/Target/R600/AMDGPUSubtarget.h +++ b/lib/Target/R600/AMDGPUSubtarget.h @@ -15,6 +15,7 @@ #ifndef AMDGPUSUBTARGET_H #define AMDGPUSUBTARGET_H #include "AMDGPU.h" +#include "AMDGPUInstrInfo.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -27,6 +28,9 @@ namespace llvm { class AMDGPUSubtarget : public AMDGPUGenSubtargetInfo { + + std::unique_ptr<AMDGPUInstrInfo> InstrInfo; + public: enum Generation { R600 = 0, @@ -40,42 +44,78 @@ public: private: std::string DevName; bool Is64bit; - bool Is32on64bit; bool DumpCode; bool R600ALUInst; bool HasVertexCache; short TexVTXClauseSize; - enum Generation Gen; + Generation Gen; bool FP64; bool CaymanISA; bool EnableIRStructurizer; bool EnableIfCvt; unsigned WavefrontSize; bool CFALUBug; + int LocalMemorySize; InstrItineraryData InstrItins; public: AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS); - const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } + const AMDGPUInstrInfo *getInstrInfo() const { + return InstrInfo.get(); + } + + const InstrItineraryData &getInstrItineraryData() const { + return InstrItins; + } + void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - bool is64bit() const; - bool hasVertexCache() const; - short getTexVTXClauseSize() const; - enum Generation getGeneration() const; - bool hasHWFP64() const; - bool hasCaymanISA() const; + bool is64bit() const { + return Is64bit; + } + + bool hasVertexCache() const { + return HasVertexCache; + } + + short getTexVTXClauseSize() const { + return TexVTXClauseSize; + } + + Generation getGeneration() const { + return Gen; + } + + bool hasHWFP64() const { + return FP64; + } + + bool hasCaymanISA() const { + return CaymanISA; + } bool hasBFE() const { return (getGeneration() >= EVERGREEN); } + bool hasBFI() const { + return (getGeneration() >= EVERGREEN); + } + bool hasBFM() const { return hasBFE(); } + bool hasBCNT(unsigned Size) const { + if (Size == 32) + return (getGeneration() >= EVERGREEN); + + assert(Size == 64); + return (getGeneration() >= SOUTHERN_ISLANDS); + } + bool hasMulU24() const { return (getGeneration() >= EVERGREEN); } @@ -85,22 +125,48 @@ public: hasCaymanISA()); } - bool IsIRStructurizerEnabled() const; - bool isIfCvtEnabled() const; - unsigned getWavefrontSize() const; + bool IsIRStructurizerEnabled() const { + return EnableIRStructurizer; + } + + bool isIfCvtEnabled() const { + return EnableIfCvt; + } + + unsigned getWavefrontSize() const { + return WavefrontSize; + } + unsigned getStackEntrySize() const; - bool hasCFAluBug() const; + + bool hasCFAluBug() const { + assert(getGeneration() <= NORTHERN_ISLANDS); + return CFALUBug; + } + + int getLocalMemorySize() const { + return LocalMemorySize; + } bool enableMachineScheduler() const override { return getGeneration() <= NORTHERN_ISLANDS; } // Helper functions to simplify if statements - bool isTargetELF() const; - std::string getDeviceName() const; - bool dumpCode() const { return DumpCode; } - bool r600ALUEncoding() const { return R600ALUInst; } + bool isTargetELF() const { + return false; + } + StringRef getDeviceName() const { + return DevName; + } + + bool dumpCode() const { + return DumpCode; + } + bool r600ALUEncoding() const { + return R600ALUInst; + } }; } // End namespace llvm diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp index 174fdca..8aab944 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.cpp +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp @@ -80,10 +80,8 @@ AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, StringRef TT, InstrItins(&Subtarget.getInstrItineraryData()) { // TLInfo uses InstrInfo so it must be initialized after. if (Subtarget.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { - InstrInfo.reset(new R600InstrInfo(*this)); TLInfo.reset(new R600TargetLowering(*this)); } else { - InstrInfo.reset(new SIInstrInfo(*this)); TLInfo.reset(new SITargetLowering(*this)); } setRequiresStructuredCFG(true); @@ -111,6 +109,7 @@ public: return nullptr; } + virtual void addCodeGenPrepare(); bool addPreISel() override; bool addInstSelector() override; bool addPreRegAlloc() override; @@ -136,6 +135,13 @@ void AMDGPUTargetMachine::addAnalysisPasses(PassManagerBase &PM) { PM.add(createAMDGPUTargetTransformInfoPass(this)); } +void AMDGPUPassConfig::addCodeGenPrepare() { + const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); + addPass(createAMDGPUPromoteAlloca(ST)); + addPass(createSROAPass()); + TargetPassConfig::addCodeGenPrepare(); +} + bool AMDGPUPassConfig::addPreISel() { const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); @@ -159,7 +165,6 @@ bool AMDGPUPassConfig::addInstSelector() { } bool AMDGPUPassConfig::addPreRegAlloc() { - addPass(createAMDGPUConvertToISAPass(*TM)); const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>(); if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) { @@ -169,6 +174,8 @@ bool AMDGPUPassConfig::addPreRegAlloc() { // SIFixSGPRCopies can generate a lot of duplicate instructions, // so we need to run MachineCSE afterwards. addPass(&MachineCSEID); + initializeSIFixSGPRLiveRangesPass(*PassRegistry::getPassRegistry()); + insertPass(&RegisterCoalescerID, &SIFixSGPRLiveRangesID); } return false; } diff --git a/lib/Target/R600/AMDGPUTargetMachine.h b/lib/Target/R600/AMDGPUTargetMachine.h index 1287e13..3bb15be 100644 --- a/lib/Target/R600/AMDGPUTargetMachine.h +++ b/lib/Target/R600/AMDGPUTargetMachine.h @@ -17,8 +17,8 @@ #include "AMDGPUFrameLowering.h" #include "AMDGPUInstrInfo.h" +#include "AMDGPUIntrinsicInfo.h" #include "AMDGPUSubtarget.h" -#include "AMDILIntrinsicInfo.h" #include "R600ISelLowering.h" #include "llvm/IR/DataLayout.h" @@ -30,7 +30,6 @@ class AMDGPUTargetMachine : public LLVMTargetMachine { const DataLayout Layout; AMDGPUFrameLowering FrameLowering; AMDGPUIntrinsicInfo IntrinsicInfo; - std::unique_ptr<AMDGPUInstrInfo> InstrInfo; std::unique_ptr<AMDGPUTargetLowering> TLInfo; const InstrItineraryData *InstrItins; @@ -46,13 +45,13 @@ public: return &IntrinsicInfo; } const AMDGPUInstrInfo *getInstrInfo() const override { - return InstrInfo.get(); + return getSubtargetImpl()->getInstrInfo(); } const AMDGPUSubtarget *getSubtargetImpl() const override { return &Subtarget; } const AMDGPURegisterInfo *getRegisterInfo() const override { - return &InstrInfo->getRegisterInfo(); + return &getInstrInfo()->getRegisterInfo(); } AMDGPUTargetLowering *getTargetLowering() const override { return TLInfo.get(); diff --git a/lib/Target/R600/AMDILBase.td b/lib/Target/R600/AMDILBase.td deleted file mode 100644 index 5dcd478..0000000 --- a/lib/Target/R600/AMDILBase.td +++ /dev/null @@ -1,25 +0,0 @@ -//===- AMDIL.td - AMDIL Target Machine -------------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// Target-independent interfaces which we are implementing -//===----------------------------------------------------------------------===// - -include "llvm/Target/Target.td" - -// Dummy Instruction itineraries for pseudo instructions -def ALU_NULL : FuncUnit; -def NullALU : InstrItinClass; - -//===----------------------------------------------------------------------===// -// Register File, Calling Conv, Instruction Descriptions -//===----------------------------------------------------------------------===// - - -include "AMDILRegisterInfo.td" -include "AMDILInstrInfo.td" - diff --git a/lib/Target/R600/AMDILISelLowering.cpp b/lib/Target/R600/AMDILISelLowering.cpp deleted file mode 100644 index 7cea803..0000000 --- a/lib/Target/R600/AMDILISelLowering.cpp +++ /dev/null @@ -1,560 +0,0 @@ -//===-- AMDILISelLowering.cpp - AMDIL DAG Lowering Implementation ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -/// \file -/// \brief TargetLowering functions borrowed from AMDIL. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPUISelLowering.h" -#include "AMDGPURegisterInfo.h" -#include "AMDGPUSubtarget.h" -#include "AMDILIntrinsicInfo.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/IR/CallingConv.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Intrinsics.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetOptions.h" - -using namespace llvm; -//===----------------------------------------------------------------------===// -// TargetLowering Implementation Help Functions End -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// TargetLowering Class Implementation Begins -//===----------------------------------------------------------------------===// -void AMDGPUTargetLowering::InitAMDILLowering() { - static const MVT::SimpleValueType types[] = { - MVT::i8, - MVT::i16, - MVT::i32, - MVT::f32, - MVT::f64, - MVT::i64, - MVT::v2i8, - MVT::v4i8, - MVT::v2i16, - MVT::v4i16, - MVT::v4f32, - MVT::v4i32, - MVT::v2f32, - MVT::v2i32, - MVT::v2f64, - MVT::v2i64 - }; - - static const MVT::SimpleValueType IntTypes[] = { - MVT::i8, - MVT::i16, - MVT::i32, - MVT::i64 - }; - - static const MVT::SimpleValueType FloatTypes[] = { - MVT::f32, - MVT::f64 - }; - - static const MVT::SimpleValueType VectorTypes[] = { - MVT::v2i8, - MVT::v4i8, - MVT::v2i16, - MVT::v4i16, - MVT::v4f32, - MVT::v4i32, - MVT::v2f32, - MVT::v2i32, - MVT::v2f64, - MVT::v2i64 - }; - - const AMDGPUSubtarget &STM = getTargetMachine().getSubtarget<AMDGPUSubtarget>(); - // These are the current register classes that are - // supported - - for (MVT VT : types) { - setOperationAction(ISD::SUBE, VT, Expand); - setOperationAction(ISD::SUBC, VT, Expand); - setOperationAction(ISD::ADDE, VT, Expand); - setOperationAction(ISD::ADDC, VT, Expand); - setOperationAction(ISD::BRCOND, VT, Custom); - setOperationAction(ISD::BR_JT, VT, Expand); - setOperationAction(ISD::BRIND, VT, Expand); - // TODO: Implement custom UREM/SREM routines - setOperationAction(ISD::SREM, VT, Expand); - setOperationAction(ISD::SMUL_LOHI, VT, Expand); - setOperationAction(ISD::UMUL_LOHI, VT, Expand); - if (VT != MVT::i64 && VT != MVT::v2i64) { - setOperationAction(ISD::SDIV, VT, Custom); - } - } - for (MVT VT : FloatTypes) { - // IL does not have these operations for floating point types - setOperationAction(ISD::FP_ROUND_INREG, VT, Expand); - setOperationAction(ISD::SETOLT, VT, Expand); - setOperationAction(ISD::SETOGE, VT, Expand); - setOperationAction(ISD::SETOGT, VT, Expand); - setOperationAction(ISD::SETOLE, VT, Expand); - setOperationAction(ISD::SETULT, VT, Expand); - setOperationAction(ISD::SETUGE, VT, Expand); - setOperationAction(ISD::SETUGT, VT, Expand); - setOperationAction(ISD::SETULE, VT, Expand); - } - - for (MVT VT : IntTypes) { - // GPU also does not have divrem function for signed or unsigned - setOperationAction(ISD::SDIVREM, VT, Expand); - - // GPU does not have [S|U]MUL_LOHI functions as a single instruction - setOperationAction(ISD::SMUL_LOHI, VT, Expand); - setOperationAction(ISD::UMUL_LOHI, VT, Expand); - - setOperationAction(ISD::BSWAP, VT, Expand); - - // GPU doesn't have any counting operators - setOperationAction(ISD::CTPOP, VT, Expand); - setOperationAction(ISD::CTTZ, VT, Expand); - setOperationAction(ISD::CTLZ, VT, Expand); - } - - for (MVT VT : VectorTypes) { - setOperationAction(ISD::VECTOR_SHUFFLE, VT, Expand); - setOperationAction(ISD::SDIVREM, VT, Expand); - setOperationAction(ISD::SMUL_LOHI, VT, Expand); - // setOperationAction(ISD::VSETCC, VT, Expand); - setOperationAction(ISD::SELECT_CC, VT, Expand); - - } - setOperationAction(ISD::MULHU, MVT::i64, Expand); - setOperationAction(ISD::MULHU, MVT::v2i64, Expand); - setOperationAction(ISD::MULHS, MVT::i64, Expand); - setOperationAction(ISD::MULHS, MVT::v2i64, Expand); - setOperationAction(ISD::ADD, MVT::v2i64, Expand); - setOperationAction(ISD::SREM, MVT::v2i64, Expand); - setOperationAction(ISD::Constant , MVT::i64 , Legal); - setOperationAction(ISD::SDIV, MVT::v2i64, Expand); - setOperationAction(ISD::TRUNCATE, MVT::v2i64, Expand); - setOperationAction(ISD::SIGN_EXTEND, MVT::v2i64, Expand); - setOperationAction(ISD::ZERO_EXTEND, MVT::v2i64, Expand); - setOperationAction(ISD::ANY_EXTEND, MVT::v2i64, Expand); - if (STM.hasHWFP64()) { - // we support loading/storing v2f64 but not operations on the type - setOperationAction(ISD::FADD, MVT::v2f64, Expand); - setOperationAction(ISD::FSUB, MVT::v2f64, Expand); - setOperationAction(ISD::FMUL, MVT::v2f64, Expand); - setOperationAction(ISD::FP_ROUND_INREG, MVT::v2f64, Expand); - setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); - setOperationAction(ISD::ConstantFP , MVT::f64 , Legal); - // We want to expand vector conversions into their scalar - // counterparts. - setOperationAction(ISD::TRUNCATE, MVT::v2f64, Expand); - setOperationAction(ISD::SIGN_EXTEND, MVT::v2f64, Expand); - setOperationAction(ISD::ZERO_EXTEND, MVT::v2f64, Expand); - setOperationAction(ISD::ANY_EXTEND, MVT::v2f64, Expand); - setOperationAction(ISD::FABS, MVT::f64, Expand); - setOperationAction(ISD::FABS, MVT::v2f64, Expand); - } - // TODO: Fix the UDIV24 algorithm so it works for these - // types correctly. This needs vector comparisons - // for this to work correctly. - setOperationAction(ISD::UDIV, MVT::v2i8, Expand); - setOperationAction(ISD::UDIV, MVT::v4i8, Expand); - setOperationAction(ISD::UDIV, MVT::v2i16, Expand); - setOperationAction(ISD::UDIV, MVT::v4i16, Expand); - setOperationAction(ISD::SUBC, MVT::Other, Expand); - setOperationAction(ISD::ADDE, MVT::Other, Expand); - setOperationAction(ISD::ADDC, MVT::Other, Expand); - setOperationAction(ISD::BRCOND, MVT::Other, Custom); - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::BRIND, MVT::Other, Expand); - - - // Use the default implementation. - setOperationAction(ISD::ConstantFP , MVT::f32 , Legal); - setOperationAction(ISD::Constant , MVT::i32 , Legal); - - setSchedulingPreference(Sched::RegPressure); - setPow2DivIsCheap(false); - setSelectIsExpensive(true); - setJumpIsExpensive(true); - - MaxStoresPerMemcpy = 4096; - MaxStoresPerMemmove = 4096; - MaxStoresPerMemset = 4096; - -} - -bool -AMDGPUTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, - const CallInst &I, unsigned Intrinsic) const { - return false; -} - -// The backend supports 32 and 64 bit floating point immediates -bool -AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { - if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 - || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { - return true; - } else { - return false; - } -} - -bool -AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const { - if (VT.getScalarType().getSimpleVT().SimpleTy == MVT::f32 - || VT.getScalarType().getSimpleVT().SimpleTy == MVT::f64) { - return false; - } else { - return true; - } -} - - -// isMaskedValueZeroForTargetNode - Return true if 'Op & Mask' is known to -// be zero. Op is expected to be a target specific node. Used by DAG -// combiner. - -//===----------------------------------------------------------------------===// -// Other Lowering Hooks -//===----------------------------------------------------------------------===// - -SDValue -AMDGPUTargetLowering::LowerSDIV(SDValue Op, SelectionDAG &DAG) const { - EVT OVT = Op.getValueType(); - SDValue DST; - if (OVT.getScalarType() == MVT::i64) { - DST = LowerSDIV64(Op, DAG); - } else if (OVT.getScalarType() == MVT::i32) { - DST = LowerSDIV32(Op, DAG); - } else if (OVT.getScalarType() == MVT::i16 - || OVT.getScalarType() == MVT::i8) { - DST = LowerSDIV24(Op, DAG); - } else { - DST = SDValue(Op.getNode(), 0); - } - return DST; -} - -SDValue -AMDGPUTargetLowering::LowerSREM(SDValue Op, SelectionDAG &DAG) const { - EVT OVT = Op.getValueType(); - SDValue DST; - if (OVT.getScalarType() == MVT::i64) { - DST = LowerSREM64(Op, DAG); - } else if (OVT.getScalarType() == MVT::i32) { - DST = LowerSREM32(Op, DAG); - } else if (OVT.getScalarType() == MVT::i16) { - DST = LowerSREM16(Op, DAG); - } else if (OVT.getScalarType() == MVT::i8) { - DST = LowerSREM8(Op, DAG); - } else { - DST = SDValue(Op.getNode(), 0); - } - return DST; -} - -EVT -AMDGPUTargetLowering::genIntType(uint32_t size, uint32_t numEle) const { - int iSize = (size * numEle); - int vEle = (iSize >> ((size == 64) ? 6 : 5)); - if (!vEle) { - vEle = 1; - } - if (size == 64) { - if (vEle == 1) { - return EVT(MVT::i64); - } else { - return EVT(MVT::getVectorVT(MVT::i64, vEle)); - } - } else { - if (vEle == 1) { - return EVT(MVT::i32); - } else { - return EVT(MVT::getVectorVT(MVT::i32, vEle)); - } - } -} - -SDValue -AMDGPUTargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { - SDValue Chain = Op.getOperand(0); - SDValue Cond = Op.getOperand(1); - SDValue Jump = Op.getOperand(2); - SDValue Result; - Result = DAG.getNode( - AMDGPUISD::BRANCH_COND, - SDLoc(Op), - Op.getValueType(), - Chain, Jump, Cond); - return Result; -} - -SDValue -AMDGPUTargetLowering::LowerSDIV24(SDValue Op, SelectionDAG &DAG) const { - SDLoc DL(Op); - EVT OVT = Op.getValueType(); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - MVT INTTY; - MVT FLTTY; - if (!OVT.isVector()) { - INTTY = MVT::i32; - FLTTY = MVT::f32; - } else if (OVT.getVectorNumElements() == 2) { - INTTY = MVT::v2i32; - FLTTY = MVT::v2f32; - } else if (OVT.getVectorNumElements() == 4) { - INTTY = MVT::v4i32; - FLTTY = MVT::v4f32; - } - unsigned bitsize = OVT.getScalarType().getSizeInBits(); - // char|short jq = ia ^ ib; - SDValue jq = DAG.getNode(ISD::XOR, DL, OVT, LHS, RHS); - - // jq = jq >> (bitsize - 2) - jq = DAG.getNode(ISD::SRA, DL, OVT, jq, DAG.getConstant(bitsize - 2, OVT)); - - // jq = jq | 0x1 - jq = DAG.getNode(ISD::OR, DL, OVT, jq, DAG.getConstant(1, OVT)); - - // jq = (int)jq - jq = DAG.getSExtOrTrunc(jq, DL, INTTY); - - // int ia = (int)LHS; - SDValue ia = DAG.getSExtOrTrunc(LHS, DL, INTTY); - - // int ib, (int)RHS; - SDValue ib = DAG.getSExtOrTrunc(RHS, DL, INTTY); - - // float fa = (float)ia; - SDValue fa = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ia); - - // float fb = (float)ib; - SDValue fb = DAG.getNode(ISD::SINT_TO_FP, DL, FLTTY, ib); - - // float fq = native_divide(fa, fb); - SDValue fq = DAG.getNode(AMDGPUISD::DIV_INF, DL, FLTTY, fa, fb); - - // fq = trunc(fq); - fq = DAG.getNode(ISD::FTRUNC, DL, FLTTY, fq); - - // float fqneg = -fq; - SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FLTTY, fq); - - // float fr = mad(fqneg, fb, fa); - SDValue fr = DAG.getNode(ISD::FADD, DL, FLTTY, - DAG.getNode(ISD::MUL, DL, FLTTY, fqneg, fb), fa); - - // int iq = (int)fq; - SDValue iq = DAG.getNode(ISD::FP_TO_SINT, DL, INTTY, fq); - - // fr = fabs(fr); - fr = DAG.getNode(ISD::FABS, DL, FLTTY, fr); - - // fb = fabs(fb); - fb = DAG.getNode(ISD::FABS, DL, FLTTY, fb); - - // int cv = fr >= fb; - SDValue cv; - if (INTTY == MVT::i32) { - cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); - } else { - cv = DAG.getSetCC(DL, INTTY, fr, fb, ISD::SETOGE); - } - // jq = (cv ? jq : 0); - jq = DAG.getNode(ISD::SELECT, DL, OVT, cv, jq, - DAG.getConstant(0, OVT)); - // dst = iq + jq; - iq = DAG.getSExtOrTrunc(iq, DL, OVT); - iq = DAG.getNode(ISD::ADD, DL, OVT, iq, jq); - return iq; -} - -SDValue -AMDGPUTargetLowering::LowerSDIV32(SDValue Op, SelectionDAG &DAG) const { - SDLoc DL(Op); - EVT OVT = Op.getValueType(); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - // The LowerSDIV32 function generates equivalent to the following IL. - // mov r0, LHS - // mov r1, RHS - // ilt r10, r0, 0 - // ilt r11, r1, 0 - // iadd r0, r0, r10 - // iadd r1, r1, r11 - // ixor r0, r0, r10 - // ixor r1, r1, r11 - // udiv r0, r0, r1 - // ixor r10, r10, r11 - // iadd r0, r0, r10 - // ixor DST, r0, r10 - - // mov r0, LHS - SDValue r0 = LHS; - - // mov r1, RHS - SDValue r1 = RHS; - - // ilt r10, r0, 0 - SDValue r10 = DAG.getSelectCC(DL, - r0, DAG.getConstant(0, OVT), - DAG.getConstant(-1, MVT::i32), - DAG.getConstant(0, MVT::i32), - ISD::SETLT); - - // ilt r11, r1, 0 - SDValue r11 = DAG.getSelectCC(DL, - r1, DAG.getConstant(0, OVT), - DAG.getConstant(-1, MVT::i32), - DAG.getConstant(0, MVT::i32), - ISD::SETLT); - - // iadd r0, r0, r10 - r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); - - // iadd r1, r1, r11 - r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); - - // ixor r0, r0, r10 - r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); - - // ixor r1, r1, r11 - r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); - - // udiv r0, r0, r1 - r0 = DAG.getNode(ISD::UDIV, DL, OVT, r0, r1); - - // ixor r10, r10, r11 - r10 = DAG.getNode(ISD::XOR, DL, OVT, r10, r11); - - // iadd r0, r0, r10 - r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); - - // ixor DST, r0, r10 - SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); - return DST; -} - -SDValue -AMDGPUTargetLowering::LowerSDIV64(SDValue Op, SelectionDAG &DAG) const { - return SDValue(Op.getNode(), 0); -} - -SDValue -AMDGPUTargetLowering::LowerSREM8(SDValue Op, SelectionDAG &DAG) const { - SDLoc DL(Op); - EVT OVT = Op.getValueType(); - MVT INTTY = MVT::i32; - if (OVT == MVT::v2i8) { - INTTY = MVT::v2i32; - } else if (OVT == MVT::v4i8) { - INTTY = MVT::v4i32; - } - SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); - SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); - LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); - LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); - return LHS; -} - -SDValue -AMDGPUTargetLowering::LowerSREM16(SDValue Op, SelectionDAG &DAG) const { - SDLoc DL(Op); - EVT OVT = Op.getValueType(); - MVT INTTY = MVT::i32; - if (OVT == MVT::v2i16) { - INTTY = MVT::v2i32; - } else if (OVT == MVT::v4i16) { - INTTY = MVT::v4i32; - } - SDValue LHS = DAG.getSExtOrTrunc(Op.getOperand(0), DL, INTTY); - SDValue RHS = DAG.getSExtOrTrunc(Op.getOperand(1), DL, INTTY); - LHS = DAG.getNode(ISD::SREM, DL, INTTY, LHS, RHS); - LHS = DAG.getSExtOrTrunc(LHS, DL, OVT); - return LHS; -} - -SDValue -AMDGPUTargetLowering::LowerSREM32(SDValue Op, SelectionDAG &DAG) const { - SDLoc DL(Op); - EVT OVT = Op.getValueType(); - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - // The LowerSREM32 function generates equivalent to the following IL. - // mov r0, LHS - // mov r1, RHS - // ilt r10, r0, 0 - // ilt r11, r1, 0 - // iadd r0, r0, r10 - // iadd r1, r1, r11 - // ixor r0, r0, r10 - // ixor r1, r1, r11 - // udiv r20, r0, r1 - // umul r20, r20, r1 - // sub r0, r0, r20 - // iadd r0, r0, r10 - // ixor DST, r0, r10 - - // mov r0, LHS - SDValue r0 = LHS; - - // mov r1, RHS - SDValue r1 = RHS; - - // ilt r10, r0, 0 - SDValue r10 = DAG.getSetCC(DL, OVT, r0, DAG.getConstant(0, OVT), ISD::SETLT); - - // ilt r11, r1, 0 - SDValue r11 = DAG.getSetCC(DL, OVT, r1, DAG.getConstant(0, OVT), ISD::SETLT); - - // iadd r0, r0, r10 - r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); - - // iadd r1, r1, r11 - r1 = DAG.getNode(ISD::ADD, DL, OVT, r1, r11); - - // ixor r0, r0, r10 - r0 = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); - - // ixor r1, r1, r11 - r1 = DAG.getNode(ISD::XOR, DL, OVT, r1, r11); - - // udiv r20, r0, r1 - SDValue r20 = DAG.getNode(ISD::UREM, DL, OVT, r0, r1); - - // umul r20, r20, r1 - r20 = DAG.getNode(AMDGPUISD::UMUL, DL, OVT, r20, r1); - - // sub r0, r0, r20 - r0 = DAG.getNode(ISD::SUB, DL, OVT, r0, r20); - - // iadd r0, r0, r10 - r0 = DAG.getNode(ISD::ADD, DL, OVT, r0, r10); - - // ixor DST, r0, r10 - SDValue DST = DAG.getNode(ISD::XOR, DL, OVT, r0, r10); - return DST; -} - -SDValue -AMDGPUTargetLowering::LowerSREM64(SDValue Op, SelectionDAG &DAG) const { - return SDValue(Op.getNode(), 0); -} diff --git a/lib/Target/R600/AMDILInstrInfo.td b/lib/Target/R600/AMDILInstrInfo.td deleted file mode 100644 index 0f0c88d..0000000 --- a/lib/Target/R600/AMDILInstrInfo.td +++ /dev/null @@ -1,150 +0,0 @@ -//===------------ AMDILInstrInfo.td - AMDIL Target ------*-tablegen-*------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -// This file describes the AMDIL instructions in TableGen format. -// -//===----------------------------------------------------------------------===// -//===--------------------------------------------------------------------===// -// Custom Operands -//===--------------------------------------------------------------------===// -def brtarget : Operand<OtherVT>; - -//===--------------------------------------------------------------------===// -// Custom Selection DAG Type Profiles -//===--------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// Generic Profile Types -//===----------------------------------------------------------------------===// - -def SDTIL_GenBinaryOp : SDTypeProfile<1, 2, [ - SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2> - ]>; -def SDTIL_GenTernaryOp : SDTypeProfile<1, 3, [ - SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, SDTCisSameAs<2, 3> - ]>; -def SDTIL_GenVecBuild : SDTypeProfile<1, 1, [ - SDTCisEltOfVec<1, 0> - ]>; - -//===----------------------------------------------------------------------===// -// Flow Control Profile Types -//===----------------------------------------------------------------------===// -// Branch instruction where second and third are basic blocks -def SDTIL_BRCond : SDTypeProfile<0, 2, [ - SDTCisVT<0, OtherVT> - ]>; - -//===--------------------------------------------------------------------===// -// Custom Selection DAG Nodes -//===--------------------------------------------------------------------===// -//===----------------------------------------------------------------------===// -// Flow Control DAG Nodes -//===----------------------------------------------------------------------===// -def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChain]>; - -//===----------------------------------------------------------------------===// -// Call/Return DAG Nodes -//===----------------------------------------------------------------------===// -def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue]>; - -//===--------------------------------------------------------------------===// -// Instructions -//===--------------------------------------------------------------------===// -// Floating point math functions -def IL_div_inf : SDNode<"AMDGPUISD::DIV_INF", SDTIL_GenBinaryOp>; - -//===----------------------------------------------------------------------===// -// Integer functions -//===----------------------------------------------------------------------===// -def IL_umul : SDNode<"AMDGPUISD::UMUL" , SDTIntBinOp, - [SDNPCommutative, SDNPAssociative]>; - -//===--------------------------------------------------------------------===// -// Custom Pattern DAG Nodes -//===--------------------------------------------------------------------===// -def global_store : PatFrag<(ops node:$val, node:$ptr), - (store node:$val, node:$ptr), [{ - return isGlobalStore(dyn_cast<StoreSDNode>(N)); -}]>; - -//===----------------------------------------------------------------------===// -// Load pattern fragments -//===----------------------------------------------------------------------===// -// Global address space loads -def global_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return isGlobalLoad(dyn_cast<LoadSDNode>(N)); -}]>; -// Constant address space loads -def constant_load : PatFrag<(ops node:$ptr), (load node:$ptr), [{ - return isConstantLoad(dyn_cast<LoadSDNode>(N), -1); -}]>; - -//===----------------------------------------------------------------------===// -// Complex addressing mode patterns -//===----------------------------------------------------------------------===// -def ADDR : ComplexPattern<i32, 2, "SelectADDR", [], []>; -def ADDRF : ComplexPattern<i32, 2, "SelectADDR", [frameindex], []>; -def ADDR64 : ComplexPattern<i64, 2, "SelectADDR64", [], []>; -def ADDR64F : ComplexPattern<i64, 2, "SelectADDR64", [frameindex], []>; - -//===----------------------------------------------------------------------===// -// Instruction format classes -//===----------------------------------------------------------------------===// -class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern> -: Instruction { - - let Namespace = "AMDGPU"; - dag OutOperandList = outs; - dag InOperandList = ins; - let Pattern = pattern; - let AsmString = !strconcat(asmstr, "\n"); - let isPseudo = 1; - let Itinerary = NullALU; - bit hasIEEEFlag = 0; - bit hasZeroOpFlag = 0; - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; -} - -//===--------------------------------------------------------------------===// -// Multiclass Instruction formats -//===--------------------------------------------------------------------===// -// Multiclass that handles branch instructions -multiclass BranchConditional<SDNode Op, RegisterClass rci, RegisterClass rcf> { - def _i32 : ILFormat<(outs), - (ins brtarget:$target, rci:$src0), - "; i32 Pseudo branch instruction", - [(Op bb:$target, (i32 rci:$src0))]>; - def _f32 : ILFormat<(outs), - (ins brtarget:$target, rcf:$src0), - "; f32 Pseudo branch instruction", - [(Op bb:$target, (f32 rcf:$src0))]>; -} - -// Only scalar types should generate flow control -multiclass BranchInstr<string name> { - def _i32 : ILFormat<(outs), (ins GPRI32:$src), - !strconcat(name, " $src"), []>; - def _f32 : ILFormat<(outs), (ins GPRF32:$src), - !strconcat(name, " $src"), []>; -} -// Only scalar types should generate flow control -multiclass BranchInstr2<string name> { - def _i32 : ILFormat<(outs), (ins GPRI32:$src0, GPRI32:$src1), - !strconcat(name, " $src0, $src1"), []>; - def _f32 : ILFormat<(outs), (ins GPRF32:$src0, GPRF32:$src1), - !strconcat(name, " $src0, $src1"), []>; -} - -//===--------------------------------------------------------------------===// -// Intrinsics support -//===--------------------------------------------------------------------===// -include "AMDILIntrinsics.td" diff --git a/lib/Target/R600/AMDILIntrinsics.td b/lib/Target/R600/AMDILIntrinsics.td deleted file mode 100644 index 4a3e02e..0000000 --- a/lib/Target/R600/AMDILIntrinsics.td +++ /dev/null @@ -1,224 +0,0 @@ -//===- AMDILIntrinsics.td - Defines AMDIL Intrinscs -*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -// This file defines all of the amdil-specific intrinsics -// -//===---------------------------------------------------------------===// -//===--------------------------------------------------------------------===// -// Intrinsic classes -// Generic versions of the above classes but for Target specific intrinsics -// instead of SDNode patterns. -//===--------------------------------------------------------------------===// -let TargetPrefix = "AMDIL", isTarget = 1 in { - class VoidIntLong : - Intrinsic<[llvm_i64_ty], [], []>; - class VoidIntInt : - Intrinsic<[llvm_i32_ty], [], []>; - class VoidIntBool : - Intrinsic<[llvm_i32_ty], [], []>; - class UnaryIntInt : - Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], [IntrNoMem]>; - class UnaryIntFloat : - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>], [IntrNoMem]>; - class ConvertIntFTOI : - Intrinsic<[llvm_anyint_ty], [llvm_anyfloat_ty], [IntrNoMem]>; - class ConvertIntITOF : - Intrinsic<[llvm_anyfloat_ty], [llvm_anyint_ty], [IntrNoMem]>; - class UnaryIntNoRetInt : - Intrinsic<[], [llvm_anyint_ty], []>; - class UnaryIntNoRetFloat : - Intrinsic<[], [llvm_anyfloat_ty], []>; - class BinaryIntInt : - Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; - class BinaryIntFloat : - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; - class BinaryIntNoRetInt : - Intrinsic<[], [llvm_anyint_ty, LLVMMatchType<0>], []>; - class BinaryIntNoRetFloat : - Intrinsic<[], [llvm_anyfloat_ty, LLVMMatchType<0>], []>; - class TernaryIntInt : - Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, - LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; - class TernaryIntFloat : - Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, - LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; - class QuaternaryIntInt : - Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>, - LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; - class UnaryAtomicInt : - Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; - class BinaryAtomicInt : - Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; - class TernaryAtomicInt : - Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>; - class UnaryAtomicIntNoRet : - Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; - class BinaryAtomicIntNoRet : - Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; - class TernaryAtomicIntNoRet : - Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty, llvm_i32_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; -} - -let TargetPrefix = "AMDIL", isTarget = 1 in { - def int_AMDIL_abs : GCCBuiltin<"__amdil_abs">, UnaryIntInt; - - def int_AMDIL_bit_reverse_u32 : GCCBuiltin<"__amdil_ubit_reverse">, - UnaryIntInt; - def int_AMDIL_bit_count_i32 : GCCBuiltin<"__amdil_count_bits">, - UnaryIntInt; - def int_AMDIL_bit_find_first_lo : GCCBuiltin<"__amdil_ffb_lo">, - UnaryIntInt; - def int_AMDIL_bit_find_first_hi : GCCBuiltin<"__amdil_ffb_hi">, - UnaryIntInt; - def int_AMDIL_bit_find_first_sgn : GCCBuiltin<"__amdil_ffb_signed">, - UnaryIntInt; - def int_AMDIL_media_bitalign : GCCBuiltin<"__amdil_bitalign">, - TernaryIntInt; - def int_AMDIL_media_bytealign : GCCBuiltin<"__amdil_bytealign">, - TernaryIntInt; - def int_AMDIL_bit_insert_u32 : GCCBuiltin<"__amdil_ubit_insert">, - QuaternaryIntInt; - def int_AMDIL_bfi : GCCBuiltin<"__amdil_bfi">, - TernaryIntInt; - def int_AMDIL_bfm : GCCBuiltin<"__amdil_bfm">, - BinaryIntInt; - def int_AMDIL_mulhi_i32 : GCCBuiltin<"__amdil_imul_high">, - BinaryIntInt; - def int_AMDIL_mulhi_u32 : GCCBuiltin<"__amdil_umul_high">, - BinaryIntInt; - def int_AMDIL_mulhi24_i32 : GCCBuiltin<"__amdil_imul24_high">, - BinaryIntInt; - def int_AMDIL_mulhi24_u32 : GCCBuiltin<"__amdil_umul24_high">, - BinaryIntInt; - def int_AMDIL_carry_i32 : GCCBuiltin<"__amdil_carry">, - BinaryIntInt; - def int_AMDIL_borrow_i32 : GCCBuiltin<"__amdil_borrow">, - BinaryIntInt; - def int_AMDIL_min_i32 : GCCBuiltin<"__amdil_imin">, - BinaryIntInt; - def int_AMDIL_min_u32 : GCCBuiltin<"__amdil_umin">, - BinaryIntInt; - def int_AMDIL_min : GCCBuiltin<"__amdil_min">, - BinaryIntFloat; - def int_AMDIL_max_i32 : GCCBuiltin<"__amdil_imax">, - BinaryIntInt; - def int_AMDIL_max_u32 : GCCBuiltin<"__amdil_umax">, - BinaryIntInt; - def int_AMDIL_max : GCCBuiltin<"__amdil_max">, - BinaryIntFloat; - def int_AMDIL_media_lerp_u4 : GCCBuiltin<"__amdil_u4lerp">, - TernaryIntInt; - def int_AMDIL_media_sad : GCCBuiltin<"__amdil_sad">, - TernaryIntInt; - def int_AMDIL_media_sad_hi : GCCBuiltin<"__amdil_sadhi">, - TernaryIntInt; - def int_AMDIL_fraction : GCCBuiltin<"__amdil_fraction">, - UnaryIntFloat; - def int_AMDIL_clamp : GCCBuiltin<"__amdil_clamp">, - TernaryIntFloat; - def int_AMDIL_pireduce : GCCBuiltin<"__amdil_pireduce">, - UnaryIntFloat; - def int_AMDIL_round_nearest : GCCBuiltin<"__amdil_round_nearest">, - UnaryIntFloat; - def int_AMDIL_round_neginf : GCCBuiltin<"__amdil_round_neginf">, - UnaryIntFloat; - def int_AMDIL_round_zero : GCCBuiltin<"__amdil_round_zero">, - UnaryIntFloat; - def int_AMDIL_acos : GCCBuiltin<"__amdil_acos">, - UnaryIntFloat; - def int_AMDIL_atan : GCCBuiltin<"__amdil_atan">, - UnaryIntFloat; - def int_AMDIL_asin : GCCBuiltin<"__amdil_asin">, - UnaryIntFloat; - def int_AMDIL_cos : GCCBuiltin<"__amdil_cos">, - UnaryIntFloat; - def int_AMDIL_cos_vec : GCCBuiltin<"__amdil_cos_vec">, - UnaryIntFloat; - def int_AMDIL_tan : GCCBuiltin<"__amdil_tan">, - UnaryIntFloat; - def int_AMDIL_sin : GCCBuiltin<"__amdil_sin">, - UnaryIntFloat; - def int_AMDIL_sin_vec : GCCBuiltin<"__amdil_sin_vec">, - UnaryIntFloat; - def int_AMDIL_pow : GCCBuiltin<"__amdil_pow">, BinaryIntFloat; - def int_AMDIL_div : GCCBuiltin<"__amdil_div">, BinaryIntFloat; - def int_AMDIL_udiv : GCCBuiltin<"__amdil_udiv">, BinaryIntInt; - def int_AMDIL_sqrt: GCCBuiltin<"__amdil_sqrt">, - UnaryIntFloat; - def int_AMDIL_sqrt_vec: GCCBuiltin<"__amdil_sqrt_vec">, - UnaryIntFloat; - def int_AMDIL_exp : GCCBuiltin<"__amdil_exp">, - UnaryIntFloat; - def int_AMDIL_exp_vec : GCCBuiltin<"__amdil_exp_vec">, - UnaryIntFloat; - def int_AMDIL_exn : GCCBuiltin<"__amdil_exn">, - UnaryIntFloat; - def int_AMDIL_log_vec : GCCBuiltin<"__amdil_log_vec">, - UnaryIntFloat; - def int_AMDIL_ln : GCCBuiltin<"__amdil_ln">, - UnaryIntFloat; - def int_AMDIL_sign: GCCBuiltin<"__amdil_sign">, - UnaryIntFloat; - def int_AMDIL_fma: GCCBuiltin<"__amdil_fma">, - TernaryIntFloat; - def int_AMDIL_rsq : GCCBuiltin<"__amdil_rsq">, - UnaryIntFloat; - def int_AMDIL_rsq_vec : GCCBuiltin<"__amdil_rsq_vec">, - UnaryIntFloat; - def int_AMDIL_length : GCCBuiltin<"__amdil_length">, - UnaryIntFloat; - def int_AMDIL_lerp : GCCBuiltin<"__amdil_lerp">, - TernaryIntFloat; - def int_AMDIL_media_sad4 : GCCBuiltin<"__amdil_sad4">, - Intrinsic<[llvm_i32_ty], [llvm_v4i32_ty, - llvm_v4i32_ty, llvm_i32_ty], []>; - - def int_AMDIL_frexp_f64 : GCCBuiltin<"__amdil_frexp">, - Intrinsic<[llvm_v2i64_ty], [llvm_double_ty], []>; - def int_AMDIL_ldexp : GCCBuiltin<"__amdil_ldexp">, - Intrinsic<[llvm_anyfloat_ty], [llvm_anyfloat_ty, llvm_anyint_ty], []>; - def int_AMDIL_drcp : GCCBuiltin<"__amdil_rcp">, - Intrinsic<[llvm_double_ty], [llvm_double_ty], []>; - def int_AMDIL_convert_f16_f32 : GCCBuiltin<"__amdil_half_to_float">, - ConvertIntITOF; - def int_AMDIL_convert_f32_f16 : GCCBuiltin<"__amdil_float_to_half">, - ConvertIntFTOI; - def int_AMDIL_convert_f32_i32_rpi : GCCBuiltin<"__amdil_float_to_int_rpi">, - ConvertIntFTOI; - def int_AMDIL_convert_f32_i32_flr : GCCBuiltin<"__amdil_float_to_int_flr">, - ConvertIntFTOI; - def int_AMDIL_convert_f32_f16_near : GCCBuiltin<"__amdil_float_to_half_near">, - ConvertIntFTOI; - def int_AMDIL_convert_f32_f16_neg_inf : GCCBuiltin<"__amdil_float_to_half_neg_inf">, - ConvertIntFTOI; - def int_AMDIL_convert_f32_f16_plus_inf : GCCBuiltin<"__amdil_float_to_half_plus_inf">, - ConvertIntFTOI; - def int_AMDIL_media_convert_f2v4u8 : GCCBuiltin<"__amdil_f_2_u4">, - Intrinsic<[llvm_i32_ty], [llvm_v4f32_ty], []>; - def int_AMDIL_media_unpack_byte_0 : GCCBuiltin<"__amdil_unpack_0">, - ConvertIntITOF; - def int_AMDIL_media_unpack_byte_1 : GCCBuiltin<"__amdil_unpack_1">, - ConvertIntITOF; - def int_AMDIL_media_unpack_byte_2 : GCCBuiltin<"__amdil_unpack_2">, - ConvertIntITOF; - def int_AMDIL_media_unpack_byte_3 : GCCBuiltin<"__amdil_unpack_3">, - ConvertIntITOF; - def int_AMDIL_dp2_add : GCCBuiltin<"__amdil_dp2_add">, - Intrinsic<[llvm_float_ty], [llvm_v2f32_ty, - llvm_v2f32_ty, llvm_float_ty], []>; - def int_AMDIL_dp2 : GCCBuiltin<"__amdil_dp2">, - Intrinsic<[llvm_float_ty], [llvm_v2f32_ty, - llvm_v2f32_ty], []>; - def int_AMDIL_dp3 : GCCBuiltin<"__amdil_dp3">, - Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, - llvm_v4f32_ty], []>; - def int_AMDIL_dp4 : GCCBuiltin<"__amdil_dp4">, - Intrinsic<[llvm_float_ty], [llvm_v4f32_ty, - llvm_v4f32_ty], []>; -} diff --git a/lib/Target/R600/AMDILRegisterInfo.td b/lib/Target/R600/AMDILRegisterInfo.td deleted file mode 100644 index b9d0334..0000000 --- a/lib/Target/R600/AMDILRegisterInfo.td +++ /dev/null @@ -1,107 +0,0 @@ -//===- AMDILRegisterInfo.td - AMDIL Register defs ----------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//==-----------------------------------------------------------------------===// -// -// Declarations that describe the AMDIL register file -// -//===----------------------------------------------------------------------===// - -class AMDILReg<bits<16> num, string n> : Register<n> { - field bits<16> Value; - let Value = num; - let Namespace = "AMDGPU"; -} - -// We will start with 8 registers for each class before expanding to more -// Since the swizzle is added based on the register class, we can leave it -// off here and just specify different registers for different register classes -def R1 : AMDILReg<1, "r1">, DwarfRegNum<[1]>; -def R2 : AMDILReg<2, "r2">, DwarfRegNum<[2]>; -def R3 : AMDILReg<3, "r3">, DwarfRegNum<[3]>; -def R4 : AMDILReg<4, "r4">, DwarfRegNum<[4]>; -def R5 : AMDILReg<5, "r5">, DwarfRegNum<[5]>; -def R6 : AMDILReg<6, "r6">, DwarfRegNum<[6]>; -def R7 : AMDILReg<7, "r7">, DwarfRegNum<[7]>; -def R8 : AMDILReg<8, "r8">, DwarfRegNum<[8]>; -def R9 : AMDILReg<9, "r9">, DwarfRegNum<[9]>; -def R10 : AMDILReg<10, "r10">, DwarfRegNum<[10]>; -def R11 : AMDILReg<11, "r11">, DwarfRegNum<[11]>; -def R12 : AMDILReg<12, "r12">, DwarfRegNum<[12]>; -def R13 : AMDILReg<13, "r13">, DwarfRegNum<[13]>; -def R14 : AMDILReg<14, "r14">, DwarfRegNum<[14]>; -def R15 : AMDILReg<15, "r15">, DwarfRegNum<[15]>; -def R16 : AMDILReg<16, "r16">, DwarfRegNum<[16]>; -def R17 : AMDILReg<17, "r17">, DwarfRegNum<[17]>; -def R18 : AMDILReg<18, "r18">, DwarfRegNum<[18]>; -def R19 : AMDILReg<19, "r19">, DwarfRegNum<[19]>; -def R20 : AMDILReg<20, "r20">, DwarfRegNum<[20]>; - -// All registers between 1000 and 1024 are reserved and cannot be used -// unless commented in this section -// r1021-r1025 are used to dynamically calculate the local/group/thread/region/region_local ID's -// r1020 is used to hold the frame index for local arrays -// r1019 is used to hold the dynamic stack allocation pointer -// r1018 is used as a temporary register for handwritten code -// r1017 is used as a temporary register for handwritten code -// r1016 is used as a temporary register for load/store code -// r1015 is used as a temporary register for data segment offset -// r1014 is used as a temporary register for store code -// r1013 is used as the section data pointer register -// r1012-r1010 and r1001-r1008 are used for temporary I/O registers -// r1009 is used as the frame pointer register -// r999 is used as the mem register. -// r998 is used as the return address register. -//def R1025 : AMDILReg<1025, "r1025">, DwarfRegNum<[1025]>; -//def R1024 : AMDILReg<1024, "r1024">, DwarfRegNum<[1024]>; -//def R1023 : AMDILReg<1023, "r1023">, DwarfRegNum<[1023]>; -//def R1022 : AMDILReg<1022, "r1022">, DwarfRegNum<[1022]>; -//def R1021 : AMDILReg<1021, "r1021">, DwarfRegNum<[1021]>; -//def R1020 : AMDILReg<1020, "r1020">, DwarfRegNum<[1020]>; -def SP : AMDILReg<1019, "r1019">, DwarfRegNum<[1019]>; -def T1 : AMDILReg<1018, "r1018">, DwarfRegNum<[1018]>; -def T2 : AMDILReg<1017, "r1017">, DwarfRegNum<[1017]>; -def T3 : AMDILReg<1016, "r1016">, DwarfRegNum<[1016]>; -def T4 : AMDILReg<1015, "r1015">, DwarfRegNum<[1015]>; -def T5 : AMDILReg<1014, "r1014">, DwarfRegNum<[1014]>; -def SDP : AMDILReg<1013, "r1013">, DwarfRegNum<[1013]>; -def R1012: AMDILReg<1012, "r1012">, DwarfRegNum<[1012]>; -def R1011: AMDILReg<1011, "r1011">, DwarfRegNum<[1011]>; -def R1010: AMDILReg<1010, "r1010">, DwarfRegNum<[1010]>; -def DFP : AMDILReg<1009, "r1009">, DwarfRegNum<[1009]>; -def R1008: AMDILReg<1008, "r1008">, DwarfRegNum<[1008]>; -def R1007: AMDILReg<1007, "r1007">, DwarfRegNum<[1007]>; -def R1006: AMDILReg<1006, "r1006">, DwarfRegNum<[1006]>; -def R1005: AMDILReg<1005, "r1005">, DwarfRegNum<[1005]>; -def R1004: AMDILReg<1004, "r1004">, DwarfRegNum<[1004]>; -def R1003: AMDILReg<1003, "r1003">, DwarfRegNum<[1003]>; -def R1002: AMDILReg<1002, "r1002">, DwarfRegNum<[1002]>; -def R1001: AMDILReg<1001, "r1001">, DwarfRegNum<[1001]>; -def MEM : AMDILReg<999, "mem">, DwarfRegNum<[999]>; -def RA : AMDILReg<998, "r998">, DwarfRegNum<[998]>; -def FP : AMDILReg<997, "r997">, DwarfRegNum<[997]>; -def GPRI16 : RegisterClass<"AMDGPU", [i16], 16, - (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> { - let AltOrders = [(add (sequence "R%u", 1, 20))]; - let AltOrderSelect = [{ - return 1; - }]; - } -def GPRI32 : RegisterClass<"AMDGPU", [i32], 32, - (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> { - let AltOrders = [(add (sequence "R%u", 1, 20))]; - let AltOrderSelect = [{ - return 1; - }]; - } -def GPRF32 : RegisterClass<"AMDGPU", [f32], 32, - (add (sequence "R%u", 1, 20), RA, SP, T1, T2, T3, T4, T5, SDP, R1010, R1011, R1001, R1002, R1003, R1004, R1005, R1006, R1007, R1008, MEM, R1012)> { - let AltOrders = [(add (sequence "R%u", 1, 20))]; - let AltOrderSelect = [{ - return 1; - }]; - } diff --git a/lib/Target/R600/CMakeLists.txt b/lib/Target/R600/CMakeLists.txt index 3c6fa5a..4d16082 100644 --- a/lib/Target/R600/CMakeLists.txt +++ b/lib/Target/R600/CMakeLists.txt @@ -13,10 +13,9 @@ add_public_tablegen_target(AMDGPUCommonTableGen) add_llvm_target(R600CodeGen AMDILCFGStructurizer.cpp - AMDILIntrinsicInfo.cpp - AMDILISelLowering.cpp AMDGPUAsmPrinter.cpp AMDGPUFrameLowering.cpp + AMDGPUIntrinsicInfo.cpp AMDGPUISelDAGToDAG.cpp AMDGPUMCInstLower.cpp AMDGPUMachineFunction.cpp @@ -24,8 +23,8 @@ add_llvm_target(R600CodeGen AMDGPUTargetMachine.cpp AMDGPUTargetTransformInfo.cpp AMDGPUISelLowering.cpp - AMDGPUConvertToISA.cpp AMDGPUInstrInfo.cpp + AMDGPUPromoteAlloca.cpp AMDGPURegisterInfo.cpp R600ClauseMergePass.cpp R600ControlFlowFinalizer.cpp @@ -41,6 +40,7 @@ add_llvm_target(R600CodeGen R600TextureIntrinsicsReplacer.cpp SIAnnotateControlFlow.cpp SIFixSGPRCopies.cpp + SIFixSGPRLiveRanges.cpp SIInsertWaits.cpp SIInstrInfo.cpp SIISelLowering.cpp diff --git a/lib/Target/R600/EvergreenInstructions.td b/lib/Target/R600/EvergreenInstructions.td index 2065441..dcb7e98 100644 --- a/lib/Target/R600/EvergreenInstructions.td +++ b/lib/Target/R600/EvergreenInstructions.td @@ -295,7 +295,7 @@ def : Pat<(i32 (sext_inreg i32:$src, i8)), def : Pat<(i32 (sext_inreg i32:$src, i16)), (BFE_INT_eg i32:$src, (i32 ZERO), (MOV_IMM_I32 16))>; -defm : BFIPatterns <BFI_INT_eg>; +defm : BFIPatterns <BFI_INT_eg, MOV_IMM_I32>; def BFM_INT_eg : R600_2OP <0xA0, "BFM_INT", [(set i32:$dst, (AMDGPUbfm i32:$src0, i32:$src1))], @@ -326,6 +326,8 @@ def MUL_UINT24_eg : R600_2OP <0xB5, "MUL_UINT24", def DOT4_eg : DOT4_Common<0xBE>; defm CUBE_eg : CUBE_Common<0xC0>; +def BCNT_INT : R600_1OP_Helper <0xAA, "BCNT_INT", ctpop, VecALU>; + let hasSideEffects = 1 in { def MOVA_INT_eg : R600_1OP <0xCC, "MOVA_INT", [], VecALU>; } @@ -346,7 +348,7 @@ def FLT_TO_UINT_eg : FLT_TO_UINT_Common<0x9A> { def UINT_TO_FLT_eg : UINT_TO_FLT_Common<0x9C>; def GROUP_BARRIER : InstR600 < - (outs), (ins), " GROUP_BARRIER", [(int_AMDGPU_barrier_local)], AnyALU>, + (outs), (ins), " GROUP_BARRIER", [(int_AMDGPU_barrier_local), (int_AMDGPU_barrier_global)], AnyALU>, R600ALU_Word0, R600ALU_Word1_OP2 <0x54> { @@ -375,6 +377,11 @@ def GROUP_BARRIER : InstR600 < let ALUInst = 1; } +def : Pat < + (int_AMDGPU_barrier_global), + (GROUP_BARRIER) +>; + //===----------------------------------------------------------------------===// // LDS Instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp index 11ae091..0927040 100644 --- a/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp +++ b/lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp @@ -99,9 +99,9 @@ void AMDGPUInstPrinter::printRegOperand(unsigned reg, raw_ostream &O) { return; } - // The low 8 bits encoding value is the register index, for both VGPRs and - // SGPRs. - unsigned RegIdx = MRI.getEncodingValue(reg) & ((1 << 8) - 1); + // The low 8 bits of the encoding value is the register index, for both VGPRs + // and SGPRs. + unsigned RegIdx = MRI.getEncodingValue(reg) & ((1 << 8) - 1); if (NumRegs == 1) { O << Type << RegIdx; return; @@ -216,13 +216,8 @@ void AMDGPUInstPrinter::printClamp(const MCInst *MI, unsigned OpNo, void AMDGPUInstPrinter::printLiteral(const MCInst *MI, unsigned OpNo, raw_ostream &O) { - union Literal { - float f; - int32_t i; - } L; - - L.i = MI->getOperand(OpNo).getImm(); - O << L.i << "(" << L.f << ")"; + int32_t Imm = MI->getOperand(OpNo).getImm(); + O << Imm << '(' << BitsToFloat(Imm) << ')'; } void AMDGPUInstPrinter::printLast(const MCInst *MI, unsigned OpNo, diff --git a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp index 5e7cefe..dc1344f 100644 --- a/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp +++ b/lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp @@ -172,17 +172,13 @@ uint64_t R600MCCodeEmitter::getMachineOpValue(const MCInst &MI, SmallVectorImpl<MCFixup> &Fixup, const MCSubtargetInfo &STI) const { if (MO.isReg()) { - if (HAS_NATIVE_OPERANDS(MCII.get(MI.getOpcode()).TSFlags)) { + if (HAS_NATIVE_OPERANDS(MCII.get(MI.getOpcode()).TSFlags)) return MRI.getEncodingValue(MO.getReg()); - } else { - return getHWReg(MO.getReg()); - } - } else if (MO.isImm()) { - return MO.getImm(); - } else { - assert(0); - return 0; + return getHWReg(MO.getReg()); } + + assert(MO.isImm()); + return MO.getImm(); } #include "AMDGPUGenMCCodeEmitter.inc" diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp index d255e96..d98a6db 100644 --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp @@ -14,6 +14,7 @@ #include "llvm/Support/Debug.h" #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" diff --git a/lib/Target/R600/R600ISelLowering.cpp b/lib/Target/R600/R600ISelLowering.cpp index d6c6830..7f3560a 100644 --- a/lib/Target/R600/R600ISelLowering.cpp +++ b/lib/Target/R600/R600ISelLowering.cpp @@ -13,6 +13,9 @@ //===----------------------------------------------------------------------===// #include "R600ISelLowering.h" +#include "AMDGPUFrameLowering.h" +#include "AMDGPUIntrinsicInfo.h" +#include "AMDGPUSubtarget.h" #include "R600Defines.h" #include "R600InstrInfo.h" #include "R600MachineFunctionInfo.h" @@ -65,6 +68,7 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::BR_CC, MVT::i32, Expand); setOperationAction(ISD::BR_CC, MVT::f32, Expand); + setOperationAction(ISD::BRCOND, MVT::Other, Custom); setOperationAction(ISD::FSUB, MVT::f32, Expand); @@ -133,19 +137,47 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) : setOperationAction(ISD::LOAD, MVT::v4i32, Custom); setOperationAction(ISD::FrameIndex, MVT::i32, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i32, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f32, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); + + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i32, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f32, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); + setTargetDAGCombine(ISD::FP_ROUND); setTargetDAGCombine(ISD::FP_TO_SINT); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); + setOperationAction(ISD::SUB, MVT::i64, Expand); + // These should be replaced by UDVIREM, but it does not happen automatically // during Type Legalization setOperationAction(ISD::UDIV, MVT::i64, Custom); setOperationAction(ISD::UREM, MVT::i64, Custom); + setOperationAction(ISD::SDIV, MVT::i64, Custom); + setOperationAction(ISD::SREM, MVT::i64, Custom); + + // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32 + // to be Legal/Custom in order to avoid library calls. + setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom); + setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom); + setOperationAction(ISD::SRA_PARTS, MVT::i32, Custom); setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); + const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; + for (MVT VT : ScalarIntVTs) { + setOperationAction(ISD::ADDC, VT, Expand); + setOperationAction(ISD::SUBC, VT, Expand); + setOperationAction(ISD::ADDE, VT, Expand); + setOperationAction(ISD::SUBE, VT, Expand); + } + setBooleanContents(ZeroOrNegativeOneBooleanContent); setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); setSchedulingPreference(Sched::Source); @@ -537,11 +569,24 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); switch (Op.getOpcode()) { default: return AMDGPUTargetLowering::LowerOperation(Op, DAG); + case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); + case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); + case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG); + case ISD::SRA_PARTS: + case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG); case ISD::FCOS: case ISD::FSIN: return LowerTrig(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); - case ISD::LOAD: return LowerLOAD(Op, DAG); + case ISD::LOAD: { + SDValue Result = LowerLOAD(Op, DAG); + assert((!Result.getNode() || + Result.getNode()->getNumValues() == 2) && + "Load should return a value and a chain"); + return Result; + } + + case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG); case ISD::INTRINSIC_VOID: { SDValue Chain = Op.getOperand(0); @@ -776,6 +821,9 @@ SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const case Intrinsic::r600_read_tidig_z: return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass, AMDGPU::T0_Z, VT); + case Intrinsic::AMDGPU_rsq: + // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior. + return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1)); } // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode()) break; @@ -793,20 +841,172 @@ void R600TargetLowering::ReplaceNodeResults(SDNode *N, return; case ISD::FP_TO_UINT: Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG)); return; - case ISD::LOAD: { - SDNode *Node = LowerLOAD(SDValue(N, 0), DAG).getNode(); - Results.push_back(SDValue(Node, 0)); - Results.push_back(SDValue(Node, 1)); - // XXX: LLVM seems not to replace Chain Value inside CustomWidenLowerNode - // function - DAG.ReplaceAllUsesOfValueWith(SDValue(N,1), SDValue(Node, 1)); - return; + case ISD::UDIV: { + SDValue Op = SDValue(N, 0); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), + N->getOperand(0), N->getOperand(1)); + Results.push_back(UDIVREM); + break; } - case ISD::STORE: - SDNode *Node = LowerSTORE(SDValue(N, 0), DAG).getNode(); - Results.push_back(SDValue(Node, 0)); - return; + case ISD::UREM: { + SDValue Op = SDValue(N, 0); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue UDIVREM = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), + N->getOperand(0), N->getOperand(1)); + Results.push_back(UDIVREM.getValue(1)); + break; + } + case ISD::SDIV: { + SDValue Op = SDValue(N, 0); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT), + N->getOperand(0), N->getOperand(1)); + Results.push_back(SDIVREM); + break; + } + case ISD::SREM: { + SDValue Op = SDValue(N, 0); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + SDValue SDIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(VT, VT), + N->getOperand(0), N->getOperand(1)); + Results.push_back(SDIVREM.getValue(1)); + break; + } + case ISD::SDIVREM: { + SDValue Op = SDValue(N, 1); + SDValue RES = LowerSDIVREM(Op, DAG); + Results.push_back(RES); + Results.push_back(RES.getValue(1)); + break; + } + case ISD::UDIVREM: { + SDValue Op = SDValue(N, 0); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext()); + + SDValue one = DAG.getConstant(1, HalfVT); + SDValue zero = DAG.getConstant(0, HalfVT); + + //HiLo split + SDValue LHS = N->getOperand(0); + SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, zero); + SDValue LHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, one); + + SDValue RHS = N->getOperand(1); + SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, zero); + SDValue RHS_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, one); + + // Get Speculative values + SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo); + SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo); + + SDValue REM_Hi = zero; + SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, zero, REM_Part, LHS_Hi, ISD::SETEQ); + + SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, zero, DIV_Part, zero, ISD::SETEQ); + SDValue DIV_Lo = zero; + + const unsigned halfBitWidth = HalfVT.getSizeInBits(); + + for (unsigned i = 0; i < halfBitWidth; ++i) { + SDValue POS = DAG.getConstant(halfBitWidth - i - 1, HalfVT); + // Get Value of high bit + SDValue HBit; + if (halfBitWidth == 32 && Subtarget->hasBFE()) { + HBit = DAG.getNode(AMDGPUISD::BFE_U32, DL, HalfVT, LHS_Lo, POS, one); + } else { + HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS); + HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, one); + } + + SDValue Carry = DAG.getNode(ISD::SRL, DL, HalfVT, REM_Lo, + DAG.getConstant(halfBitWidth - 1, HalfVT)); + REM_Hi = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Hi, one); + REM_Hi = DAG.getNode(ISD::OR, DL, HalfVT, REM_Hi, Carry); + + REM_Lo = DAG.getNode(ISD::SHL, DL, HalfVT, REM_Lo, one); + REM_Lo = DAG.getNode(ISD::OR, DL, HalfVT, REM_Lo, HBit); + + + SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi); + + SDValue BIT = DAG.getConstant(1 << (halfBitWidth - i - 1), HalfVT); + SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, zero, ISD::SETGE); + + DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT); + + // Update REM + + SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS); + + REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETGE); + REM_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, zero); + REM_Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, REM, one); + } + + SDValue REM = DAG.getNode(ISD::BUILD_PAIR, DL, VT, REM_Lo, REM_Hi); + SDValue DIV = DAG.getNode(ISD::BUILD_PAIR, DL, VT, DIV_Lo, DIV_Hi); + Results.push_back(DIV); + Results.push_back(REM); + break; } + } +} + +SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG, + SDValue Vector) const { + + SDLoc DL(Vector); + EVT VecVT = Vector.getValueType(); + EVT EltVT = VecVT.getVectorElementType(); + SmallVector<SDValue, 8> Args; + + for (unsigned i = 0, e = VecVT.getVectorNumElements(); + i != e; ++i) { + Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, + Vector, DAG.getConstant(i, getVectorIdxTy()))); + } + + return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args); +} + +SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + + SDLoc DL(Op); + SDValue Vector = Op.getOperand(0); + SDValue Index = Op.getOperand(1); + + if (isa<ConstantSDNode>(Index) || + Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR) + return Op; + + Vector = vectorToVerticalVector(DAG, Vector); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(), + Vector, Index); +} + +SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + SDValue Vector = Op.getOperand(0); + SDValue Value = Op.getOperand(1); + SDValue Index = Op.getOperand(2); + + if (isa<ConstantSDNode>(Index) || + Vector.getOpcode() == AMDGPUISD::BUILD_VERTICAL_VECTOR) + return Op; + + Vector = vectorToVerticalVector(DAG, Vector); + SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(), + Vector, Value, Index); + return vectorToVerticalVector(DAG, Insert); } SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const { @@ -840,6 +1040,80 @@ SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const { DAG.getConstantFP(3.14159265359, MVT::f32)); } +SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + + SDValue Lo = Op.getOperand(0); + SDValue Hi = Op.getOperand(1); + SDValue Shift = Op.getOperand(2); + SDValue Zero = DAG.getConstant(0, VT); + SDValue One = DAG.getConstant(1, VT); + + SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT); + SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT); + SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width); + SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift); + + // The dance around Width1 is necessary for 0 special case. + // Without it the CompShift might be 32, producing incorrect results in + // Overflow. So we do the shift in two steps, the alternative is to + // add a conditional to filter the special case. + + SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift); + Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One); + + SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift); + HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow); + SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift); + + SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift); + SDValue LoBig = Zero; + + Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT); + Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT); + + return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi); +} + +SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + + SDValue Lo = Op.getOperand(0); + SDValue Hi = Op.getOperand(1); + SDValue Shift = Op.getOperand(2); + SDValue Zero = DAG.getConstant(0, VT); + SDValue One = DAG.getConstant(1, VT); + + const bool SRA = Op.getOpcode() == ISD::SRA_PARTS; + + SDValue Width = DAG.getConstant(VT.getSizeInBits(), VT); + SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, VT); + SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width); + SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift); + + // The dance around Width1 is necessary for 0 special case. + // Without it the CompShift might be 32, producing incorrect results in + // Overflow. So we do the shift in two steps, the alternative is to + // add a conditional to filter the special case. + + SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift); + Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One); + + SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift); + SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift); + LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow); + + SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift); + SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero; + + Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT); + Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT); + + return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi); +} + SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode( ISD::SETCC, @@ -1369,6 +1643,15 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const return DAG.getMergeValues(Ops, DL); } +SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { + SDValue Chain = Op.getOperand(0); + SDValue Cond = Op.getOperand(1); + SDValue Jump = Op.getOperand(2); + + return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(), + Chain, Jump, Cond); +} + /// XXX Only kernel functions are supported, so we can assume for now that /// every function is a kernel function, but in the future we should use /// separate calling conventions for kernel and non-kernel functions. @@ -1902,9 +2185,8 @@ SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node, SDValue FakeOp; std::vector<SDValue> Ops; - for(SDNode::op_iterator I = Node->op_begin(), E = Node->op_end(); - I != E; ++I) - Ops.push_back(*I); + for (const SDUse &I : Node->ops()) + Ops.push_back(I); if (Opcode == AMDGPU::DOT_4) { int OperandIdx[] = { diff --git a/lib/Target/R600/R600ISelLowering.h b/lib/Target/R600/R600ISelLowering.h index a8a464f..d22c8c9 100644 --- a/lib/Target/R600/R600ISelLowering.h +++ b/lib/Target/R600/R600ISelLowering.h @@ -51,15 +51,18 @@ private: void lowerImplicitParameter(MachineInstr *MI, MachineBasicBlock &BB, MachineRegisterInfo & MRI, unsigned dword_offset) const; SDValue OptimizeSwizzle(SDValue BuildVector, SDValue Swz[], SelectionDAG &DAG) const; + SDValue vectorToVerticalVector(SelectionDAG &DAG, SDValue Vector) const; - /// \brief Lower ROTL opcode to BITALIGN - SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const; - + SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerTrig(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSHLParts(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSRXParts(SDValue Op, SelectionDAG &DAG) const; SDValue stackPtrToRegIndex(SDValue Ptr, unsigned StackWidth, SelectionDAG &DAG) const; diff --git a/lib/Target/R600/R600InstrInfo.cpp b/lib/Target/R600/R600InstrInfo.cpp index b0d9ae3..3972e2f 100644 --- a/lib/Target/R600/R600InstrInfo.cpp +++ b/lib/Target/R600/R600InstrInfo.cpp @@ -28,10 +28,9 @@ using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR #include "AMDGPUGenDFAPacketizer.inc" -R600InstrInfo::R600InstrInfo(AMDGPUTargetMachine &tm) - : AMDGPUInstrInfo(tm), - RI(tm), - ST(tm.getSubtarget<AMDGPUSubtarget>()) +R600InstrInfo::R600InstrInfo(const AMDGPUSubtarget &st) + : AMDGPUInstrInfo(st), + RI(st) { } const R600RegisterInfo &R600InstrInfo::getRegisterInfo() const { @@ -52,11 +51,15 @@ R600InstrInfo::copyPhysReg(MachineBasicBlock &MBB, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { unsigned VectorComponents = 0; - if (AMDGPU::R600_Reg128RegClass.contains(DestReg) && - AMDGPU::R600_Reg128RegClass.contains(SrcReg)) { + if ((AMDGPU::R600_Reg128RegClass.contains(DestReg) || + AMDGPU::R600_Reg128VerticalRegClass.contains(DestReg)) && + (AMDGPU::R600_Reg128RegClass.contains(SrcReg) || + AMDGPU::R600_Reg128VerticalRegClass.contains(SrcReg))) { VectorComponents = 4; - } else if(AMDGPU::R600_Reg64RegClass.contains(DestReg) && - AMDGPU::R600_Reg64RegClass.contains(SrcReg)) { + } else if((AMDGPU::R600_Reg64RegClass.contains(DestReg) || + AMDGPU::R600_Reg64VerticalRegClass.contains(DestReg)) && + (AMDGPU::R600_Reg64RegClass.contains(SrcReg) || + AMDGPU::R600_Reg64VerticalRegClass.contains(SrcReg))) { VectorComponents = 2; } @@ -768,16 +771,6 @@ R600InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, return true; } -int R600InstrInfo::getBranchInstr(const MachineOperand &op) const { - const MachineInstr *MI = op.getParent(); - - switch (MI->getDesc().OpInfo->RegClass) { - default: // FIXME: fallthrough?? - case AMDGPU::GPRI32RegClassID: return AMDGPU::BRANCH_COND_i32; - case AMDGPU::GPRF32RegClassID: return AMDGPU::BRANCH_COND_f32; - }; -} - static MachineBasicBlock::iterator FindLastAluClause(MachineBasicBlock &MBB) { for (MachineBasicBlock::reverse_iterator It = MBB.rbegin(), E = MBB.rend(); @@ -1064,10 +1057,34 @@ unsigned int R600InstrInfo::getInstrLatency(const InstrItineraryData *ItinData, return 2; } +bool R600InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { + + switch(MI->getOpcode()) { + default: return AMDGPUInstrInfo::expandPostRAPseudo(MI); + case AMDGPU::R600_EXTRACT_ELT_V2: + case AMDGPU::R600_EXTRACT_ELT_V4: + buildIndirectRead(MI->getParent(), MI, MI->getOperand(0).getReg(), + RI.getHWRegIndex(MI->getOperand(1).getReg()), // Address + MI->getOperand(2).getReg(), + RI.getHWRegChan(MI->getOperand(1).getReg())); + break; + case AMDGPU::R600_INSERT_ELT_V2: + case AMDGPU::R600_INSERT_ELT_V4: + buildIndirectWrite(MI->getParent(), MI, MI->getOperand(2).getReg(), // Value + RI.getHWRegIndex(MI->getOperand(1).getReg()), // Address + MI->getOperand(3).getReg(), // Offset + RI.getHWRegChan(MI->getOperand(1).getReg())); // Channel + break; + } + MI->eraseFromParent(); + return true; +} + void R600InstrInfo::reserveIndirectRegisters(BitVector &Reserved, const MachineFunction &MF) const { const AMDGPUFrameLowering *TFL = - static_cast<const AMDGPUFrameLowering*>(TM.getFrameLowering()); + static_cast<const AMDGPUFrameLowering*>( + MF.getTarget().getFrameLowering()); unsigned StackWidth = TFL->getStackWidth(MF); int End = getIndirectIndexEnd(MF); @@ -1100,7 +1117,22 @@ MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, unsigned ValueReg, unsigned Address, unsigned OffsetReg) const { - unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); + return buildIndirectWrite(MBB, I, ValueReg, Address, OffsetReg, 0); +} + +MachineInstrBuilder R600InstrInfo::buildIndirectWrite(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, unsigned Address, + unsigned OffsetReg, + unsigned AddrChan) const { + unsigned AddrReg; + switch (AddrChan) { + default: llvm_unreachable("Invalid Channel"); + case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break; + case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break; + case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break; + case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break; + } MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, AMDGPU::AR_X, OffsetReg); setImmOperand(MOVA, AMDGPU::OpName::write, 0); @@ -1117,7 +1149,22 @@ MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, MachineBasicBlock::iterator I, unsigned ValueReg, unsigned Address, unsigned OffsetReg) const { - unsigned AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); + return buildIndirectRead(MBB, I, ValueReg, Address, OffsetReg, 0); +} + +MachineInstrBuilder R600InstrInfo::buildIndirectRead(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, unsigned Address, + unsigned OffsetReg, + unsigned AddrChan) const { + unsigned AddrReg; + switch (AddrChan) { + default: llvm_unreachable("Invalid Channel"); + case 0: AddrReg = AMDGPU::R600_AddrRegClass.getRegister(Address); break; + case 1: AddrReg = AMDGPU::R600_Addr_YRegClass.getRegister(Address); break; + case 2: AddrReg = AMDGPU::R600_Addr_ZRegClass.getRegister(Address); break; + case 3: AddrReg = AMDGPU::R600_Addr_WRegClass.getRegister(Address); break; + } MachineInstr *MOVA = buildDefaultInstruction(*MBB, I, AMDGPU::MOVA_INT_eg, AMDGPU::AR_X, OffsetReg); @@ -1220,7 +1267,6 @@ MachineInstr *R600InstrInfo::buildSlotOfVectorInstruction( const { assert (MI->getOpcode() == AMDGPU::DOT_4 && "Not Implemented"); unsigned Opcode; - const AMDGPUSubtarget &ST = TM.getSubtarget<AMDGPUSubtarget>(); if (ST.getGeneration() <= AMDGPUSubtarget::R700) Opcode = AMDGPU::DOT4_r600; else diff --git a/lib/Target/R600/R600InstrInfo.h b/lib/Target/R600/R600InstrInfo.h index b5304a0..45a57d3 100644 --- a/lib/Target/R600/R600InstrInfo.h +++ b/lib/Target/R600/R600InstrInfo.h @@ -32,12 +32,22 @@ namespace llvm { class R600InstrInfo : public AMDGPUInstrInfo { private: const R600RegisterInfo RI; - const AMDGPUSubtarget &ST; - int getBranchInstr(const MachineOperand &op) const; std::vector<std::pair<int, unsigned> > ExtractSrcs(MachineInstr *MI, const DenseMap<unsigned, unsigned> &PV, unsigned &ConstCount) const; + + MachineInstrBuilder buildIndirectRead(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, unsigned Address, + unsigned OffsetReg, + unsigned AddrChan) const; + + MachineInstrBuilder buildIndirectWrite(MachineBasicBlock *MBB, + MachineBasicBlock::iterator I, + unsigned ValueReg, unsigned Address, + unsigned OffsetReg, + unsigned AddrChan) const; public: enum BankSwizzle { ALU_VEC_012_SCL_210 = 0, @@ -48,7 +58,7 @@ namespace llvm { ALU_VEC_210 }; - explicit R600InstrInfo(AMDGPUTargetMachine &tm); + explicit R600InstrInfo(const AMDGPUSubtarget &st); const R600RegisterInfo &getRegisterInfo() const override; void copyPhysReg(MachineBasicBlock &MBB, @@ -197,6 +207,8 @@ namespace llvm { int getInstrLatency(const InstrItineraryData *ItinData, SDNode *Node) const override { return 1;} + virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; + /// \brief Reserve the registers that may be accesed using indirect addressing. void reserveIndirectRegisters(BitVector &Reserved, const MachineFunction &MF) const; diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td index 590fde2..73fa345 100644 --- a/lib/Target/R600/R600Instructions.td +++ b/lib/Target/R600/R600Instructions.td @@ -125,7 +125,7 @@ class R600_1OP <bits<11> inst, string opName, list<dag> pattern, class R600_1OP_Helper <bits<11> inst, string opName, SDPatternOperator node, InstrItinClass itin = AnyALU> : R600_1OP <inst, opName, - [(set R600_Reg32:$dst, (node R600_Reg32:$src0))] + [(set R600_Reg32:$dst, (node R600_Reg32:$src0))], itin >; // If you add or change the operands for R600_2OP instructions, you must @@ -161,10 +161,10 @@ class R600_2OP <bits<11> inst, string opName, list<dag> pattern, } class R600_2OP_Helper <bits<11> inst, string opName, SDPatternOperator node, - InstrItinClass itim = AnyALU> : + InstrItinClass itin = AnyALU> : R600_2OP <inst, opName, [(set R600_Reg32:$dst, (node R600_Reg32:$src0, - R600_Reg32:$src1))] + R600_Reg32:$src1))], itin >; // If you add our change the operands for R600_3OP instructions, you must @@ -721,14 +721,11 @@ def SETNE_DX10 : R600_2OP < >; def FRACT : R600_1OP_Helper <0x10, "FRACT", AMDGPUfract>; -def TRUNC : R600_1OP_Helper <0x11, "TRUNC", int_AMDGPU_trunc>; +def TRUNC : R600_1OP_Helper <0x11, "TRUNC", ftrunc>; def CEIL : R600_1OP_Helper <0x12, "CEIL", fceil>; def RNDNE : R600_1OP_Helper <0x13, "RNDNE", frint>; def FLOOR : R600_1OP_Helper <0x14, "FLOOR", ffloor>; -// Add also ftrunc intrinsic pattern -def : Pat<(ftrunc f32:$src0), (TRUNC $src0)>; - def MOV : R600_1OP <0x19, "MOV", []>; let isPseudo = 1, isCodeGenOnly = 1, usesCustomInserter = 1 in { @@ -1082,18 +1079,21 @@ class RECIP_UINT_Common <bits<11> inst> : R600_1OP_Helper < let Itinerary = TransALU; } +// Clamped to maximum. class RECIPSQRT_CLAMPED_Common <bits<11> inst> : R600_1OP_Helper < - inst, "RECIPSQRT_CLAMPED", int_AMDGPU_rsq + inst, "RECIPSQRT_CLAMPED", AMDGPUrsq_clamped > { let Itinerary = TransALU; } -class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP < - inst, "RECIPSQRT_IEEE", [] +class RECIPSQRT_IEEE_Common <bits<11> inst> : R600_1OP_Helper < + inst, "RECIPSQRT_IEEE", AMDGPUrsq_legacy > { let Itinerary = TransALU; } +// TODO: There is also RECIPSQRT_FF which clamps to zero. + class SIN_Common <bits<11> inst> : R600_1OP < inst, "SIN", [(set f32:$dst, (SIN_HW f32:$src0))]>{ let Trig = 1; @@ -1266,13 +1266,6 @@ defm R600_ : RegisterLoadStore <R600_Reg32, FRAMEri, ADDRIndirect>; //===----------------------------------------------------------------------===// -// Branch Instructions -//===----------------------------------------------------------------------===// - -def IF_PREDICATE_SET : ILFormat<(outs), (ins GPRI32:$src), - "IF_PREDICATE_SET $src", []>; - -//===----------------------------------------------------------------------===// // Pseudo instructions //===----------------------------------------------------------------------===// @@ -1345,15 +1338,6 @@ def TXD_SHADOW: InstR600 < } // End isPseudo = 1 } // End usesCustomInserter = 1 -//===---------------------------------------------------------------------===// -// Return instruction -//===---------------------------------------------------------------------===// -let isTerminator = 1, isReturn = 1, hasCtrlDep = 1, - usesCustomInserter = 1 in { - def RETURN : ILFormat<(outs), (ins variable_ops), - "RETURN", [(IL_retflag)]>; -} - //===----------------------------------------------------------------------===// // Constant Buffer Addressing Support @@ -1480,11 +1464,52 @@ let Inst{63-32} = Word1; let VTXInst = 1; } +//===---------------------------------------------------------------------===// +// Flow and Program control Instructions +//===---------------------------------------------------------------------===// +class ILFormat<dag outs, dag ins, string asmstr, list<dag> pattern> +: Instruction { + + let Namespace = "AMDGPU"; + dag OutOperandList = outs; + dag InOperandList = ins; + let Pattern = pattern; + let AsmString = !strconcat(asmstr, "\n"); + let isPseudo = 1; + let Itinerary = NullALU; + bit hasIEEEFlag = 0; + bit hasZeroOpFlag = 0; + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; +} + +multiclass BranchConditional<SDNode Op, RegisterClass rci, RegisterClass rcf> { + def _i32 : ILFormat<(outs), + (ins brtarget:$target, rci:$src0), + "; i32 Pseudo branch instruction", + [(Op bb:$target, (i32 rci:$src0))]>; + def _f32 : ILFormat<(outs), + (ins brtarget:$target, rcf:$src0), + "; f32 Pseudo branch instruction", + [(Op bb:$target, (f32 rcf:$src0))]>; +} + +// Only scalar types should generate flow control +multiclass BranchInstr<string name> { + def _i32 : ILFormat<(outs), (ins R600_Reg32:$src), + !strconcat(name, " $src"), []>; + def _f32 : ILFormat<(outs), (ins R600_Reg32:$src), + !strconcat(name, " $src"), []>; +} +// Only scalar types should generate flow control +multiclass BranchInstr2<string name> { + def _i32 : ILFormat<(outs), (ins R600_Reg32:$src0, R600_Reg32:$src1), + !strconcat(name, " $src0, $src1"), []>; + def _f32 : ILFormat<(outs), (ins R600_Reg32:$src0, R600_Reg32:$src1), + !strconcat(name, " $src0, $src1"), []>; +} - -//===--------------------------------------------------------------------===// -// Instructions support -//===--------------------------------------------------------------------===// //===---------------------------------------------------------------------===// // Custom Inserter for Branches and returns, this eventually will be a // separate pass @@ -1497,13 +1522,22 @@ let isTerminator = 1, usesCustomInserter = 1, isBranch = 1, isBarrier = 1 in { } //===---------------------------------------------------------------------===// -// Flow and Program control Instructions +// Return instruction //===---------------------------------------------------------------------===// +let isTerminator = 1, isReturn = 1, hasCtrlDep = 1, + usesCustomInserter = 1 in { + def RETURN : ILFormat<(outs), (ins variable_ops), + "RETURN", [(IL_retflag)]>; +} + +//===----------------------------------------------------------------------===// +// Branch Instructions +//===----------------------------------------------------------------------===// + +def IF_PREDICATE_SET : ILFormat<(outs), (ins R600_Reg32:$src), + "IF_PREDICATE_SET $src", []>; + let isTerminator=1 in { - def SWITCH : ILFormat< (outs), (ins GPRI32:$src), - !strconcat("SWITCH", " $src"), []>; - def CASE : ILFormat< (outs), (ins GPRI32:$src), - !strconcat("CASE", " $src"), []>; def BREAK : ILFormat< (outs), (ins), "BREAK", []>; def CONTINUE : ILFormat< (outs), (ins), @@ -1548,6 +1582,60 @@ let isTerminator=1 in { } //===----------------------------------------------------------------------===// +// Indirect addressing pseudo instructions +//===----------------------------------------------------------------------===// + +let isPseudo = 1 in { + +class ExtractVertical <RegisterClass vec_rc> : InstR600 < + (outs R600_Reg32:$dst), + (ins vec_rc:$vec, R600_Reg32:$index), "", + [], + AnyALU +>; + +let Constraints = "$dst = $vec" in { + +class InsertVertical <RegisterClass vec_rc> : InstR600 < + (outs vec_rc:$dst), + (ins vec_rc:$vec, R600_Reg32:$value, R600_Reg32:$index), "", + [], + AnyALU +>; + +} // End Constraints = "$dst = $vec" + +} // End isPseudo = 1 + +def R600_EXTRACT_ELT_V2 : ExtractVertical <R600_Reg64Vertical>; +def R600_EXTRACT_ELT_V4 : ExtractVertical <R600_Reg128Vertical>; + +def R600_INSERT_ELT_V2 : InsertVertical <R600_Reg64Vertical>; +def R600_INSERT_ELT_V4 : InsertVertical <R600_Reg128Vertical>; + +class ExtractVerticalPat <Instruction inst, ValueType vec_ty, + ValueType scalar_ty> : Pat < + (scalar_ty (extractelt vec_ty:$vec, i32:$index)), + (inst $vec, $index) +>; + +def : ExtractVerticalPat <R600_EXTRACT_ELT_V2, v2i32, i32>; +def : ExtractVerticalPat <R600_EXTRACT_ELT_V2, v2f32, f32>; +def : ExtractVerticalPat <R600_EXTRACT_ELT_V4, v4i32, i32>; +def : ExtractVerticalPat <R600_EXTRACT_ELT_V4, v4f32, f32>; + +class InsertVerticalPat <Instruction inst, ValueType vec_ty, + ValueType scalar_ty> : Pat < + (vec_ty (insertelt vec_ty:$vec, scalar_ty:$value, i32:$index)), + (inst $vec, $value, $index) +>; + +def : InsertVerticalPat <R600_INSERT_ELT_V2, v2i32, i32>; +def : InsertVerticalPat <R600_INSERT_ELT_V2, v2f32, f32>; +def : InsertVerticalPat <R600_INSERT_ELT_V4, v4i32, i32>; +def : InsertVerticalPat <R600_INSERT_ELT_V4, v4f32, f32>; + +//===----------------------------------------------------------------------===// // ISel Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/R600MachineScheduler.cpp b/lib/Target/R600/R600MachineScheduler.cpp index d1655d1..7ea654c 100644 --- a/lib/Target/R600/R600MachineScheduler.cpp +++ b/lib/Target/R600/R600MachineScheduler.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "R600MachineScheduler.h" +#include "AMDGPUSubtarget.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Pass.h" diff --git a/lib/Target/R600/R600Packetizer.cpp b/lib/Target/R600/R600Packetizer.cpp index c2f6c03..74cf309 100644 --- a/lib/Target/R600/R600Packetizer.cpp +++ b/lib/Target/R600/R600Packetizer.cpp @@ -16,6 +16,7 @@ #include "llvm/Support/Debug.h" #include "AMDGPU.h" +#include "AMDGPUSubtarget.h" #include "R600InstrInfo.h" #include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/MachineDominators.h" diff --git a/lib/Target/R600/R600RegisterInfo.cpp b/lib/Target/R600/R600RegisterInfo.cpp index f3bb88b..dc95675 100644 --- a/lib/Target/R600/R600RegisterInfo.cpp +++ b/lib/Target/R600/R600RegisterInfo.cpp @@ -20,15 +20,14 @@ using namespace llvm; -R600RegisterInfo::R600RegisterInfo(AMDGPUTargetMachine &tm) -: AMDGPURegisterInfo(tm), - TM(tm) +R600RegisterInfo::R600RegisterInfo(const AMDGPUSubtarget &st) +: AMDGPURegisterInfo(st) { RCW.RegWeight = 0; RCW.WeightLimit = 0;} BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); - const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(TM.getInstrInfo()); + const R600InstrInfo *TII = static_cast<const R600InstrInfo*>(ST.getInstrInfo()); Reserved.set(AMDGPU::ZERO); Reserved.set(AMDGPU::HALF); @@ -55,16 +54,6 @@ BitVector R600RegisterInfo::getReservedRegs(const MachineFunction &MF) const { return Reserved; } -const TargetRegisterClass * -R600RegisterInfo::getISARegClass(const TargetRegisterClass * rc) const { - switch (rc->getID()) { - case AMDGPU::GPRF32RegClassID: - case AMDGPU::GPRI32RegClassID: - return &AMDGPU::R600_Reg32RegClass; - default: return rc; - } -} - unsigned R600RegisterInfo::getHWRegChan(unsigned reg) const { return this->getEncodingValue(reg) >> HW_CHAN_SHIFT; } diff --git a/lib/Target/R600/R600RegisterInfo.h b/lib/Target/R600/R600RegisterInfo.h index 52e1a4b..247808b 100644 --- a/lib/Target/R600/R600RegisterInfo.h +++ b/lib/Target/R600/R600RegisterInfo.h @@ -16,26 +16,18 @@ #define R600REGISTERINFO_H_ #include "AMDGPURegisterInfo.h" -#include "AMDGPUTargetMachine.h" namespace llvm { -class R600TargetMachine; +class AMDGPUSubtarget; struct R600RegisterInfo : public AMDGPURegisterInfo { - AMDGPUTargetMachine &TM; RegClassWeight RCW; - R600RegisterInfo(AMDGPUTargetMachine &tm); + R600RegisterInfo(const AMDGPUSubtarget &st); BitVector getReservedRegs(const MachineFunction &MF) const override; - /// \param RC is an AMDIL reg class. - /// - /// \returns the R600 reg class that is equivalent to \p RC. - const TargetRegisterClass *getISARegClass( - const TargetRegisterClass *RC) const override; - /// \brief get the HW encoding for a register's channel. unsigned getHWRegChan(unsigned reg) const; diff --git a/lib/Target/R600/R600RegisterInfo.td b/lib/Target/R600/R600RegisterInfo.td index 68bcd20..cc667d9 100644 --- a/lib/Target/R600/R600RegisterInfo.td +++ b/lib/Target/R600/R600RegisterInfo.td @@ -18,18 +18,28 @@ class R600RegWithChan <string name, bits<9> sel, string chan> : class R600Reg_128<string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> { + field bits<2> chan_encoding = 0; let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1, sub2, sub3]; - let HWEncoding = encoding; + let HWEncoding{8-0} = encoding{8-0}; + let HWEncoding{10-9} = chan_encoding; } class R600Reg_64<string n, list<Register> subregs, bits<16> encoding> : RegisterWithSubRegs<n, subregs> { + field bits<2> chan_encoding = 0; let Namespace = "AMDGPU"; let SubRegIndices = [sub0, sub1]; let HWEncoding = encoding; + let HWEncoding{8-0} = encoding{8-0}; + let HWEncoding{10-9} = chan_encoding; } +class R600Reg_64Vertical<int lo, int hi, string chan> : R600Reg_64 < + "V"#lo#hi#"_"#chan, + [!cast<Register>("T"#lo#"_"#chan), !cast<Register>("T"#hi#"_"#chan)], + lo +>; foreach Index = 0-127 in { foreach Chan = [ "X", "Y", "Z", "W" ] in { @@ -54,6 +64,24 @@ foreach Index = 0-127 in { Index>; } +foreach Chan = [ "X", "Y", "Z", "W"] in { + + let chan_encoding = !if(!eq(Chan, "X"), 0, + !if(!eq(Chan, "Y"), 1, + !if(!eq(Chan, "Z"), 2, + !if(!eq(Chan, "W"), 3, 0)))) in { + def V0123_#Chan : R600Reg_128 <"V0123_"#Chan, + [!cast<Register>("T0_"#Chan), + !cast<Register>("T1_"#Chan), + !cast<Register>("T2_"#Chan), + !cast<Register>("T3_"#Chan)], + 0>; + def V01_#Chan : R600Reg_64Vertical<0, 1, Chan>; + def V23_#Chan : R600Reg_64Vertical<2, 3, Chan>; + } +} + + // KCACHE_BANK0 foreach Index = 159-128 in { foreach Chan = [ "X", "Y", "Z", "W" ] in { @@ -130,8 +158,14 @@ def ALU_PARAM : R600Reg<"Param", 0>; let isAllocatable = 0 in { -// XXX: Only use the X channel, until we support wider stack widths -def R600_Addr : RegisterClass <"AMDGPU", [i32], 127, (add (sequence "Addr%u_X", 0, 127))>; +def R600_Addr : RegisterClass <"AMDGPU", [i32], 32, (add (sequence "Addr%u_X", 0, 127))>; + +// We only use Addr_[YZW] for vertical vectors. +// FIXME if we add more vertical vector registers we will need to ad more +// registers to these classes. +def R600_Addr_Y : RegisterClass <"AMDGPU", [i32], 32, (add Addr0_Y)>; +def R600_Addr_Z : RegisterClass <"AMDGPU", [i32], 32, (add Addr0_Z)>; +def R600_Addr_W : RegisterClass <"AMDGPU", [i32], 32, (add Addr0_W)>; def R600_LDS_SRC_REG : RegisterClass<"AMDGPU", [i32], 32, (add OQA, OQB, OQAP, OQBP, LDS_DIRECT_A, LDS_DIRECT_B)>; @@ -206,5 +240,13 @@ def R600_Reg128 : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, let CopyCost = -1; } +def R600_Reg128Vertical : RegisterClass<"AMDGPU", [v4f32, v4i32], 128, + (add V0123_W, V0123_Z, V0123_Y, V0123_X) +>; + def R600_Reg64 : RegisterClass<"AMDGPU", [v2f32, v2i32], 64, (add (sequence "T%u_XY", 0, 63))>; + +def R600_Reg64Vertical : RegisterClass<"AMDGPU", [v2f32, v2i32], 64, + (add V01_X, V01_Y, V01_Z, V01_W, + V23_X, V23_Y, V23_Z, V23_W)>; diff --git a/lib/Target/R600/SIAnnotateControlFlow.cpp b/lib/Target/R600/SIAnnotateControlFlow.cpp index d6e4451..91eb60b 100644 --- a/lib/Target/R600/SIAnnotateControlFlow.cpp +++ b/lib/Target/R600/SIAnnotateControlFlow.cpp @@ -65,7 +65,6 @@ class SIAnnotateControlFlow : public FunctionPass { DominatorTree *DT; StackVector Stack; - SSAUpdater PhiInserter; bool isTopOfStack(BasicBlock *BB); @@ -81,7 +80,7 @@ class SIAnnotateControlFlow : public FunctionPass { void insertElse(BranchInst *Term); - void handleLoopCondition(Value *Cond); + Value *handleLoopCondition(Value *Cond, PHINode *Broken); void handleLoop(BranchInst *Term); @@ -177,7 +176,7 @@ bool SIAnnotateControlFlow::isElse(PHINode *Phi) { } else { if (Phi->getIncomingValue(i) != BoolFalse) return false; - + } } return true; @@ -204,20 +203,26 @@ void SIAnnotateControlFlow::insertElse(BranchInst *Term) { } /// \brief Recursively handle the condition leading to a loop -void SIAnnotateControlFlow::handleLoopCondition(Value *Cond) { +Value *SIAnnotateControlFlow::handleLoopCondition(Value *Cond, PHINode *Broken) { if (PHINode *Phi = dyn_cast<PHINode>(Cond)) { + BasicBlock *Parent = Phi->getParent(); + PHINode *NewPhi = PHINode::Create(Int64, 0, "", &Parent->front()); + Value *Ret = NewPhi; // Handle all non-constant incoming values first for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) { Value *Incoming = Phi->getIncomingValue(i); - if (isa<ConstantInt>(Incoming)) + BasicBlock *From = Phi->getIncomingBlock(i); + if (isa<ConstantInt>(Incoming)) { + NewPhi->addIncoming(Broken, From); continue; + } Phi->setIncomingValue(i, BoolFalse); - handleLoopCondition(Incoming); + Value *PhiArg = handleLoopCondition(Incoming, Broken); + NewPhi->addIncoming(PhiArg, From); } - BasicBlock *Parent = Phi->getParent(); BasicBlock *IDom = DT->getNode(Parent)->getIDom()->getBlock(); for (unsigned i = 0, e = Phi->getNumIncomingValues(); i != e; ++i) { @@ -230,33 +235,28 @@ void SIAnnotateControlFlow::handleLoopCondition(Value *Cond) { if (From == IDom) { CallInst *OldEnd = dyn_cast<CallInst>(Parent->getFirstInsertionPt()); if (OldEnd && OldEnd->getCalledFunction() == EndCf) { - Value *Args[] = { - OldEnd->getArgOperand(0), - PhiInserter.GetValueAtEndOfBlock(Parent) - }; - Value *Ret = CallInst::Create(ElseBreak, Args, "", OldEnd); - PhiInserter.AddAvailableValue(Parent, Ret); + Value *Args[] = { OldEnd->getArgOperand(0), NewPhi }; + Ret = CallInst::Create(ElseBreak, Args, "", OldEnd); continue; } } - TerminatorInst *Insert = From->getTerminator(); - Value *Arg = PhiInserter.GetValueAtEndOfBlock(From); - Value *Ret = CallInst::Create(Break, Arg, "", Insert); - PhiInserter.AddAvailableValue(From, Ret); + Value *PhiArg = CallInst::Create(Break, Broken, "", Insert); + NewPhi->setIncomingValue(i, PhiArg); } eraseIfUnused(Phi); + return Ret; } else if (Instruction *Inst = dyn_cast<Instruction>(Cond)) { BasicBlock *Parent = Inst->getParent(); TerminatorInst *Insert = Parent->getTerminator(); - Value *Args[] = { Cond, PhiInserter.GetValueAtEndOfBlock(Parent) }; - Value *Ret = CallInst::Create(IfBreak, Args, "", Insert); - PhiInserter.AddAvailableValue(Parent, Ret); + Value *Args[] = { Cond, Broken }; + return CallInst::Create(IfBreak, Args, "", Insert); } else { llvm_unreachable("Unhandled loop condition!"); } + return 0; } /// \brief Handle a back edge (loop) @@ -264,15 +264,11 @@ void SIAnnotateControlFlow::handleLoop(BranchInst *Term) { BasicBlock *Target = Term->getSuccessor(1); PHINode *Broken = PHINode::Create(Int64, 0, "", &Target->front()); - PhiInserter.Initialize(Int64, ""); - PhiInserter.AddAvailableValue(Target, Broken); - Value *Cond = Term->getCondition(); Term->setCondition(BoolTrue); - handleLoopCondition(Cond); + Value *Arg = handleLoopCondition(Cond, Broken); BasicBlock *BB = Term->getParent(); - Value *Arg = PhiInserter.GetValueAtEndOfBlock(BB); for (pred_iterator PI = pred_begin(Target), PE = pred_end(Target); PI != PE; ++PI) { diff --git a/lib/Target/R600/SIDefines.h b/lib/Target/R600/SIDefines.h index 2cbce28..4d31a11 100644 --- a/lib/Target/R600/SIDefines.h +++ b/lib/Target/R600/SIDefines.h @@ -35,4 +35,54 @@ enum { #define S_00B84C_LDS_SIZE(x) (((x) & 0x1FF) << 15) #define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC + +#define R_00B848_COMPUTE_PGM_RSRC1 0x00B848 +#define S_00B848_VGPRS(x) (((x) & 0x3F) << 0) +#define G_00B848_VGPRS(x) (((x) >> 0) & 0x3F) +#define C_00B848_VGPRS 0xFFFFFFC0 +#define S_00B848_SGPRS(x) (((x) & 0x0F) << 6) +#define G_00B848_SGPRS(x) (((x) >> 6) & 0x0F) +#define C_00B848_SGPRS 0xFFFFFC3F +#define S_00B848_PRIORITY(x) (((x) & 0x03) << 10) +#define G_00B848_PRIORITY(x) (((x) >> 10) & 0x03) +#define C_00B848_PRIORITY 0xFFFFF3FF +#define S_00B848_FLOAT_MODE(x) (((x) & 0xFF) << 12) +#define G_00B848_FLOAT_MODE(x) (((x) >> 12) & 0xFF) +#define C_00B848_FLOAT_MODE 0xFFF00FFF +#define S_00B848_PRIV(x) (((x) & 0x1) << 20) +#define G_00B848_PRIV(x) (((x) >> 20) & 0x1) +#define C_00B848_PRIV 0xFFEFFFFF +#define S_00B848_DX10_CLAMP(x) (((x) & 0x1) << 21) +#define G_00B848_DX10_CLAMP(x) (((x) >> 21) & 0x1) +#define C_00B848_DX10_CLAMP 0xFFDFFFFF +#define S_00B848_DEBUG_MODE(x) (((x) & 0x1) << 22) +#define G_00B848_DEBUG_MODE(x) (((x) >> 22) & 0x1) +#define C_00B848_DEBUG_MODE 0xFFBFFFFF +#define S_00B848_IEEE_MODE(x) (((x) & 0x1) << 23) +#define G_00B848_IEEE_MODE(x) (((x) >> 23) & 0x1) +#define C_00B848_IEEE_MODE 0xFF7FFFFF + + +// Helpers for setting FLOAT_MODE +#define FP_ROUND_ROUND_TO_NEAREST 0 +#define FP_ROUND_ROUND_TO_INF 1 +#define FP_ROUND_ROUND_TO_NEGINF 2 +#define FP_ROUND_ROUND_TO_ZERO 3 + +// Bits 3:0 control rounding mode. 1:0 control single precision, 3:2 double +// precision. +#define FP_ROUND_MODE_SP(x) ((x) & 0x3) +#define FP_ROUND_MODE_DP(x) (((x) & 0x3) << 2) + +#define FP_DENORM_FLUSH_IN_FLUSH_OUT 0 +#define FP_DENORM_FLUSH_OUT 1 +#define FP_DENORM_FLUSH_IN 2 +#define FP_DENORM_FLUSH_NONE 3 + + +// Bits 7:4 control denormal handling. 5:4 control single precision, 6:7 double +// precision. +#define FP_DENORM_MODE_SP(x) (((x) & 0x3) << 4) +#define FP_DENORM_MODE_DP(x) (((x) & 0x3) << 6) + #endif // SIDEFINES_H_ diff --git a/lib/Target/R600/SIFixSGPRLiveRanges.cpp b/lib/Target/R600/SIFixSGPRLiveRanges.cpp new file mode 100644 index 0000000..7d116ee --- /dev/null +++ b/lib/Target/R600/SIFixSGPRLiveRanges.cpp @@ -0,0 +1,110 @@ +//===-- SIFixSGPRLiveRanges.cpp - Fix SGPR live ranges ----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +/// \file +/// SALU instructions ignore control flow, so we need to modify the live ranges +/// of the registers they define. +/// +/// The strategy is to view the entire program as if it were a single basic +/// block and calculate the intervals accordingly. We implement this +/// by walking this list of segments for each LiveRange and setting the +/// end of each segment equal to the start of the segment that immediately +/// follows it. + +#include "AMDGPU.h" +#include "SIRegisterInfo.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +#define DEBUG_TYPE "si-fix-sgpr-live-ranges" + +namespace { + +class SIFixSGPRLiveRanges : public MachineFunctionPass { +public: + static char ID; + +public: + SIFixSGPRLiveRanges() : MachineFunctionPass(ID) { + initializeSIFixSGPRLiveRangesPass(*PassRegistry::getPassRegistry()); + } + + virtual bool runOnMachineFunction(MachineFunction &MF) override; + + virtual const char *getPassName() const override { + return "SI Fix SGPR live ranges"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<LiveIntervals>(); + AU.addPreserved<LiveIntervals>(); + AU.addPreserved<SlotIndexes>(); + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } +}; + +} // End anonymous namespace. + +INITIALIZE_PASS_BEGIN(SIFixSGPRLiveRanges, DEBUG_TYPE, + "SI Fix SGPR Live Ranges", false, false) +INITIALIZE_PASS_DEPENDENCY(LiveIntervals) +INITIALIZE_PASS_END(SIFixSGPRLiveRanges, DEBUG_TYPE, + "SI Fix SGPR Live Ranges", false, false) + +char SIFixSGPRLiveRanges::ID = 0; + +char &llvm::SIFixSGPRLiveRangesID = SIFixSGPRLiveRanges::ID; + +FunctionPass *llvm::createSIFixSGPRLiveRangesPass() { + return new SIFixSGPRLiveRanges(); +} + +bool SIFixSGPRLiveRanges::runOnMachineFunction(MachineFunction &MF) { + MachineRegisterInfo &MRI = MF.getRegInfo(); + const SIRegisterInfo *TRI = static_cast<const SIRegisterInfo *>( + MF.getTarget().getRegisterInfo()); + LiveIntervals *LIS = &getAnalysis<LiveIntervals>(); + + for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); + BI != BE; ++BI) { + + MachineBasicBlock &MBB = *BI; + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); + I != E; ++I) { + MachineInstr &MI = *I; + MachineOperand *ExecUse = MI.findRegisterUseOperand(AMDGPU::EXEC); + if (ExecUse) + continue; + + for (const MachineOperand &Def : MI.operands()) { + if (!Def.isReg() || !Def.isDef() ||!TargetRegisterInfo::isVirtualRegister(Def.getReg())) + continue; + + const TargetRegisterClass *RC = MRI.getRegClass(Def.getReg()); + + if (!TRI->isSGPRClass(RC)) + continue; + LiveInterval &LI = LIS->getInterval(Def.getReg()); + for (unsigned i = 0, e = LI.size() - 1; i != e; ++i) { + LiveRange::Segment &Seg = LI.segments[i]; + LiveRange::Segment &Next = LI.segments[i + 1]; + Seg.end = Next.start; + } + } + } + } + + return false; +} diff --git a/lib/Target/R600/SIISelLowering.cpp b/lib/Target/R600/SIISelLowering.cpp index c9e247c..b13c3b8 100644 --- a/lib/Target/R600/SIISelLowering.cpp +++ b/lib/Target/R600/SIISelLowering.cpp @@ -14,8 +14,8 @@ #include "SIISelLowering.h" #include "AMDGPU.h" +#include "AMDGPUIntrinsicInfo.h" #include "AMDGPUSubtarget.h" -#include "AMDILIntrinsicInfo.h" #include "SIInstrInfo.h" #include "SIMachineFunctionInfo.h" #include "SIRegisterInfo.h" @@ -24,6 +24,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/Function.h" +#include "llvm/ADT/SmallString.h" using namespace llvm; @@ -76,6 +77,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::ADD, MVT::i32, Legal); setOperationAction(ISD::ADDC, MVT::i32, Legal); setOperationAction(ISD::ADDE, MVT::i32, Legal); + setOperationAction(ISD::SUBC, MVT::i32, Legal); + setOperationAction(ISD::SUBE, MVT::i32, Legal); // We need to custom lower vector stores from local memory setOperationAction(ISD::LOAD, MVT::v2i32, Custom); @@ -88,14 +91,12 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : // We need to custom lower loads/stores from private memory setOperationAction(ISD::LOAD, MVT::i32, Custom); - setOperationAction(ISD::LOAD, MVT::i64, Custom); setOperationAction(ISD::LOAD, MVT::v2i32, Custom); setOperationAction(ISD::LOAD, MVT::v4i32, Custom); setOperationAction(ISD::LOAD, MVT::v8i32, Custom); setOperationAction(ISD::STORE, MVT::i1, Custom); setOperationAction(ISD::STORE, MVT::i32, Custom); - setOperationAction(ISD::STORE, MVT::i64, Custom); setOperationAction(ISD::STORE, MVT::v2i32, Custom); setOperationAction(ISD::STORE, MVT::v4i32, Custom); @@ -105,18 +106,14 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::SELECT, MVT::f64, Promote); AddPromotedToType(ISD::SELECT, MVT::f64, MVT::i64); - setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); - - setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); + setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); + setOperationAction(ISD::SELECT_CC, MVT::f64, Expand); setOperationAction(ISD::SETCC, MVT::v2i1, Expand); setOperationAction(ISD::SETCC, MVT::v4i1, Expand); - setOperationAction(ISD::ANY_EXTEND, MVT::i64, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::i64, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::i64, Custom); - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i1, Custom); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v4i1, Custom); @@ -139,6 +136,7 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::v4f32, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); + setOperationAction(ISD::BRCOND, MVT::Other, Custom); setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom); @@ -215,9 +213,16 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) : setOperationAction(ISD::FRINT, MVT::f64, Legal); } + // FIXME: These should be removed and handled the same was as f32 fneg. Source + // modifiers also work for the double instructions. + setOperationAction(ISD::FNEG, MVT::f64, Expand); + setOperationAction(ISD::FABS, MVT::f64, Expand); + setTargetDAGCombine(ISD::SELECT_CC); setTargetDAGCombine(ISD::SETCC); + setTargetDAGCombine(ISD::UINT_TO_FP); + setSchedulingPreference(Sched::RegPressure); } @@ -265,8 +270,12 @@ bool SITargetLowering::allowsUnalignedMemoryAccesses(EVT VT, return VT.bitsGT(MVT::i32); } -bool SITargetLowering::shouldSplitVectorType(EVT VT) const { - return VT.getScalarType().bitsLE(MVT::i16); +TargetLoweringBase::LegalizeTypeAction +SITargetLowering::getPreferredVectorAction(EVT VT) const { + if (VT.getVectorNumElements() != 1 && VT.getScalarType().bitsLE(MVT::i16)) + return TypeSplitVector; + + return TargetLoweringBase::getPreferredVectorAction(VT); } bool SITargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, @@ -482,19 +491,20 @@ MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter( MI->eraseFromParent(); break; } - case AMDGPU::V_SUB_F64: - BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64), - MI->getOperand(0).getReg()) - .addReg(MI->getOperand(1).getReg()) - .addReg(MI->getOperand(2).getReg()) - .addImm(0) /* src2 */ - .addImm(0) /* ABS */ - .addImm(0) /* CLAMP */ - .addImm(0) /* OMOD */ - .addImm(2); /* NEG */ + case AMDGPU::V_SUB_F64: { + unsigned DestReg = MI->getOperand(0).getReg(); + BuildMI(*BB, I, MI->getDebugLoc(), TII->get(AMDGPU::V_ADD_F64), DestReg) + .addImm(0) // SRC0 modifiers + .addReg(MI->getOperand(1).getReg()) + .addImm(1) // SRC1 modifiers + .addReg(MI->getOperand(2).getReg()) + .addImm(0) // SRC2 modifiers + .addImm(0) // src2 + .addImm(0) // CLAMP + .addImm(0); // OMOD MI->eraseFromParent(); break; - + } case AMDGPU::SI_RegisterStorePseudo: { MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); unsigned Reg = MRI.createVirtualRegister(&AMDGPU::SReg_64RegClass); @@ -595,27 +605,31 @@ SDValue SITargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BRCOND: return LowerBRCOND(Op, DAG); case ISD::LOAD: { LoadSDNode *Load = dyn_cast<LoadSDNode>(Op); + EVT VT = Op.getValueType(); + + // These loads are legal. + if (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && + VT.isVector() && VT.getVectorNumElements() == 2 && + VT.getVectorElementType() == MVT::i32) + return SDValue(); + if (Op.getValueType().isVector() && (Load->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || Load->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS || (Load->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && Op.getValueType().getVectorNumElements() > 4))) { - SDValue MergedValues[2] = { - SplitVectorLoad(Op, DAG), - Load->getChain() - }; - return DAG.getMergeValues(MergedValues, SDLoc(Op)); + return SplitVectorLoad(Op, DAG); } else { - return LowerLOAD(Op, DAG); + SDValue Result = LowerLOAD(Op, DAG); + assert((!Result.getNode() || + Result.getNode()->getNumValues() == 2) && + "Load should return a value and a chain"); + return Result; } } case ISD::SELECT: return LowerSELECT(Op, DAG); - case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); - case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); - case ISD::ANY_EXTEND: // Fall-through - case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, DAG); case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG); case ISD::INTRINSIC_WO_CHAIN: { unsigned IntrinsicID = @@ -827,13 +841,9 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND, SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); LoadSDNode *Load = cast<LoadSDNode>(Op); - SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG); - SDValue MergedValues[2]; - MergedValues[1] = Load->getChain(); - if (Ret.getNode()) { - MergedValues[0] = Ret; - return DAG.getMergeValues(MergedValues, DL); - } + SDValue Lowered = AMDGPUTargetLowering::LowerLOAD(Op, DAG); + if (Lowered.getNode()) + return Lowered; if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) { return SDValue(); @@ -846,25 +856,38 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(), DAG.getConstant(2, MVT::i32)); - Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32, - Load->getChain(), Ptr, - DAG.getTargetConstant(0, MVT::i32), - Op.getOperand(2)); + + // FIXME: REGISTER_LOAD should probably have a chain result. + SDValue Chain = Load->getChain(); + SDValue LoLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32, + Chain, Ptr, + DAG.getTargetConstant(0, MVT::i32), + Op.getOperand(2)); + + SDValue Ret = LoLoad.getValue(0); if (MemVT.getSizeInBits() == 64) { + // TODO: This needs a test to make sure the right thing is happening with + // the chain. That is hard without general function support. + SDValue IncPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr, DAG.getConstant(1, MVT::i32)); - SDValue LoadUpper = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32, - Load->getChain(), IncPtr, - DAG.getTargetConstant(0, MVT::i32), - Op.getOperand(2)); + SDValue HiLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32, + Chain, IncPtr, + DAG.getTargetConstant(0, MVT::i32), + Op.getOperand(2)); - Ret = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ret, LoadUpper); + Ret = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, LoLoad, HiLoad); + // Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, + // LoLoad.getValue(1), HiLoad.getValue(1)); } - MergedValues[0] = Ret; - return DAG.getMergeValues(MergedValues, DL); + SDValue Ops[] = { + Ret, + Chain + }; + return DAG.getMergeValues(Ops, DL); } SDValue SITargetLowering::LowerSampleIntrinsic(unsigned Opcode, @@ -903,39 +926,17 @@ SDValue SITargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::BITCAST, DL, MVT::i64, Res); } -SDValue SITargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDValue True = Op.getOperand(2); - SDValue False = Op.getOperand(3); - SDValue CC = Op.getOperand(4); - EVT VT = Op.getValueType(); - SDLoc DL(Op); - - SDValue Cond = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, CC); - return DAG.getNode(ISD::SELECT, DL, VT, Cond, True, False); -} - -SDValue SITargetLowering::LowerSIGN_EXTEND(SDValue Op, - SelectionDAG &DAG) const { - EVT VT = Op.getValueType(); - SDLoc DL(Op); - - if (VT != MVT::i64) { - return SDValue(); - } - - SDValue Hi = DAG.getNode(ISD::SRA, DL, MVT::i32, Op.getOperand(0), - DAG.getConstant(31, MVT::i32)); - - return DAG.getNode(ISD::BUILD_PAIR, DL, VT, Op.getOperand(0), Hi); -} - SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); StoreSDNode *Store = cast<StoreSDNode>(Op); EVT VT = Store->getMemoryVT(); + // These stores are legal. + if (Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && + VT.isVector() && VT.getVectorNumElements() == 2 && + VT.getVectorElementType() == MVT::i32) + return SDValue(); + SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG); if (Ret.getNode()) return Ret; @@ -1011,27 +1012,99 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const { return Chain; } +//===----------------------------------------------------------------------===// +// Custom DAG optimizations +//===----------------------------------------------------------------------===// + +SDValue SITargetLowering::performUCharToFloatCombine(SDNode *N, + DAGCombinerInfo &DCI) { + EVT VT = N->getValueType(0); + EVT ScalarVT = VT.getScalarType(); + if (ScalarVT != MVT::f32) + return SDValue(); -SDValue SITargetLowering::LowerZERO_EXTEND(SDValue Op, - SelectionDAG &DAG) const { - EVT VT = Op.getValueType(); - SDLoc DL(Op); + SelectionDAG &DAG = DCI.DAG; + SDLoc DL(N); - if (VT != MVT::i64) { + SDValue Src = N->getOperand(0); + EVT SrcVT = Src.getValueType(); + + // TODO: We could try to match extracting the higher bytes, which would be + // easier if i8 vectors weren't promoted to i32 vectors, particularly after + // types are legalized. v4i8 -> v4f32 is probably the only case to worry + // about in practice. + if (DCI.isAfterLegalizeVectorOps() && SrcVT == MVT::i32) { + if (DAG.MaskedValueIsZero(Src, APInt::getHighBitsSet(32, 24))) { + SDValue Cvt = DAG.getNode(AMDGPUISD::CVT_F32_UBYTE0, DL, VT, Src); + DCI.AddToWorklist(Cvt.getNode()); + return Cvt; + } + } + + // We are primarily trying to catch operations on illegal vector types + // before they are expanded. + // For scalars, we can use the more flexible method of checking masked bits + // after legalization. + if (!DCI.isBeforeLegalize() || + !SrcVT.isVector() || + SrcVT.getVectorElementType() != MVT::i8) { return SDValue(); } - SDValue Src = Op.getOperand(0); - if (Src.getValueType() != MVT::i32) - Src = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Src); + assert(DCI.isBeforeLegalize() && "Unexpected legal type"); - SDValue Zero = DAG.getConstant(0, MVT::i32); - return DAG.getNode(ISD::BUILD_PAIR, DL, VT, Src, Zero); -} + // Weird sized vectors are a pain to handle, but we know 3 is really the same + // size as 4. + unsigned NElts = SrcVT.getVectorNumElements(); + if (!SrcVT.isSimple() && NElts != 3) + return SDValue(); -//===----------------------------------------------------------------------===// -// Custom DAG optimizations -//===----------------------------------------------------------------------===// + // Handle v4i8 -> v4f32 extload. Replace the v4i8 with a legal i32 load to + // prevent a mess from expanding to v4i32 and repacking. + if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse()) { + EVT LoadVT = getEquivalentMemType(*DAG.getContext(), SrcVT); + EVT RegVT = getEquivalentLoadRegType(*DAG.getContext(), SrcVT); + EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f32, NElts); + + LoadSDNode *Load = cast<LoadSDNode>(Src); + SDValue NewLoad = DAG.getExtLoad(ISD::ZEXTLOAD, DL, RegVT, + Load->getChain(), + Load->getBasePtr(), + LoadVT, + Load->getMemOperand()); + + // Make sure successors of the original load stay after it by updating + // them to use the new Chain. + DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), NewLoad.getValue(1)); + + SmallVector<SDValue, 4> Elts; + if (RegVT.isVector()) + DAG.ExtractVectorElements(NewLoad, Elts); + else + Elts.push_back(NewLoad); + + SmallVector<SDValue, 4> Ops; + + unsigned EltIdx = 0; + for (SDValue Elt : Elts) { + unsigned ComponentsInElt = std::min(4u, NElts - 4 * EltIdx); + for (unsigned I = 0; I < ComponentsInElt; ++I) { + unsigned Opc = AMDGPUISD::CVT_F32_UBYTE0 + I; + SDValue Cvt = DAG.getNode(Opc, DL, MVT::f32, Elt); + DCI.AddToWorklist(Cvt.getNode()); + Ops.push_back(Cvt); + } + + ++EltIdx; + } + + assert(Ops.size() == NElts); + + return DAG.getNode(ISD::BUILD_VECTOR, DL, FloatVT, Ops); + } + + return SDValue(); +} SDValue SITargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { @@ -1074,6 +1147,31 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N, } break; } + + case AMDGPUISD::CVT_F32_UBYTE0: + case AMDGPUISD::CVT_F32_UBYTE1: + case AMDGPUISD::CVT_F32_UBYTE2: + case AMDGPUISD::CVT_F32_UBYTE3: { + unsigned Offset = N->getOpcode() - AMDGPUISD::CVT_F32_UBYTE0; + + SDValue Src = N->getOperand(0); + APInt Demanded = APInt::getBitsSet(32, 8 * Offset, 8 * Offset + 8); + + APInt KnownZero, KnownOne; + TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLO.ShrinkDemandedConstant(Src, Demanded) || + TLI.SimplifyDemandedBits(Src, Demanded, KnownZero, KnownOne, TLO)) { + DCI.CommitTargetLoweringOpt(TLO); + } + + break; + } + + case ISD::UINT_TO_FP: { + return performUCharToFloatCombine(N, DCI); + } } return AMDGPUTargetLowering::PerformDAGCombine(N, DCI); @@ -1297,7 +1395,7 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node, int32_t Immediate = Desc->getSize() == 4 ? 0 : -1; bool HaveVSrc = false, HaveSSrc = false; - // First figure out what we alread have in this instruction + // First figure out what we already have in this instruction. for (unsigned i = 0, e = Node->getNumOperands(), Op = NumDefs; i != e && Op < NumOps; ++i, ++Op) { @@ -1316,7 +1414,7 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node, } } - // If we neither have VSrc nor SSrc it makes no sense to continue + // If we neither have VSrc nor SSrc, it makes no sense to continue. if (!HaveVSrc && !HaveSSrc) return Node; @@ -1332,17 +1430,17 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node, const SDValue &Operand = Node->getOperand(i); Ops.push_back(Operand); - // Already folded immediate ? + // Already folded immediate? if (isa<ConstantSDNode>(Operand.getNode()) || isa<ConstantFPSDNode>(Operand.getNode())) continue; - // Is this a VSrc or SSrc operand ? + // Is this a VSrc or SSrc operand? unsigned RegClass = Desc->OpInfo[Op].RegClass; if (isVSrc(RegClass) || isSSrc(RegClass)) { // Try to fold the immediates if (!foldImm(Ops[i], Immediate, ScalarSlotUsed)) { - // Folding didn't worked, make sure we don't hit the SReg limit + // Folding didn't work, make sure we don't hit the SReg limit. ensureSRegLimit(DAG, Ops[i], RegClass, ScalarSlotUsed); } continue; @@ -1371,7 +1469,6 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node, continue; if (DescE64) { - // Test if it makes sense to switch to e64 encoding unsigned OtherRegClass = DescE64->OpInfo[Op].RegClass; if (!isVSrc(OtherRegClass) && !isSSrc(OtherRegClass)) @@ -1402,7 +1499,7 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node, if (!DescE64) continue; Desc = DescE64; - DescE64 = 0; + DescE64 = nullptr; } else if (Operand.getMachineOpcode() == AMDGPU::FABS_SI) { Ops.pop_back(); @@ -1412,7 +1509,7 @@ SDNode *SITargetLowering::foldOperands(MachineSDNode *Node, if (!DescE64) continue; Desc = DescE64; - DescE64 = 0; + DescE64 = nullptr; } } @@ -1535,7 +1632,7 @@ void SITargetLowering::adjustWritemask(MachineSDNode *&Node, } } -/// \brief Fold the instructions after slecting them +/// \brief Fold the instructions after selecting them. SDNode *SITargetLowering::PostISelFolding(MachineSDNode *Node, SelectionDAG &DAG) const { const SIInstrInfo *TII = diff --git a/lib/Target/R600/SIISelLowering.h b/lib/Target/R600/SIISelLowering.h index c6eaa81..e25323a 100644 --- a/lib/Target/R600/SIISelLowering.h +++ b/lib/Target/R600/SIISelLowering.h @@ -27,10 +27,7 @@ class SITargetLowering : public AMDGPUTargetLowering { SelectionDAG &DAG) const; SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerZERO_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBRCOND(SDValue Op, SelectionDAG &DAG) const; bool foldImm(SDValue &Operand, int32_t &Immediate, @@ -46,11 +43,16 @@ class SITargetLowering : public AMDGPUTargetLowering { void adjustWritemask(MachineSDNode *&N, SelectionDAG &DAG) const; MachineSDNode *AdjustRegClass(MachineSDNode *N, SelectionDAG &DAG) const; + static SDValue performUCharToFloatCombine(SDNode *N, + DAGCombinerInfo &DCI); + public: SITargetLowering(TargetMachine &tm); bool allowsUnalignedMemoryAccesses(EVT VT, unsigned AS, bool *IsFast) const override; - bool shouldSplitVectorType(EVT VT) const override; + + TargetLoweringBase::LegalizeTypeAction + getPreferredVectorAction(EVT VT) const override; bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override; diff --git a/lib/Target/R600/SIInsertWaits.cpp b/lib/Target/R600/SIInsertWaits.cpp index a17fed7..1733326 100644 --- a/lib/Target/R600/SIInsertWaits.cpp +++ b/lib/Target/R600/SIInsertWaits.cpp @@ -341,6 +341,8 @@ Counters SIInsertWaits::handleOperands(MachineInstr &MI) { return Result; } +// FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States" +// around other non-memory instructions. bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { bool Changes = false; diff --git a/lib/Target/R600/SIInstrFormats.td b/lib/Target/R600/SIInstrFormats.td index 168eff2..7cae9fc 100644 --- a/lib/Target/R600/SIInstrFormats.td +++ b/lib/Target/R600/SIInstrFormats.td @@ -51,6 +51,16 @@ class Enc64 <dag outs, dag ins, string asm, list<dag> pattern> : let Size = 8; } +class VOP3Common <dag outs, dag ins, string asm, list<dag> pattern> : + Enc64 <outs, ins, asm, pattern> { + + let mayLoad = 0; + let mayStore = 0; + let hasSideEffects = 0; + let UseNamedOperandTable = 1; + let VOP3 = 1; +} + //===----------------------------------------------------------------------===// // Scalar operations //===----------------------------------------------------------------------===// @@ -207,7 +217,7 @@ class VOP2 <bits<6> op, dag outs, dag ins, string asm, list<dag> pattern> : } class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc64 <outs, ins, asm, pattern> { + VOP3Common <outs, ins, asm, pattern> { bits<8> dst; bits<2> src0_modifiers; @@ -233,16 +243,11 @@ class VOP3 <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> : let Inst{61} = src0_modifiers{0}; let Inst{62} = src1_modifiers{0}; let Inst{63} = src2_modifiers{0}; - - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; - let UseNamedOperandTable = 1; - let VOP3 = 1; + } class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> : - Enc64 <outs, ins, asm, pattern> { + VOP3Common <outs, ins, asm, pattern> { bits<8> dst; bits<2> src0_modifiers; @@ -266,11 +271,6 @@ class VOP3b <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern> : let Inst{62} = src1_modifiers{0}; let Inst{63} = src2_modifiers{0}; - let mayLoad = 0; - let mayStore = 0; - let hasSideEffects = 0; - let UseNamedOperandTable = 1; - let VOP3 = 1; } class VOPC <bits<8> op, dag ins, string asm, list<dag> pattern> : diff --git a/lib/Target/R600/SIInstrInfo.cpp b/lib/Target/R600/SIInstrInfo.cpp index 4a9e346..455c890 100644 --- a/lib/Target/R600/SIInstrInfo.cpp +++ b/lib/Target/R600/SIInstrInfo.cpp @@ -19,13 +19,14 @@ #include "SIMachineFunctionInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCInstrDesc.h" using namespace llvm; -SIInstrInfo::SIInstrInfo(AMDGPUTargetMachine &tm) - : AMDGPUInstrInfo(tm), - RI(tm) { } +SIInstrInfo::SIInstrInfo(const AMDGPUSubtarget &st) + : AMDGPUInstrInfo(st), + RI(st) { } //===----------------------------------------------------------------------===// // TargetInstrInfo callbacks @@ -187,18 +188,25 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>(); + MachineFunction *MF = MBB.getParent(); + SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); + MachineRegisterInfo &MRI = MF->getRegInfo(); DebugLoc DL = MBB.findDebugLoc(MI); unsigned KillFlag = isKill ? RegState::Kill : 0; - MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); - if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) { - unsigned Lane = MFI->SpillTracker.reserveLanes(MRI, MBB.getParent()); + if (RI.hasVGPRs(RC)) { + LLVMContext &Ctx = MF->getFunction()->getContext(); + Ctx.emitError("SIInstrInfo::storeRegToStackSlot - Can't spill VGPR!"); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), AMDGPU::VGPR0) + .addReg(SrcReg); + } else if (TRI->getCommonSubClass(RC, &AMDGPU::SGPR_32RegClass)) { + unsigned Lane = MFI->SpillTracker.reserveLanes(MRI, MF); + unsigned TgtReg = MFI->SpillTracker.LaneVGPR; - BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), MFI->SpillTracker.LaneVGPR) + BuildMI(MBB, MI, DL, get(AMDGPU::V_WRITELANE_B32), TgtReg) .addReg(SrcReg, KillFlag) .addImm(Lane); - MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR, Lane); + MFI->SpillTracker.addSpilledReg(FrameIndex, TgtReg, Lane); } else if (RI.isSGPRClass(RC)) { // We are only allowed to create one new instruction when spilling // registers, so we need to use pseudo instruction for vector @@ -207,8 +215,7 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, // Reserve a spot in the spill tracker for each sub-register of // the vector register. unsigned NumSubRegs = RC->getSize() / 4; - unsigned FirstLane = MFI->SpillTracker.reserveLanes(MRI, MBB.getParent(), - NumSubRegs); + unsigned FirstLane = MFI->SpillTracker.reserveLanes(MRI, MF, NumSubRegs); MFI->SpillTracker.addSpilledReg(FrameIndex, MFI->SpillTracker.LaneVGPR, FirstLane); @@ -234,19 +241,19 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, unsigned DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI) const { - SIMachineFunctionInfo *MFI = MBB.getParent()->getInfo<SIMachineFunctionInfo>(); + MachineFunction *MF = MBB.getParent(); + SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); DebugLoc DL = MBB.findDebugLoc(MI); - if (TRI->getCommonSubClass(RC, &AMDGPU::SReg_32RegClass)) { - SIMachineFunctionInfo::SpilledReg Spill = - MFI->SpillTracker.getSpilledReg(FrameIndex); - assert(Spill.VGPR); - BuildMI(MBB, MI, DL, get(AMDGPU::V_READLANE_B32), DestReg) - .addReg(Spill.VGPR) - .addImm(Spill.Lane); - insertNOPs(MI, 3); + + if (RI.hasVGPRs(RC)) { + LLVMContext &Ctx = MF->getFunction()->getContext(); + Ctx.emitError("SIInstrInfo::loadRegToStackSlot - Can't retrieve spilled VGPR!"); + BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), DestReg) + .addImm(0); } else if (RI.isSGPRClass(RC)){ unsigned Opcode; switch(RC->getSize() * 8) { + case 32: Opcode = AMDGPU::SI_SPILL_S32_RESTORE; break; case 64: Opcode = AMDGPU::SI_SPILL_S64_RESTORE; break; case 128: Opcode = AMDGPU::SI_SPILL_S128_RESTORE; break; case 256: Opcode = AMDGPU::SI_SPILL_S256_RESTORE; break; @@ -260,7 +267,6 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, BuildMI(MBB, MI, DL, get(Opcode), DestReg) .addReg(Spill.VGPR) .addImm(FrameIndex); - insertNOPs(MI, 3); } else { llvm_unreachable("VGPR spilling not supported"); } @@ -281,6 +287,8 @@ static unsigned getNumSubRegsForSpillOp(unsigned Op) { case AMDGPU::SI_SPILL_S64_SAVE: case AMDGPU::SI_SPILL_S64_RESTORE: return 2; + case AMDGPU::SI_SPILL_S32_RESTORE: + return 1; default: llvm_unreachable("Invalid spill opcode"); } } @@ -334,7 +342,8 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { case AMDGPU::SI_SPILL_S512_RESTORE: case AMDGPU::SI_SPILL_S256_RESTORE: case AMDGPU::SI_SPILL_S128_RESTORE: - case AMDGPU::SI_SPILL_S64_RESTORE: { + case AMDGPU::SI_SPILL_S64_RESTORE: + case AMDGPU::SI_SPILL_S32_RESTORE: { unsigned NumSubRegs = getNumSubRegsForSpillOp(MI->getOpcode()); for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { @@ -348,6 +357,7 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { .addReg(MI->getOperand(1).getReg()) .addImm(Spill.Lane + i); } + insertNOPs(MI, 3); MI->eraseFromParent(); break; } @@ -514,6 +524,23 @@ bool SIInstrInfo::isLiteralConstant(const MachineOperand &MO) const { return (MO.isImm() || MO.isFPImm()) && !isInlineConstant(MO); } +static bool compareMachineOp(const MachineOperand &Op0, + const MachineOperand &Op1) { + if (Op0.getType() != Op1.getType()) + return false; + + switch (Op0.getType()) { + case MachineOperand::MO_Register: + return Op0.getReg() == Op1.getReg(); + case MachineOperand::MO_Immediate: + return Op0.getImm() == Op1.getImm(); + case MachineOperand::MO_FPImmediate: + return Op0.getFPImm() == Op1.getFPImm(); + default: + llvm_unreachable("Didn't expect to be comparing these operand types"); + } +} + bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, StringRef &ErrInfo) const { uint16_t Opcode = MI->getOpcode(); @@ -532,7 +559,14 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, // Make sure the register classes are correct for (unsigned i = 0, e = Desc.getNumOperands(); i != e; ++i) { switch (Desc.OpInfo[i].OperandType) { - case MCOI::OPERAND_REGISTER: + case MCOI::OPERAND_REGISTER: { + int RegClass = Desc.OpInfo[i].RegClass; + if (!RI.regClassCanUseImmediate(RegClass) && + (MI->getOperand(i).isImm() || MI->getOperand(i).isFPImm())) { + ErrInfo = "Expected register, but got immediate"; + return false; + } + } break; case MCOI::OPERAND_IMMEDIATE: if (!MI->getOperand(i).isImm() && !MI->getOperand(i).isFPImm()) { @@ -620,6 +654,24 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr *MI, return false; } } + + // Verify misc. restrictions on specific instructions. + if (Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F32 || + Desc.getOpcode() == AMDGPU::V_DIV_SCALE_F64) { + MI->dump(); + + const MachineOperand &Src0 = MI->getOperand(2); + const MachineOperand &Src1 = MI->getOperand(3); + const MachineOperand &Src2 = MI->getOperand(4); + if (Src0.isReg() && Src1.isReg() && Src2.isReg()) { + if (!compareMachineOp(Src0, Src1) && + !compareMachineOp(Src0, Src2)) { + ErrInfo = "v_div_scale_{f32|f64} require src0 = src1 or src2"; + return false; + } + } + } + return true; } @@ -654,7 +706,9 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { case AMDGPU::S_SEXT_I32_I16: return AMDGPU::V_BFE_I32; case AMDGPU::S_BFE_U32: return AMDGPU::V_BFE_U32; case AMDGPU::S_BFE_I32: return AMDGPU::V_BFE_I32; + case AMDGPU::S_BREV_B32: return AMDGPU::V_BFREV_B32_e32; case AMDGPU::S_NOT_B32: return AMDGPU::V_NOT_B32_e32; + case AMDGPU::S_NOT_B64: return AMDGPU::V_NOT_B32_e32; case AMDGPU::S_CMP_EQ_I32: return AMDGPU::V_CMP_EQ_I32_e32; case AMDGPU::S_CMP_LG_I32: return AMDGPU::V_CMP_NE_I32_e32; case AMDGPU::S_CMP_GT_I32: return AMDGPU::V_CMP_GT_I32_e32; @@ -667,6 +721,9 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) { case AMDGPU::S_LOAD_DWORDX2_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX2_ADDR64; case AMDGPU::S_LOAD_DWORDX4_IMM: case AMDGPU::S_LOAD_DWORDX4_SGPR: return AMDGPU::BUFFER_LOAD_DWORDX4_ADDR64; + case AMDGPU::S_BCNT1_I32_B32: return AMDGPU::V_BCNT_U32_B32_e32; + case AMDGPU::S_FF1_I32_B32: return AMDGPU::V_FFBL_B32_e32; + case AMDGPU::S_FLBIT_I32_B32: return AMDGPU::V_FFBH_U32_e32; } } @@ -731,8 +788,8 @@ unsigned SIInstrInfo::buildExtractSubReg(MachineBasicBlock::iterator MI, unsigned SubReg = MRI.createVirtualRegister(SubRC); // Just in case the super register is itself a sub-register, copy it to a new - // value so we don't need to wory about merging its subreg index with the - // SubIdx passed to this function. The register coalescer should be able to + // value so we don't need to worry about merging its subreg index with the + // SubIdx passed to this function. The register coalescer should be able to // eliminate this extra copy. BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(TargetOpcode::COPY), NewSuperReg) @@ -1157,22 +1214,27 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { continue; } case AMDGPU::S_AND_B64: - splitScalar64BitOp(Worklist, Inst, AMDGPU::S_AND_B32); + splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_AND_B32); Inst->eraseFromParent(); continue; case AMDGPU::S_OR_B64: - splitScalar64BitOp(Worklist, Inst, AMDGPU::S_OR_B32); + splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_OR_B32); Inst->eraseFromParent(); continue; case AMDGPU::S_XOR_B64: - splitScalar64BitOp(Worklist, Inst, AMDGPU::S_XOR_B32); + splitScalar64BitBinaryOp(Worklist, Inst, AMDGPU::S_XOR_B32); Inst->eraseFromParent(); continue; case AMDGPU::S_NOT_B64: - splitScalar64BitOp(Worklist, Inst, AMDGPU::S_NOT_B32); + splitScalar64BitUnaryOp(Worklist, Inst, AMDGPU::S_NOT_B32); + Inst->eraseFromParent(); + continue; + + case AMDGPU::S_BCNT1_I32_B64: + splitScalar64BitBCNT(Worklist, Inst); Inst->eraseFromParent(); continue; @@ -1217,6 +1279,10 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const { // 3 to not hit an assertion later in MCInstLower. Inst->addOperand(MachineOperand::CreateImm(0)); Inst->addOperand(MachineOperand::CreateImm(0)); + } else if (Opcode == AMDGPU::S_BCNT1_I32_B32) { + // The VALU version adds the second operand to the result, so insert an + // extra 0 operand. + Inst->addOperand(MachineOperand::CreateImm(0)); } addDescImplicitUseDef(NewDesc, Inst); @@ -1297,9 +1363,62 @@ const TargetRegisterClass *SIInstrInfo::getIndirectAddrRegClass() const { return &AMDGPU::VReg_32RegClass; } -void SIInstrInfo::splitScalar64BitOp(SmallVectorImpl<MachineInstr *> &Worklist, - MachineInstr *Inst, - unsigned Opcode) const { +void SIInstrInfo::splitScalar64BitUnaryOp( + SmallVectorImpl<MachineInstr *> &Worklist, + MachineInstr *Inst, + unsigned Opcode) const { + MachineBasicBlock &MBB = *Inst->getParent(); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + + MachineOperand &Dest = Inst->getOperand(0); + MachineOperand &Src0 = Inst->getOperand(1); + DebugLoc DL = Inst->getDebugLoc(); + + MachineBasicBlock::iterator MII = Inst; + + const MCInstrDesc &InstDesc = get(Opcode); + const TargetRegisterClass *Src0RC = Src0.isReg() ? + MRI.getRegClass(Src0.getReg()) : + &AMDGPU::SGPR_32RegClass; + + const TargetRegisterClass *Src0SubRC = RI.getSubRegClass(Src0RC, AMDGPU::sub0); + + MachineOperand SrcReg0Sub0 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, + AMDGPU::sub0, Src0SubRC); + + const TargetRegisterClass *DestRC = MRI.getRegClass(Dest.getReg()); + const TargetRegisterClass *DestSubRC = RI.getSubRegClass(DestRC, AMDGPU::sub0); + + unsigned DestSub0 = MRI.createVirtualRegister(DestRC); + MachineInstr *LoHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub0) + .addOperand(SrcReg0Sub0); + + MachineOperand SrcReg0Sub1 = buildExtractSubRegOrImm(MII, MRI, Src0, Src0RC, + AMDGPU::sub1, Src0SubRC); + + unsigned DestSub1 = MRI.createVirtualRegister(DestSubRC); + MachineInstr *HiHalf = BuildMI(MBB, MII, DL, InstDesc, DestSub1) + .addOperand(SrcReg0Sub1); + + unsigned FullDestReg = MRI.createVirtualRegister(DestRC); + BuildMI(MBB, MII, DL, get(TargetOpcode::REG_SEQUENCE), FullDestReg) + .addReg(DestSub0) + .addImm(AMDGPU::sub0) + .addReg(DestSub1) + .addImm(AMDGPU::sub1); + + MRI.replaceRegWith(Dest.getReg(), FullDestReg); + + // Try to legalize the operands in case we need to swap the order to keep it + // valid. + Worklist.push_back(LoHalf); + Worklist.push_back(HiHalf); +} + +void SIInstrInfo::splitScalar64BitBinaryOp( + SmallVectorImpl<MachineInstr *> &Worklist, + MachineInstr *Inst, + unsigned Opcode) const { MachineBasicBlock &MBB = *Inst->getParent(); MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); @@ -1360,6 +1479,46 @@ void SIInstrInfo::splitScalar64BitOp(SmallVectorImpl<MachineInstr *> &Worklist, Worklist.push_back(HiHalf); } +void SIInstrInfo::splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist, + MachineInstr *Inst) const { + MachineBasicBlock &MBB = *Inst->getParent(); + MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); + + MachineBasicBlock::iterator MII = Inst; + DebugLoc DL = Inst->getDebugLoc(); + + MachineOperand &Dest = Inst->getOperand(0); + MachineOperand &Src = Inst->getOperand(1); + + const MCInstrDesc &InstDesc = get(AMDGPU::V_BCNT_U32_B32_e32); + const TargetRegisterClass *SrcRC = Src.isReg() ? + MRI.getRegClass(Src.getReg()) : + &AMDGPU::SGPR_32RegClass; + + unsigned MidReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + + const TargetRegisterClass *SrcSubRC = RI.getSubRegClass(SrcRC, AMDGPU::sub0); + + MachineOperand SrcRegSub0 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, + AMDGPU::sub0, SrcSubRC); + MachineOperand SrcRegSub1 = buildExtractSubRegOrImm(MII, MRI, Src, SrcRC, + AMDGPU::sub1, SrcSubRC); + + MachineInstr *First = BuildMI(MBB, MII, DL, InstDesc, MidReg) + .addOperand(SrcRegSub0) + .addImm(0); + + MachineInstr *Second = BuildMI(MBB, MII, DL, InstDesc, ResultReg) + .addOperand(SrcRegSub1) + .addReg(MidReg); + + MRI.replaceRegWith(Dest.getReg(), ResultReg); + + Worklist.push_back(First); + Worklist.push_back(Second); +} + void SIInstrInfo::addDescImplicitUseDef(const MCInstrDesc &NewDesc, MachineInstr *Inst) const { // Add the implict and explicit register definitions. diff --git a/lib/Target/R600/SIInstrInfo.h b/lib/Target/R600/SIInstrInfo.h index 7b31a81..4c204d8 100644 --- a/lib/Target/R600/SIInstrInfo.h +++ b/lib/Target/R600/SIInstrInfo.h @@ -44,13 +44,19 @@ private: const TargetRegisterClass *RC, const MachineOperand &Op) const; - void splitScalar64BitOp(SmallVectorImpl<MachineInstr *> & Worklist, - MachineInstr *Inst, unsigned Opcode) const; + void splitScalar64BitUnaryOp(SmallVectorImpl<MachineInstr *> &Worklist, + MachineInstr *Inst, unsigned Opcode) const; + + void splitScalar64BitBinaryOp(SmallVectorImpl<MachineInstr *> &Worklist, + MachineInstr *Inst, unsigned Opcode) const; + + void splitScalar64BitBCNT(SmallVectorImpl<MachineInstr *> &Worklist, + MachineInstr *Inst) const; void addDescImplicitUseDef(const MCInstrDesc &Desc, MachineInstr *MI) const; public: - explicit SIInstrInfo(AMDGPUTargetMachine &tm); + explicit SIInstrInfo(const AMDGPUSubtarget &st); const SIRegisterInfo &getRegisterInfo() const override { return RI; diff --git a/lib/Target/R600/SIInstrInfo.td b/lib/Target/R600/SIInstrInfo.td index 2242e6d..774c9d1 100644 --- a/lib/Target/R600/SIInstrInfo.td +++ b/lib/Target/R600/SIInstrInfo.td @@ -147,6 +147,12 @@ def FRAMEri32 : Operand<iPTR> { } //===----------------------------------------------------------------------===// +// Complex patterns +//===----------------------------------------------------------------------===// + +def MUBUFAddr64 : ComplexPattern<i64, 3, "SelectMUBUFAddr64">; + +//===----------------------------------------------------------------------===// // SI assembler operands //===----------------------------------------------------------------------===// @@ -187,6 +193,12 @@ class SOP1_64 <bits<8> op, string opName, list<dag> pattern> : SOP1 < opName#" $dst, $src0", pattern >; +// 64-bit input, 32-bit output. +class SOP1_32_64 <bits<8> op, string opName, list<dag> pattern> : SOP1 < + op, (outs SReg_32:$dst), (ins SSrc_64:$src0), + opName#" $dst, $src0", pattern +>; + class SOP2_32 <bits<7> op, string opName, list<dag> pattern> : SOP2 < op, (outs SReg_32:$dst), (ins SSrc_32:$src0, SSrc_32:$src1), opName#" $dst, $src0, $src1", pattern @@ -260,7 +272,7 @@ class SIMCInstr <string pseudo, int subtarget> { multiclass VOP3_m <bits<9> op, dag outs, dag ins, string asm, list<dag> pattern, string opName> { - def "" : InstSI <outs, ins, "", pattern>, VOP <opName>, + def "" : VOP3Common <outs, ins, "", pattern>, VOP <opName>, SIMCInstr<OpName, SISubtarget.NONE> { let isPseudo = 1; } @@ -357,12 +369,13 @@ multiclass VOP2b_32 <bits<6> op, string opName, list<dag> pattern, } multiclass VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc, - string opName, ValueType vt, PatLeaf cond> { - + string opName, ValueType vt, PatLeaf cond, bit defExec = 0> { def _e32 : VOPC < op, (ins arc:$src0, vrc:$src1), opName#"_e32 $dst, $src0, $src1", [] - >, VOP <opName>; + >, VOP <opName> { + let Defs = !if(defExec, [VCC, EXEC], [VCC]); + } def _e64 : VOP3 < {0, op{7}, op{6}, op{5}, op{4}, op{3}, op{2}, op{1}, op{0}}, @@ -375,6 +388,7 @@ multiclass VOPC_Helper <bits<8> op, RegisterClass vrc, RegisterClass arc, [(set SReg_64:$dst, (i1 (setcc (vt arc:$src0), arc:$src1, cond)))] ) >, VOP <opName> { + let Defs = !if(defExec, [EXEC], []); let src2 = SIOperand.ZERO; let src2_modifiers = 0; } @@ -388,6 +402,14 @@ multiclass VOPC_64 <bits<8> op, string opName, ValueType vt = untyped, PatLeaf cond = COND_NULL> : VOPC_Helper <op, VReg_64, VSrc_64, opName, vt, cond>; +multiclass VOPCX_32 <bits<8> op, string opName, + ValueType vt = untyped, PatLeaf cond = COND_NULL> + : VOPC_Helper <op, VReg_32, VSrc_32, opName, vt, cond, 1>; + +multiclass VOPCX_64 <bits<8> op, string opName, + ValueType vt = untyped, PatLeaf cond = COND_NULL> + : VOPC_Helper <op, VReg_64, VSrc_64, opName, vt, cond, 1>; + multiclass VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3_m < op, (outs VReg_32:$dst), (ins InputMods: $src0_modifiers, VSrc_32:$src0, InputMods:$src1_modifiers, @@ -396,7 +418,7 @@ multiclass VOP3_32 <bits<9> op, string opName, list<dag> pattern> : VOP3_m < opName#" $dst, $src0_modifiers, $src1, $src2, $clamp, $omod", pattern, opName >; -class VOP3_64_Shift <bits <9> op, string opName, list<dag> pattern> : VOP3 < +class VOP3_64_32 <bits <9> op, string opName, list<dag> pattern> : VOP3 < op, (outs VReg_64:$dst), (ins VSrc_64:$src0, VSrc_32:$src1), opName#" $dst, $src0, $src1", pattern @@ -410,11 +432,29 @@ class VOP3_64_Shift <bits <9> op, string opName, list<dag> pattern> : VOP3 < class VOP3_64 <bits<9> op, string opName, list<dag> pattern> : VOP3 < op, (outs VReg_64:$dst), - (ins VSrc_64:$src0, VSrc_64:$src1, VSrc_64:$src2, + (ins InputMods:$src0_modifiers, VSrc_64:$src0, + InputMods:$src1_modifiers, VSrc_64:$src1, + InputMods:$src2_modifiers, VSrc_64:$src2, + InstFlag:$clamp, InstFlag:$omod), + opName#" $dst, $src0_modifiers, $src1_modifiers, $src2_modifiers, $clamp, $omod", pattern +>, VOP <opName>; + + +class VOP3b_Helper <bits<9> op, RegisterClass vrc, RegisterClass arc, + string opName, list<dag> pattern> : VOP3 < + op, (outs vrc:$dst0, SReg_64:$dst1), + (ins arc:$src0, arc:$src1, arc:$src2, InstFlag:$abs, InstFlag:$clamp, InstFlag:$omod, InstFlag:$neg), - opName#" $dst, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern + opName#" $dst0, $dst1, $src0, $src1, $src2, $abs, $clamp, $omod, $neg", pattern >, VOP <opName>; + +class VOP3b_64 <bits<9> op, string opName, list<dag> pattern> : + VOP3b_Helper <op, VReg_64, VSrc_64, opName, pattern>; + +class VOP3b_32 <bits<9> op, string opName, list<dag> pattern> : + VOP3b_Helper <op, VReg_32, VSrc_32, opName, pattern>; + //===----------------------------------------------------------------------===// // Vector I/O classes //===----------------------------------------------------------------------===// @@ -475,10 +515,11 @@ class DS_Store2_Helper <bits<8> op, string asm, RegisterClass regClass> : DS_1A let vdst = 0; } +// 1 address, 1 data. class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc> : DS_1A < op, (outs rc:$vdst), - (ins i1imm:$gds, VReg_32:$addr, VReg_32:$data0, u16imm:$offset), + (ins i1imm:$gds, VReg_32:$addr, rc:$data0, u16imm:$offset), asm#" $vdst, $addr, $data0, $offset, [M0]", []> { @@ -487,6 +528,41 @@ class DS_1A1D_RET <bits<8> op, string asm, RegisterClass rc> : DS_1A < let mayLoad = 1; } +// 1 address, 2 data. +class DS_1A2D_RET <bits<8> op, string asm, RegisterClass rc> : DS_1A < + op, + (outs rc:$vdst), + (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, u16imm:$offset), + asm#" $vdst, $addr, $data0, $data1, $offset, [M0]", + []> { + let mayStore = 1; + let mayLoad = 1; +} + +// 1 address, 2 data. +class DS_1A2D_NORET <bits<8> op, string asm, RegisterClass rc> : DS_1A < + op, + (outs), + (ins i1imm:$gds, VReg_32:$addr, rc:$data0, rc:$data1, u16imm:$offset), + asm#" $addr, $data0, $data1, $offset, [M0]", + []> { + let mayStore = 1; + let mayLoad = 1; +} + +// 1 address, 1 data. +class DS_1A1D_NORET <bits<8> op, string asm, RegisterClass rc> : DS_1A < + op, + (outs), + (ins i1imm:$gds, VReg_32:$addr, rc:$data0, u16imm:$offset), + asm#" $addr, $data0, $offset, [M0]", + []> { + + let data1 = 0; + let mayStore = 1; + let mayLoad = 1; +} + class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBUF < op, (outs), @@ -500,7 +576,9 @@ class MTBUF_Store_Helper <bits<3> op, string asm, RegisterClass regClass> : MTBU let mayLoad = 0; } -multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> { +multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass, + ValueType load_vt = i32, + SDPatternOperator ld = null_frag> { let lds = 0, mayLoad = 1 in { @@ -542,16 +620,19 @@ multiclass MUBUF_Load_Helper <bits<7> op, string asm, RegisterClass regClass> { let offen = 0, idxen = 0, addr64 = 1, glc = 0, slc = 0, tfe = 0, soffset = 128 /* ZERO */ in { def _ADDR64 : MUBUF <op, (outs regClass:$vdata), (ins SReg_128:$srsrc, VReg_64:$vaddr, u16imm:$offset), - asm#" $vdata, $srsrc + $vaddr + $offset", []>; + asm#" $vdata, $srsrc + $vaddr + $offset", + [(set load_vt:$vdata, (ld (MUBUFAddr64 v4i32:$srsrc, + i64:$vaddr, u16imm:$offset)))]>; } } } -class MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass> : +class MUBUF_Store_Helper <bits<7> op, string name, RegisterClass vdataClass, + ValueType store_vt, SDPatternOperator st> : MUBUF <op, (outs), (ins vdataClass:$vdata, SReg_128:$srsrc, VReg_64:$vaddr, u16imm:$offset), name#" $vdata, $srsrc + $vaddr + $offset", - []> { + [(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, u16imm:$offset))]> { let mayLoad = 0; let mayStore = 1; @@ -658,6 +739,53 @@ multiclass MIMG_Sampler <bits<7> op, string asm> { defm _V4 : MIMG_Sampler_Src_Helper<op, asm, VReg_128, 4>; } +class MIMG_Gather_Helper <bits<7> op, string asm, + RegisterClass dst_rc, + RegisterClass src_rc> : MIMG < + op, + (outs dst_rc:$vdata), + (ins i32imm:$dmask, i1imm:$unorm, i1imm:$glc, i1imm:$da, i1imm:$r128, + i1imm:$tfe, i1imm:$lwe, i1imm:$slc, src_rc:$vaddr, + SReg_256:$srsrc, SReg_128:$ssamp), + asm#" $vdata, $dmask, $unorm, $glc, $da, $r128," + #" $tfe, $lwe, $slc, $vaddr, $srsrc, $ssamp", + []> { + let mayLoad = 1; + let mayStore = 0; + + // DMASK was repurposed for GATHER4. 4 components are always + // returned and DMASK works like a swizzle - it selects + // the component to fetch. The only useful DMASK values are + // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns + // (red,red,red,red) etc.) The ISA document doesn't mention + // this. + // Therefore, disable all code which updates DMASK by setting these two: + let MIMG = 0; + let hasPostISelHook = 0; +} + +multiclass MIMG_Gather_Src_Helper <bits<7> op, string asm, + RegisterClass dst_rc, + int channels> { + def _V1 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_32>, + MIMG_Mask<asm#"_V1", channels>; + def _V2 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_64>, + MIMG_Mask<asm#"_V2", channels>; + def _V4 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_128>, + MIMG_Mask<asm#"_V4", channels>; + def _V8 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_256>, + MIMG_Mask<asm#"_V8", channels>; + def _V16 : MIMG_Gather_Helper <op, asm, dst_rc, VReg_512>, + MIMG_Mask<asm#"_V16", channels>; +} + +multiclass MIMG_Gather <bits<7> op, string asm> { + defm _V1 : MIMG_Gather_Src_Helper<op, asm, VReg_32, 1>; + defm _V2 : MIMG_Gather_Src_Helper<op, asm, VReg_64, 2>; + defm _V3 : MIMG_Gather_Src_Helper<op, asm, VReg_96, 3>; + defm _V4 : MIMG_Gather_Src_Helper<op, asm, VReg_128, 4>; +} + //===----------------------------------------------------------------------===// // Vector instruction mappings //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIInstructions.td b/lib/Target/R600/SIInstructions.td index 500fa78..b3b44e2 100644 --- a/lib/Target/R600/SIInstructions.td +++ b/lib/Target/R600/SIInstructions.td @@ -96,22 +96,35 @@ def S_NOT_B32 : SOP1_32 <0x00000007, "S_NOT_B32", [(set i32:$dst, (not i32:$src0))] >; -def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", []>; +def S_NOT_B64 : SOP1_64 <0x00000008, "S_NOT_B64", + [(set i64:$dst, (not i64:$src0))] +>; def S_WQM_B32 : SOP1_32 <0x00000009, "S_WQM_B32", []>; def S_WQM_B64 : SOP1_64 <0x0000000a, "S_WQM_B64", []>; -def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", []>; +def S_BREV_B32 : SOP1_32 <0x0000000b, "S_BREV_B32", + [(set i32:$dst, (AMDGPUbrev i32:$src0))] +>; def S_BREV_B64 : SOP1_64 <0x0000000c, "S_BREV_B64", []>; } // End neverHasSideEffects = 1 ////def S_BCNT0_I32_B32 : SOP1_BCNT0 <0x0000000d, "S_BCNT0_I32_B32", []>; ////def S_BCNT0_I32_B64 : SOP1_BCNT0 <0x0000000e, "S_BCNT0_I32_B64", []>; -////def S_BCNT1_I32_B32 : SOP1_BCNT1 <0x0000000f, "S_BCNT1_I32_B32", []>; -////def S_BCNT1_I32_B64 : SOP1_BCNT1 <0x00000010, "S_BCNT1_I32_B64", []>; -////def S_FF0_I32_B32 : SOP1_FF0 <0x00000011, "S_FF0_I32_B32", []>; +def S_BCNT1_I32_B32 : SOP1_32 <0x0000000f, "S_BCNT1_I32_B32", + [(set i32:$dst, (ctpop i32:$src0))] +>; +def S_BCNT1_I32_B64 : SOP1_32_64 <0x00000010, "S_BCNT1_I32_B64", []>; + +////def S_FF0_I32_B32 : SOP1_32 <0x00000011, "S_FF0_I32_B32", []>; ////def S_FF0_I32_B64 : SOP1_FF0 <0x00000012, "S_FF0_I32_B64", []>; -////def S_FF1_I32_B32 : SOP1_FF1 <0x00000013, "S_FF1_I32_B32", []>; +def S_FF1_I32_B32 : SOP1_32 <0x00000013, "S_FF1_I32_B32", + [(set i32:$dst, (cttz_zero_undef i32:$src0))] +>; ////def S_FF1_I32_B64 : SOP1_FF1 <0x00000014, "S_FF1_I32_B64", []>; -//def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32", []>; + +def S_FLBIT_I32_B32 : SOP1_32 <0x00000015, "S_FLBIT_I32_B32", + [(set i32:$dst, (ctlz_zero_undef i32:$src0))] +>; + //def S_FLBIT_I32_B64 : SOP1_32 <0x00000016, "S_FLBIT_I32_B64", []>; def S_FLBIT_I32 : SOP1_32 <0x00000017, "S_FLBIT_I32", []>; //def S_FLBIT_I32_I64 : SOP1_32 <0x00000018, "S_FLBIT_I32_I64", []>; @@ -320,7 +333,7 @@ def S_CMPK_EQ_I32 : SOPK < >; */ -let isCompare = 1 in { +let isCompare = 1, Defs = [SCC] in { def S_CMPK_LG_I32 : SOPK_32 <0x00000004, "S_CMPK_LG_I32", []>; def S_CMPK_GT_I32 : SOPK_32 <0x00000005, "S_CMPK_GT_I32", []>; def S_CMPK_GE_I32 : SOPK_32 <0x00000006, "S_CMPK_GE_I32", []>; @@ -332,7 +345,7 @@ def S_CMPK_GT_U32 : SOPK_32 <0x0000000b, "S_CMPK_GT_U32", []>; def S_CMPK_GE_U32 : SOPK_32 <0x0000000c, "S_CMPK_GE_U32", []>; def S_CMPK_LT_U32 : SOPK_32 <0x0000000d, "S_CMPK_LT_U32", []>; def S_CMPK_LE_U32 : SOPK_32 <0x0000000e, "S_CMPK_LE_U32", []>; -} // End isCompare = 1 +} // End isCompare = 1, Defs = [SCC] let Defs = [SCC], isCommutable = 1 in { def S_ADDK_I32 : SOPK_32 <0x0000000f, "S_ADDK_I32", []>; @@ -467,26 +480,26 @@ defm V_CMP_NEQ_F32 : VOPC_32 <0x0000000d, "V_CMP_NEQ_F32", f32, COND_UNE>; defm V_CMP_NLT_F32 : VOPC_32 <0x0000000e, "V_CMP_NLT_F32">; defm V_CMP_TRU_F32 : VOPC_32 <0x0000000f, "V_CMP_TRU_F32">; -let hasSideEffects = 1, Defs = [EXEC] in { +let hasSideEffects = 1 in { -defm V_CMPX_F_F32 : VOPC_32 <0x00000010, "V_CMPX_F_F32">; -defm V_CMPX_LT_F32 : VOPC_32 <0x00000011, "V_CMPX_LT_F32">; -defm V_CMPX_EQ_F32 : VOPC_32 <0x00000012, "V_CMPX_EQ_F32">; -defm V_CMPX_LE_F32 : VOPC_32 <0x00000013, "V_CMPX_LE_F32">; -defm V_CMPX_GT_F32 : VOPC_32 <0x00000014, "V_CMPX_GT_F32">; -defm V_CMPX_LG_F32 : VOPC_32 <0x00000015, "V_CMPX_LG_F32">; -defm V_CMPX_GE_F32 : VOPC_32 <0x00000016, "V_CMPX_GE_F32">; -defm V_CMPX_O_F32 : VOPC_32 <0x00000017, "V_CMPX_O_F32">; -defm V_CMPX_U_F32 : VOPC_32 <0x00000018, "V_CMPX_U_F32">; -defm V_CMPX_NGE_F32 : VOPC_32 <0x00000019, "V_CMPX_NGE_F32">; -defm V_CMPX_NLG_F32 : VOPC_32 <0x0000001a, "V_CMPX_NLG_F32">; -defm V_CMPX_NGT_F32 : VOPC_32 <0x0000001b, "V_CMPX_NGT_F32">; -defm V_CMPX_NLE_F32 : VOPC_32 <0x0000001c, "V_CMPX_NLE_F32">; -defm V_CMPX_NEQ_F32 : VOPC_32 <0x0000001d, "V_CMPX_NEQ_F32">; -defm V_CMPX_NLT_F32 : VOPC_32 <0x0000001e, "V_CMPX_NLT_F32">; -defm V_CMPX_TRU_F32 : VOPC_32 <0x0000001f, "V_CMPX_TRU_F32">; +defm V_CMPX_F_F32 : VOPCX_32 <0x00000010, "V_CMPX_F_F32">; +defm V_CMPX_LT_F32 : VOPCX_32 <0x00000011, "V_CMPX_LT_F32">; +defm V_CMPX_EQ_F32 : VOPCX_32 <0x00000012, "V_CMPX_EQ_F32">; +defm V_CMPX_LE_F32 : VOPCX_32 <0x00000013, "V_CMPX_LE_F32">; +defm V_CMPX_GT_F32 : VOPCX_32 <0x00000014, "V_CMPX_GT_F32">; +defm V_CMPX_LG_F32 : VOPCX_32 <0x00000015, "V_CMPX_LG_F32">; +defm V_CMPX_GE_F32 : VOPCX_32 <0x00000016, "V_CMPX_GE_F32">; +defm V_CMPX_O_F32 : VOPCX_32 <0x00000017, "V_CMPX_O_F32">; +defm V_CMPX_U_F32 : VOPCX_32 <0x00000018, "V_CMPX_U_F32">; +defm V_CMPX_NGE_F32 : VOPCX_32 <0x00000019, "V_CMPX_NGE_F32">; +defm V_CMPX_NLG_F32 : VOPCX_32 <0x0000001a, "V_CMPX_NLG_F32">; +defm V_CMPX_NGT_F32 : VOPCX_32 <0x0000001b, "V_CMPX_NGT_F32">; +defm V_CMPX_NLE_F32 : VOPCX_32 <0x0000001c, "V_CMPX_NLE_F32">; +defm V_CMPX_NEQ_F32 : VOPCX_32 <0x0000001d, "V_CMPX_NEQ_F32">; +defm V_CMPX_NLT_F32 : VOPCX_32 <0x0000001e, "V_CMPX_NLT_F32">; +defm V_CMPX_TRU_F32 : VOPCX_32 <0x0000001f, "V_CMPX_TRU_F32">; -} // End hasSideEffects = 1, Defs = [EXEC] +} // End hasSideEffects = 1 defm V_CMP_F_F64 : VOPC_64 <0x00000020, "V_CMP_F_F64">; defm V_CMP_LT_F64 : VOPC_64 <0x00000021, "V_CMP_LT_F64", f64, COND_OLT>; @@ -505,26 +518,26 @@ defm V_CMP_NEQ_F64 : VOPC_64 <0x0000002d, "V_CMP_NEQ_F64", f64, COND_UNE>; defm V_CMP_NLT_F64 : VOPC_64 <0x0000002e, "V_CMP_NLT_F64">; defm V_CMP_TRU_F64 : VOPC_64 <0x0000002f, "V_CMP_TRU_F64">; -let hasSideEffects = 1, Defs = [EXEC] in { +let hasSideEffects = 1 in { -defm V_CMPX_F_F64 : VOPC_64 <0x00000030, "V_CMPX_F_F64">; -defm V_CMPX_LT_F64 : VOPC_64 <0x00000031, "V_CMPX_LT_F64">; -defm V_CMPX_EQ_F64 : VOPC_64 <0x00000032, "V_CMPX_EQ_F64">; -defm V_CMPX_LE_F64 : VOPC_64 <0x00000033, "V_CMPX_LE_F64">; -defm V_CMPX_GT_F64 : VOPC_64 <0x00000034, "V_CMPX_GT_F64">; -defm V_CMPX_LG_F64 : VOPC_64 <0x00000035, "V_CMPX_LG_F64">; -defm V_CMPX_GE_F64 : VOPC_64 <0x00000036, "V_CMPX_GE_F64">; -defm V_CMPX_O_F64 : VOPC_64 <0x00000037, "V_CMPX_O_F64">; -defm V_CMPX_U_F64 : VOPC_64 <0x00000038, "V_CMPX_U_F64">; -defm V_CMPX_NGE_F64 : VOPC_64 <0x00000039, "V_CMPX_NGE_F64">; -defm V_CMPX_NLG_F64 : VOPC_64 <0x0000003a, "V_CMPX_NLG_F64">; -defm V_CMPX_NGT_F64 : VOPC_64 <0x0000003b, "V_CMPX_NGT_F64">; -defm V_CMPX_NLE_F64 : VOPC_64 <0x0000003c, "V_CMPX_NLE_F64">; -defm V_CMPX_NEQ_F64 : VOPC_64 <0x0000003d, "V_CMPX_NEQ_F64">; -defm V_CMPX_NLT_F64 : VOPC_64 <0x0000003e, "V_CMPX_NLT_F64">; -defm V_CMPX_TRU_F64 : VOPC_64 <0x0000003f, "V_CMPX_TRU_F64">; +defm V_CMPX_F_F64 : VOPCX_64 <0x00000030, "V_CMPX_F_F64">; +defm V_CMPX_LT_F64 : VOPCX_64 <0x00000031, "V_CMPX_LT_F64">; +defm V_CMPX_EQ_F64 : VOPCX_64 <0x00000032, "V_CMPX_EQ_F64">; +defm V_CMPX_LE_F64 : VOPCX_64 <0x00000033, "V_CMPX_LE_F64">; +defm V_CMPX_GT_F64 : VOPCX_64 <0x00000034, "V_CMPX_GT_F64">; +defm V_CMPX_LG_F64 : VOPCX_64 <0x00000035, "V_CMPX_LG_F64">; +defm V_CMPX_GE_F64 : VOPCX_64 <0x00000036, "V_CMPX_GE_F64">; +defm V_CMPX_O_F64 : VOPCX_64 <0x00000037, "V_CMPX_O_F64">; +defm V_CMPX_U_F64 : VOPCX_64 <0x00000038, "V_CMPX_U_F64">; +defm V_CMPX_NGE_F64 : VOPCX_64 <0x00000039, "V_CMPX_NGE_F64">; +defm V_CMPX_NLG_F64 : VOPCX_64 <0x0000003a, "V_CMPX_NLG_F64">; +defm V_CMPX_NGT_F64 : VOPCX_64 <0x0000003b, "V_CMPX_NGT_F64">; +defm V_CMPX_NLE_F64 : VOPCX_64 <0x0000003c, "V_CMPX_NLE_F64">; +defm V_CMPX_NEQ_F64 : VOPCX_64 <0x0000003d, "V_CMPX_NEQ_F64">; +defm V_CMPX_NLT_F64 : VOPCX_64 <0x0000003e, "V_CMPX_NLT_F64">; +defm V_CMPX_TRU_F64 : VOPCX_64 <0x0000003f, "V_CMPX_TRU_F64">; -} // End hasSideEffects = 1, Defs = [EXEC] +} // End hasSideEffects = 1 defm V_CMPS_F_F32 : VOPC_32 <0x00000040, "V_CMPS_F_F32">; defm V_CMPS_LT_F32 : VOPC_32 <0x00000041, "V_CMPS_LT_F32">; @@ -543,26 +556,26 @@ defm V_CMPS_NEQ_F32 : VOPC_32 <0x0000004d, "V_CMPS_NEQ_F32">; defm V_CMPS_NLT_F32 : VOPC_32 <0x0000004e, "V_CMPS_NLT_F32">; defm V_CMPS_TRU_F32 : VOPC_32 <0x0000004f, "V_CMPS_TRU_F32">; -let hasSideEffects = 1, Defs = [EXEC] in { +let hasSideEffects = 1 in { -defm V_CMPSX_F_F32 : VOPC_32 <0x00000050, "V_CMPSX_F_F32">; -defm V_CMPSX_LT_F32 : VOPC_32 <0x00000051, "V_CMPSX_LT_F32">; -defm V_CMPSX_EQ_F32 : VOPC_32 <0x00000052, "V_CMPSX_EQ_F32">; -defm V_CMPSX_LE_F32 : VOPC_32 <0x00000053, "V_CMPSX_LE_F32">; -defm V_CMPSX_GT_F32 : VOPC_32 <0x00000054, "V_CMPSX_GT_F32">; -defm V_CMPSX_LG_F32 : VOPC_32 <0x00000055, "V_CMPSX_LG_F32">; -defm V_CMPSX_GE_F32 : VOPC_32 <0x00000056, "V_CMPSX_GE_F32">; -defm V_CMPSX_O_F32 : VOPC_32 <0x00000057, "V_CMPSX_O_F32">; -defm V_CMPSX_U_F32 : VOPC_32 <0x00000058, "V_CMPSX_U_F32">; -defm V_CMPSX_NGE_F32 : VOPC_32 <0x00000059, "V_CMPSX_NGE_F32">; -defm V_CMPSX_NLG_F32 : VOPC_32 <0x0000005a, "V_CMPSX_NLG_F32">; -defm V_CMPSX_NGT_F32 : VOPC_32 <0x0000005b, "V_CMPSX_NGT_F32">; -defm V_CMPSX_NLE_F32 : VOPC_32 <0x0000005c, "V_CMPSX_NLE_F32">; -defm V_CMPSX_NEQ_F32 : VOPC_32 <0x0000005d, "V_CMPSX_NEQ_F32">; -defm V_CMPSX_NLT_F32 : VOPC_32 <0x0000005e, "V_CMPSX_NLT_F32">; -defm V_CMPSX_TRU_F32 : VOPC_32 <0x0000005f, "V_CMPSX_TRU_F32">; +defm V_CMPSX_F_F32 : VOPCX_32 <0x00000050, "V_CMPSX_F_F32">; +defm V_CMPSX_LT_F32 : VOPCX_32 <0x00000051, "V_CMPSX_LT_F32">; +defm V_CMPSX_EQ_F32 : VOPCX_32 <0x00000052, "V_CMPSX_EQ_F32">; +defm V_CMPSX_LE_F32 : VOPCX_32 <0x00000053, "V_CMPSX_LE_F32">; +defm V_CMPSX_GT_F32 : VOPCX_32 <0x00000054, "V_CMPSX_GT_F32">; +defm V_CMPSX_LG_F32 : VOPCX_32 <0x00000055, "V_CMPSX_LG_F32">; +defm V_CMPSX_GE_F32 : VOPCX_32 <0x00000056, "V_CMPSX_GE_F32">; +defm V_CMPSX_O_F32 : VOPCX_32 <0x00000057, "V_CMPSX_O_F32">; +defm V_CMPSX_U_F32 : VOPCX_32 <0x00000058, "V_CMPSX_U_F32">; +defm V_CMPSX_NGE_F32 : VOPCX_32 <0x00000059, "V_CMPSX_NGE_F32">; +defm V_CMPSX_NLG_F32 : VOPCX_32 <0x0000005a, "V_CMPSX_NLG_F32">; +defm V_CMPSX_NGT_F32 : VOPCX_32 <0x0000005b, "V_CMPSX_NGT_F32">; +defm V_CMPSX_NLE_F32 : VOPCX_32 <0x0000005c, "V_CMPSX_NLE_F32">; +defm V_CMPSX_NEQ_F32 : VOPCX_32 <0x0000005d, "V_CMPSX_NEQ_F32">; +defm V_CMPSX_NLT_F32 : VOPCX_32 <0x0000005e, "V_CMPSX_NLT_F32">; +defm V_CMPSX_TRU_F32 : VOPCX_32 <0x0000005f, "V_CMPSX_TRU_F32">; -} // End hasSideEffects = 1, Defs = [EXEC] +} // End hasSideEffects = 1 defm V_CMPS_F_F64 : VOPC_64 <0x00000060, "V_CMPS_F_F64">; defm V_CMPS_LT_F64 : VOPC_64 <0x00000061, "V_CMPS_LT_F64">; @@ -611,18 +624,18 @@ defm V_CMP_NE_I32 : VOPC_32 <0x00000085, "V_CMP_NE_I32", i32, COND_NE>; defm V_CMP_GE_I32 : VOPC_32 <0x00000086, "V_CMP_GE_I32", i32, COND_SGE>; defm V_CMP_T_I32 : VOPC_32 <0x00000087, "V_CMP_T_I32">; -let hasSideEffects = 1, Defs = [EXEC] in { +let hasSideEffects = 1 in { -defm V_CMPX_F_I32 : VOPC_32 <0x00000090, "V_CMPX_F_I32">; -defm V_CMPX_LT_I32 : VOPC_32 <0x00000091, "V_CMPX_LT_I32">; -defm V_CMPX_EQ_I32 : VOPC_32 <0x00000092, "V_CMPX_EQ_I32">; -defm V_CMPX_LE_I32 : VOPC_32 <0x00000093, "V_CMPX_LE_I32">; -defm V_CMPX_GT_I32 : VOPC_32 <0x00000094, "V_CMPX_GT_I32">; -defm V_CMPX_NE_I32 : VOPC_32 <0x00000095, "V_CMPX_NE_I32">; -defm V_CMPX_GE_I32 : VOPC_32 <0x00000096, "V_CMPX_GE_I32">; -defm V_CMPX_T_I32 : VOPC_32 <0x00000097, "V_CMPX_T_I32">; +defm V_CMPX_F_I32 : VOPCX_32 <0x00000090, "V_CMPX_F_I32">; +defm V_CMPX_LT_I32 : VOPCX_32 <0x00000091, "V_CMPX_LT_I32">; +defm V_CMPX_EQ_I32 : VOPCX_32 <0x00000092, "V_CMPX_EQ_I32">; +defm V_CMPX_LE_I32 : VOPCX_32 <0x00000093, "V_CMPX_LE_I32">; +defm V_CMPX_GT_I32 : VOPCX_32 <0x00000094, "V_CMPX_GT_I32">; +defm V_CMPX_NE_I32 : VOPCX_32 <0x00000095, "V_CMPX_NE_I32">; +defm V_CMPX_GE_I32 : VOPCX_32 <0x00000096, "V_CMPX_GE_I32">; +defm V_CMPX_T_I32 : VOPCX_32 <0x00000097, "V_CMPX_T_I32">; -} // End hasSideEffects = 1, Defs = [EXEC] +} // End hasSideEffects = 1 defm V_CMP_F_I64 : VOPC_64 <0x000000a0, "V_CMP_F_I64">; defm V_CMP_LT_I64 : VOPC_64 <0x000000a1, "V_CMP_LT_I64", i64, COND_SLT>; @@ -633,18 +646,18 @@ defm V_CMP_NE_I64 : VOPC_64 <0x000000a5, "V_CMP_NE_I64", i64, COND_NE>; defm V_CMP_GE_I64 : VOPC_64 <0x000000a6, "V_CMP_GE_I64", i64, COND_SGE>; defm V_CMP_T_I64 : VOPC_64 <0x000000a7, "V_CMP_T_I64">; -let hasSideEffects = 1, Defs = [EXEC] in { +let hasSideEffects = 1 in { -defm V_CMPX_F_I64 : VOPC_64 <0x000000b0, "V_CMPX_F_I64">; -defm V_CMPX_LT_I64 : VOPC_64 <0x000000b1, "V_CMPX_LT_I64">; -defm V_CMPX_EQ_I64 : VOPC_64 <0x000000b2, "V_CMPX_EQ_I64">; -defm V_CMPX_LE_I64 : VOPC_64 <0x000000b3, "V_CMPX_LE_I64">; -defm V_CMPX_GT_I64 : VOPC_64 <0x000000b4, "V_CMPX_GT_I64">; -defm V_CMPX_NE_I64 : VOPC_64 <0x000000b5, "V_CMPX_NE_I64">; -defm V_CMPX_GE_I64 : VOPC_64 <0x000000b6, "V_CMPX_GE_I64">; -defm V_CMPX_T_I64 : VOPC_64 <0x000000b7, "V_CMPX_T_I64">; +defm V_CMPX_F_I64 : VOPCX_64 <0x000000b0, "V_CMPX_F_I64">; +defm V_CMPX_LT_I64 : VOPCX_64 <0x000000b1, "V_CMPX_LT_I64">; +defm V_CMPX_EQ_I64 : VOPCX_64 <0x000000b2, "V_CMPX_EQ_I64">; +defm V_CMPX_LE_I64 : VOPCX_64 <0x000000b3, "V_CMPX_LE_I64">; +defm V_CMPX_GT_I64 : VOPCX_64 <0x000000b4, "V_CMPX_GT_I64">; +defm V_CMPX_NE_I64 : VOPCX_64 <0x000000b5, "V_CMPX_NE_I64">; +defm V_CMPX_GE_I64 : VOPCX_64 <0x000000b6, "V_CMPX_GE_I64">; +defm V_CMPX_T_I64 : VOPCX_64 <0x000000b7, "V_CMPX_T_I64">; -} // End hasSideEffects = 1, Defs = [EXEC] +} // End hasSideEffects = 1 defm V_CMP_F_U32 : VOPC_32 <0x000000c0, "V_CMP_F_U32">; defm V_CMP_LT_U32 : VOPC_32 <0x000000c1, "V_CMP_LT_U32", i32, COND_ULT>; @@ -655,18 +668,18 @@ defm V_CMP_NE_U32 : VOPC_32 <0x000000c5, "V_CMP_NE_U32", i32, COND_NE>; defm V_CMP_GE_U32 : VOPC_32 <0x000000c6, "V_CMP_GE_U32", i32, COND_UGE>; defm V_CMP_T_U32 : VOPC_32 <0x000000c7, "V_CMP_T_U32">; -let hasSideEffects = 1, Defs = [EXEC] in { +let hasSideEffects = 1 in { -defm V_CMPX_F_U32 : VOPC_32 <0x000000d0, "V_CMPX_F_U32">; -defm V_CMPX_LT_U32 : VOPC_32 <0x000000d1, "V_CMPX_LT_U32">; -defm V_CMPX_EQ_U32 : VOPC_32 <0x000000d2, "V_CMPX_EQ_U32">; -defm V_CMPX_LE_U32 : VOPC_32 <0x000000d3, "V_CMPX_LE_U32">; -defm V_CMPX_GT_U32 : VOPC_32 <0x000000d4, "V_CMPX_GT_U32">; -defm V_CMPX_NE_U32 : VOPC_32 <0x000000d5, "V_CMPX_NE_U32">; -defm V_CMPX_GE_U32 : VOPC_32 <0x000000d6, "V_CMPX_GE_U32">; -defm V_CMPX_T_U32 : VOPC_32 <0x000000d7, "V_CMPX_T_U32">; +defm V_CMPX_F_U32 : VOPCX_32 <0x000000d0, "V_CMPX_F_U32">; +defm V_CMPX_LT_U32 : VOPCX_32 <0x000000d1, "V_CMPX_LT_U32">; +defm V_CMPX_EQ_U32 : VOPCX_32 <0x000000d2, "V_CMPX_EQ_U32">; +defm V_CMPX_LE_U32 : VOPCX_32 <0x000000d3, "V_CMPX_LE_U32">; +defm V_CMPX_GT_U32 : VOPCX_32 <0x000000d4, "V_CMPX_GT_U32">; +defm V_CMPX_NE_U32 : VOPCX_32 <0x000000d5, "V_CMPX_NE_U32">; +defm V_CMPX_GE_U32 : VOPCX_32 <0x000000d6, "V_CMPX_GE_U32">; +defm V_CMPX_T_U32 : VOPCX_32 <0x000000d7, "V_CMPX_T_U32">; -} // End hasSideEffects = 1, Defs = [EXEC] +} // End hasSideEffects = 1 defm V_CMP_F_U64 : VOPC_64 <0x000000e0, "V_CMP_F_U64">; defm V_CMP_LT_U64 : VOPC_64 <0x000000e1, "V_CMP_LT_U64", i64, COND_ULT>; @@ -677,30 +690,30 @@ defm V_CMP_NE_U64 : VOPC_64 <0x000000e5, "V_CMP_NE_U64", i64, COND_NE>; defm V_CMP_GE_U64 : VOPC_64 <0x000000e6, "V_CMP_GE_U64", i64, COND_UGE>; defm V_CMP_T_U64 : VOPC_64 <0x000000e7, "V_CMP_T_U64">; -let hasSideEffects = 1, Defs = [EXEC] in { +let hasSideEffects = 1 in { -defm V_CMPX_F_U64 : VOPC_64 <0x000000f0, "V_CMPX_F_U64">; -defm V_CMPX_LT_U64 : VOPC_64 <0x000000f1, "V_CMPX_LT_U64">; -defm V_CMPX_EQ_U64 : VOPC_64 <0x000000f2, "V_CMPX_EQ_U64">; -defm V_CMPX_LE_U64 : VOPC_64 <0x000000f3, "V_CMPX_LE_U64">; -defm V_CMPX_GT_U64 : VOPC_64 <0x000000f4, "V_CMPX_GT_U64">; -defm V_CMPX_NE_U64 : VOPC_64 <0x000000f5, "V_CMPX_NE_U64">; -defm V_CMPX_GE_U64 : VOPC_64 <0x000000f6, "V_CMPX_GE_U64">; -defm V_CMPX_T_U64 : VOPC_64 <0x000000f7, "V_CMPX_T_U64">; +defm V_CMPX_F_U64 : VOPCX_64 <0x000000f0, "V_CMPX_F_U64">; +defm V_CMPX_LT_U64 : VOPCX_64 <0x000000f1, "V_CMPX_LT_U64">; +defm V_CMPX_EQ_U64 : VOPCX_64 <0x000000f2, "V_CMPX_EQ_U64">; +defm V_CMPX_LE_U64 : VOPCX_64 <0x000000f3, "V_CMPX_LE_U64">; +defm V_CMPX_GT_U64 : VOPCX_64 <0x000000f4, "V_CMPX_GT_U64">; +defm V_CMPX_NE_U64 : VOPCX_64 <0x000000f5, "V_CMPX_NE_U64">; +defm V_CMPX_GE_U64 : VOPCX_64 <0x000000f6, "V_CMPX_GE_U64">; +defm V_CMPX_T_U64 : VOPCX_64 <0x000000f7, "V_CMPX_T_U64">; -} // End hasSideEffects = 1, Defs = [EXEC] +} // End hasSideEffects = 1 defm V_CMP_CLASS_F32 : VOPC_32 <0x00000088, "V_CMP_CLASS_F32">; -let hasSideEffects = 1, Defs = [EXEC] in { -defm V_CMPX_CLASS_F32 : VOPC_32 <0x00000098, "V_CMPX_CLASS_F32">; -} // End hasSideEffects = 1, Defs = [EXEC] +let hasSideEffects = 1 in { +defm V_CMPX_CLASS_F32 : VOPCX_32 <0x00000098, "V_CMPX_CLASS_F32">; +} // End hasSideEffects = 1 defm V_CMP_CLASS_F64 : VOPC_64 <0x000000a8, "V_CMP_CLASS_F64">; -let hasSideEffects = 1, Defs = [EXEC] in { -defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">; -} // End hasSideEffects = 1, Defs = [EXEC] +let hasSideEffects = 1 in { +defm V_CMPX_CLASS_F64 : VOPCX_64 <0x000000b8, "V_CMPX_CLASS_F64">; +} // End hasSideEffects = 1 } // End isCompare = 1 @@ -708,8 +721,97 @@ defm V_CMPX_CLASS_F64 : VOPC_64 <0x000000b8, "V_CMPX_CLASS_F64">; // DS Instructions //===----------------------------------------------------------------------===// -def DS_ADD_U32_RTN : DS_1A1D_RET <0x20, "DS_ADD_U32_RTN", VReg_32>; -def DS_SUB_U32_RTN : DS_1A1D_RET <0x21, "DS_SUB_U32_RTN", VReg_32>; + +def DS_ADD_U32 : DS_1A1D_NORET <0x0, "DS_ADD_U32", VReg_32>; +def DS_SUB_U32 : DS_1A1D_NORET <0x1, "DS_SUB_U32", VReg_32>; +def DS_RSUB_U32 : DS_1A1D_NORET <0x2, "DS_RSUB_U32", VReg_32>; +def DS_INC_U32 : DS_1A1D_NORET <0x3, "DS_INC_U32", VReg_32>; +def DS_DEC_U32 : DS_1A1D_NORET <0x4, "DS_DEC_U32", VReg_32>; +def DS_MIN_I32 : DS_1A1D_NORET <0x5, "DS_MIN_I32", VReg_32>; +def DS_MAX_I32 : DS_1A1D_NORET <0x6, "DS_MAX_I32", VReg_32>; +def DS_MIN_U32 : DS_1A1D_NORET <0x7, "DS_MIN_U32", VReg_32>; +def DS_MAX_U32 : DS_1A1D_NORET <0x8, "DS_MAX_U32", VReg_32>; +def DS_AND_B32 : DS_1A1D_NORET <0x9, "DS_AND_B32", VReg_32>; +def DS_OR_B32 : DS_1A1D_NORET <0xa, "DS_OR_B32", VReg_32>; +def DS_XOR_B32 : DS_1A1D_NORET <0xb, "DS_XOR_B32", VReg_32>; +def DS_MSKOR_B32 : DS_1A1D_NORET <0xc, "DS_MSKOR_B32", VReg_32>; +def DS_CMPST_B32 : DS_1A2D_NORET <0x10, "DS_CMPST_B32", VReg_32>; +def DS_CMPST_F32 : DS_1A2D_NORET <0x11, "DS_CMPST_F32", VReg_32>; +def DS_MIN_F32 : DS_1A1D_NORET <0x12, "DS_MIN_F32", VReg_32>; +def DS_MAX_F32 : DS_1A1D_NORET <0x13, "DS_MAX_F32", VReg_32>; + +def DS_ADD_RTN_U32 : DS_1A1D_RET <0x20, "DS_ADD_RTN_U32", VReg_32>; +def DS_SUB_RTN_U32 : DS_1A1D_RET <0x21, "DS_SUB_RTN_U32", VReg_32>; +def DS_RSUB_RTN_U32 : DS_1A1D_RET <0x22, "DS_RSUB_RTN_U32", VReg_32>; +def DS_INC_RTN_U32 : DS_1A1D_RET <0x23, "DS_INC_RTN_U32", VReg_32>; +def DS_DEC_RTN_U32 : DS_1A1D_RET <0x24, "DS_DEC_RTN_U32", VReg_32>; +def DS_MIN_RTN_I32 : DS_1A1D_RET <0x25, "DS_MIN_RTN_I32", VReg_32>; +def DS_MAX_RTN_I32 : DS_1A1D_RET <0x26, "DS_MAX_RTN_I32", VReg_32>; +def DS_MIN_RTN_U32 : DS_1A1D_RET <0x27, "DS_MIN_RTN_U32", VReg_32>; +def DS_MAX_RTN_U32 : DS_1A1D_RET <0x28, "DS_MAX_RTN_U32", VReg_32>; +def DS_AND_RTN_B32 : DS_1A1D_RET <0x29, "DS_AND_RTN_B32", VReg_32>; +def DS_OR_RTN_B32 : DS_1A1D_RET <0x2a, "DS_OR_RTN_B32", VReg_32>; +def DS_XOR_RTN_B32 : DS_1A1D_RET <0x2b, "DS_XOR_RTN_B32", VReg_32>; +def DS_MSKOR_RTN_B32 : DS_1A1D_RET <0x2c, "DS_MSKOR_RTN_B32", VReg_32>; +def DS_WRXCHG_RTN_B32 : DS_1A1D_RET <0x2d, "DS_WRXCHG_RTN_B32", VReg_32>; +//def DS_WRXCHG2_RTN_B32 : DS_2A0D_RET <0x2e, "DS_WRXCHG2_RTN_B32", VReg_32>; +//def DS_WRXCHG2ST64_RTN_B32 : DS_2A0D_RET <0x2f, "DS_WRXCHG2_RTN_B32", VReg_32>; +def DS_CMPST_RTN_B32 : DS_1A2D_RET <0x30, "DS_CMPST_RTN_B32", VReg_32>; +def DS_CMPST_RTN_F32 : DS_1A2D_RET <0x31, "DS_CMPST_RTN_F32", VReg_32>; +def DS_MIN_RTN_F32 : DS_1A1D_RET <0x32, "DS_MIN_RTN_F32", VReg_32>; +def DS_MAX_RTN_F32 : DS_1A1D_RET <0x33, "DS_MAX_RTN_F32", VReg_32>; + +let SubtargetPredicate = isCI in { +def DS_WRAP_RTN_F32 : DS_1A1D_RET <0x34, "DS_WRAP_RTN_F32", VReg_32>; +} // End isCI + + +def DS_ADD_U64 : DS_1A1D_NORET <0x40, "DS_ADD_U64", VReg_32>; +def DS_SUB_U64 : DS_1A1D_NORET <0x41, "DS_SUB_U64", VReg_32>; +def DS_RSUB_U64 : DS_1A1D_NORET <0x42, "DS_RSUB_U64", VReg_32>; +def DS_INC_U64 : DS_1A1D_NORET <0x43, "DS_INC_U64", VReg_32>; +def DS_DEC_U64 : DS_1A1D_NORET <0x44, "DS_DEC_U64", VReg_32>; +def DS_MIN_I64 : DS_1A1D_NORET <0x45, "DS_MIN_I64", VReg_64>; +def DS_MAX_I64 : DS_1A1D_NORET <0x46, "DS_MAX_I64", VReg_64>; +def DS_MIN_U64 : DS_1A1D_NORET <0x47, "DS_MIN_U64", VReg_64>; +def DS_MAX_U64 : DS_1A1D_NORET <0x48, "DS_MAX_U64", VReg_64>; +def DS_AND_B64 : DS_1A1D_NORET <0x49, "DS_AND_B64", VReg_64>; +def DS_OR_B64 : DS_1A1D_NORET <0x4a, "DS_OR_B64", VReg_64>; +def DS_XOR_B64 : DS_1A1D_NORET <0x4b, "DS_XOR_B64", VReg_64>; +def DS_MSKOR_B64 : DS_1A1D_NORET <0x4c, "DS_MSKOR_B64", VReg_64>; +def DS_CMPST_B64 : DS_1A2D_NORET <0x50, "DS_CMPST_B64", VReg_64>; +def DS_CMPST_F64 : DS_1A2D_NORET <0x51, "DS_CMPST_F64", VReg_64>; +def DS_MIN_F64 : DS_1A1D_NORET <0x52, "DS_MIN_F64", VReg_64>; +def DS_MAX_F64 : DS_1A1D_NORET <0x53, "DS_MAX_F64", VReg_64>; + +def DS_ADD_RTN_U64 : DS_1A1D_RET <0x60, "DS_ADD_RTN_U64", VReg_64>; +def DS_SUB_RTN_U64 : DS_1A1D_RET <0x61, "DS_SUB_RTN_U64", VReg_64>; +def DS_RSUB_RTN_U64 : DS_1A1D_RET <0x62, "DS_RSUB_RTN_U64", VReg_64>; +def DS_INC_RTN_U64 : DS_1A1D_RET <0x63, "DS_INC_RTN_U64", VReg_64>; +def DS_DEC_RTN_U64 : DS_1A1D_RET <0x64, "DS_DEC_RTN_U64", VReg_64>; +def DS_MIN_RTN_I64 : DS_1A1D_RET <0x65, "DS_MIN_RTN_I64", VReg_64>; +def DS_MAX_RTN_I64 : DS_1A1D_RET <0x66, "DS_MAX_RTN_I64", VReg_64>; +def DS_MIN_RTN_U64 : DS_1A1D_RET <0x67, "DS_MIN_RTN_U64", VReg_64>; +def DS_MAX_RTN_U64 : DS_1A1D_RET <0x68, "DS_MAX_RTN_U64", VReg_64>; +def DS_AND_RTN_B64 : DS_1A1D_RET <0x69, "DS_AND_RTN_B64", VReg_64>; +def DS_OR_RTN_B64 : DS_1A1D_RET <0x6a, "DS_OR_RTN_B64", VReg_64>; +def DS_XOR_RTN_B64 : DS_1A1D_RET <0x6b, "DS_XOR_RTN_B64", VReg_64>; +def DS_MSKOR_RTN_B64 : DS_1A1D_RET <0x6c, "DS_MSKOR_RTN_B64", VReg_64>; +def DS_WRXCHG_RTN_B64 : DS_1A1D_RET <0x6d, "DS_WRXCHG_RTN_B64", VReg_64>; +//def DS_WRXCHG2_RTN_B64 : DS_2A0D_RET <0x6e, "DS_WRXCHG2_RTN_B64", VReg_64>; +//def DS_WRXCHG2ST64_RTN_B64 : DS_2A0D_RET <0x6f, "DS_WRXCHG2_RTN_B64", VReg_64>; +def DS_CMPST_RTN_B64 : DS_1A2D_RET <0x70, "DS_CMPST_RTN_B64", VReg_64>; +def DS_CMPST_RTN_F64 : DS_1A2D_RET <0x71, "DS_CMPST_RTN_F64", VReg_64>; +def DS_MIN_RTN_F64 : DS_1A1D_RET <0x72, "DS_MIN_F64", VReg_64>; +def DS_MAX_RTN_F64 : DS_1A1D_RET <0x73, "DS_MAX_F64", VReg_64>; + +//let SubtargetPredicate = isCI in { +// DS_CONDXCHG32_RTN_B64 +// DS_CONDXCHG32_RTN_B128 +//} // End isCI + +// TODO: _SRC2_* forms + def DS_WRITE_B32 : DS_Store_Helper <0x0000000d, "DS_WRITE_B32", VReg_32>; def DS_WRITE_B8 : DS_Store_Helper <0x00000001e, "DS_WRITE_B8", VReg_32>; def DS_WRITE_B16 : DS_Store_Helper <0x00000001f, "DS_WRITE_B16", VReg_32>; @@ -744,32 +846,46 @@ defm BUFFER_LOAD_FORMAT_XYZW : MUBUF_Load_Helper <0x00000003, "BUFFER_LOAD_FORMA //def BUFFER_STORE_FORMAT_XY : MUBUF_ <0x00000005, "BUFFER_STORE_FORMAT_XY", []>; //def BUFFER_STORE_FORMAT_XYZ : MUBUF_ <0x00000006, "BUFFER_STORE_FORMAT_XYZ", []>; //def BUFFER_STORE_FORMAT_XYZW : MUBUF_ <0x00000007, "BUFFER_STORE_FORMAT_XYZW", []>; -defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper <0x00000008, "BUFFER_LOAD_UBYTE", VReg_32>; -defm BUFFER_LOAD_SBYTE : MUBUF_Load_Helper <0x00000009, "BUFFER_LOAD_SBYTE", VReg_32>; -defm BUFFER_LOAD_USHORT : MUBUF_Load_Helper <0x0000000a, "BUFFER_LOAD_USHORT", VReg_32>; -defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper <0x0000000b, "BUFFER_LOAD_SSHORT", VReg_32>; -defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper <0x0000000c, "BUFFER_LOAD_DWORD", VReg_32>; -defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper <0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64>; -defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper <0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128>; +defm BUFFER_LOAD_UBYTE : MUBUF_Load_Helper < + 0x00000008, "BUFFER_LOAD_UBYTE", VReg_32, i32, az_extloadi8_global +>; +defm BUFFER_LOAD_SBYTE : MUBUF_Load_Helper < + 0x00000009, "BUFFER_LOAD_SBYTE", VReg_32, i32, sextloadi8_global +>; +defm BUFFER_LOAD_USHORT : MUBUF_Load_Helper < + 0x0000000a, "BUFFER_LOAD_USHORT", VReg_32, i32, az_extloadi16_global +>; +defm BUFFER_LOAD_SSHORT : MUBUF_Load_Helper < + 0x0000000b, "BUFFER_LOAD_SSHORT", VReg_32, i32, sextloadi16_global +>; +defm BUFFER_LOAD_DWORD : MUBUF_Load_Helper < + 0x0000000c, "BUFFER_LOAD_DWORD", VReg_32, i32, global_load +>; +defm BUFFER_LOAD_DWORDX2 : MUBUF_Load_Helper < + 0x0000000d, "BUFFER_LOAD_DWORDX2", VReg_64, v2i32, global_load +>; +defm BUFFER_LOAD_DWORDX4 : MUBUF_Load_Helper < + 0x0000000e, "BUFFER_LOAD_DWORDX4", VReg_128, v4i32, global_load +>; def BUFFER_STORE_BYTE : MUBUF_Store_Helper < - 0x00000018, "BUFFER_STORE_BYTE", VReg_32 + 0x00000018, "BUFFER_STORE_BYTE", VReg_32, i32, truncstorei8_global >; def BUFFER_STORE_SHORT : MUBUF_Store_Helper < - 0x0000001a, "BUFFER_STORE_SHORT", VReg_32 + 0x0000001a, "BUFFER_STORE_SHORT", VReg_32, i32, truncstorei16_global >; def BUFFER_STORE_DWORD : MUBUF_Store_Helper < - 0x0000001c, "BUFFER_STORE_DWORD", VReg_32 + 0x0000001c, "BUFFER_STORE_DWORD", VReg_32, i32, global_store >; def BUFFER_STORE_DWORDX2 : MUBUF_Store_Helper < - 0x0000001d, "BUFFER_STORE_DWORDX2", VReg_64 + 0x0000001d, "BUFFER_STORE_DWORDX2", VReg_64, v2i32, global_store >; def BUFFER_STORE_DWORDX4 : MUBUF_Store_Helper < - 0x0000001e, "BUFFER_STORE_DWORDX4", VReg_128 + 0x0000001e, "BUFFER_STORE_DWORDX4", VReg_128, v4i32, global_store >; //def BUFFER_ATOMIC_SWAP : MUBUF_ <0x00000030, "BUFFER_ATOMIC_SWAP", []>; //def BUFFER_ATOMIC_CMPSWAP : MUBUF_ <0x00000031, "BUFFER_ATOMIC_CMPSWAP", []>; @@ -885,31 +1001,31 @@ defm IMAGE_SAMPLE_C_B : MIMG_Sampler <0x0000002d, "IMAGE_SAMPLE_C_B">; //def IMAGE_SAMPLE_C_B_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_O", 0x0000003d>; //def IMAGE_SAMPLE_C_B_CL_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_B_CL_O", 0x0000003e>; //def IMAGE_SAMPLE_C_LZ_O : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_LZ_O", 0x0000003f>; -//def IMAGE_GATHER4 : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4", 0x00000040>; -//def IMAGE_GATHER4_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL", 0x00000041>; -//def IMAGE_GATHER4_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L", 0x00000044>; -//def IMAGE_GATHER4_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B", 0x00000045>; -//def IMAGE_GATHER4_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL", 0x00000046>; -//def IMAGE_GATHER4_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ", 0x00000047>; -//def IMAGE_GATHER4_C : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C", 0x00000048>; -//def IMAGE_GATHER4_C_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL", 0x00000049>; -//def IMAGE_GATHER4_C_L : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L", 0x0000004c>; -//def IMAGE_GATHER4_C_B : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B", 0x0000004d>; -//def IMAGE_GATHER4_C_B_CL : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL", 0x0000004e>; -//def IMAGE_GATHER4_C_LZ : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ", 0x0000004f>; -//def IMAGE_GATHER4_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_O", 0x00000050>; -//def IMAGE_GATHER4_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_CL_O", 0x00000051>; -//def IMAGE_GATHER4_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_L_O", 0x00000054>; -//def IMAGE_GATHER4_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_O", 0x00000055>; -//def IMAGE_GATHER4_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_B_CL_O", 0x00000056>; -//def IMAGE_GATHER4_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_LZ_O", 0x00000057>; -//def IMAGE_GATHER4_C_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_O", 0x00000058>; -//def IMAGE_GATHER4_C_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_CL_O", 0x00000059>; -//def IMAGE_GATHER4_C_L_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_L_O", 0x0000005c>; -//def IMAGE_GATHER4_C_B_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_O", 0x0000005d>; -//def IMAGE_GATHER4_C_B_CL_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_B_CL_O", 0x0000005e>; -//def IMAGE_GATHER4_C_LZ_O : MIMG_NoPattern_GATHER4 <"IMAGE_GATHER4_C_LZ_O", 0x0000005f>; -//def IMAGE_GET_LOD : MIMG_NoPattern_ <"IMAGE_GET_LOD", 0x00000060>; +defm IMAGE_GATHER4 : MIMG_Gather <0x00000040, "IMAGE_GATHER4">; +defm IMAGE_GATHER4_CL : MIMG_Gather <0x00000041, "IMAGE_GATHER4_CL">; +defm IMAGE_GATHER4_L : MIMG_Gather <0x00000044, "IMAGE_GATHER4_L">; +defm IMAGE_GATHER4_B : MIMG_Gather <0x00000045, "IMAGE_GATHER4_B">; +defm IMAGE_GATHER4_B_CL : MIMG_Gather <0x00000046, "IMAGE_GATHER4_B_CL">; +defm IMAGE_GATHER4_LZ : MIMG_Gather <0x00000047, "IMAGE_GATHER4_LZ">; +defm IMAGE_GATHER4_C : MIMG_Gather <0x00000048, "IMAGE_GATHER4_C">; +defm IMAGE_GATHER4_C_CL : MIMG_Gather <0x00000049, "IMAGE_GATHER4_C_CL">; +defm IMAGE_GATHER4_C_L : MIMG_Gather <0x0000004c, "IMAGE_GATHER4_C_L">; +defm IMAGE_GATHER4_C_B : MIMG_Gather <0x0000004d, "IMAGE_GATHER4_C_B">; +defm IMAGE_GATHER4_C_B_CL : MIMG_Gather <0x0000004e, "IMAGE_GATHER4_C_B_CL">; +defm IMAGE_GATHER4_C_LZ : MIMG_Gather <0x0000004f, "IMAGE_GATHER4_C_LZ">; +defm IMAGE_GATHER4_O : MIMG_Gather <0x00000050, "IMAGE_GATHER4_O">; +defm IMAGE_GATHER4_CL_O : MIMG_Gather <0x00000051, "IMAGE_GATHER4_CL_O">; +defm IMAGE_GATHER4_L_O : MIMG_Gather <0x00000054, "IMAGE_GATHER4_L_O">; +defm IMAGE_GATHER4_B_O : MIMG_Gather <0x00000055, "IMAGE_GATHER4_B_O">; +defm IMAGE_GATHER4_B_CL_O : MIMG_Gather <0x00000056, "IMAGE_GATHER4_B_CL_O">; +defm IMAGE_GATHER4_LZ_O : MIMG_Gather <0x00000057, "IMAGE_GATHER4_LZ_O">; +defm IMAGE_GATHER4_C_O : MIMG_Gather <0x00000058, "IMAGE_GATHER4_C_O">; +defm IMAGE_GATHER4_C_CL_O : MIMG_Gather <0x00000059, "IMAGE_GATHER4_C_CL_O">; +defm IMAGE_GATHER4_C_L_O : MIMG_Gather <0x0000005c, "IMAGE_GATHER4_C_L_O">; +defm IMAGE_GATHER4_C_B_O : MIMG_Gather <0x0000005d, "IMAGE_GATHER4_C_B_O">; +defm IMAGE_GATHER4_C_B_CL_O : MIMG_Gather <0x0000005e, "IMAGE_GATHER4_C_B_CL_O">; +defm IMAGE_GATHER4_C_LZ_O : MIMG_Gather <0x0000005f, "IMAGE_GATHER4_C_LZ_O">; +defm IMAGE_GET_LOD : MIMG_Sampler <0x00000060, "IMAGE_GET_LOD">; //def IMAGE_SAMPLE_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD", 0x00000068>; //def IMAGE_SAMPLE_CD_CL : MIMG_NoPattern_ <"IMAGE_SAMPLE_CD_CL", 0x00000069>; //def IMAGE_SAMPLE_C_CD : MIMG_NoPattern_ <"IMAGE_SAMPLE_C_CD", 0x0000006a>; @@ -962,8 +1078,12 @@ defm V_CVT_I32_F32 : VOP1_32 <0x00000008, "V_CVT_I32_F32", [(set i32:$dst, (fp_to_sint f32:$src0))] >; defm V_MOV_FED_B32 : VOP1_32 <0x00000009, "V_MOV_FED_B32", []>; -////def V_CVT_F16_F32 : VOP1_F16 <0x0000000a, "V_CVT_F16_F32", []>; -//defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16", []>; +defm V_CVT_F16_F32 : VOP1_32 <0x0000000a, "V_CVT_F16_F32", + [(set i32:$dst, (f32_to_f16 f32:$src0))] +>; +defm V_CVT_F32_F16 : VOP1_32 <0x0000000b, "V_CVT_F32_F16", + [(set f32:$dst, (f16_to_f32 i32:$src0))] +>; //defm V_CVT_RPI_I32_F32 : VOP1_32 <0x0000000c, "V_CVT_RPI_I32_F32", []>; //defm V_CVT_FLR_I32_F32 : VOP1_32 <0x0000000d, "V_CVT_FLR_I32_F32", []>; //defm V_CVT_OFF_F32_I4 : VOP1_32 <0x0000000e, "V_CVT_OFF_F32_I4", []>; @@ -973,10 +1093,18 @@ defm V_CVT_F32_F64 : VOP1_32_64 <0x0000000f, "V_CVT_F32_F64", defm V_CVT_F64_F32 : VOP1_64_32 <0x00000010, "V_CVT_F64_F32", [(set f64:$dst, (fextend f32:$src0))] >; -//defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0", []>; -//defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", []>; -//defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", []>; -//defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3", []>; +defm V_CVT_F32_UBYTE0 : VOP1_32 <0x00000011, "V_CVT_F32_UBYTE0", + [(set f32:$dst, (AMDGPUcvt_f32_ubyte0 i32:$src0))] +>; +defm V_CVT_F32_UBYTE1 : VOP1_32 <0x00000012, "V_CVT_F32_UBYTE1", + [(set f32:$dst, (AMDGPUcvt_f32_ubyte1 i32:$src0))] +>; +defm V_CVT_F32_UBYTE2 : VOP1_32 <0x00000013, "V_CVT_F32_UBYTE2", + [(set f32:$dst, (AMDGPUcvt_f32_ubyte2 i32:$src0))] +>; +defm V_CVT_F32_UBYTE3 : VOP1_32 <0x00000014, "V_CVT_F32_UBYTE3", + [(set f32:$dst, (AMDGPUcvt_f32_ubyte3 i32:$src0))] +>; defm V_CVT_U32_F64 : VOP1_32_64 <0x00000015, "V_CVT_U32_F64", [(set i32:$dst, (fp_to_uint f64:$src0))] >; @@ -988,7 +1116,7 @@ defm V_FRACT_F32 : VOP1_32 <0x00000020, "V_FRACT_F32", [(set f32:$dst, (AMDGPUfract f32:$src0))] >; defm V_TRUNC_F32 : VOP1_32 <0x00000021, "V_TRUNC_F32", - [(set f32:$dst, (int_AMDGPU_trunc f32:$src0))] + [(set f32:$dst, (ftrunc f32:$src0))] >; defm V_CEIL_F32 : VOP1_32 <0x00000022, "V_CEIL_F32", [(set f32:$dst, (fceil f32:$src0))] @@ -1006,24 +1134,33 @@ defm V_LOG_CLAMP_F32 : VOP1_32 <0x00000026, "V_LOG_CLAMP_F32", []>; defm V_LOG_F32 : VOP1_32 <0x00000027, "V_LOG_F32", [(set f32:$dst, (flog2 f32:$src0))] >; + defm V_RCP_CLAMP_F32 : VOP1_32 <0x00000028, "V_RCP_CLAMP_F32", []>; defm V_RCP_LEGACY_F32 : VOP1_32 <0x00000029, "V_RCP_LEGACY_F32", []>; defm V_RCP_F32 : VOP1_32 <0x0000002a, "V_RCP_F32", - [(set f32:$dst, (fdiv FP_ONE, f32:$src0))] + [(set f32:$dst, (AMDGPUrcp f32:$src0))] >; defm V_RCP_IFLAG_F32 : VOP1_32 <0x0000002b, "V_RCP_IFLAG_F32", []>; -defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", []>; +defm V_RSQ_CLAMP_F32 : VOP1_32 <0x0000002c, "V_RSQ_CLAMP_F32", + [(set f32:$dst, (AMDGPUrsq_clamped f32:$src0))] +>; defm V_RSQ_LEGACY_F32 : VOP1_32 < 0x0000002d, "V_RSQ_LEGACY_F32", - [(set f32:$dst, (int_AMDGPU_rsq f32:$src0))] + [(set f32:$dst, (AMDGPUrsq_legacy f32:$src0))] +>; +defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", + [(set f32:$dst, (AMDGPUrsq f32:$src0))] >; -defm V_RSQ_F32 : VOP1_32 <0x0000002e, "V_RSQ_F32", []>; defm V_RCP_F64 : VOP1_64 <0x0000002f, "V_RCP_F64", - [(set f64:$dst, (fdiv FP_ONE, f64:$src0))] + [(set f64:$dst, (AMDGPUrcp f64:$src0))] >; defm V_RCP_CLAMP_F64 : VOP1_64 <0x00000030, "V_RCP_CLAMP_F64", []>; -defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", []>; -defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", []>; +defm V_RSQ_F64 : VOP1_64 <0x00000031, "V_RSQ_F64", + [(set f64:$dst, (AMDGPUrsq f64:$src0))] +>; +defm V_RSQ_CLAMP_F64 : VOP1_64 <0x00000032, "V_RSQ_CLAMP_F64", + [(set f64:$dst, (AMDGPUrsq_clamped f64:$src0))] +>; defm V_SQRT_F32 : VOP1_32 <0x00000033, "V_SQRT_F32", [(set f32:$dst, (fsqrt f32:$src0))] >; @@ -1211,7 +1348,7 @@ defm V_BFM_B32 : VOP2_32 <0x0000001e, "V_BFM_B32", defm V_MAC_F32 : VOP2_32 <0x0000001f, "V_MAC_F32", []>; defm V_MADMK_F32 : VOP2_32 <0x00000020, "V_MADMK_F32", []>; defm V_MADAK_F32 : VOP2_32 <0x00000021, "V_MADAK_F32", []>; -//defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>; +defm V_BCNT_U32_B32 : VOP2_32 <0x00000022, "V_BCNT_U32_B32", []>; defm V_MBCNT_LO_U32_B32 : VOP2_32 <0x00000023, "V_MBCNT_LO_U32_B32", []>; defm V_MBCNT_HI_U32_B32 : VOP2_32 <0x00000024, "V_MBCNT_HI_U32_B32", []>; @@ -1303,16 +1440,20 @@ defm V_MULLIT_F32 : VOP3_32 <0x00000150, "V_MULLIT_F32", []>; //def V_SAD_U16 : VOP3_U16 <0x0000015c, "V_SAD_U16", []>; defm V_SAD_U32 : VOP3_32 <0x0000015d, "V_SAD_U32", []>; ////def V_CVT_PK_U8_F32 : VOP3_U8 <0x0000015e, "V_CVT_PK_U8_F32", []>; -defm V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", []>; -def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", []>; +defm V_DIV_FIXUP_F32 : VOP3_32 <0x0000015f, "V_DIV_FIXUP_F32", + [(set f32:$dst, (AMDGPUdiv_fixup f32:$src0, f32:$src1, f32:$src2))] +>; +def V_DIV_FIXUP_F64 : VOP3_64 <0x00000160, "V_DIV_FIXUP_F64", + [(set f64:$dst, (AMDGPUdiv_fixup f64:$src0, f64:$src1, f64:$src2))] +>; -def V_LSHL_B64 : VOP3_64_Shift <0x00000161, "V_LSHL_B64", +def V_LSHL_B64 : VOP3_64_32 <0x00000161, "V_LSHL_B64", [(set i64:$dst, (shl i64:$src0, i32:$src1))] >; -def V_LSHR_B64 : VOP3_64_Shift <0x00000162, "V_LSHR_B64", +def V_LSHR_B64 : VOP3_64_32 <0x00000162, "V_LSHR_B64", [(set i64:$dst, (srl i64:$src0, i32:$src1))] >; -def V_ASHR_I64 : VOP3_64_Shift <0x00000163, "V_ASHR_I64", +def V_ASHR_I64 : VOP3_64_32 <0x00000163, "V_ASHR_I64", [(set i64:$dst, (sra i64:$src0, i32:$src1))] >; @@ -1336,14 +1477,23 @@ defm V_MUL_HI_I32 : VOP3_32 <0x0000016c, "V_MUL_HI_I32", []>; } // isCommutable = 1 -defm V_DIV_SCALE_F32 : VOP3_32 <0x0000016d, "V_DIV_SCALE_F32", []>; -def V_DIV_SCALE_F64 : VOP3_64 <0x0000016e, "V_DIV_SCALE_F64", []>; -defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", []>; -def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", []>; +def V_DIV_SCALE_F32 : VOP3b_32 <0x0000016d, "V_DIV_SCALE_F32", []>; + +// Double precision division pre-scale. +def V_DIV_SCALE_F64 : VOP3b_64 <0x0000016e, "V_DIV_SCALE_F64", []>; + +defm V_DIV_FMAS_F32 : VOP3_32 <0x0000016f, "V_DIV_FMAS_F32", + [(set f32:$dst, (AMDGPUdiv_fmas f32:$src0, f32:$src1, f32:$src2))] +>; +def V_DIV_FMAS_F64 : VOP3_64 <0x00000170, "V_DIV_FMAS_F64", + [(set f64:$dst, (AMDGPUdiv_fmas f64:$src0, f64:$src1, f64:$src2))] +>; //def V_MSAD_U8 : VOP3_U8 <0x00000171, "V_MSAD_U8", []>; //def V_QSAD_U8 : VOP3_U8 <0x00000172, "V_QSAD_U8", []>; //def V_MQSAD_U8 : VOP3_U8 <0x00000173, "V_MQSAD_U8", []>; -def V_TRIG_PREOP_F64 : VOP3_64 <0x00000174, "V_TRIG_PREOP_F64", []>; +def V_TRIG_PREOP_F64 : VOP3_64_32 <0x00000174, "V_TRIG_PREOP_F64", + [(set f64:$dst, (AMDGPUtrig_preop f64:$src0, i32:$src1))] +>; //===----------------------------------------------------------------------===// // Pseudo Instructions @@ -1500,7 +1650,7 @@ let usesCustomInserter = 1 in { // constant that can be used with the ADDR64 MUBUF instructions. def SI_ADDR64_RSRC : InstSI < (outs SReg_128:$srsrc), - (ins SReg_64:$ptr), + (ins SSrc_64:$ptr), "", [] >; @@ -1508,7 +1658,7 @@ def V_SUB_F64 : InstSI < (outs VReg_64:$dst), (ins VReg_64:$src0, VReg_64:$src1), "V_SUB_F64 $dst, $src0, $src1", - [] + [(set f64:$dst, (fsub f64:$src0, f64:$src1))] >; } // end usesCustomInserter @@ -1529,6 +1679,7 @@ multiclass SI_SPILL_SGPR <RegisterClass sgpr_class> { } +defm SI_SPILL_S32 : SI_SPILL_SGPR <SReg_32>; defm SI_SPILL_S64 : SI_SPILL_SGPR <SReg_64>; defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>; defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>; @@ -1552,7 +1703,7 @@ def : Pat < /* int_SI_vs_load_input */ def : Pat< - (SIload_input v4i32:$tlst, IMM12bit:$attr_offset, i32:$buf_idx_vgpr), + (SIload_input v4i32:$tlst, imm:$attr_offset, i32:$buf_idx_vgpr), (BUFFER_LOAD_FORMAT_XYZW_IDXEN $tlst, $buf_idx_vgpr, imm:$attr_offset, 0, 0, 0, 0) >; @@ -1564,11 +1715,6 @@ def : Pat < $src0, $src1, $src2, $src3) >; -def : Pat < - (f64 (fsub f64:$src0, f64:$src1)), - (V_SUB_F64 $src0, $src1) ->; - //===----------------------------------------------------------------------===// // SMRD Patterns //===----------------------------------------------------------------------===// @@ -1596,7 +1742,6 @@ multiclass SMRD_Pattern <SMRD Instr_IMM, SMRD Instr_SGPR, ValueType vt> { defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, f32>; defm : SMRD_Pattern <S_LOAD_DWORD_IMM, S_LOAD_DWORD_SGPR, i32>; -defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, i64>; defm : SMRD_Pattern <S_LOAD_DWORDX2_IMM, S_LOAD_DWORDX2_SGPR, v2i32>; defm : SMRD_Pattern <S_LOAD_DWORDX4_IMM, S_LOAD_DWORDX4_SGPR, v4i32>; defm : SMRD_Pattern <S_LOAD_DWORDX8_IMM, S_LOAD_DWORDX8_SGPR, v32i8>; @@ -1615,6 +1760,24 @@ def : Pat < (S_BUFFER_LOAD_DWORD_SGPR $sbase, (S_MOV_B32 imm:$offset)) >; +} // Predicates = [isSI] in { + +//===----------------------------------------------------------------------===// +// SOP1 Patterns +//===----------------------------------------------------------------------===// + +let Predicates = [isSI, isCFDepth0] in { + +def : Pat < + (i64 (ctpop i64:$src)), + (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (S_BCNT1_I32_B64 $src), sub0), + (S_MOV_B32 0), sub1) +>; + +} // Predicates = [isSI, isCFDepth0] + +let Predicates = [isSI] in { //===----------------------------------------------------------------------===// // SOP2 Patterns //===----------------------------------------------------------------------===// @@ -1625,18 +1788,39 @@ def : Pat < >; //===----------------------------------------------------------------------===// -// VOP2 Patterns +// SOPP Patterns //===----------------------------------------------------------------------===// def : Pat < - (or i64:$src0, i64:$src1), + (int_AMDGPU_barrier_global), + (S_BARRIER) +>; + +//===----------------------------------------------------------------------===// +// VOP1 Patterns +//===----------------------------------------------------------------------===// + +def : RcpPat<V_RCP_F32_e32, f32>; +def : RcpPat<V_RCP_F64_e32, f64>; +defm : RsqPat<V_RSQ_F32_e32, f32>; +defm : RsqPat<V_RSQ_F64_e32, f64>; + +//===----------------------------------------------------------------------===// +// VOP2 Patterns +//===----------------------------------------------------------------------===// + +class BinOp64Pat <SDNode node, Instruction inst> : Pat < + (node i64:$src0, i64:$src1), (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), - (V_OR_B32_e32 (EXTRACT_SUBREG i64:$src0, sub0), + (inst (EXTRACT_SUBREG i64:$src0, sub0), (EXTRACT_SUBREG i64:$src1, sub0)), sub0), - (V_OR_B32_e32 (EXTRACT_SUBREG i64:$src0, sub1), + (inst (EXTRACT_SUBREG i64:$src0, sub1), (EXTRACT_SUBREG i64:$src1, sub1)), sub1) >; +def : BinOp64Pat <or, V_OR_B32_e32>; +def : BinOp64Pat <xor, V_XOR_B32_e32>; + class SextInReg <ValueType vt, int ShiftAmt> : Pat < (sext_inreg i32:$src0, vt), (V_ASHRREV_I32_e32 ShiftAmt, (V_LSHLREV_B32_e32 ShiftAmt, $src0)) @@ -1645,10 +1829,82 @@ class SextInReg <ValueType vt, int ShiftAmt> : Pat < def : SextInReg <i8, 24>; def : SextInReg <i16, 16>; +def : Pat < + (i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)), + (V_BCNT_U32_B32_e32 $popcnt, $val) +>; + +def : Pat < + (i32 (ctpop i32:$popcnt)), + (V_BCNT_U32_B32_e64 $popcnt, 0, 0, 0) +>; + +def : Pat < + (i64 (ctpop i64:$src)), + (INSERT_SUBREG + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (V_BCNT_U32_B32_e32 (EXTRACT_SUBREG $src, sub1), + (V_BCNT_U32_B32_e64 (EXTRACT_SUBREG $src, sub0), 0, 0, 0)), + sub0), + (V_MOV_B32_e32 0), sub1) +>; + /********** ======================= **********/ /********** Image sampling patterns **********/ /********** ======================= **********/ +class SampleRawPattern<SDPatternOperator name, MIMG opcode, ValueType vt> : Pat < + (name vt:$addr, v32i8:$rsrc, v16i8:$sampler, i32:$dmask, i32:$unorm, + i32:$r128, i32:$da, i32:$glc, i32:$slc, i32:$tfe, i32:$lwe), + (opcode (as_i32imm $dmask), (as_i1imm $unorm), (as_i1imm $glc), (as_i1imm $da), + (as_i1imm $r128), (as_i1imm $tfe), (as_i1imm $lwe), (as_i1imm $slc), + $addr, $rsrc, $sampler) +>; + +// Only the variants which make sense are defined. +def : SampleRawPattern<int_SI_gather4, IMAGE_GATHER4_V4_V2, v2i32>; +def : SampleRawPattern<int_SI_gather4, IMAGE_GATHER4_V4_V4, v4i32>; +def : SampleRawPattern<int_SI_gather4_cl, IMAGE_GATHER4_CL_V4_V4, v4i32>; +def : SampleRawPattern<int_SI_gather4_l, IMAGE_GATHER4_L_V4_V4, v4i32>; +def : SampleRawPattern<int_SI_gather4_b, IMAGE_GATHER4_B_V4_V4, v4i32>; +def : SampleRawPattern<int_SI_gather4_b_cl, IMAGE_GATHER4_B_CL_V4_V4, v4i32>; +def : SampleRawPattern<int_SI_gather4_b_cl, IMAGE_GATHER4_B_CL_V4_V8, v8i32>; +def : SampleRawPattern<int_SI_gather4_lz, IMAGE_GATHER4_LZ_V4_V2, v2i32>; +def : SampleRawPattern<int_SI_gather4_lz, IMAGE_GATHER4_LZ_V4_V4, v4i32>; + +def : SampleRawPattern<int_SI_gather4_c, IMAGE_GATHER4_C_V4_V4, v4i32>; +def : SampleRawPattern<int_SI_gather4_c_cl, IMAGE_GATHER4_C_CL_V4_V4, v4i32>; +def : SampleRawPattern<int_SI_gather4_c_cl, IMAGE_GATHER4_C_CL_V4_V8, v8i32>; +def : SampleRawPattern<int_SI_gather4_c_l, IMAGE_GATHER4_C_L_V4_V4, v4i32>; +def : SampleRawPattern<int_SI_gather4_c_l, IMAGE_GATHER4_C_L_V4_V8, v8i32>; +def : SampleRawPattern<int_SI_gather4_c_b, IMAGE_GATHER4_C_B_V4_V4, v4i32>; +def : SampleRawPattern<int_SI_gather4_c_b, IMAGE_GATHER4_C_B_V4_V8, v8i32>; +def : SampleRawPattern<int_SI_gather4_c_b_cl, IMAGE_GATHER4_C_B_CL_V4_V8, v8i32>; +def : SampleRawPattern<int_SI_gather4_c_lz, IMAGE_GATHER4_C_LZ_V4_V4, v4i32>; + +def : SampleRawPattern<int_SI_gather4_o, IMAGE_GATHER4_O_V4_V4, v4i32>; +def : SampleRawPattern<int_SI_gather4_cl_o, IMAGE_GATHER4_CL_O_V4_V4, v4i32>; +def : SampleRawPattern<int_SI_gather4_cl_o, IMAGE_GATHER4_CL_O_V4_V8, v8i32>; +def : SampleRawPattern<int_SI_gather4_l_o, IMAGE_GATHER4_L_O_V4_V4, v4i32>; +def : SampleRawPattern<int_SI_gather4_l_o, IMAGE_GATHER4_L_O_V4_V8, v8i32>; +def : SampleRawPattern<int_SI_gather4_b_o, IMAGE_GATHER4_B_O_V4_V4, v4i32>; +def : SampleRawPattern<int_SI_gather4_b_o, IMAGE_GATHER4_B_O_V4_V8, v8i32>; +def : SampleRawPattern<int_SI_gather4_b_cl_o, IMAGE_GATHER4_B_CL_O_V4_V8, v8i32>; +def : SampleRawPattern<int_SI_gather4_lz_o, IMAGE_GATHER4_LZ_O_V4_V4, v4i32>; + +def : SampleRawPattern<int_SI_gather4_c_o, IMAGE_GATHER4_C_O_V4_V4, v4i32>; +def : SampleRawPattern<int_SI_gather4_c_o, IMAGE_GATHER4_C_O_V4_V8, v8i32>; +def : SampleRawPattern<int_SI_gather4_c_cl_o, IMAGE_GATHER4_C_CL_O_V4_V8, v8i32>; +def : SampleRawPattern<int_SI_gather4_c_l_o, IMAGE_GATHER4_C_L_O_V4_V8, v8i32>; +def : SampleRawPattern<int_SI_gather4_c_b_o, IMAGE_GATHER4_C_B_O_V4_V8, v8i32>; +def : SampleRawPattern<int_SI_gather4_c_b_cl_o, IMAGE_GATHER4_C_B_CL_O_V4_V8, v8i32>; +def : SampleRawPattern<int_SI_gather4_c_lz_o, IMAGE_GATHER4_C_LZ_O_V4_V4, v4i32>; +def : SampleRawPattern<int_SI_gather4_c_lz_o, IMAGE_GATHER4_C_LZ_O_V4_V8, v8i32>; + +def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V1, i32>; +def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V2, v2i32>; +def : SampleRawPattern<int_SI_getlod, IMAGE_GET_LOD_V4_V4, v4i32>; + /* SIsample for simple 1D texture lookup */ def : Pat < (SIsample i32:$addr, v32i8:$rsrc, v4i32:$sampler, imm), @@ -1864,7 +2120,10 @@ def : BitConvert <v2f32, v2i32, VReg_64>; def : BitConvert <v2i32, v2f32, VReg_64>; def : BitConvert <v2i32, i64, VReg_64>; def : BitConvert <i64, v2i32, VReg_64>; - +def : BitConvert <v2f32, i64, VReg_64>; +def : BitConvert <i64, v2f32, VReg_64>; +def : BitConvert <v2i32, f64, VReg_64>; +def : BitConvert <f64, v2i32, VReg_64>; def : BitConvert <v4f32, v4i32, VReg_128>; def : BitConvert <v4i32, v4f32, VReg_128>; @@ -1894,7 +2153,7 @@ def FCLAMP_SI : AMDGPUShaderInst < } def : Pat < - (int_AMDIL_clamp f32:$src, (f32 FP_ZERO), (f32 FP_ONE)), + (AMDGPUclamp f32:$src, (f32 FP_ZERO), (f32 FP_ONE)), (FCLAMP_SI f32:$src) >; @@ -2106,7 +2365,7 @@ def : Pat < (V_MUL_HI_I32 $src0, $src1, (i32 0)) >; -defm : BFIPatterns <V_BFI_B32>; +defm : BFIPatterns <V_BFI_B32, S_MOV_B32>; def : ROTRPattern <V_ALIGNBIT_B32>; /********** ======================= **********/ @@ -2130,7 +2389,7 @@ defm : DSReadPat <DS_READ_U8, i32, az_extloadi8_local>; defm : DSReadPat <DS_READ_I16, i32, sextloadi16_local>; defm : DSReadPat <DS_READ_U16, i32, az_extloadi16_local>; defm : DSReadPat <DS_READ_B32, i32, local_load>; -defm : DSReadPat <DS_READ_B64, i64, local_load>; +defm : DSReadPat <DS_READ_B64, v2i32, local_load>; multiclass DSWritePat <DS inst, ValueType vt, PatFrag frag> { def : Pat < @@ -2139,48 +2398,109 @@ multiclass DSWritePat <DS inst, ValueType vt, PatFrag frag> { >; def : Pat < - (frag vt:$src1, i32:$src0), - (inst 0, $src0, $src1, 0) + (frag vt:$val, i32:$ptr), + (inst 0, $ptr, $val, 0) >; } defm : DSWritePat <DS_WRITE_B8, i32, truncstorei8_local>; defm : DSWritePat <DS_WRITE_B16, i32, truncstorei16_local>; defm : DSWritePat <DS_WRITE_B32, i32, local_store>; -defm : DSWritePat <DS_WRITE_B64, i64, local_store>; +defm : DSWritePat <DS_WRITE_B64, v2i32, local_store>; -def : Pat <(atomic_load_add_local i32:$ptr, i32:$val), - (DS_ADD_U32_RTN 0, $ptr, $val, 0)>; - -def : Pat <(atomic_load_sub_local i32:$ptr, i32:$val), - (DS_SUB_U32_RTN 0, $ptr, $val, 0)>; +multiclass DSAtomicRetPat<DS inst, ValueType vt, PatFrag frag> { + def : Pat < + (frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$value), + (inst (i1 0), $ptr, $value, (as_i16imm $offset)) + >; -//===----------------------------------------------------------------------===// -// MUBUF Patterns -//===----------------------------------------------------------------------===// + def : Pat < + (frag i32:$ptr, vt:$val), + (inst 0, $ptr, $val, 0) + >; +} -multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt, - PatFrag global_ld, PatFrag constant_ld> { +// Special case of DSAtomicRetPat for add / sub 1 -> inc / dec +// +// We need to use something for the data0, so we set a register to +// -1. For the non-rtn variants, the manual says it does +// DS[A] = (DS[A] >= D0) ? 0 : DS[A] + 1, and setting D0 to uint_max +// will always do the increment so I'm assuming it's the same. +// +// We also load this -1 with s_mov_b32 / s_mov_b64 even though this +// needs to be a VGPR. The SGPR copy pass will fix this, and it's +// easier since there is no v_mov_b64. +multiclass DSAtomicIncRetPat<DS inst, ValueType vt, + Instruction LoadImm, PatFrag frag> { def : Pat < - (vt (global_ld (mubuf_vaddr_offset i64:$ptr, i64:$offset, IMM12bit:$imm_offset))), - (Instr_ADDR64 (SI_ADDR64_RSRC $ptr), $offset, (as_i16imm $imm_offset)) + (frag (add i32:$ptr, (i32 IMM16bit:$offset)), (vt 1)), + (inst (i1 0), $ptr, (LoadImm (vt -1)), (as_i16imm $offset)) >; def : Pat < - (vt (global_ld (add i64:$ptr, (i64 IMM12bit:$offset)))), - (Instr_ADDR64 (SI_ADDR64_RSRC (i64 0)), $ptr, (as_i16imm $offset)) + (frag i32:$ptr, (vt 1)), + (inst 0, $ptr, (LoadImm (vt -1)), 0) >; +} +multiclass DSAtomicCmpXChg <DS inst, ValueType vt, PatFrag frag> { def : Pat < - (vt (global_ld i64:$ptr)), - (Instr_ADDR64 (SI_ADDR64_RSRC (i64 0)), $ptr, 0) + (frag (add i32:$ptr, (i32 IMM16bit:$offset)), vt:$cmp, vt:$swap), + (inst (i1 0), $ptr, $cmp, $swap, (as_i16imm $offset)) >; def : Pat < - (vt (global_ld (add i64:$ptr, i64:$offset))), - (Instr_ADDR64 (SI_ADDR64_RSRC $ptr), $offset, 0) + (frag i32:$ptr, vt:$cmp, vt:$swap), + (inst 0, $ptr, $cmp, $swap, 0) >; +} + + +// 32-bit atomics. +defm : DSAtomicIncRetPat<DS_INC_RTN_U32, i32, + S_MOV_B32, atomic_load_add_local>; +defm : DSAtomicIncRetPat<DS_DEC_RTN_U32, i32, + S_MOV_B32, atomic_load_sub_local>; + +defm : DSAtomicRetPat<DS_WRXCHG_RTN_B32, i32, atomic_swap_local>; +defm : DSAtomicRetPat<DS_ADD_RTN_U32, i32, atomic_load_add_local>; +defm : DSAtomicRetPat<DS_SUB_RTN_U32, i32, atomic_load_sub_local>; +defm : DSAtomicRetPat<DS_AND_RTN_B32, i32, atomic_load_and_local>; +defm : DSAtomicRetPat<DS_OR_RTN_B32, i32, atomic_load_or_local>; +defm : DSAtomicRetPat<DS_XOR_RTN_B32, i32, atomic_load_xor_local>; +defm : DSAtomicRetPat<DS_MIN_RTN_I32, i32, atomic_load_min_local>; +defm : DSAtomicRetPat<DS_MAX_RTN_I32, i32, atomic_load_max_local>; +defm : DSAtomicRetPat<DS_MIN_RTN_U32, i32, atomic_load_umin_local>; +defm : DSAtomicRetPat<DS_MAX_RTN_U32, i32, atomic_load_umax_local>; + +defm : DSAtomicCmpXChg<DS_CMPST_RTN_B32, i32, atomic_cmp_swap_32_local>; + +// 64-bit atomics. +defm : DSAtomicIncRetPat<DS_INC_RTN_U64, i64, + S_MOV_B64, atomic_load_add_local>; +defm : DSAtomicIncRetPat<DS_DEC_RTN_U64, i64, + S_MOV_B64, atomic_load_sub_local>; + +defm : DSAtomicRetPat<DS_WRXCHG_RTN_B64, i64, atomic_swap_local>; +defm : DSAtomicRetPat<DS_ADD_RTN_U64, i64, atomic_load_add_local>; +defm : DSAtomicRetPat<DS_SUB_RTN_U64, i64, atomic_load_sub_local>; +defm : DSAtomicRetPat<DS_AND_RTN_B64, i64, atomic_load_and_local>; +defm : DSAtomicRetPat<DS_OR_RTN_B64, i64, atomic_load_or_local>; +defm : DSAtomicRetPat<DS_XOR_RTN_B64, i64, atomic_load_xor_local>; +defm : DSAtomicRetPat<DS_MIN_RTN_I64, i64, atomic_load_min_local>; +defm : DSAtomicRetPat<DS_MAX_RTN_I64, i64, atomic_load_max_local>; +defm : DSAtomicRetPat<DS_MIN_RTN_U64, i64, atomic_load_umin_local>; +defm : DSAtomicRetPat<DS_MAX_RTN_U64, i64, atomic_load_umax_local>; + +defm : DSAtomicCmpXChg<DS_CMPST_RTN_B64, i64, atomic_cmp_swap_64_local>; + +//===----------------------------------------------------------------------===// +// MUBUF Patterns +//===----------------------------------------------------------------------===// + +multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt, + PatFrag constant_ld> { def : Pat < (vt (constant_ld (add i64:$ptr, i64:$offset))), (Instr_ADDR64 (SI_ADDR64_RSRC $ptr), $offset, 0) @@ -2188,53 +2508,19 @@ multiclass MUBUFLoad_Pattern <MUBUF Instr_ADDR64, ValueType vt, } defm : MUBUFLoad_Pattern <BUFFER_LOAD_SBYTE_ADDR64, i32, - sextloadi8_global, sextloadi8_constant>; + sextloadi8_constant>; defm : MUBUFLoad_Pattern <BUFFER_LOAD_UBYTE_ADDR64, i32, - az_extloadi8_global, az_extloadi8_constant>; + az_extloadi8_constant>; defm : MUBUFLoad_Pattern <BUFFER_LOAD_SSHORT_ADDR64, i32, - sextloadi16_global, sextloadi16_constant>; + sextloadi16_constant>; defm : MUBUFLoad_Pattern <BUFFER_LOAD_USHORT_ADDR64, i32, - az_extloadi16_global, az_extloadi16_constant>; + az_extloadi16_constant>; defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORD_ADDR64, i32, - global_load, constant_load>; -defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, i64, - global_load, constant_load>; -defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, i64, - az_extloadi32_global, az_extloadi32_constant>; + constant_load>; defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX2_ADDR64, v2i32, - global_load, constant_load>; + constant_load>; defm : MUBUFLoad_Pattern <BUFFER_LOAD_DWORDX4_ADDR64, v4i32, - global_load, constant_load>; - -multiclass MUBUFStore_Pattern <MUBUF Instr, ValueType vt, PatFrag st> { - - def : Pat < - (st vt:$value, (mubuf_vaddr_offset i64:$ptr, i64:$offset, IMM12bit:$imm_offset)), - (Instr $value, (SI_ADDR64_RSRC $ptr), $offset, (as_i16imm $imm_offset)) - >; - - def : Pat < - (st vt:$value, (add i64:$ptr, IMM12bit:$offset)), - (Instr $value, (SI_ADDR64_RSRC (i64 0)), $ptr, (as_i16imm $offset)) - >; - - def : Pat < - (st vt:$value, i64:$ptr), - (Instr $value, (SI_ADDR64_RSRC (i64 0)), $ptr, 0) - >; - - def : Pat < - (st vt:$value, (add i64:$ptr, i64:$offset)), - (Instr $value, (SI_ADDR64_RSRC $ptr), $offset, 0) - >; -} - -defm : MUBUFStore_Pattern <BUFFER_STORE_BYTE, i32, truncstorei8_global>; -defm : MUBUFStore_Pattern <BUFFER_STORE_SHORT, i32, truncstorei16_global>; -defm : MUBUFStore_Pattern <BUFFER_STORE_DWORD, i32, global_store>; -defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, i64, global_store>; -defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX2, v2i32, global_store>; -defm : MUBUFStore_Pattern <BUFFER_STORE_DWORDX4, v4i32, global_store>; + constant_load>; // BUFFER_LOAD_DWORD*, addr64=0 multiclass MUBUF_Load_Dword <ValueType vt, MUBUF offset, MUBUF offen, MUBUF idxen, @@ -2301,7 +2587,7 @@ def : MTBUF_StoreResource <v2i32, 2, TBUFFER_STORE_FORMAT_XY>; def : MTBUF_StoreResource <v4i32, 3, TBUFFER_STORE_FORMAT_XYZ>; def : MTBUF_StoreResource <v4i32, 4, TBUFFER_STORE_FORMAT_XYZW>; -let Predicates = [isCI] in { +let SubtargetPredicate = isCI in { // Sea island new arithmetic instructinos let neverHasSideEffects = 1 in { @@ -2348,7 +2634,7 @@ def V_MAD_I64_I32 : VOP3_64 <0x00000177, "V_MAD_I64_I32", []>; // BUFFER_LOAD_DWORDX3 // BUFFER_STORE_DWORDX3 -} // End Predicates = [isCI] +} // End iSCI /********** ====================== **********/ @@ -2360,13 +2646,13 @@ multiclass SI_INDIRECT_Pattern <ValueType vt, ValueType eltvt, SI_INDIRECT_DST I // 1. Extract with offset def : Pat< (vector_extract vt:$vec, (add i32:$idx, imm:$off)), - (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, imm:$off)) + (eltvt (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, imm:$off)) >; // 2. Extract without offset def : Pat< (vector_extract vt:$vec, i32:$idx), - (f32 (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, 0)) + (eltvt (SI_INDIRECT_SRC (IMPLICIT_DEF), $vec, $idx, 0)) >; // 3. Insert with offset @@ -2392,20 +2678,6 @@ defm : SI_INDIRECT_Pattern <v4i32, i32, SI_INDIRECT_DST_V4>; defm : SI_INDIRECT_Pattern <v8i32, i32, SI_INDIRECT_DST_V8>; defm : SI_INDIRECT_Pattern <v16i32, i32, SI_INDIRECT_DST_V16>; -/********** =============== **********/ -/********** Conditions **********/ -/********** =============== **********/ - -def : Pat< - (i1 (setcc f32:$src0, f32:$src1, SETO)), - (V_CMP_O_F32_e64 $src0, $src1) ->; - -def : Pat< - (i1 (setcc f32:$src0, f32:$src1, SETUO)), - (V_CMP_U_F32_e64 $src0, $src1) ->; - //===----------------------------------------------------------------------===// // Conversion Patterns //===----------------------------------------------------------------------===// @@ -2439,6 +2711,62 @@ def : Pat < (S_MOV_B32 -1), sub1) >; +class ZExt_i64_i32_Pat <SDNode ext> : Pat < + (i64 (ext i32:$src)), + (INSERT_SUBREG (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $src, sub0), + (S_MOV_B32 0), sub1) +>; + +class ZExt_i64_i1_Pat <SDNode ext> : Pat < + (i64 (ext i1:$src)), + (INSERT_SUBREG + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), + (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src), sub0), + (S_MOV_B32 0), sub1) +>; + + +def : ZExt_i64_i32_Pat<zext>; +def : ZExt_i64_i32_Pat<anyext>; +def : ZExt_i64_i1_Pat<zext>; +def : ZExt_i64_i1_Pat<anyext>; + +def : Pat < + (i64 (sext i32:$src)), + (INSERT_SUBREG + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), $src, sub0), + (S_ASHR_I32 $src, 31), sub1) +>; + +def : Pat < + (i64 (sext i1:$src)), + (INSERT_SUBREG + (INSERT_SUBREG + (i64 (IMPLICIT_DEF)), + (V_CNDMASK_B32_e64 0, -1, $src), sub0), + (V_CNDMASK_B32_e64 0, -1, $src), sub1) +>; + +def : Pat < + (f32 (sint_to_fp i1:$src)), + (V_CNDMASK_B32_e64 (i32 0), CONST.FP32_NEG_ONE, $src) +>; + +def : Pat < + (f32 (uint_to_fp i1:$src)), + (V_CNDMASK_B32_e64 (i32 0), CONST.FP32_ONE, $src) +>; + +def : Pat < + (f64 (sint_to_fp i1:$src)), + (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src)) +>; + +def : Pat < + (f64 (uint_to_fp i1:$src)), + (V_CVT_F64_U32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 1), $src)) +>; + //===----------------------------------------------------------------------===// // Miscellaneous Patterns //===----------------------------------------------------------------------===// diff --git a/lib/Target/R600/SIIntrinsics.td b/lib/Target/R600/SIIntrinsics.td index 00e32c0..df690a4 100644 --- a/lib/Target/R600/SIIntrinsics.td +++ b/lib/Target/R600/SIIntrinsics.td @@ -56,11 +56,61 @@ let TargetPrefix = "SI", isTarget = 1 in { class Sample : Intrinsic <[llvm_v4f32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_anyint_ty, llvm_i32_ty], [IntrNoMem]>; + // Fully-flexible SAMPLE instruction. + class SampleRaw : Intrinsic < + [llvm_v4f32_ty], // vdata(VGPR) + [llvm_anyint_ty, // vaddr(VGPR) + llvm_v32i8_ty, // rsrc(SGPR) + llvm_v16i8_ty, // sampler(SGPR) + llvm_i32_ty, // dmask(imm) + llvm_i32_ty, // unorm(imm) + llvm_i32_ty, // r128(imm) + llvm_i32_ty, // da(imm) + llvm_i32_ty, // glc(imm) + llvm_i32_ty, // slc(imm) + llvm_i32_ty, // tfe(imm) + llvm_i32_ty], // lwe(imm) + [IntrNoMem]>; + def int_SI_sample : Sample; def int_SI_sampleb : Sample; def int_SI_sampled : Sample; def int_SI_samplel : Sample; + // Basic gather4 + def int_SI_gather4 : SampleRaw; + def int_SI_gather4_cl : SampleRaw; + def int_SI_gather4_l : SampleRaw; + def int_SI_gather4_b : SampleRaw; + def int_SI_gather4_b_cl : SampleRaw; + def int_SI_gather4_lz : SampleRaw; + + // Gather4 with comparison + def int_SI_gather4_c : SampleRaw; + def int_SI_gather4_c_cl : SampleRaw; + def int_SI_gather4_c_l : SampleRaw; + def int_SI_gather4_c_b : SampleRaw; + def int_SI_gather4_c_b_cl : SampleRaw; + def int_SI_gather4_c_lz : SampleRaw; + + // Gather4 with offsets + def int_SI_gather4_o : SampleRaw; + def int_SI_gather4_cl_o : SampleRaw; + def int_SI_gather4_l_o : SampleRaw; + def int_SI_gather4_b_o : SampleRaw; + def int_SI_gather4_b_cl_o : SampleRaw; + def int_SI_gather4_lz_o : SampleRaw; + + // Gather4 with comparison and offsets + def int_SI_gather4_c_o : SampleRaw; + def int_SI_gather4_c_cl_o : SampleRaw; + def int_SI_gather4_c_l_o : SampleRaw; + def int_SI_gather4_c_b_o : SampleRaw; + def int_SI_gather4_c_b_cl_o : SampleRaw; + def int_SI_gather4_c_lz_o : SampleRaw; + + def int_SI_getlod : SampleRaw; + def int_SI_imageload : Intrinsic <[llvm_v4i32_ty], [llvm_anyvector_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_SI_resinfo : Intrinsic <[llvm_v4i32_ty], [llvm_i32_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; diff --git a/lib/Target/R600/SILowerControlFlow.cpp b/lib/Target/R600/SILowerControlFlow.cpp index 6601f2a..9f5ff29 100644 --- a/lib/Target/R600/SILowerControlFlow.cpp +++ b/lib/Target/R600/SILowerControlFlow.cpp @@ -86,6 +86,7 @@ private: void Kill(MachineInstr &MI); void Branch(MachineInstr &MI); + void InitM0ForLDS(MachineBasicBlock::iterator MI); void LoadM0(MachineInstr &MI, MachineInstr *MovRel); void IndirectSrc(MachineInstr &MI); void IndirectDst(MachineInstr &MI); @@ -320,6 +321,14 @@ void SILowerControlFlowPass::Kill(MachineInstr &MI) { MI.eraseFromParent(); } +/// The m0 register stores the maximum allowable address for LDS reads and +/// writes. Its value must be at least the size in bytes of LDS allocated by +/// the shader. For simplicity, we set it to the maximum possible value. +void SILowerControlFlowPass::InitM0ForLDS(MachineBasicBlock::iterator MI) { + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::S_MOV_B32), + AMDGPU::M0).addImm(0xffffffff); +} + void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) { MachineBasicBlock &MBB = *MI.getParent(); @@ -333,52 +342,57 @@ void SILowerControlFlowPass::LoadM0(MachineInstr &MI, MachineInstr *MovRel) { BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) .addReg(Idx); MBB.insert(I, MovRel); - MI.eraseFromParent(); - return; - } + } else { - assert(AMDGPU::SReg_64RegClass.contains(Save)); - assert(AMDGPU::VReg_32RegClass.contains(Idx)); + assert(AMDGPU::SReg_64RegClass.contains(Save)); + assert(AMDGPU::VReg_32RegClass.contains(Idx)); - // Save the EXEC mask - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save) - .addReg(AMDGPU::EXEC); + // Save the EXEC mask + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), Save) + .addReg(AMDGPU::EXEC); - // Read the next variant into VCC (lower 32 bits) <- also loop target - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), - AMDGPU::VCC_LO) - .addReg(Idx); + // Read the next variant into VCC (lower 32 bits) <- also loop target + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), + AMDGPU::VCC_LO) + .addReg(Idx); - // Move index from VCC into M0 - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) - .addReg(AMDGPU::VCC_LO); + // Move index from VCC into M0 + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B32), AMDGPU::M0) + .addReg(AMDGPU::VCC_LO); - // Compare the just read M0 value to all possible Idx values - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC) - .addReg(AMDGPU::M0) - .addReg(Idx); + // Compare the just read M0 value to all possible Idx values + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CMP_EQ_U32_e32), AMDGPU::VCC) + .addReg(AMDGPU::M0) + .addReg(Idx); - // Update EXEC, save the original EXEC value to VCC - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC) - .addReg(AMDGPU::VCC); + // Update EXEC, save the original EXEC value to VCC + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_AND_SAVEEXEC_B64), AMDGPU::VCC) + .addReg(AMDGPU::VCC); - // Do the actual move - MBB.insert(I, MovRel); + // Do the actual move + MBB.insert(I, MovRel); - // Update EXEC, switch all done bits to 0 and all todo bits to 1 - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC) - .addReg(AMDGPU::EXEC) - .addReg(AMDGPU::VCC); + // Update EXEC, switch all done bits to 0 and all todo bits to 1 + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC) + .addReg(AMDGPU::EXEC) + .addReg(AMDGPU::VCC); - // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) - .addImm(-7) - .addReg(AMDGPU::EXEC); + // Loop back to V_READFIRSTLANE_B32 if there are still variants to cover + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_CBRANCH_EXECNZ)) + .addImm(-7) + .addReg(AMDGPU::EXEC); - // Restore EXEC - BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) - .addReg(Save); + // Restore EXEC + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::S_MOV_B64), AMDGPU::EXEC) + .addReg(Save); + } + // FIXME: Are there any values other than the LDS address clamp that need to + // be stored in the m0 register and may be live for more than a few + // instructions? If so, we should save the m0 register at the beginning + // of this function and restore it here. + // FIXME: Add support for LDS direct loads. + InitM0ForLDS(&MI); MI.eraseFromParent(); } @@ -523,8 +537,7 @@ bool SILowerControlFlowPass::runOnMachineFunction(MachineFunction &MF) { MachineBasicBlock &MBB = MF.front(); // Initialize M0 to a value that won't cause LDS access to be discarded // due to offset clamping - BuildMI(MBB, MBB.getFirstNonPHI(), DebugLoc(), TII->get(AMDGPU::S_MOV_B32), - AMDGPU::M0).addImm(0xffffffff); + InitM0ForLDS(MBB.getFirstNonPHI()); } if (NeedWQM && MFI->ShaderType == ShaderType::PIXEL) { diff --git a/lib/Target/R600/SIMachineFunctionInfo.cpp b/lib/Target/R600/SIMachineFunctionInfo.cpp index af60995..e2df950 100644 --- a/lib/Target/R600/SIMachineFunctionInfo.cpp +++ b/lib/Target/R600/SIMachineFunctionInfo.cpp @@ -62,8 +62,10 @@ static unsigned createLaneVGPR(MachineRegisterInfo &MRI, MachineFunction *MF) { return VGPR; } } - MF->getFunction()->getContext().emitError( - "Could not found S_ENGPGM instrtuction."); + + LLVMContext &Ctx = MF->getFunction()->getContext(); + Ctx.emitError("Could not find S_ENDPGM instruction."); + return VGPR; } diff --git a/lib/Target/R600/SIRegisterInfo.cpp b/lib/Target/R600/SIRegisterInfo.cpp index c72d549..d0b677a 100644 --- a/lib/Target/R600/SIRegisterInfo.cpp +++ b/lib/Target/R600/SIRegisterInfo.cpp @@ -14,21 +14,20 @@ #include "SIRegisterInfo.h" -#include "AMDGPUTargetMachine.h" +#include "AMDGPUSubtarget.h" #include "SIInstrInfo.h" using namespace llvm; -SIRegisterInfo::SIRegisterInfo(AMDGPUTargetMachine &tm) -: AMDGPURegisterInfo(tm), - TM(tm) +SIRegisterInfo::SIRegisterInfo(const AMDGPUSubtarget &st) +: AMDGPURegisterInfo(st) { } BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); Reserved.set(AMDGPU::EXEC); Reserved.set(AMDGPU::INDIRECT_BASE_ADDR); - const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(TM.getInstrInfo()); + const SIInstrInfo *TII = static_cast<const SIInstrInfo*>(ST.getInstrInfo()); TII->reserveIndirectRegisters(Reserved, MF); return Reserved; } @@ -38,15 +37,6 @@ unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, return RC->getNumRegs(); } -const TargetRegisterClass * -SIRegisterInfo::getISARegClass(const TargetRegisterClass * rc) const { - switch (rc->getID()) { - case AMDGPU::GPRF32RegClassID: - return &AMDGPU::VReg_32RegClass; - default: return rc; - } -} - const TargetRegisterClass * SIRegisterInfo::getCFGStructurizerRegClass( MVT VT) const { switch(VT.SimpleTy) { @@ -135,3 +125,19 @@ unsigned SIRegisterInfo::getPhysRegSubReg(unsigned Reg, unsigned Index = getHWRegIndex(Reg); return SubRC->getRegister(Index + Channel); } + +bool SIRegisterInfo::regClassCanUseImmediate(int RCID) const { + switch (RCID) { + default: return false; + case AMDGPU::SSrc_32RegClassID: + case AMDGPU::SSrc_64RegClassID: + case AMDGPU::VSrc_32RegClassID: + case AMDGPU::VSrc_64RegClassID: + return true; + } +} + +bool SIRegisterInfo::regClassCanUseImmediate( + const TargetRegisterClass *RC) const { + return regClassCanUseImmediate(RC->getID()); +} diff --git a/lib/Target/R600/SIRegisterInfo.h b/lib/Target/R600/SIRegisterInfo.h index 36b4fcd..c9305fb 100644 --- a/lib/Target/R600/SIRegisterInfo.h +++ b/lib/Target/R600/SIRegisterInfo.h @@ -20,24 +20,15 @@ namespace llvm { -class AMDGPUTargetMachine; - struct SIRegisterInfo : public AMDGPURegisterInfo { - AMDGPUTargetMachine &TM; - SIRegisterInfo(AMDGPUTargetMachine &tm); + SIRegisterInfo(const AMDGPUSubtarget &st); BitVector getReservedRegs(const MachineFunction &MF) const override; unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override; - /// \param RC is an AMDIL reg class. - /// - /// \returns the SI register class that is equivalent to \p RC. - const TargetRegisterClass * - getISARegClass(const TargetRegisterClass *RC) const override; - /// \brief get the register class of the specified type to use in the /// CFGStructurizer const TargetRegisterClass * getCFGStructurizerRegClass(MVT VT) const override; @@ -69,6 +60,14 @@ struct SIRegisterInfo : public AMDGPURegisterInfo { /// \returns The sub-register of Reg that is in Channel. unsigned getPhysRegSubReg(unsigned Reg, const TargetRegisterClass *SubRC, unsigned Channel) const; + + /// \returns True if operands defined with this register class can accept + /// inline immediates. + bool regClassCanUseImmediate(int RCID) const; + + /// \returns True if operands defined with this register class can accept + /// inline immediates. + bool regClassCanUseImmediate(const TargetRegisterClass *RC) const; }; } // End namespace llvm diff --git a/lib/Target/R600/SIRegisterInfo.td b/lib/Target/R600/SIRegisterInfo.td index f1f01de..8974b63 100644 --- a/lib/Target/R600/SIRegisterInfo.td +++ b/lib/Target/R600/SIRegisterInfo.td @@ -168,7 +168,7 @@ def SReg_64 : RegisterClass<"AMDGPU", [v2i32, i64, i1], 64, (add SGPR_64Regs, VCCReg, EXECReg) >; -def SReg_128 : RegisterClass<"AMDGPU", [v4i32], 128, (add SGPR_128)>; +def SReg_128 : RegisterClass<"AMDGPU", [v4i32, v16i8], 128, (add SGPR_128)>; def SReg_256 : RegisterClass<"AMDGPU", [v32i8, v8i32, v8f32], 256, (add SGPR_256)>; diff --git a/lib/Target/R600/SITypeRewriter.cpp b/lib/Target/R600/SITypeRewriter.cpp index a0b6907..367963a 100644 --- a/lib/Target/R600/SITypeRewriter.cpp +++ b/lib/Target/R600/SITypeRewriter.cpp @@ -119,8 +119,7 @@ void SITypeRewriter::visitCallInst(CallInst &I) { Type::getInt32Ty(I.getContext())){ Type *ElementTy = Arg->getType()->getVectorElementType(); std::string TypeName = "i32"; - InsertElementInst *Def = dyn_cast<InsertElementInst>(Arg); - assert(Def); + InsertElementInst *Def = cast<InsertElementInst>(Arg); Args.push_back(Def->getOperand(1)); Types.push_back(ElementTy); std::string VecTypeName = "v1" + TypeName; diff --git a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp index da88820..9df0054 100644 --- a/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp +++ b/lib/Target/Sparc/AsmParser/SparcAsmParser.cpp @@ -47,31 +47,27 @@ class SparcAsmParser : public MCTargetAsmParser { // public interface of the MCTargetAsmParser. bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out, unsigned &ErrorInfo, + OperandVector &Operands, MCStreamer &Out, + unsigned &ErrorInfo, bool MatchingInlineAsm) override; bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, - SMLoc NameLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands) override; + SMLoc NameLoc, OperandVector &Operands) override; bool ParseDirective(AsmToken DirectiveID) override; - unsigned validateTargetOperandClass(MCParsedAsmOperand *Op, + unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, unsigned Kind) override; // Custom parse functions for Sparc specific operands. - OperandMatchResultTy - parseMEMOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + OperandMatchResultTy parseMEMOperand(OperandVector &Operands); - OperandMatchResultTy - parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - StringRef Name); + OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Name); OperandMatchResultTy - parseSparcAsmOperand(SparcOperand *&Operand, bool isCall = false); + parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Operand, + bool isCall = false); - OperandMatchResultTy - parseBranchModifiers(SmallVectorImpl<MCParsedAsmOperand*> &Operands); + OperandMatchResultTy parseBranchModifiers(OperandVector &Operands); // returns true if Tok is matched to a register and returns register in RegNo. bool matchRegisterName(const AsmToken &Tok, unsigned &RegNo, @@ -153,8 +149,6 @@ private: SMLoc StartLoc, EndLoc; - SparcOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} - struct Token { const char *Data; unsigned Length; @@ -182,6 +176,8 @@ private: struct MemOp Mem; }; public: + SparcOperand(KindTy K) : MCParsedAsmOperand(), Kind(K) {} + bool isToken() const override { return Kind == k_Token; } bool isReg() const override { return Kind == k_Register; } bool isImm() const override { return Kind == k_Immediate; } @@ -291,8 +287,8 @@ public: addExpr(Inst, Expr); } - static SparcOperand *CreateToken(StringRef Str, SMLoc S) { - SparcOperand *Op = new SparcOperand(k_Token); + static std::unique_ptr<SparcOperand> CreateToken(StringRef Str, SMLoc S) { + auto Op = make_unique<SparcOperand>(k_Token); Op->Tok.Data = Str.data(); Op->Tok.Length = Str.size(); Op->StartLoc = S; @@ -300,10 +296,9 @@ public: return Op; } - static SparcOperand *CreateReg(unsigned RegNum, - unsigned Kind, - SMLoc S, SMLoc E) { - SparcOperand *Op = new SparcOperand(k_Register); + static std::unique_ptr<SparcOperand> CreateReg(unsigned RegNum, unsigned Kind, + SMLoc S, SMLoc E) { + auto Op = make_unique<SparcOperand>(k_Register); Op->Reg.RegNum = RegNum; Op->Reg.Kind = (SparcOperand::RegisterKind)Kind; Op->StartLoc = S; @@ -311,49 +306,51 @@ public: return Op; } - static SparcOperand *CreateImm(const MCExpr *Val, SMLoc S, SMLoc E) { - SparcOperand *Op = new SparcOperand(k_Immediate); + static std::unique_ptr<SparcOperand> CreateImm(const MCExpr *Val, SMLoc S, + SMLoc E) { + auto Op = make_unique<SparcOperand>(k_Immediate); Op->Imm.Val = Val; Op->StartLoc = S; Op->EndLoc = E; return Op; } - static SparcOperand *MorphToDoubleReg(SparcOperand *Op) { - unsigned Reg = Op->getReg(); - assert(Op->Reg.Kind == rk_FloatReg); + static bool MorphToDoubleReg(SparcOperand &Op) { + unsigned Reg = Op.getReg(); + assert(Op.Reg.Kind == rk_FloatReg); unsigned regIdx = Reg - Sparc::F0; if (regIdx % 2 || regIdx > 31) - return nullptr; - Op->Reg.RegNum = DoubleRegs[regIdx / 2]; - Op->Reg.Kind = rk_DoubleReg; - return Op; + return false; + Op.Reg.RegNum = DoubleRegs[regIdx / 2]; + Op.Reg.Kind = rk_DoubleReg; + return true; } - static SparcOperand *MorphToQuadReg(SparcOperand *Op) { - unsigned Reg = Op->getReg(); + static bool MorphToQuadReg(SparcOperand &Op) { + unsigned Reg = Op.getReg(); unsigned regIdx = 0; - switch (Op->Reg.Kind) { - default: assert(0 && "Unexpected register kind!"); + switch (Op.Reg.Kind) { + default: llvm_unreachable("Unexpected register kind!"); case rk_FloatReg: regIdx = Reg - Sparc::F0; if (regIdx % 4 || regIdx > 31) - return nullptr; + return false; Reg = QuadFPRegs[regIdx / 4]; break; case rk_DoubleReg: regIdx = Reg - Sparc::D0; if (regIdx % 2 || regIdx > 31) - return nullptr; + return false; Reg = QuadFPRegs[regIdx / 2]; break; } - Op->Reg.RegNum = Reg; - Op->Reg.Kind = rk_QuadReg; - return Op; + Op.Reg.RegNum = Reg; + Op.Reg.Kind = rk_QuadReg; + return true; } - static SparcOperand *MorphToMEMrr(unsigned Base, SparcOperand *Op) { + static std::unique_ptr<SparcOperand> + MorphToMEMrr(unsigned Base, std::unique_ptr<SparcOperand> Op) { unsigned offsetReg = Op->getReg(); Op->Kind = k_MemoryReg; Op->Mem.Base = Base; @@ -362,10 +359,9 @@ public: return Op; } - static SparcOperand *CreateMEMri(unsigned Base, - const MCExpr *Off, - SMLoc S, SMLoc E) { - SparcOperand *Op = new SparcOperand(k_MemoryImm); + static std::unique_ptr<SparcOperand> + CreateMEMri(unsigned Base, const MCExpr *Off, SMLoc S, SMLoc E) { + auto Op = make_unique<SparcOperand>(k_MemoryImm); Op->Mem.Base = Base; Op->Mem.OffsetReg = 0; Op->Mem.Off = Off; @@ -374,7 +370,8 @@ public: return Op; } - static SparcOperand *MorphToMEMri(unsigned Base, SparcOperand *Op) { + static std::unique_ptr<SparcOperand> + MorphToMEMri(unsigned Base, std::unique_ptr<SparcOperand> Op) { const MCExpr *Imm = Op->getImm(); Op->Kind = k_MemoryImm; Op->Mem.Base = Base; @@ -386,11 +383,11 @@ public: } // end namespace -bool SparcAsmParser:: -MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out, unsigned &ErrorInfo, - bool MatchingInlineAsm) { +bool SparcAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, + unsigned &ErrorInfo, + bool MatchingInlineAsm) { MCInst Inst; SmallVector<MCInst, 8> Instructions; unsigned MatchResult = MatchInstructionImpl(Operands, Inst, ErrorInfo, @@ -415,7 +412,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, if (ErrorInfo >= Operands.size()) return Error(IDLoc, "too few operands for instruction"); - ErrorLoc = ((SparcOperand*) Operands[ErrorInfo])->getStartLoc(); + ErrorLoc = ((SparcOperand &)*Operands[ErrorInfo]).getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; } @@ -450,11 +447,9 @@ ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) static void applyMnemonicAliases(StringRef &Mnemonic, unsigned Features, unsigned VariantID); -bool SparcAsmParser:: -ParseInstruction(ParseInstructionInfo &Info, StringRef Name, - SMLoc NameLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands) -{ +bool SparcAsmParser::ParseInstruction(ParseInstructionInfo &Info, + StringRef Name, SMLoc NameLoc, + OperandVector &Operands) { // First operand in MCInst is instruction mnemonic. Operands.push_back(SparcOperand::CreateToken(Name, NameLoc)); @@ -548,9 +543,8 @@ bool SparcAsmParser:: parseDirectiveWord(unsigned Size, SMLoc L) { return false; } -SparcAsmParser::OperandMatchResultTy SparcAsmParser:: -parseMEMOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) -{ +SparcAsmParser::OperandMatchResultTy +SparcAsmParser::parseMEMOperand(OperandVector &Operands) { SMLoc S, E; unsigned BaseReg = 0; @@ -575,23 +569,20 @@ parseMEMOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands) break; } - SparcOperand *Offset = nullptr; + std::unique_ptr<SparcOperand> Offset; OperandMatchResultTy ResTy = parseSparcAsmOperand(Offset); if (ResTy != MatchOperand_Success || !Offset) return MatchOperand_NoMatch; - Offset = (Offset->isImm() - ? SparcOperand::MorphToMEMri(BaseReg, Offset) - : SparcOperand::MorphToMEMrr(BaseReg, Offset)); + Operands.push_back( + Offset->isImm() ? SparcOperand::MorphToMEMri(BaseReg, std::move(Offset)) + : SparcOperand::MorphToMEMrr(BaseReg, std::move(Offset))); - Operands.push_back(Offset); return MatchOperand_Success; } -SparcAsmParser::OperandMatchResultTy SparcAsmParser:: -parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - StringRef Mnemonic) -{ +SparcAsmParser::OperandMatchResultTy +SparcAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic) { OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); @@ -637,21 +628,21 @@ parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, return MatchOperand_Success; } - SparcOperand *Op = nullptr; + std::unique_ptr<SparcOperand> Op; ResTy = parseSparcAsmOperand(Op, (Mnemonic == "call")); if (ResTy != MatchOperand_Success || !Op) return MatchOperand_ParseFail; // Push the parsed operand into the list of operands - Operands.push_back(Op); + Operands.push_back(std::move(Op)); return MatchOperand_Success; } SparcAsmParser::OperandMatchResultTy -SparcAsmParser::parseSparcAsmOperand(SparcOperand *&Op, bool isCall) -{ +SparcAsmParser::parseSparcAsmOperand(std::unique_ptr<SparcOperand> &Op, + bool isCall) { SMLoc S = Parser.getTok().getLoc(); SMLoc E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); @@ -718,8 +709,8 @@ SparcAsmParser::parseSparcAsmOperand(SparcOperand *&Op, bool isCall) return (Op) ? MatchOperand_Success : MatchOperand_ParseFail; } -SparcAsmParser::OperandMatchResultTy SparcAsmParser:: -parseBranchModifiers(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +SparcAsmParser::OperandMatchResultTy +SparcAsmParser::parseBranchModifiers(OperandVector &Operands) { // parse (,a|,pn|,pt)+ @@ -928,18 +919,14 @@ extern "C" void LLVMInitializeSparcAsmParser() { #define GET_MATCHER_IMPLEMENTATION #include "SparcGenAsmMatcher.inc" - - -unsigned SparcAsmParser:: -validateTargetOperandClass(MCParsedAsmOperand *GOp, - unsigned Kind) -{ - SparcOperand *Op = (SparcOperand*)GOp; - if (Op->isFloatOrDoubleReg()) { +unsigned SparcAsmParser::validateTargetOperandClass(MCParsedAsmOperand &GOp, + unsigned Kind) { + SparcOperand &Op = (SparcOperand &)GOp; + if (Op.isFloatOrDoubleReg()) { switch (Kind) { default: break; case MCK_DFPRegs: - if (!Op->isFloatReg() || SparcOperand::MorphToDoubleReg(Op)) + if (!Op.isFloatReg() || SparcOperand::MorphToDoubleReg(Op)) return MCTargetAsmParser::Match_Success; break; case MCK_QFPRegs: diff --git a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp index 261fb38..5975a51 100644 --- a/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp +++ b/lib/Target/Sparc/InstPrinter/SparcInstPrinter.cpp @@ -173,6 +173,6 @@ void SparcInstPrinter::printCCOperand(const MCInst *MI, int opNum, bool SparcInstPrinter::printGetPCX(const MCInst *MI, unsigned opNum, raw_ostream &O) { - assert(0 && "FIXME: Implement SparcInstPrinter::printGetPCX."); + llvm_unreachable("FIXME: Implement SparcInstPrinter::printGetPCX."); return true; } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp index 7d517b6..dcd81e3 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp @@ -196,12 +196,12 @@ namespace { const MCRelaxableFragment *DF, const MCAsmLayout &Layout) const override { // FIXME. - assert(0 && "fixupNeedsRelaxation() unimplemented"); + llvm_unreachable("fixupNeedsRelaxation() unimplemented"); return false; } void relaxInstruction(const MCInst &Inst, MCInst &Res) const override { // FIXME. - assert(0 && "relaxInstruction() unimplemented"); + llvm_unreachable("relaxInstruction() unimplemented"); } bool writeNopData(uint64_t Count, MCObjectWriter *OW) const override { diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp index b19ad7b..eea9626 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCCodeEmitter.cpp @@ -133,7 +133,7 @@ getMachineOpValue(const MCInst &MI, const MCOperand &MO, if (Expr->EvaluateAsAbsolute(Res)) return Res; - assert(0 && "Unhandled expression!"); + llvm_unreachable("Unhandled expression!"); return 0; } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp index ae57fdc..7f01ab0 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.cpp @@ -16,6 +16,7 @@ #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELF.h" +#include "llvm/MC/MCObjectStreamer.h" #include "llvm/MC/MCSymbol.h" #include "llvm/Object/ELF.h" @@ -124,7 +125,7 @@ SparcMCExpr::VariantKind SparcMCExpr::parseVariantKind(StringRef name) Sparc::Fixups SparcMCExpr::getFixupKind(SparcMCExpr::VariantKind Kind) { switch (Kind) { - default: assert(0 && "Unhandled SparcMCExpr::VariantKind"); + default: llvm_unreachable("Unhandled SparcMCExpr::VariantKind"); case VK_Sparc_LO: return Sparc::fixup_sparc_lo10; case VK_Sparc_HI: return Sparc::fixup_sparc_hi22; case VK_Sparc_H44: return Sparc::fixup_sparc_h44; @@ -219,35 +220,6 @@ void SparcMCExpr::fixELFSymbolsInTLSFixups(MCAssembler &Asm) const { fixELFSymbolsInTLSFixupsImpl(getSubExpr(), Asm); } -// FIXME: This basically copies MCObjectStreamer::AddValueSymbols. Perhaps -// that method should be made public? -// FIXME: really do above: now that at least three other backends are using it. -static void AddValueSymbolsImpl(const MCExpr *Value, MCAssembler *Asm) { - switch (Value->getKind()) { - case MCExpr::Target: - llvm_unreachable("Can't handle nested target expr!"); - break; - - case MCExpr::Constant: - break; - - case MCExpr::Binary: { - const MCBinaryExpr *BE = cast<MCBinaryExpr>(Value); - AddValueSymbolsImpl(BE->getLHS(), Asm); - AddValueSymbolsImpl(BE->getRHS(), Asm); - break; - } - - case MCExpr::SymbolRef: - Asm->getOrCreateSymbolData(cast<MCSymbolRefExpr>(Value)->getSymbol()); - break; - - case MCExpr::Unary: - AddValueSymbolsImpl(cast<MCUnaryExpr>(Value)->getSubExpr(), Asm); - break; - } -} - -void SparcMCExpr::AddValueSymbols(MCAssembler *Asm) const { - AddValueSymbolsImpl(getSubExpr(), Asm); +void SparcMCExpr::visitUsedExpr(MCStreamer &Streamer) const { + Streamer.visitUsedExpr(*getSubExpr()); } diff --git a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h index 78dd945..f0d0ef3 100644 --- a/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h +++ b/lib/Target/Sparc/MCTargetDesc/SparcMCExpr.h @@ -88,7 +88,7 @@ public: void PrintImpl(raw_ostream &OS) const override; bool EvaluateAsRelocatableImpl(MCValue &Res, const MCAsmLayout *Layout) const override; - void AddValueSymbols(MCAssembler *) const override; + void visitUsedExpr(MCStreamer &Streamer) const override; const MCSection *FindAssociatedSection() const override { return getSubExpr()->FindAssociatedSection(); } diff --git a/lib/Target/Sparc/SparcFrameLowering.cpp b/lib/Target/Sparc/SparcFrameLowering.cpp index a37da94..3cdfda3 100644 --- a/lib/Target/Sparc/SparcFrameLowering.cpp +++ b/lib/Target/Sparc/SparcFrameLowering.cpp @@ -14,6 +14,7 @@ #include "SparcFrameLowering.h" #include "SparcInstrInfo.h" #include "SparcMachineFunctionInfo.h" +#include "SparcSubtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -32,6 +33,9 @@ DisableLeafProc("disable-sparc-leaf-proc", cl::desc("Disable Sparc leaf procedure optimization."), cl::Hidden); +SparcFrameLowering::SparcFrameLowering(const SparcSubtarget &ST) + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, + ST.is64Bit() ? 16 : 8, 0, ST.is64Bit() ? 16 : 8) {} void SparcFrameLowering::emitSPAdjustment(MachineFunction &MF, MachineBasicBlock &MBB, @@ -99,7 +103,9 @@ void SparcFrameLowering::emitPrologue(MachineFunction &MF) const { SAVEri = SP::ADDri; SAVErr = SP::ADDrr; } - NumBytes = - SubTarget.getAdjustedFrameSize(NumBytes); + NumBytes = + -MF.getTarget().getSubtarget<SparcSubtarget>().getAdjustedFrameSize( + NumBytes); emitSPAdjustment(MF, MBB, MBBI, NumBytes, SAVErr, SAVEri); MachineModuleInfo &MMI = MF.getMMI(); @@ -162,7 +168,8 @@ void SparcFrameLowering::emitEpilogue(MachineFunction &MF, if (NumBytes == 0) return; - NumBytes = SubTarget.getAdjustedFrameSize(NumBytes); + NumBytes = MF.getTarget().getSubtarget<SparcSubtarget>().getAdjustedFrameSize( + NumBytes); emitSPAdjustment(MF, MBB, MBBI, NumBytes, SP::ADDrr, SP::ADDri); } diff --git a/lib/Target/Sparc/SparcFrameLowering.h b/lib/Target/Sparc/SparcFrameLowering.h index bda7b7c..a7d1b89 100644 --- a/lib/Target/Sparc/SparcFrameLowering.h +++ b/lib/Target/Sparc/SparcFrameLowering.h @@ -15,19 +15,14 @@ #define SPARC_FRAMEINFO_H #include "Sparc.h" -#include "SparcSubtarget.h" #include "llvm/Target/TargetFrameLowering.h" namespace llvm { - class SparcSubtarget; +class SparcSubtarget; class SparcFrameLowering : public TargetFrameLowering { - const SparcSubtarget &SubTarget; public: - explicit SparcFrameLowering(const SparcSubtarget &ST) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, - ST.is64Bit() ? 16 : 8, 0, ST.is64Bit() ? 16 : 8), - SubTarget(ST) {} + explicit SparcFrameLowering(const SparcSubtarget &ST); /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index ef61466..990f52a 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -2030,7 +2030,7 @@ SparcTargetLowering::LowerF128Op(SDValue Op, SelectionDAG &DAG, } TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(SDLoc(Op)).setChain(Chain) - .setCallee(CallingConv::C, RetTyABI, Callee, &Args, 0); + .setCallee(CallingConv::C, RetTyABI, Callee, std::move(Args), 0); std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); @@ -2086,7 +2086,7 @@ SparcTargetLowering::LowerF128Compare(SDValue LHS, SDValue RHS, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(DL).setChain(Chain) - .setCallee(CallingConv::C, RetTy, Callee, &Args, 0); + .setCallee(CallingConv::C, RetTy, Callee, std::move(Args), 0); std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); diff --git a/lib/Target/Sparc/SparcJITInfo.cpp b/lib/Target/Sparc/SparcJITInfo.cpp index c775e9e..d0eec98 100644 --- a/lib/Target/Sparc/SparcJITInfo.cpp +++ b/lib/Target/Sparc/SparcJITInfo.cpp @@ -213,7 +213,8 @@ extern "C" void *SparcCompilationCallbackC(intptr_t StubAddr) { void SparcJITInfo::replaceMachineCodeForFunction(void *Old, void *New) { - assert(0 && "FIXME: Implement SparcJITInfo::replaceMachineCodeForFunction"); + llvm_unreachable("FIXME: Implement SparcJITInfo::" + "replaceMachineCodeForFunction"); } diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp index eb36d29..a308fc5 100644 --- a/lib/Target/Sparc/SparcSelectionDAGInfo.cpp +++ b/lib/Target/Sparc/SparcSelectionDAGInfo.cpp @@ -11,13 +11,13 @@ // //===----------------------------------------------------------------------===// -#include "SparcTargetMachine.h" +#include "SparcSelectionDAGInfo.h" using namespace llvm; #define DEBUG_TYPE "sparc-selectiondag-info" -SparcSelectionDAGInfo::SparcSelectionDAGInfo(const SparcTargetMachine &TM) - : TargetSelectionDAGInfo(TM) { +SparcSelectionDAGInfo::SparcSelectionDAGInfo(const DataLayout &DL) + : TargetSelectionDAGInfo(&DL) { } SparcSelectionDAGInfo::~SparcSelectionDAGInfo() { diff --git a/lib/Target/Sparc/SparcSelectionDAGInfo.h b/lib/Target/Sparc/SparcSelectionDAGInfo.h index dcd4203..2346f41 100644 --- a/lib/Target/Sparc/SparcSelectionDAGInfo.h +++ b/lib/Target/Sparc/SparcSelectionDAGInfo.h @@ -22,7 +22,7 @@ class SparcTargetMachine; class SparcSelectionDAGInfo : public TargetSelectionDAGInfo { public: - explicit SparcSelectionDAGInfo(const SparcTargetMachine &TM); + explicit SparcSelectionDAGInfo(const DataLayout &DL); ~SparcSelectionDAGInfo(); }; diff --git a/lib/Target/Sparc/SparcSubtarget.cpp b/lib/Target/Sparc/SparcSubtarget.cpp index e38fb02..eea0c8c 100644 --- a/lib/Target/Sparc/SparcSubtarget.cpp +++ b/lib/Target/Sparc/SparcSubtarget.cpp @@ -26,20 +26,44 @@ using namespace llvm; void SparcSubtarget::anchor() { } -SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool is64Bit) : - SparcGenSubtargetInfo(TT, CPU, FS), - IsV9(false), - V8DeprecatedInsts(false), - IsVIS(false), - Is64Bit(is64Bit), - HasHardQuad(false), - UsePopc(false) { +static std::string computeDataLayout(const SparcSubtarget &ST) { + // Sparc is big endian. + std::string Ret = "E-m:e"; + + // Some ABIs have 32bit pointers. + if (!ST.is64Bit()) + Ret += "-p:32:32"; + + // Alignments for 64 bit integers. + Ret += "-i64:64"; + + // On SparcV9 128 floats are aligned to 128 bits, on others only to 64. + // On SparcV9 registers can hold 64 or 32 bits, on others only 32. + if (ST.is64Bit()) + Ret += "-n32:64"; + else + Ret += "-f128:64-n32"; + + if (ST.is64Bit()) + Ret += "-S128"; + else + Ret += "-S64"; + + return Ret; +} + +SparcSubtarget &SparcSubtarget::initializeSubtargetDependencies(StringRef CPU, + StringRef FS) { + IsV9 = false; + V8DeprecatedInsts = false; + IsVIS = false; + HasHardQuad = false; + UsePopc = false; // Determine default and user specified characteristics std::string CPUName = CPU; if (CPUName.empty()) - CPUName = (is64Bit) ? "v9" : "v8"; + CPUName = (Is64Bit) ? "v9" : "v8"; // Parse features string. ParseSubtargetFeatures(CPUName, FS); @@ -47,8 +71,16 @@ SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU, // Popc is a v9-only instruction. if (!IsV9) UsePopc = false; + + return *this; } +SparcSubtarget::SparcSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, TargetMachine &TM, + bool is64Bit) + : SparcGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), + DL(computeDataLayout(initializeSubtargetDependencies(CPU, FS))), + InstrInfo(*this), TLInfo(TM), TSInfo(DL), FrameLowering(*this) {} int SparcSubtarget::getAdjustedFrameSize(int frameSize) const { diff --git a/lib/Target/Sparc/SparcSubtarget.h b/lib/Target/Sparc/SparcSubtarget.h index 4025622..a335778 100644 --- a/lib/Target/Sparc/SparcSubtarget.h +++ b/lib/Target/Sparc/SparcSubtarget.h @@ -14,6 +14,13 @@ #ifndef SPARC_SUBTARGET_H #define SPARC_SUBTARGET_H +#include "SparcFrameLowering.h" +#include "SparcInstrInfo.h" +#include "SparcISelLowering.h" +#include "SparcJITInfo.h" +#include "SparcSelectionDAGInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <string> @@ -31,10 +38,26 @@ class SparcSubtarget : public SparcGenSubtargetInfo { bool Is64Bit; bool HasHardQuad; bool UsePopc; + const DataLayout DL; // Calculates type size & alignment + SparcInstrInfo InstrInfo; + SparcTargetLowering TLInfo; + SparcSelectionDAGInfo TSInfo; + SparcFrameLowering FrameLowering; + SparcJITInfo JITInfo; public: SparcSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, bool is64bit); + const std::string &FS, TargetMachine &TM, bool is64bit); + + const SparcInstrInfo *getInstrInfo() const { return &InstrInfo; } + const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; } + const SparcRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + const SparcTargetLowering *getTargetLowering() const { return &TLInfo; } + const SparcSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } + SparcJITInfo *getJITInfo() { return &JITInfo; } + const DataLayout *getDataLayout() const { return &DL; } bool isV9() const { return IsV9; } bool isVIS() const { return IsVIS; } @@ -47,6 +70,7 @@ public: /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + SparcSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); bool is64Bit() const { return Is64Bit; } diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp index 2469d93..0130fac 100644 --- a/lib/Target/Sparc/SparcTargetMachine.cpp +++ b/lib/Target/Sparc/SparcTargetMachine.cpp @@ -23,32 +23,6 @@ extern "C" void LLVMInitializeSparcTarget() { RegisterTargetMachine<SparcV9TargetMachine> Y(TheSparcV9Target); } -static std::string computeDataLayout(const SparcSubtarget &ST) { - // Sparc is big endian. - std::string Ret = "E-m:e"; - - // Some ABIs have 32bit pointers. - if (!ST.is64Bit()) - Ret += "-p:32:32"; - - // Alignments for 64 bit integers. - Ret += "-i64:64"; - - // On SparcV9 128 floats are aligned to 128 bits, on others only to 64. - // On SparcV9 registers can hold 64 or 32 bits, on others only 32. - if (ST.is64Bit()) - Ret += "-n32:64"; - else - Ret += "-f128:64-n32"; - - if (ST.is64Bit()) - Ret += "-S128"; - else - Ret += "-S64"; - - return Ret; -} - /// SparcTargetMachine ctor - Create an ILP32 architecture model /// SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, @@ -58,11 +32,7 @@ SparcTargetMachine::SparcTargetMachine(const Target &T, StringRef TT, CodeGenOpt::Level OL, bool is64bit) : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, is64bit), - DL(computeDataLayout(Subtarget)), - InstrInfo(Subtarget), - TLInfo(*this), TSInfo(*this), - FrameLowering(Subtarget) { + Subtarget(TT, CPU, FS, *this, is64bit) { initAsmInfo(); } diff --git a/lib/Target/Sparc/SparcTargetMachine.h b/lib/Target/Sparc/SparcTargetMachine.h index 7d04338..03b5137 100644 --- a/lib/Target/Sparc/SparcTargetMachine.h +++ b/lib/Target/Sparc/SparcTargetMachine.h @@ -14,50 +14,40 @@ #ifndef SPARCTARGETMACHINE_H #define SPARCTARGETMACHINE_H -#include "SparcFrameLowering.h" -#include "SparcISelLowering.h" #include "SparcInstrInfo.h" -#include "SparcJITInfo.h" -#include "SparcSelectionDAGInfo.h" #include "SparcSubtarget.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" namespace llvm { class SparcTargetMachine : public LLVMTargetMachine { SparcSubtarget Subtarget; - const DataLayout DL; // Calculates type size & alignment - SparcInstrInfo InstrInfo; - SparcTargetLowering TLInfo; - SparcSelectionDAGInfo TSInfo; - SparcFrameLowering FrameLowering; - SparcJITInfo JITInfo; public: SparcTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64bit); - const SparcInstrInfo *getInstrInfo() const override { return &InstrInfo; } - const TargetFrameLowering *getFrameLowering() const override { - return &FrameLowering; + const SparcInstrInfo *getInstrInfo() const override { + return getSubtargetImpl()->getInstrInfo(); + } + const TargetFrameLowering *getFrameLowering() const override { + return getSubtargetImpl()->getFrameLowering(); } const SparcSubtarget *getSubtargetImpl() const override { return &Subtarget; } const SparcRegisterInfo *getRegisterInfo() const override { - return &InstrInfo.getRegisterInfo(); + return getSubtargetImpl()->getRegisterInfo(); } - const SparcTargetLowering* getTargetLowering() const override { - return &TLInfo; + const SparcTargetLowering *getTargetLowering() const override { + return getSubtargetImpl()->getTargetLowering(); } - const SparcSelectionDAGInfo* getSelectionDAGInfo() const override { - return &TSInfo; + const SparcSelectionDAGInfo *getSelectionDAGInfo() const override { + return getSubtargetImpl()->getSelectionDAGInfo(); } - SparcJITInfo *getJITInfo() override { - return &JITInfo; + SparcJITInfo *getJITInfo() override { return Subtarget.getJITInfo(); } + const DataLayout *getDataLayout() const override { + return getSubtargetImpl()->getDataLayout(); } - const DataLayout *getDataLayout() const override { return &DL; } // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; diff --git a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp index 71de64f..758be41 100644 --- a/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +++ b/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp @@ -104,10 +104,6 @@ private: MemOp Mem; }; - SystemZOperand(OperandKind kind, SMLoc startLoc, SMLoc endLoc) - : Kind(kind), StartLoc(startLoc), EndLoc(endLoc) - {} - void addExpr(MCInst &Inst, const MCExpr *Expr) const { // Add as immediates when possible. Null MCExpr = 0. if (!Expr) @@ -119,40 +115,44 @@ private: } public: + SystemZOperand(OperandKind kind, SMLoc startLoc, SMLoc endLoc) + : Kind(kind), StartLoc(startLoc), EndLoc(endLoc) {} + // Create particular kinds of operand. - static SystemZOperand *createInvalid(SMLoc StartLoc, SMLoc EndLoc) { - return new SystemZOperand(KindInvalid, StartLoc, EndLoc); + static std::unique_ptr<SystemZOperand> createInvalid(SMLoc StartLoc, + SMLoc EndLoc) { + return make_unique<SystemZOperand>(KindInvalid, StartLoc, EndLoc); } - static SystemZOperand *createToken(StringRef Str, SMLoc Loc) { - SystemZOperand *Op = new SystemZOperand(KindToken, Loc, Loc); + static std::unique_ptr<SystemZOperand> createToken(StringRef Str, SMLoc Loc) { + auto Op = make_unique<SystemZOperand>(KindToken, Loc, Loc); Op->Token.Data = Str.data(); Op->Token.Length = Str.size(); return Op; } - static SystemZOperand *createReg(RegisterKind Kind, unsigned Num, - SMLoc StartLoc, SMLoc EndLoc) { - SystemZOperand *Op = new SystemZOperand(KindReg, StartLoc, EndLoc); + static std::unique_ptr<SystemZOperand> + createReg(RegisterKind Kind, unsigned Num, SMLoc StartLoc, SMLoc EndLoc) { + auto Op = make_unique<SystemZOperand>(KindReg, StartLoc, EndLoc); Op->Reg.Kind = Kind; Op->Reg.Num = Num; return Op; } - static SystemZOperand *createAccessReg(unsigned Num, SMLoc StartLoc, - SMLoc EndLoc) { - SystemZOperand *Op = new SystemZOperand(KindAccessReg, StartLoc, EndLoc); + static std::unique_ptr<SystemZOperand> + createAccessReg(unsigned Num, SMLoc StartLoc, SMLoc EndLoc) { + auto Op = make_unique<SystemZOperand>(KindAccessReg, StartLoc, EndLoc); Op->AccessReg = Num; return Op; } - static SystemZOperand *createImm(const MCExpr *Expr, SMLoc StartLoc, - SMLoc EndLoc) { - SystemZOperand *Op = new SystemZOperand(KindImm, StartLoc, EndLoc); + static std::unique_ptr<SystemZOperand> + createImm(const MCExpr *Expr, SMLoc StartLoc, SMLoc EndLoc) { + auto Op = make_unique<SystemZOperand>(KindImm, StartLoc, EndLoc); Op->Imm = Expr; return Op; } - static SystemZOperand *createMem(RegisterKind RegKind, unsigned Base, - const MCExpr *Disp, unsigned Index, - const MCExpr *Length, SMLoc StartLoc, - SMLoc EndLoc) { - SystemZOperand *Op = new SystemZOperand(KindMem, StartLoc, EndLoc); + static std::unique_ptr<SystemZOperand> + createMem(RegisterKind RegKind, unsigned Base, const MCExpr *Disp, + unsigned Index, const MCExpr *Length, SMLoc StartLoc, + SMLoc EndLoc) { + auto Op = make_unique<SystemZOperand>(KindMem, StartLoc, EndLoc); Op->Mem.RegKind = RegKind; Op->Mem.Base = Base; Op->Mem.Index = Index; @@ -313,21 +313,19 @@ private: bool parseRegister(Register &Reg, RegisterGroup Group, const unsigned *Regs, bool IsAddress = false); - OperandMatchResultTy - parseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - RegisterGroup Group, const unsigned *Regs, RegisterKind Kind); + OperandMatchResultTy parseRegister(OperandVector &Operands, + RegisterGroup Group, const unsigned *Regs, + RegisterKind Kind); bool parseAddress(unsigned &Base, const MCExpr *&Disp, unsigned &Index, const MCExpr *&Length, const unsigned *Regs, RegisterKind RegKind); - OperandMatchResultTy - parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - const unsigned *Regs, RegisterKind RegKind, - MemoryKind MemKind); + OperandMatchResultTy parseAddress(OperandVector &Operands, + const unsigned *Regs, RegisterKind RegKind, + MemoryKind MemKind); - bool parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - StringRef Mnemonic); + bool parseOperand(OperandVector &Operands, StringRef Mnemonic); public: SystemZAsmParser(MCSubtargetInfo &sti, MCAsmParser &parser, @@ -343,87 +341,66 @@ public: // Override MCTargetAsmParser. bool ParseDirective(AsmToken DirectiveID) override; bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; - bool ParseInstruction(ParseInstructionInfo &Info, - StringRef Name, SMLoc NameLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands) - override; + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) override; bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out, unsigned &ErrorInfo, + OperandVector &Operands, MCStreamer &Out, + unsigned &ErrorInfo, bool MatchingInlineAsm) override; // Used by the TableGen code to parse particular operand types. - OperandMatchResultTy - parseGR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + OperandMatchResultTy parseGR32(OperandVector &Operands) { return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, GR32Reg); } - OperandMatchResultTy - parseGRH32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + OperandMatchResultTy parseGRH32(OperandVector &Operands) { return parseRegister(Operands, RegGR, SystemZMC::GRH32Regs, GRH32Reg); } - OperandMatchResultTy - parseGRX32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + OperandMatchResultTy parseGRX32(OperandVector &Operands) { llvm_unreachable("GRX32 should only be used for pseudo instructions"); } - OperandMatchResultTy - parseGR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + OperandMatchResultTy parseGR64(OperandVector &Operands) { return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, GR64Reg); } - OperandMatchResultTy - parseGR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + OperandMatchResultTy parseGR128(OperandVector &Operands) { return parseRegister(Operands, RegGR, SystemZMC::GR128Regs, GR128Reg); } - OperandMatchResultTy - parseADDR32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + OperandMatchResultTy parseADDR32(OperandVector &Operands) { return parseRegister(Operands, RegGR, SystemZMC::GR32Regs, ADDR32Reg); } - OperandMatchResultTy - parseADDR64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + OperandMatchResultTy parseADDR64(OperandVector &Operands) { return parseRegister(Operands, RegGR, SystemZMC::GR64Regs, ADDR64Reg); } - OperandMatchResultTy - parseADDR128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + OperandMatchResultTy parseADDR128(OperandVector &Operands) { llvm_unreachable("Shouldn't be used as an operand"); } - OperandMatchResultTy - parseFP32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + OperandMatchResultTy parseFP32(OperandVector &Operands) { return parseRegister(Operands, RegFP, SystemZMC::FP32Regs, FP32Reg); } - OperandMatchResultTy - parseFP64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + OperandMatchResultTy parseFP64(OperandVector &Operands) { return parseRegister(Operands, RegFP, SystemZMC::FP64Regs, FP64Reg); } - OperandMatchResultTy - parseFP128(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + OperandMatchResultTy parseFP128(OperandVector &Operands) { return parseRegister(Operands, RegFP, SystemZMC::FP128Regs, FP128Reg); } - OperandMatchResultTy - parseBDAddr32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + OperandMatchResultTy parseBDAddr32(OperandVector &Operands) { return parseAddress(Operands, SystemZMC::GR32Regs, ADDR32Reg, BDMem); } - OperandMatchResultTy - parseBDAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + OperandMatchResultTy parseBDAddr64(OperandVector &Operands) { return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDMem); } - OperandMatchResultTy - parseBDXAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + OperandMatchResultTy parseBDXAddr64(OperandVector &Operands) { return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDXMem); } - OperandMatchResultTy - parseBDLAddr64(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + OperandMatchResultTy parseBDLAddr64(OperandVector &Operands) { return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDLMem); } - OperandMatchResultTy - parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands); - OperandMatchResultTy - parsePCRel(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - int64_t MinVal, int64_t MaxVal); - OperandMatchResultTy - parsePCRel16(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + OperandMatchResultTy parseAccessReg(OperandVector &Operands); + OperandMatchResultTy parsePCRel(OperandVector &Operands, int64_t MinVal, + int64_t MaxVal); + OperandMatchResultTy parsePCRel16(OperandVector &Operands) { return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1); } - OperandMatchResultTy - parsePCRel32(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { + OperandMatchResultTy parsePCRel32(OperandVector &Operands) { return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1); } }; @@ -497,9 +474,8 @@ bool SystemZAsmParser::parseRegister(Register &Reg, RegisterGroup Group, // Parse a register and add it to Operands. The other arguments are as above. SystemZAsmParser::OperandMatchResultTy -SystemZAsmParser::parseRegister(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - RegisterGroup Group, const unsigned *Regs, - RegisterKind Kind) { +SystemZAsmParser::parseRegister(OperandVector &Operands, RegisterGroup Group, + const unsigned *Regs, RegisterKind Kind) { if (Parser.getTok().isNot(AsmToken::Percent)) return MatchOperand_NoMatch; @@ -566,9 +542,8 @@ bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp, // Parse a memory operand and add it to Operands. The other arguments // are as above. SystemZAsmParser::OperandMatchResultTy -SystemZAsmParser::parseAddress(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - const unsigned *Regs, RegisterKind RegKind, - MemoryKind MemKind) { +SystemZAsmParser::parseAddress(OperandVector &Operands, const unsigned *Regs, + RegisterKind RegKind, MemoryKind MemKind) { SMLoc StartLoc = Parser.getTok().getLoc(); unsigned Base, Index; const MCExpr *Disp; @@ -622,9 +597,9 @@ bool SystemZAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, return false; } -bool SystemZAsmParser:: -ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +bool SystemZAsmParser::ParseInstruction(ParseInstructionInfo &Info, + StringRef Name, SMLoc NameLoc, + OperandVector &Operands) { Operands.push_back(SystemZOperand::createToken(Name, NameLoc)); // Read the remaining operands. @@ -655,9 +630,8 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, return false; } -bool SystemZAsmParser:: -parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - StringRef Mnemonic) { +bool SystemZAsmParser::parseOperand(OperandVector &Operands, + StringRef Mnemonic) { // Check if the current operand has a custom associated parser, if so, try to // custom parse the operand, or fallback to the general approach. OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); @@ -700,11 +674,11 @@ parseOperand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, return false; } -bool SystemZAsmParser:: -MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out, unsigned &ErrorInfo, - bool MatchingInlineAsm) { +bool SystemZAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, + unsigned &ErrorInfo, + bool MatchingInlineAsm) { MCInst Inst; unsigned MatchResult; @@ -739,7 +713,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, if (ErrorInfo >= Operands.size()) return Error(IDLoc, "too few operands for instruction"); - ErrorLoc = ((SystemZOperand*)Operands[ErrorInfo])->getStartLoc(); + ErrorLoc = ((SystemZOperand &)*Operands[ErrorInfo]).getStartLoc(); if (ErrorLoc == SMLoc()) ErrorLoc = IDLoc; } @@ -753,8 +727,8 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, llvm_unreachable("Unexpected match type"); } -SystemZAsmParser::OperandMatchResultTy SystemZAsmParser:: -parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +SystemZAsmParser::OperandMatchResultTy +SystemZAsmParser::parseAccessReg(OperandVector &Operands) { if (Parser.getTok().isNot(AsmToken::Percent)) return MatchOperand_NoMatch; @@ -768,9 +742,9 @@ parseAccessReg(SmallVectorImpl<MCParsedAsmOperand*> &Operands) { return MatchOperand_Success; } -SystemZAsmParser::OperandMatchResultTy SystemZAsmParser:: -parsePCRel(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - int64_t MinVal, int64_t MaxVal) { +SystemZAsmParser::OperandMatchResultTy +SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal, + int64_t MaxVal) { MCContext &Ctx = getContext(); MCStreamer &Out = getStreamer(); const MCExpr *Expr; diff --git a/lib/Target/SystemZ/SystemZCallingConv.td b/lib/Target/SystemZ/SystemZCallingConv.td index c4f641e..fb0d1d8 100644 --- a/lib/Target/SystemZ/SystemZCallingConv.td +++ b/lib/Target/SystemZ/SystemZCallingConv.td @@ -13,7 +13,7 @@ class CCIfExtend<CCAction A> : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>; //===----------------------------------------------------------------------===// -// SVR4 return value calling convention +// z/Linux return value calling convention //===----------------------------------------------------------------------===// def RetCC_SystemZ : CallingConv<[ // Promote i32 to i64 if it has an explicit extension type. @@ -39,7 +39,7 @@ def RetCC_SystemZ : CallingConv<[ ]>; //===----------------------------------------------------------------------===// -// SVR4 argument calling conventions +// z/Linux argument calling conventions //===----------------------------------------------------------------------===// def CC_SystemZ : CallingConv<[ // Promote i32 to i64 if it has an explicit extension type. @@ -63,3 +63,9 @@ def CC_SystemZ : CallingConv<[ // Other arguments are passed in 8-byte-aligned 8-byte stack slots. CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>> ]>; + +//===----------------------------------------------------------------------===// +// z/Linux callee-saved registers +//===----------------------------------------------------------------------===// +def CSR_SystemZ : CalleeSavedRegs<(add (sequence "R%dD", 6, 15), + (sequence "F%dD", 8, 15))>; diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp index 65f3caf..055dbe9 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ b/lib/Target/SystemZ/SystemZFrameLowering.cpp @@ -10,8 +10,9 @@ #include "SystemZFrameLowering.h" #include "SystemZCallingConv.h" #include "SystemZInstrBuilder.h" +#include "SystemZInstrInfo.h" #include "SystemZMachineFunctionInfo.h" -#include "SystemZTargetMachine.h" +#include "SystemZRegisterInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" @@ -44,11 +45,9 @@ static const TargetFrameLowering::SpillSlot SpillOffsetTable[] = { }; } // end anonymous namespace -SystemZFrameLowering::SystemZFrameLowering(const SystemZTargetMachine &tm, - const SystemZSubtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, - -SystemZMC::CallFrameSize, 8), - TM(tm), STI(sti) { +SystemZFrameLowering::SystemZFrameLowering() + : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, + -SystemZMC::CallFrameSize, 8) { // Create a mapping from register number to save slot offset. RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS); for (unsigned I = 0, E = array_lengthof(SpillOffsetTable); I != E; ++I) @@ -108,9 +107,8 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // instruction, or an implicit one that comes between the explicit start // and end registers. static void addSavedGPR(MachineBasicBlock &MBB, MachineInstrBuilder &MIB, - const SystemZTargetMachine &TM, unsigned GPR64, bool IsImplicit) { - const SystemZRegisterInfo *RI = TM.getRegisterInfo(); + const TargetRegisterInfo *RI = MBB.getParent()->getTarget().getRegisterInfo(); unsigned GPR32 = RI->getSubReg(GPR64, SystemZ::subreg_l32); bool IsLive = MBB.isLiveIn(GPR64) || MBB.isLiveIn(GPR32); if (!IsLive || !IsImplicit) { @@ -176,8 +174,8 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::STMG)); // Add the explicit register operands. - addSavedGPR(MBB, MIB, TM, LowGPR, false); - addSavedGPR(MBB, MIB, TM, HighGPR, false); + addSavedGPR(MBB, MIB, LowGPR, false); + addSavedGPR(MBB, MIB, HighGPR, false); // Add the address. MIB.addReg(SystemZ::R15D).addImm(StartOffset); @@ -187,13 +185,13 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, for (unsigned I = 0, E = CSI.size(); I != E; ++I) { unsigned Reg = CSI[I].getReg(); if (SystemZ::GR64BitRegClass.contains(Reg)) - addSavedGPR(MBB, MIB, TM, Reg, true); + addSavedGPR(MBB, MIB, Reg, true); } // ...likewise GPR varargs. if (IsVarArg) for (unsigned I = ZFI->getVarArgsFirstGPR(); I < SystemZ::NumArgGPRs; ++I) - addSavedGPR(MBB, MIB, TM, SystemZ::ArgGPRs[I], true); + addSavedGPR(MBB, MIB, SystemZ::ArgGPRs[I], true); } // Save FPRs in the normal TargetInstrInfo way. diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h index 70e25fb..4d5fe6d 100644 --- a/lib/Target/SystemZ/SystemZFrameLowering.h +++ b/lib/Target/SystemZ/SystemZFrameLowering.h @@ -10,7 +10,6 @@ #ifndef SYSTEMZFRAMELOWERING_H #define SYSTEMZFRAMELOWERING_H -#include "SystemZSubtarget.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/Target/TargetFrameLowering.h" @@ -21,13 +20,8 @@ class SystemZSubtarget; class SystemZFrameLowering : public TargetFrameLowering { IndexedMap<unsigned> RegSpillOffsets; -protected: - const SystemZTargetMachine &TM; - const SystemZSubtarget &STI; - public: - SystemZFrameLowering(const SystemZTargetMachine &tm, - const SystemZSubtarget &sti); + SystemZFrameLowering(); // Override TargetFrameLowering. bool isFPCloseToIncomingSP() const override { return false; } diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp index 6fe1fb9..00c65f5 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ b/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -80,9 +80,9 @@ static MachineOperand earlyUseOperand(MachineOperand Op) { return Op; } -SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) - : TargetLowering(tm, new TargetLoweringObjectFileELF()), - Subtarget(*tm.getSubtargetImpl()), TM(tm) { +SystemZTargetLowering::SystemZTargetLowering(const TargetMachine &tm) + : TargetLowering(tm, new TargetLoweringObjectFileELF()), + Subtarget(tm.getSubtarget<SystemZSubtarget>()) { MVT PtrVT = getPointerTy(); // Set up the register classes. @@ -673,11 +673,13 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, MachineRegisterInfo &MRI = MF.getRegInfo(); SystemZMachineFunctionInfo *FuncInfo = MF.getInfo<SystemZMachineFunctionInfo>(); - auto *TFL = static_cast<const SystemZFrameLowering *>(TM.getFrameLowering()); + auto *TFL = static_cast<const SystemZFrameLowering *>( + DAG.getTarget().getFrameLowering()); // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext()); + CCState CCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), ArgLocs, + *DAG.getContext()); CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); unsigned NumFixedGPRs = 0; @@ -815,7 +817,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // Analyze the operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; - CCState ArgCCInfo(CallConv, IsVarArg, MF, TM, ArgLocs, *DAG.getContext()); + CCState ArgCCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), ArgLocs, + *DAG.getContext()); ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); // We don't support GuaranteedTailCallOpt, only automatically-detected @@ -911,6 +914,12 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, Ops.push_back(DAG.getRegister(RegsToPass[I].first, RegsToPass[I].second.getValueType())); + // Add a register mask operand representing the call-preserved registers. + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + // Glue the call to the argument copies, if any. if (Glue.getNode()) Ops.push_back(Glue); @@ -931,7 +940,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI, // Assign locations to each value returned by this call. SmallVector<CCValAssign, 16> RetLocs; - CCState RetCCInfo(CallConv, IsVarArg, MF, TM, RetLocs, *DAG.getContext()); + CCState RetCCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), RetLocs, + *DAG.getContext()); RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ); // Copy all of the result registers out of their specified physreg. @@ -962,7 +972,8 @@ SystemZTargetLowering::LowerReturn(SDValue Chain, // Assign locations to each returned value. SmallVector<CCValAssign, 16> RetLocs; - CCState RetCCInfo(CallConv, IsVarArg, MF, TM, RetLocs, *DAG.getContext()); + CCState RetCCInfo(CallConv, IsVarArg, MF, DAG.getTarget(), RetLocs, + *DAG.getContext()); RetCCInfo.AnalyzeReturn(Outs, RetCC_SystemZ); // Quick exit for void returns @@ -1786,8 +1797,8 @@ SDValue SystemZTargetLowering::lowerGlobalAddress(GlobalAddressSDNode *Node, const GlobalValue *GV = Node->getGlobal(); int64_t Offset = Node->getOffset(); EVT PtrVT = getPointerTy(); - Reloc::Model RM = TM.getRelocationModel(); - CodeModel::Model CM = TM.getCodeModel(); + Reloc::Model RM = DAG.getTarget().getRelocationModel(); + CodeModel::Model CM = DAG.getTarget().getCodeModel(); SDValue Result; if (Subtarget.isPC32DBLSymbol(GV, RM, CM)) { @@ -1824,7 +1835,7 @@ SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, SDLoc DL(Node); const GlobalValue *GV = Node->getGlobal(); EVT PtrVT = getPointerTy(); - TLSModel::Model model = TM.getTLSModel(GV); + TLSModel::Model model = DAG.getTarget().getTLSModel(GV); if (model != TLSModel::LocalExec) llvm_unreachable("only local-exec TLS mode supported"); @@ -2287,9 +2298,9 @@ SDValue SystemZTargetLowering::lowerATOMIC_LOAD_SUB(SDValue Op, // Use an addition if the operand is constant and either LAA(G) is // available or the negative value is in the range of A(G)FHI. int64_t Value = (-Op2->getAPIntValue()).getSExtValue(); - if (isInt<32>(Value) || TM.getSubtargetImpl()->hasInterlockedAccess1()) + if (isInt<32>(Value) || Subtarget.hasInterlockedAccess1()) NegSrc2 = DAG.getConstant(Value, MemVT); - } else if (TM.getSubtargetImpl()->hasInterlockedAccess1()) + } else if (Subtarget.hasInterlockedAccess1()) // Use LAA(G) if available. NegSrc2 = DAG.getNode(ISD::SUB, DL, MemVT, DAG.getConstant(0, MemVT), Src2); @@ -2602,7 +2613,8 @@ static unsigned forceReg(MachineInstr *MI, MachineOperand &Base, MachineBasicBlock * SystemZTargetLowering::emitSelect(MachineInstr *MI, MachineBasicBlock *MBB) const { - const SystemZInstrInfo *TII = TM.getInstrInfo(); + const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>( + MBB->getParent()->getTarget().getInstrInfo()); unsigned DestReg = MI->getOperand(0).getReg(); unsigned TrueReg = MI->getOperand(1).getReg(); @@ -2650,7 +2662,8 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI, MachineBasicBlock *MBB, unsigned StoreOpcode, unsigned STOCOpcode, bool Invert) const { - const SystemZInstrInfo *TII = TM.getInstrInfo(); + const SystemZInstrInfo *TII = static_cast<const SystemZInstrInfo *>( + MBB->getParent()->getTarget().getInstrInfo()); unsigned SrcReg = MI->getOperand(0).getReg(); MachineOperand Base = MI->getOperand(1); @@ -2665,7 +2678,7 @@ SystemZTargetLowering::emitCondStore(MachineInstr *MI, // Use STOCOpcode if possible. We could use different store patterns in // order to avoid matching the index register, but the performance trade-offs // might be more complicated in that case. - if (STOCOpcode && !IndexReg && TM.getSubtargetImpl()->hasLoadStoreOnCond()) { + if (STOCOpcode && !IndexReg && Subtarget.hasLoadStoreOnCond()) { if (Invert) CCMask ^= CCValid; BuildMI(*MBB, MI, DL, TII->get(STOCOpcode)) @@ -2717,8 +2730,9 @@ SystemZTargetLowering::emitAtomicLoadBinary(MachineInstr *MI, unsigned BinOpcode, unsigned BitSize, bool Invert) const { - const SystemZInstrInfo *TII = TM.getInstrInfo(); MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = + static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); bool IsSubWord = (BitSize < 32); @@ -2840,8 +2854,9 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, unsigned CompareOpcode, unsigned KeepOldMask, unsigned BitSize) const { - const SystemZInstrInfo *TII = TM.getInstrInfo(); MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = + static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); bool IsSubWord = (BitSize < 32); @@ -2951,8 +2966,9 @@ SystemZTargetLowering::emitAtomicLoadMinMax(MachineInstr *MI, MachineBasicBlock * SystemZTargetLowering::emitAtomicCmpSwapW(MachineInstr *MI, MachineBasicBlock *MBB) const { - const SystemZInstrInfo *TII = TM.getInstrInfo(); MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = + static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); // Extract the operands. Base can be a register or a frame index. @@ -3067,8 +3083,9 @@ MachineBasicBlock * SystemZTargetLowering::emitExt128(MachineInstr *MI, MachineBasicBlock *MBB, bool ClearEven, unsigned SubReg) const { - const SystemZInstrInfo *TII = TM.getInstrInfo(); MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = + static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI->getDebugLoc(); @@ -3098,8 +3115,9 @@ MachineBasicBlock * SystemZTargetLowering::emitMemMemWrapper(MachineInstr *MI, MachineBasicBlock *MBB, unsigned Opcode) const { - const SystemZInstrInfo *TII = TM.getInstrInfo(); MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = + static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI->getDebugLoc(); @@ -3267,8 +3285,9 @@ MachineBasicBlock * SystemZTargetLowering::emitStringWrapper(MachineInstr *MI, MachineBasicBlock *MBB, unsigned Opcode) const { - const SystemZInstrInfo *TII = TM.getInstrInfo(); MachineFunction &MF = *MBB->getParent(); + const SystemZInstrInfo *TII = + static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo()); MachineRegisterInfo &MRI = MF.getRegInfo(); DebugLoc DL = MI->getDebugLoc(); diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h index bceb25e..e21b050 100644 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ b/lib/Target/SystemZ/SystemZISelLowering.h @@ -198,7 +198,7 @@ class SystemZTargetMachine; class SystemZTargetLowering : public TargetLowering { public: - explicit SystemZTargetLowering(SystemZTargetMachine &TM); + explicit SystemZTargetLowering(const TargetMachine &TM); // Override TargetLowering. MVT getScalarShiftAmountTy(EVT LHSTy) const override { @@ -249,7 +249,6 @@ public: private: const SystemZSubtarget &Subtarget; - const SystemZTargetMachine &TM; // Implement LowerOperation for individual opcodes. SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td index a1e782c..e8841e1 100644 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ b/lib/Target/SystemZ/SystemZInstrFP.td @@ -133,6 +133,13 @@ def LEDBR : UnaryRRE<"ledb", 0xB344, fround, FP32, FP64>; def LEXBR : UnaryRRE<"lexb", 0xB346, null_frag, FP128, FP128>; def LDXBR : UnaryRRE<"ldxb", 0xB345, null_frag, FP128, FP128>; +def LEDBRA : UnaryRRF4<"ledbra", 0xB344, FP32, FP64>, + Requires<[FeatureFPExtension]>; +def LEXBRA : UnaryRRF4<"lexbra", 0xB346, FP128, FP128>, + Requires<[FeatureFPExtension]>; +def LDXBRA : UnaryRRF4<"ldxbra", 0xB345, FP128, FP128>, + Requires<[FeatureFPExtension]>; + def : Pat<(f32 (fround FP128:$src)), (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hh32)>; def : Pat<(f64 (fround FP128:$src)), diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td index add675a..9f59a1c 100644 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ b/lib/Target/SystemZ/SystemZInstrFormats.td @@ -511,34 +511,24 @@ class InstSS<bits<8> op, dag outs, dag ins, string asmstr, list<dag> pattern> // to store. Other stored registers are added as implicit uses. // // Unary: -// One register output operand and one input operand. The input -// operand may be a register, immediate or memory. +// One register output operand and one input operand. // // Binary: -// One register output operand and two input operands. The first -// input operand is always a register and the second may be a register, -// immediate or memory. -// -// Shift: -// One register output operand and two input operands. The first -// input operand is a register and the second has the same form as -// an address (although it isn't actually used to address memory). +// One register output operand and two input operands. // // Compare: -// Two input operands. The first operand is always a register, -// the second may be a register, immediate or memory. +// Two input operands and an implicit CC output operand. // // Ternary: -// One register output operand and three register input operands. +// One register output operand and three input operands. // // LoadAndOp: -// One output operand and two input operands. The first input operand -// is a register and the second is an address. +// One output operand and two input operands, one of which is an address. +// The instruction both reads from and writes to the address. // // CmpSwap: -// One output operand and three input operands. The first two -// operands are registers and the third is an address. The instruction -// both reads from and writes to the address. +// One output operand and three input operands, one of which is an address. +// The instruction both reads from and writes to the address. // // RotateSelect: // One output operand and five input operands. The first two operands @@ -691,7 +681,7 @@ class CondStoreRSY<string mnemonic, bits<16> opcode, class AsmCondStoreRSY<string mnemonic, bits<16> opcode, RegisterOperand cls, bits<5> bytes, AddressingMode mode = bdaddr20only> - : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, uimm8zx4:$R3), + : InstRSY<opcode, (outs), (ins cls:$R1, mode:$BD2, imm32zx4:$R3), mnemonic#"\t$R1, $BD2, $R3", []>, Requires<[FeatureLoadStoreOnCond]> { let mayStore = 1; @@ -730,7 +720,7 @@ class UnaryRRE<string mnemonic, bits<16> opcode, SDPatternOperator operator, class UnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, RegisterOperand cls2> - : InstRRF<opcode, (outs cls1:$R1), (ins uimm8zx4:$R3, cls2:$R2), + : InstRRF<opcode, (outs cls1:$R1), (ins imm32zx4:$R3, cls2:$R2), mnemonic#"r\t$R1, $R3, $R2", []> { let OpKey = mnemonic ## cls1; let OpType = "reg"; @@ -739,7 +729,7 @@ class UnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, class UnaryRRF4<string mnemonic, bits<16> opcode, RegisterOperand cls1, RegisterOperand cls2> - : InstRRF<opcode, (outs cls1:$R1), (ins uimm8zx4:$R3, cls2:$R2, uimm8zx4:$R4), + : InstRRF<opcode, (outs cls1:$R1), (ins imm32zx4:$R3, cls2:$R2, imm32zx4:$R4), mnemonic#"\t$R1, $R3, $R2, $R4", []>; // These instructions are generated by if conversion. The old value of R1 @@ -757,7 +747,7 @@ class CondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, // mask is the third operand rather than being part of the mnemonic. class AsmCondUnaryRRF<string mnemonic, bits<16> opcode, RegisterOperand cls1, RegisterOperand cls2> - : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2, uimm8zx4:$R3), + : InstRRF<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2, imm32zx4:$R3), mnemonic#"r\t$R1, $R2, $R3", []>, Requires<[FeatureLoadStoreOnCond]> { let Constraints = "$R1 = $R1src"; @@ -823,7 +813,7 @@ class CondUnaryRSY<string mnemonic, bits<16> opcode, class AsmCondUnaryRSY<string mnemonic, bits<16> opcode, RegisterOperand cls, bits<5> bytes, AddressingMode mode = bdaddr20only> - : InstRSY<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2, uimm8zx4:$R3), + : InstRSY<opcode, (outs cls:$R1), (ins cls:$R1src, mode:$BD2, imm32zx4:$R3), mnemonic#"\t$R1, $BD2, $R3", []>, Requires<[FeatureLoadStoreOnCond]> { let mayLoad = 1; @@ -993,6 +983,33 @@ class BinaryRIL<string mnemonic, bits<12> opcode, SDPatternOperator operator, let DisableEncoding = "$R1src"; } +class BinaryRS<string mnemonic, bits<8> opcode, SDPatternOperator operator, + RegisterOperand cls> + : InstRS<opcode, (outs cls:$R1), (ins cls:$R1src, shift12only:$BD2), + mnemonic#"\t$R1, $BD2", + [(set cls:$R1, (operator cls:$R1src, shift12only:$BD2))]> { + let R3 = 0; + let Constraints = "$R1 = $R1src"; + let DisableEncoding = "$R1src"; +} + +class BinaryRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator, + RegisterOperand cls> + : InstRSY<opcode, (outs cls:$R1), (ins cls:$R3, shift20only:$BD2), + mnemonic#"\t$R1, $R3, $BD2", + [(set cls:$R1, (operator cls:$R3, shift20only:$BD2))]>; + +multiclass BinaryRSAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2, + SDPatternOperator operator, RegisterOperand cls> { + let NumOpsKey = mnemonic in { + let NumOpsValue = "3" in + def K : BinaryRSY<mnemonic##"k", opcode2, null_frag, cls>, + Requires<[FeatureDistinctOps]>; + let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in + def "" : BinaryRS<mnemonic, opcode1, operator, cls>; + } +} + class BinaryRX<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls, SDPatternOperator load, bits<5> bytes, AddressingMode mode = bdxaddr12only> @@ -1077,33 +1094,6 @@ multiclass BinarySIPair<string mnemonic, bits<8> siOpcode, } } -class ShiftRS<string mnemonic, bits<8> opcode, SDPatternOperator operator, - RegisterOperand cls> - : InstRS<opcode, (outs cls:$R1), (ins cls:$R1src, shift12only:$BD2), - mnemonic#"\t$R1, $BD2", - [(set cls:$R1, (operator cls:$R1src, shift12only:$BD2))]> { - let R3 = 0; - let Constraints = "$R1 = $R1src"; - let DisableEncoding = "$R1src"; -} - -class ShiftRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator, - RegisterOperand cls> - : InstRSY<opcode, (outs cls:$R1), (ins cls:$R3, shift20only:$BD2), - mnemonic#"\t$R1, $R3, $BD2", - [(set cls:$R1, (operator cls:$R3, shift20only:$BD2))]>; - -multiclass ShiftRSAndK<string mnemonic, bits<8> opcode1, bits<16> opcode2, - SDPatternOperator operator, RegisterOperand cls> { - let NumOpsKey = mnemonic in { - let NumOpsValue = "3" in - def K : ShiftRSY<mnemonic##"k", opcode2, null_frag, cls>, - Requires<[FeatureDistinctOps]>; - let NumOpsValue = "2", isConvertibleToThreeAddress = 1 in - def "" : ShiftRS<mnemonic, opcode1, operator, cls>; - } -} - class CompareRR<string mnemonic, bits<8> opcode, SDPatternOperator operator, RegisterOperand cls1, RegisterOperand cls2> : InstRR<opcode, (outs), (ins cls1:$R1, cls2:$R2), @@ -1315,22 +1305,23 @@ multiclass CmpSwapRSPair<string mnemonic, bits<8> rsOpcode, bits<16> rsyOpcode, class RotateSelectRIEf<string mnemonic, bits<16> opcode, RegisterOperand cls1, RegisterOperand cls2> : InstRIEf<opcode, (outs cls1:$R1), - (ins cls1:$R1src, cls2:$R2, uimm8:$I3, uimm8:$I4, uimm8zx6:$I5), + (ins cls1:$R1src, cls2:$R2, imm32zx8:$I3, imm32zx8:$I4, + imm32zx6:$I5), mnemonic#"\t$R1, $R2, $I3, $I4, $I5", []> { let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; } class PrefetchRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator> - : InstRXY<opcode, (outs), (ins uimm8zx4:$R1, bdxaddr20only:$XBD2), + : InstRXY<opcode, (outs), (ins imm32zx4:$R1, bdxaddr20only:$XBD2), mnemonic##"\t$R1, $XBD2", - [(operator uimm8zx4:$R1, bdxaddr20only:$XBD2)]>; + [(operator imm32zx4:$R1, bdxaddr20only:$XBD2)]>; class PrefetchRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator> - : InstRIL<opcode, (outs), (ins uimm8zx4:$R1, pcrel32:$I2), + : InstRIL<opcode, (outs), (ins imm32zx4:$R1, pcrel32:$I2), mnemonic##"\t$R1, $I2", - [(operator uimm8zx4:$R1, pcrel32:$I2)]> { + [(operator imm32zx4:$R1, pcrel32:$I2)]> { // We want PC-relative addresses to be tried ahead of BD and BDX addresses. // However, BDXs have two extra operands and are therefore 6 units more // complex. @@ -1450,7 +1441,8 @@ class StoreRXYPseudo<SDPatternOperator operator, RegisterOperand cls, // of registers. class RotateSelectRIEfPseudo<RegisterOperand cls1, RegisterOperand cls2> : Pseudo<(outs cls1:$R1), - (ins cls1:$R1src, cls2:$R2, uimm8:$I3, uimm8:$I4, uimm8zx6:$I5), + (ins cls1:$R1src, cls2:$R2, imm32zx8:$I3, imm32zx8:$I4, + imm32zx6:$I5), []> { let Constraints = "$R1 = $R1src"; let DisableEncoding = "$R1src"; @@ -1460,9 +1452,9 @@ class RotateSelectRIEfPseudo<RegisterOperand cls1, RegisterOperand cls2> // the value of the PSW's 2-bit condition code field. class SelectWrapper<RegisterOperand cls> : Pseudo<(outs cls:$dst), - (ins cls:$src1, cls:$src2, uimm8zx4:$valid, uimm8zx4:$cc), + (ins cls:$src1, cls:$src2, imm32zx4:$valid, imm32zx4:$cc), [(set cls:$dst, (z_select_ccmask cls:$src1, cls:$src2, - uimm8zx4:$valid, uimm8zx4:$cc))]> { + imm32zx4:$valid, imm32zx4:$cc))]> { let usesCustomInserter = 1; // Although the instructions used by these nodes do not in themselves // change CC, the insertion requires new blocks, and CC cannot be live @@ -1476,14 +1468,14 @@ multiclass CondStores<RegisterOperand cls, SDPatternOperator store, SDPatternOperator load, AddressingMode mode> { let Defs = [CC], Uses = [CC], usesCustomInserter = 1 in { def "" : Pseudo<(outs), - (ins cls:$new, mode:$addr, uimm8zx4:$valid, uimm8zx4:$cc), + (ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc), [(store (z_select_ccmask cls:$new, (load mode:$addr), - uimm8zx4:$valid, uimm8zx4:$cc), + imm32zx4:$valid, imm32zx4:$cc), mode:$addr)]>; def Inv : Pseudo<(outs), - (ins cls:$new, mode:$addr, uimm8zx4:$valid, uimm8zx4:$cc), + (ins cls:$new, mode:$addr, imm32zx4:$valid, imm32zx4:$cc), [(store (z_select_ccmask (load mode:$addr), cls:$new, - uimm8zx4:$valid, uimm8zx4:$cc), + imm32zx4:$valid, imm32zx4:$cc), mode:$addr)]>; } } @@ -1611,6 +1603,7 @@ class CompareAliasRI<SDPatternOperator operator, RegisterOperand cls, // An alias of a RotateSelectRIEf, but with different register sizes. class RotateSelectAliasRIEf<RegisterOperand cls1, RegisterOperand cls2> : Alias<6, (outs cls1:$R1), - (ins cls1:$R1src, cls2:$R2, uimm8:$I3, uimm8:$I4, uimm8zx6:$I5), []> { + (ins cls1:$R1src, cls2:$R2, imm32zx8:$I3, imm32zx8:$I4, + imm32zx6:$I5), []> { let Constraints = "$R1 = $R1src"; } diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 6a18b2d..f58ab47 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -40,9 +40,9 @@ static bool isHighReg(unsigned int Reg) { // Pin the vtable to this file. void SystemZInstrInfo::anchor() {} -SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm) +SystemZInstrInfo::SystemZInstrInfo(SystemZSubtarget &sti) : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKDOWN, SystemZ::ADJCALLSTACKUP), - RI(tm), TM(tm) { + RI(), STI(sti) { } // MI is a 128-bit load or store. Split it into two 64-bit loads or stores, @@ -488,7 +488,7 @@ SystemZInstrInfo::optimizeCompareInstr(MachineInstr *Compare, bool IsLogical = (Compare->getDesc().TSFlags & SystemZII::IsLogical) != 0; if (Value == 0 && !IsLogical && - removeIPMBasedCompare(Compare, SrcReg, MRI, TM.getRegisterInfo())) + removeIPMBasedCompare(Compare, SrcReg, MRI, &RI)) return true; return false; } @@ -505,7 +505,7 @@ static unsigned getConditionalMove(unsigned Opcode) { bool SystemZInstrInfo::isPredicable(MachineInstr *MI) const { unsigned Opcode = MI->getOpcode(); - if (TM.getSubtargetImpl()->hasLoadStoreOnCond() && + if (STI.hasLoadStoreOnCond() && getConditionalMove(Opcode)) return true; return false; @@ -537,7 +537,7 @@ PredicateInstruction(MachineInstr *MI, unsigned CCMask = Pred[1].getImm(); assert(CCMask > 0 && CCMask < 15 && "Invalid predicate"); unsigned Opcode = MI->getOpcode(); - if (TM.getSubtargetImpl()->hasLoadStoreOnCond()) { + if (STI.hasLoadStoreOnCond()) { if (unsigned CondOpcode = getConditionalMove(Opcode)) { MI->setDesc(get(CondOpcode)); MachineInstrBuilder(*MI->getParent()->getParent(), MI) @@ -685,7 +685,7 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // We prefer to keep the two-operand form where possible both // because it tends to be shorter and because some instructions // have memory forms that can be used during spilling. - if (TM.getSubtargetImpl()->hasDistinctOps()) { + if (STI.hasDistinctOps()) { MachineOperand &Dest = MI->getOperand(0); MachineOperand &Src = MI->getOperand(1); unsigned DestReg = Dest.getReg(); diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h index 09aee5d..83009cb 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ b/lib/Target/SystemZ/SystemZInstrInfo.h @@ -110,9 +110,10 @@ struct Branch { }; } // end namespace SystemZII +class SystemZSubtarget; class SystemZInstrInfo : public SystemZGenInstrInfo { const SystemZRegisterInfo RI; - SystemZTargetMachine &TM; + SystemZSubtarget &STI; void splitMove(MachineBasicBlock::iterator MI, unsigned NewOpcode) const; void splitAdjDynAlloc(MachineBasicBlock::iterator MI) const; @@ -130,7 +131,7 @@ class SystemZInstrInfo : public SystemZGenInstrInfo { virtual void anchor(); public: - explicit SystemZInstrInfo(SystemZTargetMachine &TM); + explicit SystemZInstrInfo(SystemZSubtarget &STI); // Override TargetInstrInfo. unsigned isLoadFromStackSlot(const MachineInstr *MI, diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td index e70df92..f4951ad 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ b/lib/Target/SystemZ/SystemZInstrInfo.td @@ -63,11 +63,11 @@ let isBranch = 1, isTerminator = 1, Uses = [CC] in { def BRCL : InstRIL<0xC04, (outs), (ins cond4:$valid, cond4:$R1, brtarget32:$I2), "jg$R1\t$I2", []>; } - def AsmBRC : InstRI<0xA74, (outs), (ins uimm8zx4:$R1, brtarget16:$I2), + def AsmBRC : InstRI<0xA74, (outs), (ins imm32zx4:$R1, brtarget16:$I2), "brc\t$R1, $I2", []>; - def AsmBRCL : InstRIL<0xC04, (outs), (ins uimm8zx4:$R1, brtarget32:$I2), + def AsmBRCL : InstRIL<0xC04, (outs), (ins imm32zx4:$R1, brtarget32:$I2), "brcl\t$R1, $I2", []>; - def AsmBCR : InstRR<0x07, (outs), (ins uimm8zx4:$R1, GR64:$R2), + def AsmBCR : InstRR<0x07, (outs), (ins imm32zx4:$R1, GR64:$R2), "bcr\t$R1, $R2", []>; } @@ -109,7 +109,7 @@ multiclass CompareBranches<Operand ccmask, string pos1, string pos2> { } let isCodeGenOnly = 1 in defm C : CompareBranches<cond4, "$M3", "">; -defm AsmC : CompareBranches<uimm8zx4, "", "$M3, ">; +defm AsmC : CompareBranches<imm32zx4, "", "$M3, ">; // Define AsmParser mnemonics for each general condition-code mask // (integer or floating-point) @@ -233,9 +233,7 @@ defm CondStore64 : CondStores<GR64, nonvolatile_store, // Call instructions //===----------------------------------------------------------------------===// -// The definitions here are for the call-clobbered registers. -let isCall = 1, Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D, - F0D, F1D, F2D, F3D, F4D, F5D, F6D, F7D, CC] in { +let isCall = 1, Defs = [R14D, CC] in { def CallBRASL : Alias<6, (outs), (ins pcrel32:$I2, variable_ops), [(z_call pcrel32:$I2)]>; def CallBASR : Alias<2, (outs), (ins ADDR64:$R2, variable_ops), @@ -855,7 +853,7 @@ let Defs = [CC] in { } // AND to memory - defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, uimm8>; + defm NI : BinarySIPair<"ni", 0x94, 0xEB54, null_frag, imm32zx8>; // Block AND. let mayLoad = 1, mayStore = 1 in @@ -912,7 +910,7 @@ let Defs = [CC] in { } // OR to memory - defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, uimm8>; + defm OI : BinarySIPair<"oi", 0x96, 0xEB56, null_frag, imm32zx8>; // Block OR. let mayLoad = 1, mayStore = 1 in @@ -952,7 +950,7 @@ let Defs = [CC] in { } // XOR to memory - defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, uimm8>; + defm XI : BinarySIPair<"xi", 0x97, 0xEB57, null_frag, imm32zx8>; // Block XOR. let mayLoad = 1, mayStore = 1 in @@ -1015,26 +1013,26 @@ def DLG : BinaryRXY<"dlg", 0xE387, z_udivrem64, GR128, load, 8>; // Shift left. let neverHasSideEffects = 1 in { - defm SLL : ShiftRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>; - def SLLG : ShiftRSY<"sllg", 0xEB0D, shl, GR64>; + defm SLL : BinaryRSAndK<"sll", 0x89, 0xEBDF, shl, GR32>; + def SLLG : BinaryRSY<"sllg", 0xEB0D, shl, GR64>; } // Logical shift right. let neverHasSideEffects = 1 in { - defm SRL : ShiftRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>; - def SRLG : ShiftRSY<"srlg", 0xEB0C, srl, GR64>; + defm SRL : BinaryRSAndK<"srl", 0x88, 0xEBDE, srl, GR32>; + def SRLG : BinaryRSY<"srlg", 0xEB0C, srl, GR64>; } // Arithmetic shift right. let Defs = [CC], CCValues = 0xE, CompareZeroCCMask = 0xE in { - defm SRA : ShiftRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>; - def SRAG : ShiftRSY<"srag", 0xEB0A, sra, GR64>; + defm SRA : BinaryRSAndK<"sra", 0x8A, 0xEBDC, sra, GR32>; + def SRAG : BinaryRSY<"srag", 0xEB0A, sra, GR64>; } // Rotate left. let neverHasSideEffects = 1 in { - def RLL : ShiftRSY<"rll", 0xEB1D, rotl, GR32>; - def RLLG : ShiftRSY<"rllg", 0xEB1C, rotl, GR64>; + def RLL : BinaryRSY<"rll", 0xEB1D, rotl, GR32>; + def RLLG : BinaryRSY<"rllg", 0xEB1C, rotl, GR64>; } // Rotate second operand left and inserted selected bits into first operand. @@ -1403,15 +1401,15 @@ def : Pat<(sub GR64:$src1, (azextloadi32 bdxaddr20only:$addr)), // Optimize sign-extended 1/0 selects to -1/0 selects. This is important // for vector legalization. -def : Pat<(sra (shl (i32 (z_select_ccmask 1, 0, uimm8zx4:$valid, uimm8zx4:$cc)), +def : Pat<(sra (shl (i32 (z_select_ccmask 1, 0, imm32zx4:$valid, imm32zx4:$cc)), (i32 31)), (i32 31)), - (Select32 (LHI -1), (LHI 0), uimm8zx4:$valid, uimm8zx4:$cc)>; -def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, uimm8zx4:$valid, - uimm8zx4:$cc)))), + (Select32 (LHI -1), (LHI 0), imm32zx4:$valid, imm32zx4:$cc)>; +def : Pat<(sra (shl (i64 (anyext (i32 (z_select_ccmask 1, 0, imm32zx4:$valid, + imm32zx4:$cc)))), (i32 63)), (i32 63)), - (Select64 (LGHI -1), (LGHI 0), uimm8zx4:$valid, uimm8zx4:$cc)>; + (Select64 (LGHI -1), (LGHI 0), imm32zx4:$valid, imm32zx4:$cc)>; // Peepholes for turning scalar operations into block operations. defm : BlockLoadStore<anyextloadi8, i32, MVCSequence, NCSequence, OCSequence, diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td index 3ad146c..7be81dc 100644 --- a/lib/Target/SystemZ/SystemZOperands.td +++ b/lib/Target/SystemZ/SystemZOperands.td @@ -202,21 +202,6 @@ def S32Imm : ImmediateAsmOperand<"S32Imm">; def U32Imm : ImmediateAsmOperand<"U32Imm">; //===----------------------------------------------------------------------===// -// 8-bit immediates -//===----------------------------------------------------------------------===// - -def uimm8zx4 : Immediate<i8, [{ - return isUInt<4>(N->getZExtValue()); -}], NOOP_SDNodeXForm, "U4Imm">; - -def uimm8zx6 : Immediate<i8, [{ - return isUInt<6>(N->getZExtValue()); -}], NOOP_SDNodeXForm, "U6Imm">; - -def simm8 : Immediate<i8, [{}], SIMM8, "S8Imm">; -def uimm8 : Immediate<i8, [{}], UIMM8, "U8Imm">; - -//===----------------------------------------------------------------------===// // i32 immediates //===----------------------------------------------------------------------===// @@ -241,6 +226,14 @@ def imm32lh16c : Immediate<i32, [{ }], LH16, "U16Imm">; // Short immediates +def imm32zx4 : Immediate<i32, [{ + return isUInt<4>(N->getZExtValue()); +}], NOOP_SDNodeXForm, "U4Imm">; + +def imm32zx6 : Immediate<i32, [{ + return isUInt<6>(N->getZExtValue()); +}], NOOP_SDNodeXForm, "U6Imm">; + def imm32sx8 : Immediate<i32, [{ return isInt<8>(N->getSExtValue()); }], SIMM8, "S8Imm">; @@ -470,13 +463,13 @@ def AccessReg : AsmOperandClass { let Name = "AccessReg"; let ParserMethod = "parseAccessReg"; } -def access_reg : Immediate<i8, [{ return N->getZExtValue() < 16; }], +def access_reg : Immediate<i32, [{ return N->getZExtValue() < 16; }], NOOP_SDNodeXForm, "AccessReg"> { let ParserMatchClass = AccessReg; } // A 4-bit condition-code mask. -def cond4 : PatLeaf<(i8 imm), [{ return (N->getZExtValue() < 16); }]>, - Operand<i8> { +def cond4 : PatLeaf<(i32 imm), [{ return (N->getZExtValue() < 16); }]>, + Operand<i32> { let PrintMethod = "printCond4Operand"; } diff --git a/lib/Target/SystemZ/SystemZOperators.td b/lib/Target/SystemZ/SystemZOperators.td index a391961..c70e662 100644 --- a/lib/Target/SystemZ/SystemZOperators.td +++ b/lib/Target/SystemZ/SystemZOperators.td @@ -19,14 +19,14 @@ def SDT_ZICmp : SDTypeProfile<0, 3, [SDTCisSameAs<0, 1>, SDTCisVT<2, i32>]>; def SDT_ZBRCCMask : SDTypeProfile<0, 3, - [SDTCisVT<0, i8>, - SDTCisVT<1, i8>, + [SDTCisVT<0, i32>, + SDTCisVT<1, i32>, SDTCisVT<2, OtherVT>]>; def SDT_ZSelectCCMask : SDTypeProfile<1, 4, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, - SDTCisVT<3, i8>, - SDTCisVT<4, i8>]>; + SDTCisVT<3, i32>, + SDTCisVT<4, i32>]>; def SDT_ZWrapPtr : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; @@ -37,7 +37,7 @@ def SDT_ZWrapOffset : SDTypeProfile<1, 2, def SDT_ZAdjDynAlloc : SDTypeProfile<1, 0, [SDTCisVT<0, i64>]>; def SDT_ZExtractAccess : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, - SDTCisVT<1, i8>]>; + SDTCisVT<1, i32>]>; def SDT_ZGR128Binary32 : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, SDTCisVT<1, untyped>, @@ -77,7 +77,7 @@ def SDT_ZString : SDTypeProfile<1, 3, SDTCisVT<3, i32>]>; def SDT_ZI32Intrinsic : SDTypeProfile<1, 0, [SDTCisVT<0, i32>]>; def SDT_ZPrefetch : SDTypeProfile<0, 2, - [SDTCisVT<0, i8>, + [SDTCisVT<0, i32>, SDTCisPtrTy<1>]>; //===----------------------------------------------------------------------===// diff --git a/lib/Target/SystemZ/SystemZPatterns.td b/lib/Target/SystemZ/SystemZPatterns.td index c0f94ec..e307f8a 100644 --- a/lib/Target/SystemZ/SystemZPatterns.td +++ b/lib/Target/SystemZ/SystemZPatterns.td @@ -101,15 +101,15 @@ multiclass CondStores64<Instruction insn, Instruction insninv, SDPatternOperator store, SDPatternOperator load, AddressingMode mode> { def : Pat<(store (z_select_ccmask GR64:$new, (load mode:$addr), - uimm8zx4:$valid, uimm8zx4:$cc), + imm32zx4:$valid, imm32zx4:$cc), mode:$addr), (insn (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr, - uimm8zx4:$valid, uimm8zx4:$cc)>; + imm32zx4:$valid, imm32zx4:$cc)>; def : Pat<(store (z_select_ccmask (load mode:$addr), GR64:$new, - uimm8zx4:$valid, uimm8zx4:$cc), + imm32zx4:$valid, imm32zx4:$cc), mode:$addr), (insninv (EXTRACT_SUBREG GR64:$new, subreg_l32), mode:$addr, - uimm8zx4:$valid, uimm8zx4:$cc)>; + imm32zx4:$valid, imm32zx4:$cc)>; } // Try to use MVC instruction INSN for a load of type LOAD followed by a store diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp index a04d703..f03bcc4 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp @@ -7,31 +7,29 @@ // //===----------------------------------------------------------------------===// +#include "SystemZInstrInfo.h" #include "SystemZRegisterInfo.h" -#include "SystemZTargetMachine.h" +#include "SystemZSubtarget.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetFrameLowering.h" using namespace llvm; #define GET_REGINFO_TARGET_DESC #include "SystemZGenRegisterInfo.inc" -SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm) - : SystemZGenRegisterInfo(SystemZ::R14D), TM(tm) {} +SystemZRegisterInfo::SystemZRegisterInfo() + : SystemZGenRegisterInfo(SystemZ::R14D) {} -const MCPhysReg* +const MCPhysReg * SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - static const MCPhysReg CalleeSavedRegs[] = { - SystemZ::R6D, SystemZ::R7D, SystemZ::R8D, SystemZ::R9D, - SystemZ::R10D, SystemZ::R11D, SystemZ::R12D, SystemZ::R13D, - SystemZ::R14D, SystemZ::R15D, - SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D, - SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D, - 0 - }; - - return CalleeSavedRegs; + return CSR_SystemZ_SaveList; +} + +const uint32_t * +SystemZRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { + return CSR_SystemZ_RegMask; } BitVector @@ -63,7 +61,8 @@ SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI, MachineBasicBlock &MBB = *MI->getParent(); MachineFunction &MF = *MBB.getParent(); - auto *TII = static_cast<const SystemZInstrInfo*>(TM.getInstrInfo()); + auto *TII = + static_cast<const SystemZInstrInfo *>(MF.getTarget().getInstrInfo()); const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); DebugLoc DL = MI->getDebugLoc(); diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h index e236f71..9bffa46 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ b/lib/Target/SystemZ/SystemZRegisterInfo.h @@ -29,15 +29,9 @@ inline unsigned odd128(bool Is32bit) { } } // end namespace SystemZ -class SystemZSubtarget; -class SystemZInstrInfo; - struct SystemZRegisterInfo : public SystemZGenRegisterInfo { -private: - SystemZTargetMachine &TM; - public: - SystemZRegisterInfo(SystemZTargetMachine &tm); + SystemZRegisterInfo(); // Override TargetRegisterInfo.h. bool requiresRegisterScavenging(const MachineFunction &MF) const override { @@ -51,6 +45,7 @@ public: } const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF = nullptr) const override; + const uint32_t *getCallPreservedMask(CallingConv::ID CC) const override; BitVector getReservedRegs(const MachineFunction &MF) const override; void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td index 93d7c83..47ac20d 100644 --- a/lib/Target/SystemZ/SystemZRegisterInfo.td +++ b/lib/Target/SystemZ/SystemZRegisterInfo.td @@ -119,6 +119,29 @@ defm ADDR128 : SystemZRegClass<"ADDR128", untyped, 128, (sub GR128Bit, R0Q)>; // Floating-point registers //===----------------------------------------------------------------------===// +// Maps FPR register numbers to their DWARF encoding. +class DwarfMapping<int id> { int Id = id; } + +def F0Dwarf : DwarfMapping<16>; +def F2Dwarf : DwarfMapping<17>; +def F4Dwarf : DwarfMapping<18>; +def F6Dwarf : DwarfMapping<19>; + +def F1Dwarf : DwarfMapping<20>; +def F3Dwarf : DwarfMapping<21>; +def F5Dwarf : DwarfMapping<22>; +def F7Dwarf : DwarfMapping<23>; + +def F8Dwarf : DwarfMapping<24>; +def F10Dwarf : DwarfMapping<25>; +def F12Dwarf : DwarfMapping<26>; +def F14Dwarf : DwarfMapping<27>; + +def F9Dwarf : DwarfMapping<28>; +def F11Dwarf : DwarfMapping<29>; +def F13Dwarf : DwarfMapping<30>; +def F15Dwarf : DwarfMapping<31>; + // Lower 32 bits of one of the 16 64-bit floating-point registers class FPR32<bits<16> num, string n> : SystemZReg<n> { let HWEncoding = num; @@ -142,7 +165,7 @@ class FPR128<bits<16> num, string n, FPR64 low, FPR64 high> foreach I = 0-15 in { def F#I#S : FPR32<I, "f"#I>; def F#I#D : FPR64<I, "f"#I, !cast<FPR32>("F"#I#"S")>, - DwarfRegNum<[!add(I, 16)]>; + DwarfRegNum<[!cast<DwarfMapping>("F"#I#"Dwarf").Id]>; } foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in { diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp index 97abee3..a3cba64 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp @@ -18,10 +18,8 @@ using namespace llvm; #define DEBUG_TYPE "systemz-selectiondag-info" -SystemZSelectionDAGInfo:: -SystemZSelectionDAGInfo(const SystemZTargetMachine &TM) - : TargetSelectionDAGInfo(TM) { -} +SystemZSelectionDAGInfo::SystemZSelectionDAGInfo(const DataLayout &DL) + : TargetSelectionDAGInfo(&DL) {} SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() { } diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h index 79e7fab..e9de146 100644 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h @@ -22,7 +22,7 @@ class SystemZTargetMachine; class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo { public: - explicit SystemZSelectionDAGInfo(const SystemZTargetMachine &TM); + explicit SystemZSelectionDAGInfo(const DataLayout &DL); ~SystemZSelectionDAGInfo(); SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc DL, SDValue Chain, diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp index a011157..e160bc8 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.cpp +++ b/lib/Target/SystemZ/SystemZSubtarget.cpp @@ -20,16 +20,11 @@ using namespace llvm; #define GET_SUBTARGETINFO_CTOR #include "SystemZGenSubtargetInfo.inc" -// Pin the vtabel to this file. +// Pin the vtable to this file. void SystemZSubtarget::anchor() {} -SystemZSubtarget::SystemZSubtarget(const std::string &TT, - const std::string &CPU, - const std::string &FS) - : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false), - HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false), - HasFastSerialization(false), HasInterlockedAccess1(false), - TargetTriple(TT) { +SystemZSubtarget & +SystemZSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { std::string CPUName = CPU; if (CPUName.empty()) CPUName = "generic"; @@ -37,11 +32,26 @@ SystemZSubtarget::SystemZSubtarget(const std::string &TT, if (CPUName == "generic") CPUName = sys::getHostCPUName(); #endif - // Parse features string. ParseSubtargetFeatures(CPUName, FS); + return *this; } +SystemZSubtarget::SystemZSubtarget(const std::string &TT, + const std::string &CPU, + const std::string &FS, + const TargetMachine &TM) + : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false), + HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false), + HasFastSerialization(false), HasInterlockedAccess1(false), + TargetTriple(TT), + // Make sure that global data has at least 16 bits of alignment by + // default, so that we can refer to it using LARL. We don't have any + // special requirements for stack variables though. + DL("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"), + InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM), + TSInfo(DL), FrameLowering() {} + // Return true if GV binds locally under reloc model RM. static bool bindsLocally(const GlobalValue *GV, Reloc::Model RM) { // For non-PIC, all symbols bind locally. diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h index ffca2d8..4e8c710 100644 --- a/lib/Target/SystemZ/SystemZSubtarget.h +++ b/lib/Target/SystemZ/SystemZSubtarget.h @@ -14,6 +14,12 @@ #ifndef SYSTEMZSUBTARGET_H #define SYSTEMZSUBTARGET_H +#include "SystemZFrameLowering.h" +#include "SystemZISelLowering.h" +#include "SystemZInstrInfo.h" +#include "SystemZRegisterInfo.h" +#include "SystemZSelectionDAGInfo.h" +#include "llvm/IR/DataLayout.h" #include "llvm/ADT/Triple.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <string> @@ -37,10 +43,26 @@ protected: private: Triple TargetTriple; - + const DataLayout DL; + SystemZInstrInfo InstrInfo; + SystemZTargetLowering TLInfo; + SystemZSelectionDAGInfo TSInfo; + SystemZFrameLowering FrameLowering; + + SystemZSubtarget &initializeSubtargetDependencies(StringRef CPU, + StringRef FS); public: SystemZSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS); + const std::string &FS, const TargetMachine &TM); + + const TargetFrameLowering *getFrameLowering() const { return &FrameLowering; } + const SystemZInstrInfo *getInstrInfo() const { return &InstrInfo; } + const DataLayout *getDataLayout() const { return &DL; } + const SystemZRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + const SystemZTargetLowering *getTargetLowering() const { return &TLInfo; } + const TargetSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } // This is important for reducing register pressure in vector code. bool useAA() const override { return true; } diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index 4c9ce29..0122e99 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -22,17 +22,10 @@ extern "C" void LLVMInitializeSystemZTarget() { SystemZTargetMachine::SystemZTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, - Reloc::Model RM, - CodeModel::Model CM, + Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS), - // Make sure that global data has at least 16 bits of alignment by default, - // so that we can refer to it using LARL. We don't have any special - // requirements for stack variables though. - DL("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"), - InstrInfo(*this), TLInfo(*this), TSInfo(*this), - FrameLowering(*this, Subtarget) { + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS, *this) { initAsmInfo(); } @@ -65,7 +58,8 @@ bool SystemZPassConfig::addInstSelector() { } bool SystemZPassConfig::addPreSched2() { - if (getSystemZTargetMachine().getSubtargetImpl()->hasLoadStoreOnCond()) + if (getOptLevel() != CodeGenOpt::None && + getSystemZTargetMachine().getSubtargetImpl()->hasLoadStoreOnCond()) addPass(&IfConverterID); return true; } diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h index 1db717b..ded07e9 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/lib/Target/SystemZ/SystemZTargetMachine.h @@ -15,25 +15,15 @@ #ifndef SYSTEMZTARGETMACHINE_H #define SYSTEMZTARGETMACHINE_H -#include "SystemZFrameLowering.h" -#include "SystemZISelLowering.h" -#include "SystemZInstrInfo.h" -#include "SystemZRegisterInfo.h" -#include "SystemZSelectionDAGInfo.h" #include "SystemZSubtarget.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" namespace llvm { +class TargetFrameLowering; + class SystemZTargetMachine : public LLVMTargetMachine { SystemZSubtarget Subtarget; - const DataLayout DL; - SystemZInstrInfo InstrInfo; - SystemZTargetLowering TLInfo; - SystemZSelectionDAGInfo TSInfo; - SystemZFrameLowering FrameLowering; public: SystemZTargetMachine(const Target &T, StringRef TT, StringRef CPU, @@ -43,25 +33,25 @@ public: // Override TargetMachine. const TargetFrameLowering *getFrameLowering() const override { - return &FrameLowering; + return getSubtargetImpl()->getFrameLowering(); } const SystemZInstrInfo *getInstrInfo() const override { - return &InstrInfo; + return getSubtargetImpl()->getInstrInfo(); } const SystemZSubtarget *getSubtargetImpl() const override { return &Subtarget; } const DataLayout *getDataLayout() const override { - return &DL; + return getSubtargetImpl()->getDataLayout(); } const SystemZRegisterInfo *getRegisterInfo() const override { - return &InstrInfo.getRegisterInfo(); + return getSubtargetImpl()->getRegisterInfo(); } const SystemZTargetLowering *getTargetLowering() const override { - return &TLInfo; + return getSubtargetImpl()->getTargetLowering(); } const TargetSelectionDAGInfo *getSelectionDAGInfo() const override { - return &TSInfo; + return getSubtargetImpl()->getSelectionDAGInfo(); } // Override LLVMTargetMachine diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 8365f64..95c8cb6 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -88,8 +88,8 @@ CodeModel::Model TargetMachine::getCodeModel() const { } /// Get the IR-specified TLS model for Var. -static TLSModel::Model getSelectedTLSModel(const GlobalVariable *Var) { - switch (Var->getThreadLocalMode()) { +static TLSModel::Model getSelectedTLSModel(const GlobalValue *GV) { + switch (GV->getThreadLocalMode()) { case GlobalVariable::NotThreadLocal: llvm_unreachable("getSelectedTLSModel for non-TLS variable"); break; @@ -127,13 +127,10 @@ TLSModel::Model TargetMachine::getTLSModel(const GlobalValue *GV) const { Model = TLSModel::InitialExec; } - const GlobalVariable *Var = dyn_cast<GlobalVariable>(GV); - if (Var) { - // If the user specified a more specific model, use that. - TLSModel::Model SelectedModel = getSelectedTLSModel(Var); - if (SelectedModel > Model) - return SelectedModel; - } + // If the user specified a more specific model, use that. + TLSModel::Model SelectedModel = getSelectedTLSModel(GV); + if (SelectedModel > Model) + return SelectedModel; return Model; } diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp index 3ca13da..87b6b66 100644 --- a/lib/Target/TargetSubtargetInfo.cpp +++ b/lib/Target/TargetSubtargetInfo.cpp @@ -39,10 +39,23 @@ bool TargetSubtargetInfo::useMachineScheduler() const { return enableMachineScheduler(); } +bool TargetSubtargetInfo::enableAtomicExpandLoadLinked() const { + return true; +} + bool TargetSubtargetInfo::enableMachineScheduler() const { return false; } +bool TargetSubtargetInfo::enableRALocalReassignment( + CodeGenOpt::Level OptLevel) const { + return true; +} + +bool TargetSubtargetInfo::enablePostMachineScheduler() const { + return false; +} + bool TargetSubtargetInfo::enablePostRAScheduler( CodeGenOpt::Level OptLevel, AntiDepBreakMode& Mode, diff --git a/lib/Target/X86/Android.mk b/lib/Target/X86/Android.mk index 0d0a9ca..e2c4be7 100644 --- a/lib/Target/X86/Android.mk +++ b/lib/Target/X86/Android.mk @@ -12,6 +12,7 @@ x86_codegen_TBLGEN_TABLES := \ x86_codegen_SRC_FILES := \ X86AsmPrinter.cpp \ + X86AtomicExpandPass.cpp \ X86CodeEmitter.cpp \ X86FastISel.cpp \ X86FixupLEAs.cpp \ diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp index f3e6b3f..a365f62 100644 --- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp +++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.cpp @@ -20,6 +20,7 @@ #include "llvm/MC/MCParser/MCParsedAsmOperand.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetAsmParser.h" #include "llvm/MC/MCTargetOptions.h" #include "llvm/Support/CommandLine.h" @@ -36,8 +37,8 @@ bool IsStackReg(unsigned Reg) { } std::string FuncName(unsigned AccessSize, bool IsWrite) { - return std::string("__sanitizer_sanitize_") + (IsWrite ? "store" : "load") + - (utostr(AccessSize)); + return std::string("__asan_report_") + (IsWrite ? "store" : "load") + + utostr(AccessSize); } class X86AddressSanitizer : public X86AsmInstrumentation { @@ -47,47 +48,55 @@ public: // X86AsmInstrumentation implementation: virtual void InstrumentInstruction( - const MCInst &Inst, SmallVectorImpl<MCParsedAsmOperand *> &Operands, - MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out) override { + const MCInst &Inst, OperandVector &Operands, MCContext &Ctx, + const MCInstrInfo &MII, MCStreamer &Out) override { InstrumentMOV(Inst, Operands, Ctx, MII, Out); } // Should be implemented differently in x86_32 and x86_64 subclasses. - virtual void InstrumentMemOperandImpl(X86Operand *Op, unsigned AccessSize, - bool IsWrite, MCContext &Ctx, - MCStreamer &Out) = 0; + virtual void InstrumentMemOperandSmallImpl( + X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx, + MCStreamer &Out) = 0; + virtual void InstrumentMemOperandLargeImpl( + X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx, + MCStreamer &Out) = 0; - void InstrumentMemOperand(MCParsedAsmOperand *Op, unsigned AccessSize, + void InstrumentMemOperand(MCParsedAsmOperand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx, MCStreamer &Out); - void InstrumentMOV(const MCInst &Inst, - SmallVectorImpl<MCParsedAsmOperand *> &Operands, + void InstrumentMOV(const MCInst &Inst, OperandVector &Operands, MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out); void EmitInstruction(MCStreamer &Out, const MCInst &Inst) { Out.EmitInstruction(Inst, STI); } + void EmitLabel(MCStreamer &Out, MCSymbol *Label) { Out.EmitLabel(Label); } + protected: const MCSubtargetInfo &STI; }; void X86AddressSanitizer::InstrumentMemOperand( - MCParsedAsmOperand *Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx, + MCParsedAsmOperand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx, MCStreamer &Out) { - assert(Op && Op->isMem() && "Op should be a memory operand."); + assert(Op.isMem() && "Op should be a memory operand."); assert((AccessSize & (AccessSize - 1)) == 0 && AccessSize <= 16 && "AccessSize should be a power of two, less or equal than 16."); - X86Operand *MemOp = static_cast<X86Operand *>(Op); + X86Operand &MemOp = static_cast<X86Operand &>(Op); // FIXME: get rid of this limitation. - if (IsStackReg(MemOp->getMemBaseReg()) || IsStackReg(MemOp->getMemIndexReg())) + if (IsStackReg(MemOp.getMemBaseReg()) || IsStackReg(MemOp.getMemIndexReg())) return; - InstrumentMemOperandImpl(MemOp, AccessSize, IsWrite, Ctx, Out); + // FIXME: take into account load/store alignment. + if (AccessSize < 8) + InstrumentMemOperandSmallImpl(MemOp, AccessSize, IsWrite, Ctx, Out); + else + InstrumentMemOperandLargeImpl(MemOp, AccessSize, IsWrite, Ctx, Out); } void X86AddressSanitizer::InstrumentMOV( - const MCInst &Inst, SmallVectorImpl<MCParsedAsmOperand *> &Operands, - MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out) { + const MCInst &Inst, OperandVector &Operands, MCContext &Ctx, + const MCInstrInfo &MII, MCStreamer &Out) { // Access size in bytes. unsigned AccessSize = 0; @@ -124,107 +133,351 @@ void X86AddressSanitizer::InstrumentMOV( const bool IsWrite = MII.get(Inst.getOpcode()).mayStore(); for (unsigned Ix = 0; Ix < Operands.size(); ++Ix) { - MCParsedAsmOperand *Op = Operands[Ix]; - if (Op && Op->isMem()) + assert(Operands[Ix]); + MCParsedAsmOperand &Op = *Operands[Ix]; + if (Op.isMem()) InstrumentMemOperand(Op, AccessSize, IsWrite, Ctx, Out); } } class X86AddressSanitizer32 : public X86AddressSanitizer { public: + static const long kShadowOffset = 0x20000000; + X86AddressSanitizer32(const MCSubtargetInfo &STI) : X86AddressSanitizer(STI) {} virtual ~X86AddressSanitizer32() {} - virtual void InstrumentMemOperandImpl(X86Operand *Op, unsigned AccessSize, - bool IsWrite, MCContext &Ctx, - MCStreamer &Out) override; + virtual void InstrumentMemOperandSmallImpl( + X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx, + MCStreamer &Out) override; + virtual void InstrumentMemOperandLargeImpl( + X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx, + MCStreamer &Out) override; + + private: + void EmitCallAsanReport(MCContext &Ctx, MCStreamer &Out, unsigned AccessSize, + bool IsWrite, unsigned AddressReg) { + EmitInstruction(Out, MCInstBuilder(X86::CLD)); + EmitInstruction(Out, MCInstBuilder(X86::MMX_EMMS)); + + EmitInstruction(Out, MCInstBuilder(X86::AND64ri8).addReg(X86::ESP) + .addReg(X86::ESP).addImm(-16)); + EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(AddressReg)); + + + const std::string& Fn = FuncName(AccessSize, IsWrite); + MCSymbol *FnSym = Ctx.GetOrCreateSymbol(StringRef(Fn)); + const MCSymbolRefExpr *FnExpr = + MCSymbolRefExpr::Create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx); + EmitInstruction(Out, MCInstBuilder(X86::CALLpcrel32).addExpr(FnExpr)); + } }; -void X86AddressSanitizer32::InstrumentMemOperandImpl( - X86Operand *Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx, +void X86AddressSanitizer32::InstrumentMemOperandSmallImpl( + X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx, MCStreamer &Out) { - // FIXME: emit .cfi directives for correct stack unwinding. EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(X86::EAX)); + EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(X86::ECX)); + EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(X86::EDX)); + EmitInstruction(Out, MCInstBuilder(X86::PUSHF32)); + { MCInst Inst; Inst.setOpcode(X86::LEA32r); Inst.addOperand(MCOperand::CreateReg(X86::EAX)); + Op.addMemOperands(Inst, 5); + EmitInstruction(Out, Inst); + } + + EmitInstruction( + Out, MCInstBuilder(X86::MOV32rr).addReg(X86::ECX).addReg(X86::EAX)); + EmitInstruction(Out, MCInstBuilder(X86::SHR32ri).addReg(X86::ECX) + .addReg(X86::ECX).addImm(3)); + + { + MCInst Inst; + Inst.setOpcode(X86::MOV8rm); + Inst.addOperand(MCOperand::CreateReg(X86::CL)); + const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx); + std::unique_ptr<X86Operand> Op( + X86Operand::CreateMem(0, Disp, X86::ECX, 0, 1, SMLoc(), SMLoc())); + Op->addMemOperands(Inst, 5); + EmitInstruction(Out, Inst); + } + + EmitInstruction(Out, + MCInstBuilder(X86::TEST8rr).addReg(X86::CL).addReg(X86::CL)); + MCSymbol *DoneSym = Ctx.CreateTempSymbol(); + const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx); + EmitInstruction(Out, MCInstBuilder(X86::JE_4).addExpr(DoneExpr)); + + EmitInstruction( + Out, MCInstBuilder(X86::MOV32rr).addReg(X86::EDX).addReg(X86::EAX)); + EmitInstruction(Out, MCInstBuilder(X86::AND32ri).addReg(X86::EDX) + .addReg(X86::EDX).addImm(7)); + + switch (AccessSize) { + case 1: + break; + case 2: { + MCInst Inst; + Inst.setOpcode(X86::LEA32r); + Inst.addOperand(MCOperand::CreateReg(X86::EDX)); + + const MCExpr *Disp = MCConstantExpr::Create(1, Ctx); + std::unique_ptr<X86Operand> Op( + X86Operand::CreateMem(0, Disp, X86::EDX, 0, 1, SMLoc(), SMLoc())); Op->addMemOperands(Inst, 5); EmitInstruction(Out, Inst); + break; } + case 4: + EmitInstruction(Out, MCInstBuilder(X86::ADD32ri8).addReg(X86::EDX) + .addReg(X86::EDX).addImm(3)); + break; + default: + assert(false && "Incorrect access size"); + break; + } + + EmitInstruction( + Out, MCInstBuilder(X86::MOVSX32rr8).addReg(X86::ECX).addReg(X86::CL)); + EmitInstruction( + Out, MCInstBuilder(X86::CMP32rr).addReg(X86::EDX).addReg(X86::ECX)); + EmitInstruction(Out, MCInstBuilder(X86::JL_4).addExpr(DoneExpr)); + + EmitCallAsanReport(Ctx, Out, AccessSize, IsWrite, X86::EAX); + EmitLabel(Out, DoneSym); + + EmitInstruction(Out, MCInstBuilder(X86::POPF32)); + EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::EDX)); + EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::ECX)); + EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::EAX)); +} + +void X86AddressSanitizer32::InstrumentMemOperandLargeImpl( + X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx, + MCStreamer &Out) { EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(X86::EAX)); + EmitInstruction(Out, MCInstBuilder(X86::PUSH32r).addReg(X86::ECX)); + EmitInstruction(Out, MCInstBuilder(X86::PUSHF32)); + { - const std::string Func = FuncName(AccessSize, IsWrite); - const MCSymbol *FuncSym = Ctx.GetOrCreateSymbol(StringRef(Func)); - const MCSymbolRefExpr *FuncExpr = - MCSymbolRefExpr::Create(FuncSym, MCSymbolRefExpr::VK_PLT, Ctx); - EmitInstruction(Out, MCInstBuilder(X86::CALLpcrel32).addExpr(FuncExpr)); + MCInst Inst; + Inst.setOpcode(X86::LEA32r); + Inst.addOperand(MCOperand::CreateReg(X86::EAX)); + Op.addMemOperands(Inst, 5); + EmitInstruction(Out, Inst); } - EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::EAX)); + EmitInstruction( + Out, MCInstBuilder(X86::MOV32rr).addReg(X86::ECX).addReg(X86::EAX)); + EmitInstruction(Out, MCInstBuilder(X86::SHR32ri).addReg(X86::ECX) + .addReg(X86::ECX).addImm(3)); + { + MCInst Inst; + switch (AccessSize) { + case 8: + Inst.setOpcode(X86::CMP8mi); + break; + case 16: + Inst.setOpcode(X86::CMP16mi); + break; + default: + assert(false && "Incorrect access size"); + break; + } + const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx); + std::unique_ptr<X86Operand> Op( + X86Operand::CreateMem(0, Disp, X86::ECX, 0, 1, SMLoc(), SMLoc())); + Op->addMemOperands(Inst, 5); + Inst.addOperand(MCOperand::CreateImm(0)); + EmitInstruction(Out, Inst); + } + MCSymbol *DoneSym = Ctx.CreateTempSymbol(); + const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx); + EmitInstruction(Out, MCInstBuilder(X86::JE_4).addExpr(DoneExpr)); + + EmitCallAsanReport(Ctx, Out, AccessSize, IsWrite, X86::EAX); + EmitLabel(Out, DoneSym); + + EmitInstruction(Out, MCInstBuilder(X86::POPF32)); + EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::ECX)); EmitInstruction(Out, MCInstBuilder(X86::POP32r).addReg(X86::EAX)); } class X86AddressSanitizer64 : public X86AddressSanitizer { public: + static const long kShadowOffset = 0x7fff8000; + X86AddressSanitizer64(const MCSubtargetInfo &STI) : X86AddressSanitizer(STI) {} virtual ~X86AddressSanitizer64() {} - virtual void InstrumentMemOperandImpl(X86Operand *Op, unsigned AccessSize, - bool IsWrite, MCContext &Ctx, - MCStreamer &Out) override; -}; + virtual void InstrumentMemOperandSmallImpl( + X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx, + MCStreamer &Out) override; + virtual void InstrumentMemOperandLargeImpl( + X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx, + MCStreamer &Out) override; -void X86AddressSanitizer64::InstrumentMemOperandImpl(X86Operand *Op, - unsigned AccessSize, - bool IsWrite, - MCContext &Ctx, - MCStreamer &Out) { - // FIXME: emit .cfi directives for correct stack unwinding. - - // Set %rsp below current red zone (128 bytes wide) using LEA instruction to - // preserve flags. - { +private: + void EmitAdjustRSP(MCContext &Ctx, MCStreamer &Out, long Offset) { MCInst Inst; Inst.setOpcode(X86::LEA64r); Inst.addOperand(MCOperand::CreateReg(X86::RSP)); - const MCExpr *Disp = MCConstantExpr::Create(-128, Ctx); + const MCExpr *Disp = MCConstantExpr::Create(Offset, Ctx); std::unique_ptr<X86Operand> Op( X86Operand::CreateMem(0, Disp, X86::RSP, 0, 1, SMLoc(), SMLoc())); Op->addMemOperands(Inst, 5); EmitInstruction(Out, Inst); } + + void EmitCallAsanReport(MCContext &Ctx, MCStreamer &Out, unsigned AccessSize, + bool IsWrite) { + EmitInstruction(Out, MCInstBuilder(X86::CLD)); + EmitInstruction(Out, MCInstBuilder(X86::MMX_EMMS)); + + EmitInstruction(Out, MCInstBuilder(X86::AND64ri8).addReg(X86::RSP) + .addReg(X86::RSP).addImm(-16)); + + const std::string& Fn = FuncName(AccessSize, IsWrite); + MCSymbol *FnSym = Ctx.GetOrCreateSymbol(StringRef(Fn)); + const MCSymbolRefExpr *FnExpr = + MCSymbolRefExpr::Create(FnSym, MCSymbolRefExpr::VK_PLT, Ctx); + EmitInstruction(Out, MCInstBuilder(X86::CALL64pcrel32).addExpr(FnExpr)); + } +}; + +void X86AddressSanitizer64::InstrumentMemOperandSmallImpl( + X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx, + MCStreamer &Out) { + EmitAdjustRSP(Ctx, Out, -128); + EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(X86::RAX)); + EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(X86::RCX)); EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(X86::RDI)); + EmitInstruction(Out, MCInstBuilder(X86::PUSHF64)); { MCInst Inst; Inst.setOpcode(X86::LEA64r); Inst.addOperand(MCOperand::CreateReg(X86::RDI)); - Op->addMemOperands(Inst, 5); + Op.addMemOperands(Inst, 5); EmitInstruction(Out, Inst); } + EmitInstruction( + Out, MCInstBuilder(X86::MOV64rr).addReg(X86::RAX).addReg(X86::RDI)); + EmitInstruction(Out, MCInstBuilder(X86::SHR64ri).addReg(X86::RAX) + .addReg(X86::RAX).addImm(3)); { - const std::string Func = FuncName(AccessSize, IsWrite); - const MCSymbol *FuncSym = Ctx.GetOrCreateSymbol(StringRef(Func)); - const MCSymbolRefExpr *FuncExpr = - MCSymbolRefExpr::Create(FuncSym, MCSymbolRefExpr::VK_PLT, Ctx); - EmitInstruction(Out, MCInstBuilder(X86::CALL64pcrel32).addExpr(FuncExpr)); + MCInst Inst; + Inst.setOpcode(X86::MOV8rm); + Inst.addOperand(MCOperand::CreateReg(X86::AL)); + const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx); + std::unique_ptr<X86Operand> Op( + X86Operand::CreateMem(0, Disp, X86::RAX, 0, 1, SMLoc(), SMLoc())); + Op->addMemOperands(Inst, 5); + EmitInstruction(Out, Inst); + } + + EmitInstruction(Out, + MCInstBuilder(X86::TEST8rr).addReg(X86::AL).addReg(X86::AL)); + MCSymbol *DoneSym = Ctx.CreateTempSymbol(); + const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx); + EmitInstruction(Out, MCInstBuilder(X86::JE_4).addExpr(DoneExpr)); + + EmitInstruction( + Out, MCInstBuilder(X86::MOV32rr).addReg(X86::ECX).addReg(X86::EDI)); + EmitInstruction(Out, MCInstBuilder(X86::AND32ri).addReg(X86::ECX) + .addReg(X86::ECX).addImm(7)); + + switch (AccessSize) { + case 1: + break; + case 2: { + MCInst Inst; + Inst.setOpcode(X86::LEA32r); + Inst.addOperand(MCOperand::CreateReg(X86::ECX)); + + const MCExpr *Disp = MCConstantExpr::Create(1, Ctx); + std::unique_ptr<X86Operand> Op( + X86Operand::CreateMem(0, Disp, X86::ECX, 0, 1, SMLoc(), SMLoc())); + Op->addMemOperands(Inst, 5); + EmitInstruction(Out, Inst); + break; } + case 4: + EmitInstruction(Out, MCInstBuilder(X86::ADD32ri8).addReg(X86::ECX) + .addReg(X86::ECX).addImm(3)); + break; + default: + assert(false && "Incorrect access size"); + break; + } + + EmitInstruction( + Out, MCInstBuilder(X86::MOVSX32rr8).addReg(X86::EAX).addReg(X86::AL)); + EmitInstruction( + Out, MCInstBuilder(X86::CMP32rr).addReg(X86::ECX).addReg(X86::EAX)); + EmitInstruction(Out, MCInstBuilder(X86::JL_4).addExpr(DoneExpr)); + + EmitCallAsanReport(Ctx, Out, AccessSize, IsWrite); + EmitLabel(Out, DoneSym); + + EmitInstruction(Out, MCInstBuilder(X86::POPF64)); EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(X86::RDI)); + EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(X86::RCX)); + EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(X86::RAX)); + EmitAdjustRSP(Ctx, Out, 128); +} + +void X86AddressSanitizer64::InstrumentMemOperandLargeImpl( + X86Operand &Op, unsigned AccessSize, bool IsWrite, MCContext &Ctx, + MCStreamer &Out) { + EmitAdjustRSP(Ctx, Out, -128); + EmitInstruction(Out, MCInstBuilder(X86::PUSH64r).addReg(X86::RAX)); + EmitInstruction(Out, MCInstBuilder(X86::PUSHF64)); - // Restore old %rsp value. { MCInst Inst; Inst.setOpcode(X86::LEA64r); - Inst.addOperand(MCOperand::CreateReg(X86::RSP)); - - const MCExpr *Disp = MCConstantExpr::Create(128, Ctx); + Inst.addOperand(MCOperand::CreateReg(X86::RAX)); + Op.addMemOperands(Inst, 5); + EmitInstruction(Out, Inst); + } + EmitInstruction(Out, MCInstBuilder(X86::SHR64ri).addReg(X86::RAX) + .addReg(X86::RAX).addImm(3)); + { + MCInst Inst; + switch (AccessSize) { + case 8: + Inst.setOpcode(X86::CMP8mi); + break; + case 16: + Inst.setOpcode(X86::CMP16mi); + break; + default: + assert(false && "Incorrect access size"); + break; + } + const MCExpr *Disp = MCConstantExpr::Create(kShadowOffset, Ctx); std::unique_ptr<X86Operand> Op( - X86Operand::CreateMem(0, Disp, X86::RSP, 0, 1, SMLoc(), SMLoc())); + X86Operand::CreateMem(0, Disp, X86::RAX, 0, 1, SMLoc(), SMLoc())); Op->addMemOperands(Inst, 5); + Inst.addOperand(MCOperand::CreateImm(0)); EmitInstruction(Out, Inst); } + + MCSymbol *DoneSym = Ctx.CreateTempSymbol(); + const MCExpr *DoneExpr = MCSymbolRefExpr::Create(DoneSym, Ctx); + EmitInstruction(Out, MCInstBuilder(X86::JE_4).addExpr(DoneExpr)); + + EmitCallAsanReport(Ctx, Out, AccessSize, IsWrite); + EmitLabel(Out, DoneSym); + + EmitInstruction(Out, MCInstBuilder(X86::POPF64)); + EmitInstruction(Out, MCInstBuilder(X86::POP64r).addReg(X86::RAX)); + EmitAdjustRSP(Ctx, Out, 128); } } // End anonymous namespace @@ -233,8 +486,8 @@ X86AsmInstrumentation::X86AsmInstrumentation() {} X86AsmInstrumentation::~X86AsmInstrumentation() {} void X86AsmInstrumentation::InstrumentInstruction( - const MCInst &Inst, SmallVectorImpl<MCParsedAsmOperand *> &Operands, - MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out) {} + const MCInst &Inst, OperandVector &Operands, MCContext &Ctx, + const MCInstrInfo &MII, MCStreamer &Out) {} X86AsmInstrumentation * CreateX86AsmInstrumentation(const MCTargetOptions &MCOptions, diff --git a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h index 0369b14..1bc3c09 100644 --- a/lib/Target/X86/AsmParser/X86AsmInstrumentation.h +++ b/lib/Target/X86/AsmParser/X86AsmInstrumentation.h @@ -12,6 +12,8 @@ #include "llvm/ADT/SmallVector.h" +#include <memory> + namespace llvm { class MCContext; @@ -35,10 +37,9 @@ public: // Instruments Inst. Should be called just before the original // instruction is sent to Out. virtual void InstrumentInstruction( - const MCInst &Inst, SmallVectorImpl<MCParsedAsmOperand *> &Operands, - MCContext &Ctx, - const MCInstrInfo &MII, - MCStreamer &Out); + const MCInst &Inst, + SmallVectorImpl<std::unique_ptr<MCParsedAsmOperand>> &Operands, + MCContext &Ctx, const MCInstrInfo &MII, MCStreamer &Out); protected: friend X86AsmInstrumentation * diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index d3e695e..f0765ed 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -235,6 +235,7 @@ private: IES_RSHIFT, IES_PLUS, IES_MINUS, + IES_NOT, IES_MULTIPLY, IES_DIVIDE, IES_LBRAC, @@ -372,6 +373,7 @@ private: State = IES_ERROR; break; case IES_PLUS: + case IES_NOT: case IES_MULTIPLY: case IES_DIVIDE: case IES_LPAREN: @@ -401,6 +403,19 @@ private: } PrevState = CurrState; } + void onNot() { + IntelExprState CurrState = State; + switch (State) { + default: + State = IES_ERROR; + break; + case IES_PLUS: + case IES_NOT: + State = IES_NOT; + break; + } + PrevState = CurrState; + } void onRegister(unsigned Reg) { IntelExprState CurrState = State; switch (State) { @@ -438,6 +453,7 @@ private: break; case IES_PLUS: case IES_MINUS: + case IES_NOT: State = IES_INTEGER; Sym = SymRef; SymName = SymRefName; @@ -453,6 +469,7 @@ private: break; case IES_PLUS: case IES_MINUS: + case IES_NOT: case IES_OR: case IES_AND: case IES_LSHIFT: @@ -476,11 +493,22 @@ private: PrevState == IES_OR || PrevState == IES_AND || PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || - PrevState == IES_LPAREN || PrevState == IES_LBRAC) && + PrevState == IES_LPAREN || PrevState == IES_LBRAC || + PrevState == IES_NOT) && CurrState == IES_MINUS) { // Unary minus. No need to pop the minus operand because it was never // pushed. IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm. + } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS || + PrevState == IES_OR || PrevState == IES_AND || + PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || + PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || + PrevState == IES_LPAREN || PrevState == IES_LBRAC || + PrevState == IES_NOT) && + CurrState == IES_NOT) { + // Unary not. No need to pop the not operand because it was never + // pushed. + IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm. } else { IC.pushOperand(IC_IMM, TmpInt); } @@ -561,6 +589,7 @@ private: break; case IES_PLUS: case IES_MINUS: + case IES_NOT: case IES_OR: case IES_AND: case IES_LSHIFT: @@ -568,13 +597,14 @@ private: case IES_MULTIPLY: case IES_DIVIDE: case IES_LPAREN: - // FIXME: We don't handle this type of unary minus, yet. + // FIXME: We don't handle this type of unary minus or not, yet. if ((PrevState == IES_PLUS || PrevState == IES_MINUS || PrevState == IES_OR || PrevState == IES_AND || PrevState == IES_LSHIFT || PrevState == IES_RSHIFT || PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE || - PrevState == IES_LPAREN || PrevState == IES_LBRAC) && - CurrState == IES_MINUS) { + PrevState == IES_LPAREN || PrevState == IES_LBRAC || + PrevState == IES_NOT) && + (CurrState == IES_MINUS || CurrState == IES_NOT)) { State = IES_ERROR; break; } @@ -618,52 +648,52 @@ private: return Error(L, Msg, Ranges, MatchingInlineAsm); } - X86Operand *ErrorOperand(SMLoc Loc, StringRef Msg) { + std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) { Error(Loc, Msg); return nullptr; } - X86Operand *DefaultMemSIOperand(SMLoc Loc); - X86Operand *DefaultMemDIOperand(SMLoc Loc); - X86Operand *ParseOperand(); - X86Operand *ParseATTOperand(); - X86Operand *ParseIntelOperand(); - X86Operand *ParseIntelOffsetOfOperator(); + std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc); + std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc); + std::unique_ptr<X86Operand> ParseOperand(); + std::unique_ptr<X86Operand> ParseATTOperand(); + std::unique_ptr<X86Operand> ParseIntelOperand(); + std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator(); bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp); - X86Operand *ParseIntelOperator(unsigned OpKind); - X86Operand *ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size); - X86Operand *ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, - unsigned Size); + std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind); + std::unique_ptr<X86Operand> + ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size); + std::unique_ptr<X86Operand> + ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size); bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End); - X86Operand *ParseIntelBracExpression(unsigned SegReg, SMLoc Start, - int64_t ImmDisp, unsigned Size); + std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg, + SMLoc Start, + int64_t ImmDisp, + unsigned Size); bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier, InlineAsmIdentifierInfo &Info, bool IsUnevaluatedOperand, SMLoc &End); - X86Operand *ParseMemOperand(unsigned SegReg, SMLoc StartLoc); + std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc); - X86Operand *CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, - unsigned BaseReg, unsigned IndexReg, - unsigned Scale, SMLoc Start, SMLoc End, - unsigned Size, StringRef Identifier, - InlineAsmIdentifierInfo &Info); + std::unique_ptr<X86Operand> + CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, + unsigned IndexReg, unsigned Scale, SMLoc Start, + SMLoc End, unsigned Size, StringRef Identifier, + InlineAsmIdentifierInfo &Info); bool ParseDirectiveWord(unsigned Size, SMLoc L); bool ParseDirectiveCode(StringRef IDVal, SMLoc L); - bool processInstruction(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Ops); + bool processInstruction(MCInst &Inst, const OperandVector &Ops); /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds /// instrumentation around Inst. - void EmitInstruction(MCInst &Inst, - SmallVectorImpl<MCParsedAsmOperand *> &Operands, - MCStreamer &Out); + void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out); bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out, unsigned &ErrorInfo, + OperandVector &Operands, MCStreamer &Out, + unsigned &ErrorInfo, bool MatchingInlineAsm) override; /// doSrcDstMatch - Returns true if operands are matching in their @@ -674,8 +704,8 @@ private: /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z}) /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required. /// \return \c true if no parsing errors occurred, \c false otherwise. - bool HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - const MCParsedAsmOperand &Op); + bool HandleAVX512Operand(OperandVector &Operands, + const MCParsedAsmOperand &Op); bool is64BitMode() const { // FIXME: Can tablegen auto-generate this? @@ -725,9 +755,8 @@ public: bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; - bool - ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands) override; + bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) override; bool ParseDirective(AsmToken DirectiveID) override; }; @@ -908,7 +937,7 @@ bool X86AsmParser::ParseRegister(unsigned &RegNo, return false; } -X86Operand *X86AsmParser::DefaultMemSIOperand(SMLoc Loc) { +std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) { unsigned basereg = is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI); const MCExpr *Disp = MCConstantExpr::Create(0, getContext()); @@ -916,7 +945,7 @@ X86Operand *X86AsmParser::DefaultMemSIOperand(SMLoc Loc) { /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0); } -X86Operand *X86AsmParser::DefaultMemDIOperand(SMLoc Loc) { +std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) { unsigned basereg = is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI); const MCExpr *Disp = MCConstantExpr::Create(0, getContext()); @@ -924,7 +953,7 @@ X86Operand *X86AsmParser::DefaultMemDIOperand(SMLoc Loc) { /*IndexReg=*/0, /*Scale=*/1, Loc, Loc, 0); } -X86Operand *X86AsmParser::ParseOperand() { +std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() { if (isParsingIntelSyntax()) return ParseIntelOperand(); return ParseATTOperand(); @@ -946,12 +975,10 @@ static unsigned getIntelMemOperandSize(StringRef OpStr) { return Size; } -X86Operand * -X86AsmParser::CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, - unsigned BaseReg, unsigned IndexReg, - unsigned Scale, SMLoc Start, SMLoc End, - unsigned Size, StringRef Identifier, - InlineAsmIdentifierInfo &Info){ +std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm( + unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg, + unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier, + InlineAsmIdentifierInfo &Info) { // If this is not a VarDecl then assume it is a FuncDecl or some other label // reference. We need an 'r' constraint here, so we need to create register // operand to ensure proper matching. Just pick a GPR based on the size of @@ -1064,7 +1091,8 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac) break; - switch (getLexer().getKind()) { + AsmToken::TokenKind TK = getLexer().getKind(); + switch (TK) { default: { if (SM.isValidEndState()) { Done = true; @@ -1076,13 +1104,14 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { Done = true; break; } + case AsmToken::String: case AsmToken::Identifier: { // This could be a register or a symbolic displacement. unsigned TmpReg; const MCExpr *Val; SMLoc IdentLoc = Tok.getLoc(); StringRef Identifier = Tok.getString(); - if(!ParseRegister(TmpReg, IdentLoc, End)) { + if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) { SM.onRegister(TmpReg); UpdateLocLex = false; break; @@ -1142,6 +1171,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { } case AsmToken::Plus: SM.onPlus(); break; case AsmToken::Minus: SM.onMinus(); break; + case AsmToken::Tilde: SM.onNot(); break; case AsmToken::Star: SM.onStar(); break; case AsmToken::Slash: SM.onDivide(); break; case AsmToken::Pipe: SM.onOr(); break; @@ -1164,9 +1194,9 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) { return false; } -X86Operand *X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start, - int64_t ImmDisp, - unsigned Size) { +std::unique_ptr<X86Operand> +X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start, + int64_t ImmDisp, unsigned Size) { const AsmToken &Tok = Parser.getTok(); SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc(); if (getLexer().isNot(AsmToken::LBrac)) @@ -1270,9 +1300,9 @@ bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val, } /// \brief Parse intel style segment override. -X86Operand *X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, - SMLoc Start, - unsigned Size) { +std::unique_ptr<X86Operand> +X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, + unsigned Size) { assert(SegReg != 0 && "Tried to parse a segment override without a segment!"); const AsmToken &Tok = Parser.getTok(); // Eat colon. if (Tok.isNot(AsmToken::Colon)) @@ -1321,8 +1351,9 @@ X86Operand *X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, } /// ParseIntelMemOperand - Parse intel style memory operand. -X86Operand *X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, SMLoc Start, - unsigned Size) { +std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp, + SMLoc Start, + unsigned Size) { const AsmToken &Tok = Parser.getTok(); SMLoc End; @@ -1425,7 +1456,7 @@ bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp, /// Parse the 'offset' operator. This operator is used to specify the /// location rather then the content of a variable. -X86Operand *X86AsmParser::ParseIntelOffsetOfOperator() { +std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() { const AsmToken &Tok = Parser.getTok(); SMLoc OffsetOfLoc = Tok.getLoc(); Parser.Lex(); // Eat offset. @@ -1462,7 +1493,7 @@ enum IntelOperatorKind { /// variable. A variable's size is the product of its LENGTH and TYPE. The /// TYPE operator returns the size of a C or C++ type or variable. If the /// variable is an array, TYPE returns the size of a single element. -X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) { +std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) { const AsmToken &Tok = Parser.getTok(); SMLoc TypeLoc = Tok.getLoc(); Parser.Lex(); // Eat operator. @@ -1495,7 +1526,7 @@ X86Operand *X86AsmParser::ParseIntelOperator(unsigned OpKind) { return X86Operand::CreateImm(Imm, Start, End); } -X86Operand *X86AsmParser::ParseIntelOperand() { +std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() { const AsmToken &Tok = Parser.getTok(); SMLoc Start, End; @@ -1523,7 +1554,7 @@ X86Operand *X86AsmParser::ParseIntelOperand() { // Immediate. if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) || - getLexer().is(AsmToken::LParen)) { + getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) { AsmToken StartTok = Tok; IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true, /*AddImmPrefix=*/false); @@ -1577,7 +1608,7 @@ X86Operand *X86AsmParser::ParseIntelOperand() { return ParseIntelMemOperand(/*Disp=*/0, Start, Size); } -X86Operand *X86AsmParser::ParseATTOperand() { +std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() { switch (getLexer().getKind()) { default: // Parse a memory operand with no segment register. @@ -1613,9 +1644,8 @@ X86Operand *X86AsmParser::ParseATTOperand() { } } -bool -X86AsmParser::HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands, - const MCParsedAsmOperand &Op) { +bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands, + const MCParsedAsmOperand &Op) { if(STI.getFeatureBits() & X86::FeatureAVX512) { if (getLexer().is(AsmToken::LCurly)) { // Eat "{" and mark the current place. @@ -1653,8 +1683,8 @@ X86AsmParser::HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands } else { // Parse mask register {%k1} Operands.push_back(X86Operand::CreateToken("{", consumedToken)); - if (X86Operand *Op = ParseOperand()) { - Operands.push_back(Op); + if (std::unique_ptr<X86Operand> Op = ParseOperand()) { + Operands.push_back(std::move(Op)); if (!getLexer().is(AsmToken::RCurly)) return !ErrorAndEatStatement(getLexer().getLoc(), "Expected } at this point"); @@ -1682,7 +1712,8 @@ X86AsmParser::HandleAVX512Operand(SmallVectorImpl<MCParsedAsmOperand*> &Operands /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix /// has already been parsed if present. -X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { +std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg, + SMLoc MemStart) { // We have to disambiguate a parenthesized expression "(4+5)" from the start // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The @@ -1845,9 +1876,8 @@ X86Operand *X86AsmParser::ParseMemOperand(unsigned SegReg, SMLoc MemStart) { MemStart, MemEnd); } -bool X86AsmParser:: -ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, - SmallVectorImpl<MCParsedAsmOperand*> &Operands) { +bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name, + SMLoc NameLoc, OperandVector &Operands) { InstInfo = &Info; StringRef PatchedName = Name; @@ -1940,9 +1970,9 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, // Read the operands. while(1) { - if (X86Operand *Op = ParseOperand()) { - Operands.push_back(Op); - if (!HandleAVX512Operand(Operands, *Op)) + if (std::unique_ptr<X86Operand> Op = ParseOperand()) { + Operands.push_back(std::move(Op)); + if (!HandleAVX512Operand(Operands, *Operands.back())) return true; } else { Parser.eatToEndOfStatement(); @@ -1973,27 +2003,25 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, // documented form in various unofficial manuals, so a lot of code uses it. if ((Name == "outb" || Name == "outw" || Name == "outl" || Name == "out") && Operands.size() == 3) { - X86Operand &Op = *(X86Operand*)Operands.back(); + X86Operand &Op = (X86Operand &)*Operands.back(); if (Op.isMem() && Op.Mem.SegReg == 0 && isa<MCConstantExpr>(Op.Mem.Disp) && cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { SMLoc Loc = Op.getEndLoc(); Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); - delete &Op; } } // Same hack for "in[bwl]? (%dx), %al" -> "inb %dx, %al". if ((Name == "inb" || Name == "inw" || Name == "inl" || Name == "in") && Operands.size() == 3) { - X86Operand &Op = *(X86Operand*)Operands.begin()[1]; + X86Operand &Op = (X86Operand &)*Operands[1]; if (Op.isMem() && Op.Mem.SegReg == 0 && isa<MCConstantExpr>(Op.Mem.Disp) && cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 && Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) { SMLoc Loc = Op.getEndLoc(); - Operands.begin()[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); - delete &Op; + Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc); } } @@ -2060,8 +2088,8 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, Operands.push_back(DefaultMemSIOperand(NameLoc)); } } else if (Operands.size() == 3) { - X86Operand &Op = *(X86Operand*)Operands.begin()[1]; - X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; + X86Operand &Op = (X86Operand &)*Operands[1]; + X86Operand &Op2 = (X86Operand &)*Operands[2]; if (!doSrcDstMatch(Op, Op2)) return Error(Op.getStartLoc(), "mismatching source and destination index registers"); @@ -2076,10 +2104,8 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, (Name == "smov" || Name == "smovb" || Name == "smovw" || Name == "smovl" || Name == "smovd" || Name == "smovq"))) { if (Operands.size() == 1) { - if (Name == "movsd") { - delete Operands.back(); + if (Name == "movsd") Operands.back() = X86Operand::CreateToken("movsl", NameLoc); - } if (isParsingIntelSyntax()) { Operands.push_back(DefaultMemDIOperand(NameLoc)); Operands.push_back(DefaultMemSIOperand(NameLoc)); @@ -2088,8 +2114,8 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, Operands.push_back(DefaultMemDIOperand(NameLoc)); } } else if (Operands.size() == 3) { - X86Operand &Op = *(X86Operand*)Operands.begin()[1]; - X86Operand &Op2 = *(X86Operand*)Operands.begin()[2]; + X86Operand &Op = (X86Operand &)*Operands[1]; + X86Operand &Op2 = (X86Operand &)*Operands[2]; if (!doSrcDstMatch(Op, Op2)) return Error(Op.getStartLoc(), "mismatching source and destination index registers"); @@ -2105,31 +2131,26 @@ ParseInstruction(ParseInstructionInfo &Info, StringRef Name, SMLoc NameLoc, Operands.size() == 3) { if (isParsingIntelSyntax()) { // Intel syntax - X86Operand *Op1 = static_cast<X86Operand*>(Operands[2]); - if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && - cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) { - delete Operands[2]; + X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]); + if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) && + cast<MCConstantExpr>(Op1.getImm())->getValue() == 1) Operands.pop_back(); - } } else { - X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); - if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && - cast<MCConstantExpr>(Op1->getImm())->getValue() == 1) { - delete Operands[1]; + X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]); + if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) && + cast<MCConstantExpr>(Op1.getImm())->getValue() == 1) Operands.erase(Operands.begin() + 1); - } } } // Transforms "int $3" into "int3" as a size optimization. We can't write an // instalias with an immediate operand yet. if (Name == "int" && Operands.size() == 2) { - X86Operand *Op1 = static_cast<X86Operand*>(Operands[1]); - if (Op1->isImm() && isa<MCConstantExpr>(Op1->getImm()) && - cast<MCConstantExpr>(Op1->getImm())->getValue() == 3) { - delete Operands[1]; + X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]); + if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) && + cast<MCConstantExpr>(Op1.getImm())->getValue() == 3) { Operands.erase(Operands.begin() + 1); - static_cast<X86Operand*>(Operands[0])->setTokenValue("int3"); + static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3"); } } @@ -2175,9 +2196,7 @@ static bool convert64i32to64ri8(MCInst &Inst, unsigned Opcode, return convertToSExti8(Inst, Opcode, X86::RAX, isCmp); } -bool X86AsmParser:: -processInstruction(MCInst &Inst, - const SmallVectorImpl<MCParsedAsmOperand*> &Ops) { +bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) { switch (Inst.getOpcode()) { default: return false; case X86::AND16i16: return convert16i16to16ri8(Inst, X86::AND16ri8); @@ -2258,51 +2277,47 @@ processInstruction(MCInst &Inst, static const char *getSubtargetFeatureName(unsigned Val); -void X86AsmParser::EmitInstruction( - MCInst &Inst, SmallVectorImpl<MCParsedAsmOperand *> &Operands, - MCStreamer &Out) { +void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands, + MCStreamer &Out) { Instrumentation->InstrumentInstruction(Inst, Operands, getContext(), MII, Out); Out.EmitInstruction(Inst, STI); } -bool X86AsmParser:: -MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, - SmallVectorImpl<MCParsedAsmOperand*> &Operands, - MCStreamer &Out, unsigned &ErrorInfo, - bool MatchingInlineAsm) { +bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, + OperandVector &Operands, + MCStreamer &Out, unsigned &ErrorInfo, + bool MatchingInlineAsm) { assert(!Operands.empty() && "Unexpect empty operand list!"); - X86Operand *Op = static_cast<X86Operand*>(Operands[0]); - assert(Op->isToken() && "Leading operand should always be a mnemonic!"); + X86Operand &Op = static_cast<X86Operand &>(*Operands[0]); + assert(Op.isToken() && "Leading operand should always be a mnemonic!"); ArrayRef<SMRange> EmptyRanges = None; // First, handle aliases that expand to multiple instructions. // FIXME: This should be replaced with a real .td file alias mechanism. // Also, MatchInstructionImpl should actually *do* the EmitInstruction // call. - if (Op->getToken() == "fstsw" || Op->getToken() == "fstcw" || - Op->getToken() == "fstsww" || Op->getToken() == "fstcww" || - Op->getToken() == "finit" || Op->getToken() == "fsave" || - Op->getToken() == "fstenv" || Op->getToken() == "fclex") { + if (Op.getToken() == "fstsw" || Op.getToken() == "fstcw" || + Op.getToken() == "fstsww" || Op.getToken() == "fstcww" || + Op.getToken() == "finit" || Op.getToken() == "fsave" || + Op.getToken() == "fstenv" || Op.getToken() == "fclex") { MCInst Inst; Inst.setOpcode(X86::WAIT); Inst.setLoc(IDLoc); if (!MatchingInlineAsm) EmitInstruction(Inst, Operands, Out); - const char *Repl = - StringSwitch<const char*>(Op->getToken()) - .Case("finit", "fninit") - .Case("fsave", "fnsave") - .Case("fstcw", "fnstcw") - .Case("fstcww", "fnstcw") - .Case("fstenv", "fnstenv") - .Case("fstsw", "fnstsw") - .Case("fstsww", "fnstsw") - .Case("fclex", "fnclex") - .Default(nullptr); + const char *Repl = StringSwitch<const char *>(Op.getToken()) + .Case("finit", "fninit") + .Case("fsave", "fnsave") + .Case("fstcw", "fnstcw") + .Case("fstcww", "fnstcw") + .Case("fstenv", "fnstenv") + .Case("fstsw", "fnstsw") + .Case("fstsww", "fnstsw") + .Case("fclex", "fnclex") + .Default(nullptr); assert(Repl && "Unknown wait-prefixed instruction"); - delete Operands[0]; Operands[0] = X86Operand::CreateToken(Repl, IDLoc); } @@ -2355,11 +2370,11 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, // following hack. // Change the operand to point to a temporary token. - StringRef Base = Op->getToken(); + StringRef Base = Op.getToken(); SmallString<16> Tmp; Tmp += Base; Tmp += ' '; - Op->setTokenValue(Tmp.str()); + Op.setTokenValue(Tmp.str()); // If this instruction starts with an 'f', then it is a floating point stack // instruction. These come in up to three forms for 32-bit, 64-bit, and @@ -2400,7 +2415,7 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, ErrorInfoMissingFeature = ErrorInfoIgnore; // Restore the old token. - Op->setTokenValue(Base); + Op.setTokenValue(Base); // If exactly one matched, then we treat that as a successful match (and the // instruction will already have been filled in correctly, since the failing @@ -2450,8 +2465,8 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, if ((Match1 == Match_MnemonicFail) && (Match2 == Match_MnemonicFail) && (Match3 == Match_MnemonicFail) && (Match4 == Match_MnemonicFail)) { if (!WasOriginallyInvalidOperand) { - ArrayRef<SMRange> Ranges = MatchingInlineAsm ? EmptyRanges : - Op->getLocRange(); + ArrayRef<SMRange> Ranges = + MatchingInlineAsm ? EmptyRanges : Op.getLocRange(); return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'", Ranges, MatchingInlineAsm); } @@ -2462,10 +2477,10 @@ MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, return Error(IDLoc, "too few operands for instruction", EmptyRanges, MatchingInlineAsm); - X86Operand *Operand = (X86Operand*)Operands[ErrorInfo]; - if (Operand->getStartLoc().isValid()) { - SMRange OperandRange = Operand->getLocRange(); - return Error(Operand->getStartLoc(), "invalid operand for instruction", + X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo]; + if (Operand.getStartLoc().isValid()) { + SMRange OperandRange = Operand.getLocRange(); + return Error(Operand.getStartLoc(), "invalid operand for instruction", OperandRange, MatchingInlineAsm); } } diff --git a/lib/Target/X86/AsmParser/X86Operand.h b/lib/Target/X86/AsmParser/X86Operand.h index de3be38..1bbfc11 100644 --- a/lib/Target/X86/AsmParser/X86Operand.h +++ b/lib/Target/X86/AsmParser/X86Operand.h @@ -13,6 +13,7 @@ #include "X86AsmParserCommon.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCParser/MCParsedAsmOperand.h" +#include "llvm/ADT/STLExtras.h" namespace llvm { @@ -410,20 +411,19 @@ struct X86Operand : public MCParsedAsmOperand { Inst.addOperand(MCOperand::CreateReg(getMemSegReg())); } - static X86Operand *CreateToken(StringRef Str, SMLoc Loc) { + static std::unique_ptr<X86Operand> CreateToken(StringRef Str, SMLoc Loc) { SMLoc EndLoc = SMLoc::getFromPointer(Loc.getPointer() + Str.size()); - X86Operand *Res = new X86Operand(Token, Loc, EndLoc); + auto Res = llvm::make_unique<X86Operand>(Token, Loc, EndLoc); Res->Tok.Data = Str.data(); Res->Tok.Length = Str.size(); return Res; } - static X86Operand *CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc, - bool AddressOf = false, - SMLoc OffsetOfLoc = SMLoc(), - StringRef SymName = StringRef(), - void *OpDecl = nullptr) { - X86Operand *Res = new X86Operand(Register, StartLoc, EndLoc); + static std::unique_ptr<X86Operand> + CreateReg(unsigned RegNo, SMLoc StartLoc, SMLoc EndLoc, + bool AddressOf = false, SMLoc OffsetOfLoc = SMLoc(), + StringRef SymName = StringRef(), void *OpDecl = nullptr) { + auto Res = llvm::make_unique<X86Operand>(Register, StartLoc, EndLoc); Res->Reg.RegNo = RegNo; Res->AddressOf = AddressOf; Res->OffsetOfLoc = OffsetOfLoc; @@ -432,17 +432,18 @@ struct X86Operand : public MCParsedAsmOperand { return Res; } - static X86Operand *CreateImm(const MCExpr *Val, SMLoc StartLoc, SMLoc EndLoc){ - X86Operand *Res = new X86Operand(Immediate, StartLoc, EndLoc); + static std::unique_ptr<X86Operand> CreateImm(const MCExpr *Val, + SMLoc StartLoc, SMLoc EndLoc) { + auto Res = llvm::make_unique<X86Operand>(Immediate, StartLoc, EndLoc); Res->Imm.Val = Val; return Res; } /// Create an absolute memory operand. - static X86Operand *CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, - unsigned Size = 0, StringRef SymName = StringRef(), - void *OpDecl = nullptr) { - X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); + static std::unique_ptr<X86Operand> + CreateMem(const MCExpr *Disp, SMLoc StartLoc, SMLoc EndLoc, unsigned Size = 0, + StringRef SymName = StringRef(), void *OpDecl = nullptr) { + auto Res = llvm::make_unique<X86Operand>(Memory, StartLoc, EndLoc); Res->Mem.SegReg = 0; Res->Mem.Disp = Disp; Res->Mem.BaseReg = 0; @@ -456,12 +457,11 @@ struct X86Operand : public MCParsedAsmOperand { } /// Create a generalized memory operand. - static X86Operand *CreateMem(unsigned SegReg, const MCExpr *Disp, - unsigned BaseReg, unsigned IndexReg, - unsigned Scale, SMLoc StartLoc, SMLoc EndLoc, - unsigned Size = 0, - StringRef SymName = StringRef(), - void *OpDecl = nullptr) { + static std::unique_ptr<X86Operand> + CreateMem(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, + unsigned IndexReg, unsigned Scale, SMLoc StartLoc, SMLoc EndLoc, + unsigned Size = 0, StringRef SymName = StringRef(), + void *OpDecl = nullptr) { // We should never just have a displacement, that should be parsed as an // absolute memory operand. assert((SegReg || BaseReg || IndexReg) && "Invalid memory operand!"); @@ -469,7 +469,7 @@ struct X86Operand : public MCParsedAsmOperand { // The scale should always be one of {1,2,4,8}. assert(((Scale == 1 || Scale == 2 || Scale == 4 || Scale == 8)) && "Invalid scale!"); - X86Operand *Res = new X86Operand(Memory, StartLoc, EndLoc); + auto Res = llvm::make_unique<X86Operand>(Memory, StartLoc, EndLoc); Res->Mem.SegReg = SegReg; Res->Mem.Disp = Disp; Res->Mem.BaseReg = BaseReg; diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index c54fbc1..a09767e 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -14,6 +14,7 @@ add_public_tablegen_target(X86CommonTableGen) set(sources X86AsmPrinter.cpp + X86AtomicExpandPass.cpp X86CodeEmitter.cpp X86FastISel.cpp X86FloatingPoint.cpp diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp index 804606d..55587d4 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -1620,7 +1620,8 @@ static int readVVVV(struct InternalInstruction* insn) { int vvvv; if (insn->vectorExtensionType == TYPE_EVEX) - vvvv = vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2]); + vvvv = (v2FromEVEX4of4(insn->vectorExtensionPrefix[3]) << 4 | + vvvvFromEVEX3of4(insn->vectorExtensionPrefix[2])); else if (insn->vectorExtensionType == TYPE_VEX_3B) vvvv = vvvvFromVEX3of3(insn->vectorExtensionPrefix[2]); else if (insn->vectorExtensionType == TYPE_VEX_2B) diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index bf30a8e..23bca0d 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -73,11 +73,12 @@ public: }; class X86AsmBackend : public MCAsmBackend { - StringRef CPU; + const StringRef CPU; bool HasNopl; + const uint64_t MaxNopLength; public: X86AsmBackend(const Target &T, StringRef _CPU) - : MCAsmBackend(), CPU(_CPU) { + : MCAsmBackend(), CPU(_CPU), MaxNopLength(_CPU == "slm" ? 7 : 15) { HasNopl = CPU != "generic" && CPU != "i386" && CPU != "i486" && CPU != "i586" && CPU != "pentium" && CPU != "pentium-mmx" && CPU != "i686" && CPU != "k6" && CPU != "k6-2" && CPU != "k6-3" && @@ -331,7 +332,7 @@ bool X86AsmBackend::writeNopData(uint64_t Count, MCObjectWriter *OW) const { // 15 is the longest single nop instruction. Emit as many 15-byte nops as // needed, then emit a nop of the remaining length. do { - const uint8_t ThisNopLength = (uint8_t) std::min(Count, (uint64_t) 15); + const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength); const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10; for (uint8_t i = 0; i < Prefixes; i++) OW->Write8(0x66); @@ -365,6 +366,17 @@ public: } }; +class ELFX86_X32AsmBackend : public ELFX86AsmBackend { +public: + ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) + : ELFX86AsmBackend(T, OSABI, CPU) {} + + MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { + return createX86ELFObjectWriter(OS, /*IsELF64*/ false, OSABI, + ELF::EM_X86_64); + } +}; + class ELFX86_64AsmBackend : public ELFX86AsmBackend { public: ELFX86_64AsmBackend(const Target &T, uint8_t OSABI, StringRef CPU) @@ -717,11 +729,10 @@ public: }; class DarwinX86_32AsmBackend : public DarwinX86AsmBackend { - bool SupportsCU; public: DarwinX86_32AsmBackend(const Target &T, const MCRegisterInfo &MRI, - StringRef CPU, bool SupportsCU) - : DarwinX86AsmBackend(T, MRI, CPU, false), SupportsCU(SupportsCU) {} + StringRef CPU) + : DarwinX86AsmBackend(T, MRI, CPU, false) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { return createX86MachObjectWriter(OS, /*Is64Bit=*/false, @@ -732,20 +743,16 @@ public: /// \brief Generate the compact unwind encoding for the CFI instructions. uint32_t generateCompactUnwindEncoding( ArrayRef<MCCFIInstruction> Instrs) const override { - return SupportsCU ? generateCompactUnwindEncodingImpl(Instrs) : 0; + return generateCompactUnwindEncodingImpl(Instrs); } }; class DarwinX86_64AsmBackend : public DarwinX86AsmBackend { - bool SupportsCU; const MachO::CPUSubTypeX86 Subtype; public: DarwinX86_64AsmBackend(const Target &T, const MCRegisterInfo &MRI, - StringRef CPU, bool SupportsCU, - MachO::CPUSubTypeX86 st) - : DarwinX86AsmBackend(T, MRI, CPU, true), SupportsCU(SupportsCU), - Subtype(st) { - } + StringRef CPU, MachO::CPUSubTypeX86 st) + : DarwinX86AsmBackend(T, MRI, CPU, true), Subtype(st) {} MCObjectWriter *createObjectWriter(raw_ostream &OS) const override { return createX86MachObjectWriter(OS, /*Is64Bit=*/true, @@ -788,7 +795,7 @@ public: /// \brief Generate the compact unwind encoding for the CFI instructions. uint32_t generateCompactUnwindEncoding( ArrayRef<MCCFIInstruction> Instrs) const override { - return SupportsCU ? generateCompactUnwindEncodingImpl(Instrs) : 0; + return generateCompactUnwindEncodingImpl(Instrs); } }; @@ -801,9 +808,7 @@ MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T, Triple TheTriple(TT); if (TheTriple.isOSBinFormatMachO()) - return new DarwinX86_32AsmBackend(T, MRI, CPU, - TheTriple.isMacOSX() && - !TheTriple.isMacOSXVersionLT(10, 7)); + return new DarwinX86_32AsmBackend(T, MRI, CPU); if (TheTriple.isOSWindows() && !TheTriple.isOSBinFormatELF()) return new WindowsX86AsmBackend(T, false, CPU); @@ -823,14 +828,15 @@ MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T, StringSwitch<MachO::CPUSubTypeX86>(TheTriple.getArchName()) .Case("x86_64h", MachO::CPU_SUBTYPE_X86_64_H) .Default(MachO::CPU_SUBTYPE_X86_64_ALL); - return new DarwinX86_64AsmBackend(T, MRI, CPU, - TheTriple.isMacOSX() && - !TheTriple.isMacOSXVersionLT(10, 7), CS); + return new DarwinX86_64AsmBackend(T, MRI, CPU, CS); } if (TheTriple.isOSWindows() && !TheTriple.isOSBinFormatELF()) return new WindowsX86AsmBackend(T, true, CPU); uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS()); + + if (TheTriple.getEnvironment() == Triple::GNUX32) + return new ELFX86_X32AsmBackend(T, OSABI, CPU); return new ELFX86_64AsmBackend(T, OSABI, CPU); } diff --git a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp index 39480ea..83b2777 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp @@ -74,8 +74,9 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &T) { // FIXME: this should not depend on the target OS version, but on the ld64 // version in use. From at least >= ld64-97.17 (Xcode 3.2.6) the abs-ified - // FDE relocs may be used. - DwarfFDESymbolsUseAbsDiff = T.isMacOSX() && !T.isMacOSXVersionLT(10, 6); + // FDE relocs may be used. We also use them for the ios simulator. + DwarfFDESymbolsUseAbsDiff = (T.isMacOSX() && !T.isMacOSXVersionLT(10, 6)) + || T.isiOS(); UseIntegratedAssembler = true; } @@ -142,8 +143,11 @@ getNonexecutableStackSection(MCContext &Ctx) const { void X86MCAsmInfoMicrosoft::anchor() { } X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) { - if (Triple.getArch() == Triple::x86_64) + if (Triple.getArch() == Triple::x86_64) { PrivateGlobalPrefix = ".L"; + PointerSize = 8; + ExceptionsType = ExceptionHandling::WinEH; + } AssemblerDialect = AsmWriterFlavor; @@ -157,17 +161,18 @@ X86MCAsmInfoMicrosoft::X86MCAsmInfoMicrosoft(const Triple &Triple) { void X86MCAsmInfoGNUCOFF::anchor() { } X86MCAsmInfoGNUCOFF::X86MCAsmInfoGNUCOFF(const Triple &Triple) { + assert(Triple.isOSWindows() && "Windows is the only supported COFF target"); if (Triple.getArch() == Triple::x86_64) { PrivateGlobalPrefix = ".L"; PointerSize = 8; + ExceptionsType = ExceptionHandling::WinEH; + } else { + ExceptionsType = ExceptionHandling::DwarfCFI; } AssemblerDialect = AsmWriterFlavor; TextAlignFillValue = 0x90; - // Exceptions handling - ExceptionsType = ExceptionHandling::DwarfCFI; - UseIntegratedAssembler = true; } diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp index e63036c..5e29e5c 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp @@ -197,14 +197,13 @@ void X86_MC::DetectFamilyModel(unsigned EAX, unsigned &Family, } } -unsigned X86_MC::getDwarfRegFlavour(StringRef TT, bool isEH) { - Triple TheTriple(TT); - if (TheTriple.getArch() == Triple::x86_64) +unsigned X86_MC::getDwarfRegFlavour(Triple TT, bool isEH) { + if (TT.getArch() == Triple::x86_64) return DWARFFlavour::X86_64; - if (TheTriple.isOSDarwin()) + if (TT.isOSDarwin()) return isEH ? DWARFFlavour::X86_32_DarwinEH : DWARFFlavour::X86_32_Generic; - if (TheTriple.isOSCygMing()) + if (TT.isOSCygMing()) // Unsupported by now, just quick fallback return DWARFFlavour::X86_32_Generic; return DWARFFlavour::X86_32_Generic; @@ -251,8 +250,8 @@ static MCRegisterInfo *createX86MCRegisterInfo(StringRef TT) { MCRegisterInfo *X = new MCRegisterInfo(); InitX86MCRegisterInfo(X, RA, - X86_MC::getDwarfRegFlavour(TT, false), - X86_MC::getDwarfRegFlavour(TT, true), + X86_MC::getDwarfRegFlavour(TheTriple, false), + X86_MC::getDwarfRegFlavour(TheTriple, true), RA); X86_MC::InitLLVM2SEHRegisterMapping(X); return X; diff --git a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h index 8fe40fd..ebe74cf 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h +++ b/lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h @@ -28,6 +28,7 @@ class MCSubtargetInfo; class MCRelocationInfo; class MCStreamer; class Target; +class Triple; class StringRef; class raw_ostream; @@ -64,7 +65,7 @@ namespace X86_MC { void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model); - unsigned getDwarfRegFlavour(StringRef TT, bool isEH); + unsigned getDwarfRegFlavour(Triple TT, bool isEH); void InitLLVM2SEHRegisterMapping(MCRegisterInfo *MRI); diff --git a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp index c62fd0a..7fa4180 100644 --- a/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp +++ b/lib/Target/X86/MCTargetDesc/X86WinCOFFStreamer.cpp @@ -19,12 +19,12 @@ public: raw_ostream &OS) : MCWinCOFFStreamer(C, AB, *CE, OS) { } - void EmitWin64EHHandlerData() override; + void EmitWinEHHandlerData() override; void FinishImpl() override; }; -void X86WinCOFFStreamer::EmitWin64EHHandlerData() { - MCStreamer::EmitWin64EHHandlerData(); +void X86WinCOFFStreamer::EmitWinEHHandlerData() { + MCStreamer::EmitWinEHHandlerData(); // We have to emit the unwind info now, because this directive // actually switches to the .xdata section! diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index 64e8ea8..d5522ed 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -24,6 +24,10 @@ class ImmutablePass; class JITCodeEmitter; class X86TargetMachine; +/// createX86AtomicExpandPass - This pass expands atomic operations that cannot +/// be handled natively in terms of a loop using cmpxchg. +FunctionPass *createX86AtomicExpandPass(const X86TargetMachine *TM); + /// createX86ISelDag - This pass converts a legalized DAG into a /// X86-specific DAG, ready for instruction scheduling. /// diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 6912b57..93f516a 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -168,6 +168,8 @@ def FeatureLEAUsesAG : SubtargetFeature<"lea-uses-ag", "LEAUsesAG", "true", "LEA instruction needs inputs at AG stage">; def FeatureSlowLEA : SubtargetFeature<"slow-lea", "SlowLEA", "true", "LEA instruction with certain arguments is slow">; +def FeatureSlowIncDec : SubtargetFeature<"slow-incdec", "SlowIncDec", "true", + "INC and DEC instructions are slower than ADD and SUB">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -228,7 +230,7 @@ def : ProcessorModel<"slm", SLMModel, [ProcIntelSLM, FeaturePCLMUL, FeatureAES, FeatureCallRegIndirect, FeaturePRFCHW, - FeatureSlowLEA, + FeatureSlowLEA, FeatureSlowIncDec, FeatureSlowBTMem, FeatureFastUAMem]>; // "Arrandale" along with corei3 and corei5 def : ProcessorModel<"corei7", SandyBridgeModel, @@ -271,7 +273,8 @@ def : ProcessorModel<"knl", HaswellModel, FeatureCMPXCHG16B, FeatureFastUAMem, FeaturePOPCNT, FeatureAES, FeaturePCLMUL, FeatureRDRAND, FeatureF16C, FeatureFSGSBase, FeatureMOVBE, FeatureLZCNT, FeatureBMI, - FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE]>; + FeatureBMI2, FeatureFMA, FeatureRTM, FeatureHLE, + FeatureSlowIncDec]>; def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6-2", [Feature3DNow]>; diff --git a/lib/Target/X86/X86AtomicExpandPass.cpp b/lib/Target/X86/X86AtomicExpandPass.cpp new file mode 100644 index 0000000..61eefbb --- /dev/null +++ b/lib/Target/X86/X86AtomicExpandPass.cpp @@ -0,0 +1,287 @@ +//===-- X86AtomicExpandPass.cpp - Expand illegal atomic instructions --0---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains a pass (at IR level) to replace atomic instructions which +// cannot be implemented as a single instruction with cmpxchg-based loops. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86TargetMachine.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +#define DEBUG_TYPE "x86-atomic-expand" + +namespace { + class X86AtomicExpandPass : public FunctionPass { + const X86TargetMachine *TM; + public: + static char ID; // Pass identification, replacement for typeid + explicit X86AtomicExpandPass(const X86TargetMachine *TM) + : FunctionPass(ID), TM(TM) {} + + bool runOnFunction(Function &F) override; + bool expandAtomicInsts(Function &F); + + bool needsCmpXchgNb(Type *MemType); + + /// There are four kinds of atomic operations. Two never need expanding: + /// cmpxchg is what we expand the others *to*, and loads are easily handled + /// by ISelLowering. Atomicrmw and store can need expanding in some + /// circumstances. + bool shouldExpand(Instruction *Inst); + + /// 128-bit atomic stores (64-bit on i686) need to be implemented in terms + /// of trivial cmpxchg16b loops. A simple store isn't necessarily atomic. + bool shouldExpandStore(StoreInst *SI); + + /// Only some atomicrmw instructions need expanding -- some operations + /// (e.g. max) have absolutely no architectural support; some (e.g. or) have + /// limited support but can't return the previous value; some (e.g. add) + /// have complete support in the instruction set. + /// + /// Also, naturally, 128-bit operations always need to be expanded. + bool shouldExpandAtomicRMW(AtomicRMWInst *AI); + + bool expandAtomicRMW(AtomicRMWInst *AI); + bool expandAtomicStore(StoreInst *SI); + }; +} + +char X86AtomicExpandPass::ID = 0; + +FunctionPass *llvm::createX86AtomicExpandPass(const X86TargetMachine *TM) { + return new X86AtomicExpandPass(TM); +} + +bool X86AtomicExpandPass::runOnFunction(Function &F) { + SmallVector<Instruction *, 1> AtomicInsts; + + // Changing control-flow while iterating through it is a bad idea, so gather a + // list of all atomic instructions before we start. + for (BasicBlock &BB : F) + for (Instruction &Inst : BB) { + if (isa<AtomicRMWInst>(&Inst) || + (isa<StoreInst>(&Inst) && cast<StoreInst>(&Inst)->isAtomic())) + AtomicInsts.push_back(&Inst); + } + + bool MadeChange = false; + for (Instruction *Inst : AtomicInsts) { + if (!shouldExpand(Inst)) + continue; + + if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) + MadeChange |= expandAtomicRMW(AI); + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) + MadeChange |= expandAtomicStore(SI); + + assert(MadeChange && "Atomic inst not expanded when it should be?"); + Inst->eraseFromParent(); + } + + return MadeChange; +} + +/// Returns true if operations on the given type will need to use either +/// cmpxchg8b or cmpxchg16b. This occurs if the type is 1 step up from the +/// native width, and the instructions are available (otherwise we leave them +/// alone to become __sync_fetch_and_... calls). +bool X86AtomicExpandPass::needsCmpXchgNb(llvm::Type *MemType) { + const X86Subtarget &Subtarget = TM->getSubtarget<X86Subtarget>(); + if (!Subtarget.hasCmpxchg16b()) + return false; + + unsigned CmpXchgNbWidth = Subtarget.is64Bit() ? 128 : 64; + + unsigned OpWidth = MemType->getPrimitiveSizeInBits(); + if (OpWidth == CmpXchgNbWidth) + return true; + + return false; +} + + +bool X86AtomicExpandPass::shouldExpandAtomicRMW(AtomicRMWInst *AI) { + const X86Subtarget &Subtarget = TM->getSubtarget<X86Subtarget>(); + unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; + + if (needsCmpXchgNb(AI->getType())) + return true; + + if (AI->getType()->getPrimitiveSizeInBits() > NativeWidth) + return false; + + AtomicRMWInst::BinOp Op = AI->getOperation(); + switch (Op) { + default: + llvm_unreachable("Unknown atomic operation"); + case AtomicRMWInst::Xchg: + case AtomicRMWInst::Add: + case AtomicRMWInst::Sub: + // It's better to use xadd, xsub or xchg for these in all cases. + return false; + case AtomicRMWInst::Or: + case AtomicRMWInst::And: + case AtomicRMWInst::Xor: + // If the atomicrmw's result isn't actually used, we can just add a "lock" + // prefix to a normal instruction for these operations. + return !AI->use_empty(); + case AtomicRMWInst::Nand: + case AtomicRMWInst::Max: + case AtomicRMWInst::Min: + case AtomicRMWInst::UMax: + case AtomicRMWInst::UMin: + // These always require a non-trivial set of data operations on x86. We must + // use a cmpxchg loop. + return true; + } +} + +bool X86AtomicExpandPass::shouldExpandStore(StoreInst *SI) { + if (needsCmpXchgNb(SI->getValueOperand()->getType())) + return true; + + return false; +} + +bool X86AtomicExpandPass::shouldExpand(Instruction *Inst) { + if (AtomicRMWInst *AI = dyn_cast<AtomicRMWInst>(Inst)) + return shouldExpandAtomicRMW(AI); + if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) + return shouldExpandStore(SI); + return false; +} + +/// Emit IR to implement the given atomicrmw operation on values in registers, +/// returning the new value. +static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, + Value *Loaded, Value *Inc) { + Value *NewVal; + switch (Op) { + case AtomicRMWInst::Xchg: + return Inc; + case AtomicRMWInst::Add: + return Builder.CreateAdd(Loaded, Inc, "new"); + case AtomicRMWInst::Sub: + return Builder.CreateSub(Loaded, Inc, "new"); + case AtomicRMWInst::And: + return Builder.CreateAnd(Loaded, Inc, "new"); + case AtomicRMWInst::Nand: + return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new"); + case AtomicRMWInst::Or: + return Builder.CreateOr(Loaded, Inc, "new"); + case AtomicRMWInst::Xor: + return Builder.CreateXor(Loaded, Inc, "new"); + case AtomicRMWInst::Max: + NewVal = Builder.CreateICmpSGT(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::Min: + NewVal = Builder.CreateICmpSLE(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::UMax: + NewVal = Builder.CreateICmpUGT(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + case AtomicRMWInst::UMin: + NewVal = Builder.CreateICmpULE(Loaded, Inc); + return Builder.CreateSelect(NewVal, Loaded, Inc, "new"); + default: + break; + } + llvm_unreachable("Unknown atomic op"); +} + +bool X86AtomicExpandPass::expandAtomicRMW(AtomicRMWInst *AI) { + AtomicOrdering Order = + AI->getOrdering() == Unordered ? Monotonic : AI->getOrdering(); + Value *Addr = AI->getPointerOperand(); + BasicBlock *BB = AI->getParent(); + Function *F = BB->getParent(); + LLVMContext &Ctx = F->getContext(); + + // Given: atomicrmw some_op iN* %addr, iN %incr ordering + // + // The standard expansion we produce is: + // [...] + // %init_loaded = load atomic iN* %addr + // br label %loop + // loop: + // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ] + // %new = some_op iN %loaded, %incr + // %pair = cmpxchg iN* %addr, iN %loaded, iN %new + // %new_loaded = extractvalue { iN, i1 } %pair, 0 + // %success = extractvalue { iN, i1 } %pair, 1 + // br i1 %success, label %atomicrmw.end, label %loop + // atomicrmw.end: + // [...] + BasicBlock *ExitBB = BB->splitBasicBlock(AI, "atomicrmw.end"); + BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB); + + // This grabs the DebugLoc from AI. + IRBuilder<> Builder(AI); + + // The split call above "helpfully" added a branch at the end of BB (to the + // wrong place), but we want a load. It's easiest to just remove + // the branch entirely. + std::prev(BB->end())->eraseFromParent(); + Builder.SetInsertPoint(BB); + LoadInst *InitLoaded = Builder.CreateLoad(Addr); + InitLoaded->setAlignment(AI->getType()->getPrimitiveSizeInBits()); + Builder.CreateBr(LoopBB); + + // Start the main loop block now that we've taken care of the preliminaries. + Builder.SetInsertPoint(LoopBB); + PHINode *Loaded = Builder.CreatePHI(AI->getType(), 2, "loaded"); + Loaded->addIncoming(InitLoaded, BB); + + Value *NewVal = + performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand()); + + Value *Pair = Builder.CreateAtomicCmpXchg( + Addr, Loaded, NewVal, Order, + AtomicCmpXchgInst::getStrongestFailureOrdering(Order)); + Value *NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded"); + Loaded->addIncoming(NewLoaded, LoopBB); + + Value *Success = Builder.CreateExtractValue(Pair, 1, "success"); + Builder.CreateCondBr(Success, ExitBB, LoopBB); + + AI->replaceAllUsesWith(NewLoaded); + + return true; +} + +bool X86AtomicExpandPass::expandAtomicStore(StoreInst *SI) { + // An atomic store might need cmpxchg16b (or 8b on x86) to execute. Express + // this in terms of the usual expansion to "atomicrmw xchg". + IRBuilder<> Builder(SI); + AtomicOrdering Order = + SI->getOrdering() == Unordered ? Monotonic : SI->getOrdering(); + AtomicRMWInst *AI = + Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(), + SI->getValueOperand(), Order); + + // Now we have an appropriate swap instruction, lower it as usual. + if (shouldExpandAtomicRMW(AI)) { + expandAtomicRMW(AI); + AI->eraseFromParent(); + return true; + } + + return AI; +} diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index 76718d0..a3ae7ee 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -1113,9 +1113,14 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, case TargetOpcode::INLINEASM: // We allow inline assembler nodes with empty bodies - they can // implicitly define registers, which is ok for JIT. - if (MI.getOperand(0).getSymbolName()[0]) + if (MI.getOperand(0).getSymbolName()[0]) { + DebugLoc DL = MI.getDebugLoc(); + DL.print(MI.getParent()->getParent()->getFunction()->getContext(), + llvm::errs()); report_fatal_error("JIT does not support inline asm!"); + } break; + case TargetOpcode::DBG_VALUE: case TargetOpcode::CFI_INSTRUCTION: break; case TargetOpcode::GC_LABEL: @@ -1126,6 +1131,16 @@ void Emitter<CodeEmitter>::emitInstruction(MachineInstr &MI, case TargetOpcode::IMPLICIT_DEF: case TargetOpcode::KILL: break; + + case X86::SEH_PushReg: + case X86::SEH_SaveReg: + case X86::SEH_SaveXMM: + case X86::SEH_StackAlloc: + case X86::SEH_SetFrame: + case X86::SEH_PushFrame: + case X86::SEH_EndPrologue: + break; + case X86::MOVPC32r: { // This emits the "call" portion of this pseudo instruction. MCE.emitByte(BaseOpcode); diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 56bcfa3..ce554ba 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -16,10 +16,12 @@ #include "X86.h" #include "X86CallingConv.h" #include "X86InstrBuilder.h" +#include "X86InstrInfo.h" #include "X86MachineFunctionInfo.h" #include "X86RegisterInfo.h" #include "X86Subtarget.h" #include "X86TargetMachine.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -78,12 +80,14 @@ public: private: bool X86FastEmitCompare(const Value *LHS, const Value *RHS, EVT VT); - bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR); + bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, MachineMemOperand *MMO, + unsigned &ResultReg); bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM, - bool Aligned = false); - bool X86FastEmitStore(EVT VT, unsigned ValReg, const X86AddressMode &AM, - bool Aligned = false); + MachineMemOperand *MMO = nullptr, bool Aligned = false); + bool X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, + const X86AddressMode &AM, + MachineMemOperand *MMO = nullptr, bool Aligned = false); bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, unsigned &ResultReg); @@ -107,6 +111,12 @@ private: bool X86SelectDivRem(const Instruction *I); + bool X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I); + + bool X86FastEmitSSESelect(MVT RetVT, const Instruction *I); + + bool X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I); + bool X86SelectSelect(const Instruction *I); bool X86SelectTrunc(const Instruction *I); @@ -147,10 +157,182 @@ private: bool TryEmitSmallMemcpy(X86AddressMode DestAM, X86AddressMode SrcAM, uint64_t Len); + + bool foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, + const Value *Cond); }; } // end anonymous namespace. +static CmpInst::Predicate optimizeCmpPredicate(const CmpInst *CI) { + // If both operands are the same, then try to optimize or fold the cmp. + CmpInst::Predicate Predicate = CI->getPredicate(); + if (CI->getOperand(0) != CI->getOperand(1)) + return Predicate; + + switch (Predicate) { + default: llvm_unreachable("Invalid predicate!"); + case CmpInst::FCMP_FALSE: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_OEQ: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_OGT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_OGE: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_OLT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_OLE: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_ONE: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::FCMP_ORD: Predicate = CmpInst::FCMP_ORD; break; + case CmpInst::FCMP_UNO: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_UEQ: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::FCMP_UGT: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_UGE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::FCMP_ULT: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_ULE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::FCMP_UNE: Predicate = CmpInst::FCMP_UNO; break; + case CmpInst::FCMP_TRUE: Predicate = CmpInst::FCMP_TRUE; break; + + case CmpInst::ICMP_EQ: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_NE: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_UGT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_UGE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_ULT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_ULE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_SGT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_SGE: Predicate = CmpInst::FCMP_TRUE; break; + case CmpInst::ICMP_SLT: Predicate = CmpInst::FCMP_FALSE; break; + case CmpInst::ICMP_SLE: Predicate = CmpInst::FCMP_TRUE; break; + } + + return Predicate; +} + +static std::pair<X86::CondCode, bool> +getX86ConditionCode(CmpInst::Predicate Predicate) { + X86::CondCode CC = X86::COND_INVALID; + bool NeedSwap = false; + switch (Predicate) { + default: break; + // Floating-point Predicates + case CmpInst::FCMP_UEQ: CC = X86::COND_E; break; + case CmpInst::FCMP_OLT: NeedSwap = true; // fall-through + case CmpInst::FCMP_OGT: CC = X86::COND_A; break; + case CmpInst::FCMP_OLE: NeedSwap = true; // fall-through + case CmpInst::FCMP_OGE: CC = X86::COND_AE; break; + case CmpInst::FCMP_UGT: NeedSwap = true; // fall-through + case CmpInst::FCMP_ULT: CC = X86::COND_B; break; + case CmpInst::FCMP_UGE: NeedSwap = true; // fall-through + case CmpInst::FCMP_ULE: CC = X86::COND_BE; break; + case CmpInst::FCMP_ONE: CC = X86::COND_NE; break; + case CmpInst::FCMP_UNO: CC = X86::COND_P; break; + case CmpInst::FCMP_ORD: CC = X86::COND_NP; break; + case CmpInst::FCMP_OEQ: // fall-through + case CmpInst::FCMP_UNE: CC = X86::COND_INVALID; break; + + // Integer Predicates + case CmpInst::ICMP_EQ: CC = X86::COND_E; break; + case CmpInst::ICMP_NE: CC = X86::COND_NE; break; + case CmpInst::ICMP_UGT: CC = X86::COND_A; break; + case CmpInst::ICMP_UGE: CC = X86::COND_AE; break; + case CmpInst::ICMP_ULT: CC = X86::COND_B; break; + case CmpInst::ICMP_ULE: CC = X86::COND_BE; break; + case CmpInst::ICMP_SGT: CC = X86::COND_G; break; + case CmpInst::ICMP_SGE: CC = X86::COND_GE; break; + case CmpInst::ICMP_SLT: CC = X86::COND_L; break; + case CmpInst::ICMP_SLE: CC = X86::COND_LE; break; + } + + return std::make_pair(CC, NeedSwap); +} + +static std::pair<unsigned, bool> +getX86SSEConditionCode(CmpInst::Predicate Predicate) { + unsigned CC; + bool NeedSwap = false; + + // SSE Condition code mapping: + // 0 - EQ + // 1 - LT + // 2 - LE + // 3 - UNORD + // 4 - NEQ + // 5 - NLT + // 6 - NLE + // 7 - ORD + switch (Predicate) { + default: llvm_unreachable("Unexpected predicate"); + case CmpInst::FCMP_OEQ: CC = 0; break; + case CmpInst::FCMP_OGT: NeedSwap = true; // fall-through + case CmpInst::FCMP_OLT: CC = 1; break; + case CmpInst::FCMP_OGE: NeedSwap = true; // fall-through + case CmpInst::FCMP_OLE: CC = 2; break; + case CmpInst::FCMP_UNO: CC = 3; break; + case CmpInst::FCMP_UNE: CC = 4; break; + case CmpInst::FCMP_ULE: NeedSwap = true; // fall-through + case CmpInst::FCMP_UGE: CC = 5; break; + case CmpInst::FCMP_ULT: NeedSwap = true; // fall-through + case CmpInst::FCMP_UGT: CC = 6; break; + case CmpInst::FCMP_ORD: CC = 7; break; + case CmpInst::FCMP_UEQ: + case CmpInst::FCMP_ONE: CC = 8; break; + } + + return std::make_pair(CC, NeedSwap); +} + +/// \brief Check if it is possible to fold the condition from the XALU intrinsic +/// into the user. The condition code will only be updated on success. +bool X86FastISel::foldX86XALUIntrinsic(X86::CondCode &CC, const Instruction *I, + const Value *Cond) { + if (!isa<ExtractValueInst>(Cond)) + return false; + + const auto *EV = cast<ExtractValueInst>(Cond); + if (!isa<IntrinsicInst>(EV->getAggregateOperand())) + return false; + + const auto *II = cast<IntrinsicInst>(EV->getAggregateOperand()); + MVT RetVT; + const Function *Callee = II->getCalledFunction(); + Type *RetTy = + cast<StructType>(Callee->getReturnType())->getTypeAtIndex(0U); + if (!isTypeLegal(RetTy, RetVT)) + return false; + + if (RetVT != MVT::i32 && RetVT != MVT::i64) + return false; + + X86::CondCode TmpCC; + switch (II->getIntrinsicID()) { + default: return false; + case Intrinsic::sadd_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: TmpCC = X86::COND_O; break; + case Intrinsic::uadd_with_overflow: + case Intrinsic::usub_with_overflow: TmpCC = X86::COND_B; break; + } + + // Check if both instructions are in the same basic block. + if (II->getParent() != I->getParent()) + return false; + + // Make sure nothing is in the way + BasicBlock::const_iterator Start = I; + BasicBlock::const_iterator End = II; + for (auto Itr = std::prev(Start); Itr != End; --Itr) { + // We only expect extractvalue instructions between the intrinsic and the + // instruction to be selected. + if (!isa<ExtractValueInst>(Itr)) + return false; + + // Check that the extractvalue operand comes from the intrinsic. + const auto *EVI = cast<ExtractValueInst>(Itr); + if (EVI->getAggregateOperand() != II) + return false; + } + + CC = TmpCC; + return true; +} + bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { EVT evt = TLI.getValueType(Ty, /*HandleUnknown=*/true); if (evt == MVT::Other || !evt.isSimple()) @@ -180,7 +362,7 @@ bool X86FastISel::isTypeLegal(Type *Ty, MVT &VT, bool AllowI1) { /// The address is either pre-computed, i.e. Ptr, or a GlobalAddress, i.e. GV. /// Return true and the result register by reference if it is possible. bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, - unsigned &ResultReg) { + MachineMemOperand *MMO, unsigned &ResultReg) { // Get opcode and regclass of the output for the given load instruction. unsigned Opc = 0; const TargetRegisterClass *RC = nullptr; @@ -228,8 +410,11 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, } ResultReg = createResultReg(RC); - addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DbgLoc, TII.get(Opc), ResultReg), AM); + MachineInstrBuilder MIB = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); + addFullAddress(MIB, AM); + if (MMO) + MIB->addMemOperand(*FuncInfo.MF, MMO); return true; } @@ -237,9 +422,9 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, /// type VT. The address is either pre-computed, consisted of a base ptr, Ptr /// and a displacement offset, or a GlobalAddress, /// i.e. V. Return true if it is possible. -bool -X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, - const X86AddressMode &AM, bool Aligned) { +bool X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, bool ValIsKill, + const X86AddressMode &AM, + MachineMemOperand *MMO, bool Aligned) { // Get opcode and regclass of the output for the given store instruction. unsigned Opc = 0; switch (VT.getSimpleVT().SimpleTy) { @@ -249,7 +434,8 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, // Mask out all but lowest bit. unsigned AndResult = createResultReg(&X86::GR8RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(X86::AND8ri), AndResult).addReg(ValReg).addImm(1); + TII.get(X86::AND8ri), AndResult) + .addReg(ValReg, getKillRegState(ValIsKill)).addImm(1); ValReg = AndResult; } // FALLTHROUGH, handling i1 as i8. @@ -288,13 +474,18 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, break; } - addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DbgLoc, TII.get(Opc)), AM).addReg(ValReg); + MachineInstrBuilder MIB = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)); + addFullAddress(MIB, AM).addReg(ValReg, getKillRegState(ValIsKill)); + if (MMO) + MIB->addMemOperand(*FuncInfo.MF, MMO); + return true; } bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, - const X86AddressMode &AM, bool Aligned) { + const X86AddressMode &AM, + MachineMemOperand *MMO, bool Aligned) { // Handle 'null' like i32/i64 0. if (isa<ConstantPointerNull>(Val)) Val = Constant::getNullValue(DL.getIntPtrType(Val->getContext())); @@ -317,10 +508,12 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, } if (Opc) { - addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DbgLoc, TII.get(Opc)), AM) - .addImm(Signed ? (uint64_t) CI->getSExtValue() : - CI->getZExtValue()); + MachineInstrBuilder MIB = + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc)); + addFullAddress(MIB, AM).addImm(Signed ? (uint64_t) CI->getSExtValue() + : CI->getZExtValue()); + if (MMO) + MIB->addMemOperand(*FuncInfo.MF, MMO); return true; } } @@ -329,7 +522,8 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, if (ValReg == 0) return false; - return X86FastEmitStore(VT, ValReg, AM, Aligned); + bool ValKill = hasTrivialKill(Val); + return X86FastEmitStore(VT, ValReg, ValKill, AM, MMO, Aligned); } /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of @@ -355,17 +549,8 @@ bool X86FastISel::handleConstantAddresses(const Value *V, X86AddressMode &AM) { return false; // Can't handle TLS yet. - if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV)) - if (GVar->isThreadLocal()) - return false; - - // Can't handle TLS yet, part 2 (this is slightly crazy, but this is how - // it works...). - if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) - if (const GlobalVariable *GVar = - dyn_cast_or_null<GlobalVariable>(GA->getAliasee())) - if (GVar->isThreadLocal()) - return false; + if (GV->isThreadLocal()) + return false; // RIP-relative addresses can't have additional register operands, so if // we've already folded stuff into the addressing mode, just force the @@ -696,7 +881,7 @@ bool X86FastISel::X86SelectCallAddress(const Value *V, X86AddressMode &AM) { (AM.Base.Reg != 0 || AM.IndexReg != 0)) return false; - // Can't handle DbgLocLImport. + // Can't handle DLL Import. if (GV->hasDLLImportStorageClass()) return false; @@ -749,19 +934,24 @@ bool X86FastISel::X86SelectStore(const Instruction *I) { if (S->isAtomic()) return false; - unsigned SABIAlignment = - DL.getABITypeAlignment(S->getValueOperand()->getType()); - bool Aligned = S->getAlignment() == 0 || S->getAlignment() >= SABIAlignment; + const Value *Val = S->getValueOperand(); + const Value *Ptr = S->getPointerOperand(); MVT VT; - if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true)) + if (!isTypeLegal(Val->getType(), VT, /*AllowI1=*/true)) return false; + unsigned Alignment = S->getAlignment(); + unsigned ABIAlignment = DL.getABITypeAlignment(Val->getType()); + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = ABIAlignment; + bool Aligned = Alignment >= ABIAlignment; + X86AddressMode AM; - if (!X86SelectAddress(I->getOperand(1), AM)) + if (!X86SelectAddress(Ptr, AM)) return false; - return X86FastEmitStore(VT, I->getOperand(0), AM, Aligned); + return X86FastEmitStore(VT, Val, AM, createMachineMemOperandFor(I), Aligned); } /// X86SelectRet - Select and emit code to implement ret instructions. @@ -896,25 +1086,29 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { /// X86SelectLoad - Select and emit code to implement load instructions. /// -bool X86FastISel::X86SelectLoad(const Instruction *I) { +bool X86FastISel::X86SelectLoad(const Instruction *I) { + const LoadInst *LI = cast<LoadInst>(I); + // Atomic loads need special handling. - if (cast<LoadInst>(I)->isAtomic()) + if (LI->isAtomic()) return false; MVT VT; - if (!isTypeLegal(I->getType(), VT, /*AllowI1=*/true)) + if (!isTypeLegal(LI->getType(), VT, /*AllowI1=*/true)) return false; + const Value *Ptr = LI->getPointerOperand(); + X86AddressMode AM; - if (!X86SelectAddress(I->getOperand(0), AM)) + if (!X86SelectAddress(Ptr, AM)) return false; unsigned ResultReg = 0; - if (X86FastEmitLoad(VT, AM, ResultReg)) { - UpdateValueMap(I, ResultReg); - return true; - } - return false; + if (!X86FastEmitLoad(VT, AM, createMachineMemOperandFor(LI), ResultReg)) + return false; + + UpdateValueMap(I, ResultReg); + return true; } static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) { @@ -994,73 +1188,89 @@ bool X86FastISel::X86SelectCmp(const Instruction *I) { if (!isTypeLegal(I->getOperand(0)->getType(), VT)) return false; - unsigned ResultReg = createResultReg(&X86::GR8RegClass); - unsigned SetCCOpc; - bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0. - switch (CI->getPredicate()) { - case CmpInst::FCMP_OEQ: { - if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT)) + // Try to optimize or fold the cmp. + CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); + unsigned ResultReg = 0; + switch (Predicate) { + default: break; + case CmpInst::FCMP_FALSE: { + ResultReg = createResultReg(&X86::GR32RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV32r0), + ResultReg); + ResultReg = FastEmitInst_extractsubreg(MVT::i8, ResultReg, /*Kill=*/true, + X86::sub_8bit); + if (!ResultReg) return false; + break; + } + case CmpInst::FCMP_TRUE: { + ResultReg = createResultReg(&X86::GR8RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::MOV8ri), + ResultReg).addImm(1); + break; + } + } - unsigned EReg = createResultReg(&X86::GR8RegClass); - unsigned NPReg = createResultReg(&X86::GR8RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETEr), EReg); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(X86::SETNPr), NPReg); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(X86::AND8rr), ResultReg).addReg(NPReg).addReg(EReg); + if (ResultReg) { UpdateValueMap(I, ResultReg); return true; } - case CmpInst::FCMP_UNE: { - if (!X86FastEmitCompare(CI->getOperand(0), CI->getOperand(1), VT)) + + const Value *LHS = CI->getOperand(0); + const Value *RHS = CI->getOperand(1); + + // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0. + // We don't have to materialize a zero constant for this case and can just use + // %x again on the RHS. + if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) { + const auto *RHSC = dyn_cast<ConstantFP>(RHS); + if (RHSC && RHSC->isNullValue()) + RHS = LHS; + } + + // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction. + static unsigned SETFOpcTable[2][3] = { + { X86::SETEr, X86::SETNPr, X86::AND8rr }, + { X86::SETNEr, X86::SETPr, X86::OR8rr } + }; + unsigned *SETFOpc = nullptr; + switch (Predicate) { + default: break; + case CmpInst::FCMP_OEQ: SETFOpc = &SETFOpcTable[0][0]; break; + case CmpInst::FCMP_UNE: SETFOpc = &SETFOpcTable[1][0]; break; + } + + ResultReg = createResultReg(&X86::GR8RegClass); + if (SETFOpc) { + if (!X86FastEmitCompare(LHS, RHS, VT)) return false; - unsigned NEReg = createResultReg(&X86::GR8RegClass); - unsigned PReg = createResultReg(&X86::GR8RegClass); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETNEr), NEReg); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::SETPr), PReg); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::OR8rr),ResultReg) - .addReg(PReg).addReg(NEReg); + unsigned FlagReg1 = createResultReg(&X86::GR8RegClass); + unsigned FlagReg2 = createResultReg(&X86::GR8RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]), + FlagReg1); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]), + FlagReg2); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[2]), + ResultReg).addReg(FlagReg1).addReg(FlagReg2); UpdateValueMap(I, ResultReg); return true; } - case CmpInst::FCMP_OGT: SwapArgs = false; SetCCOpc = X86::SETAr; break; - case CmpInst::FCMP_OGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break; - case CmpInst::FCMP_OLT: SwapArgs = true; SetCCOpc = X86::SETAr; break; - case CmpInst::FCMP_OLE: SwapArgs = true; SetCCOpc = X86::SETAEr; break; - case CmpInst::FCMP_ONE: SwapArgs = false; SetCCOpc = X86::SETNEr; break; - case CmpInst::FCMP_ORD: SwapArgs = false; SetCCOpc = X86::SETNPr; break; - case CmpInst::FCMP_UNO: SwapArgs = false; SetCCOpc = X86::SETPr; break; - case CmpInst::FCMP_UEQ: SwapArgs = false; SetCCOpc = X86::SETEr; break; - case CmpInst::FCMP_UGT: SwapArgs = true; SetCCOpc = X86::SETBr; break; - case CmpInst::FCMP_UGE: SwapArgs = true; SetCCOpc = X86::SETBEr; break; - case CmpInst::FCMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break; - case CmpInst::FCMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break; - - case CmpInst::ICMP_EQ: SwapArgs = false; SetCCOpc = X86::SETEr; break; - case CmpInst::ICMP_NE: SwapArgs = false; SetCCOpc = X86::SETNEr; break; - case CmpInst::ICMP_UGT: SwapArgs = false; SetCCOpc = X86::SETAr; break; - case CmpInst::ICMP_UGE: SwapArgs = false; SetCCOpc = X86::SETAEr; break; - case CmpInst::ICMP_ULT: SwapArgs = false; SetCCOpc = X86::SETBr; break; - case CmpInst::ICMP_ULE: SwapArgs = false; SetCCOpc = X86::SETBEr; break; - case CmpInst::ICMP_SGT: SwapArgs = false; SetCCOpc = X86::SETGr; break; - case CmpInst::ICMP_SGE: SwapArgs = false; SetCCOpc = X86::SETGEr; break; - case CmpInst::ICMP_SLT: SwapArgs = false; SetCCOpc = X86::SETLr; break; - case CmpInst::ICMP_SLE: SwapArgs = false; SetCCOpc = X86::SETLEr; break; - default: - return false; - } - const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); + X86::CondCode CC; + bool SwapArgs; + std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate); + assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); + unsigned Opc = X86::getSETFromCond(CC); + if (SwapArgs) - std::swap(Op0, Op1); + std::swap(LHS, RHS); - // Emit a compare of Op0/Op1. - if (!X86FastEmitCompare(Op0, Op1, VT)) + // Emit a compare of LHS/RHS. + if (!X86FastEmitCompare(LHS, RHS, VT)) return false; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SetCCOpc), ResultReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg); UpdateValueMap(I, ResultReg); return true; } @@ -1126,73 +1336,88 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { // Fold the common case of a conditional branch with a comparison // in the same block (values defined on other blocks may not have // initialized registers). + X86::CondCode CC; if (const CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition())) { if (CI->hasOneUse() && CI->getParent() == I->getParent()) { EVT VT = TLI.getValueType(CI->getOperand(0)->getType()); + // Try to optimize or fold the cmp. + CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); + switch (Predicate) { + default: break; + case CmpInst::FCMP_FALSE: FastEmitBranch(FalseMBB, DbgLoc); return true; + case CmpInst::FCMP_TRUE: FastEmitBranch(TrueMBB, DbgLoc); return true; + } + + const Value *CmpLHS = CI->getOperand(0); + const Value *CmpRHS = CI->getOperand(1); + + // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, + // 0.0. + // We don't have to materialize a zero constant for this case and can just + // use %x again on the RHS. + if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) { + const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS); + if (CmpRHSC && CmpRHSC->isNullValue()) + CmpRHS = CmpLHS; + } + // Try to take advantage of fallthrough opportunities. - CmpInst::Predicate Predicate = CI->getPredicate(); if (FuncInfo.MBB->isLayoutSuccessor(TrueMBB)) { std::swap(TrueMBB, FalseMBB); Predicate = CmpInst::getInversePredicate(Predicate); } - bool SwapArgs; // false -> compare Op0, Op1. true -> compare Op1, Op0. - unsigned BranchOpc; // Opcode to jump on, e.g. "X86::JA" - + // FCMP_OEQ and FCMP_UNE cannot be expressed with a single flag/condition + // code check. Instead two branch instructions are required to check all + // the flags. First we change the predicate to a supported condition code, + // which will be the first branch. Later one we will emit the second + // branch. + bool NeedExtraBranch = false; switch (Predicate) { + default: break; case CmpInst::FCMP_OEQ: - std::swap(TrueMBB, FalseMBB); - Predicate = CmpInst::FCMP_UNE; - // FALL THROUGH - case CmpInst::FCMP_UNE: SwapArgs = false; BranchOpc = X86::JNE_4; break; - case CmpInst::FCMP_OGT: SwapArgs = false; BranchOpc = X86::JA_4; break; - case CmpInst::FCMP_OGE: SwapArgs = false; BranchOpc = X86::JAE_4; break; - case CmpInst::FCMP_OLT: SwapArgs = true; BranchOpc = X86::JA_4; break; - case CmpInst::FCMP_OLE: SwapArgs = true; BranchOpc = X86::JAE_4; break; - case CmpInst::FCMP_ONE: SwapArgs = false; BranchOpc = X86::JNE_4; break; - case CmpInst::FCMP_ORD: SwapArgs = false; BranchOpc = X86::JNP_4; break; - case CmpInst::FCMP_UNO: SwapArgs = false; BranchOpc = X86::JP_4; break; - case CmpInst::FCMP_UEQ: SwapArgs = false; BranchOpc = X86::JE_4; break; - case CmpInst::FCMP_UGT: SwapArgs = true; BranchOpc = X86::JB_4; break; - case CmpInst::FCMP_UGE: SwapArgs = true; BranchOpc = X86::JBE_4; break; - case CmpInst::FCMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break; - case CmpInst::FCMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break; - - case CmpInst::ICMP_EQ: SwapArgs = false; BranchOpc = X86::JE_4; break; - case CmpInst::ICMP_NE: SwapArgs = false; BranchOpc = X86::JNE_4; break; - case CmpInst::ICMP_UGT: SwapArgs = false; BranchOpc = X86::JA_4; break; - case CmpInst::ICMP_UGE: SwapArgs = false; BranchOpc = X86::JAE_4; break; - case CmpInst::ICMP_ULT: SwapArgs = false; BranchOpc = X86::JB_4; break; - case CmpInst::ICMP_ULE: SwapArgs = false; BranchOpc = X86::JBE_4; break; - case CmpInst::ICMP_SGT: SwapArgs = false; BranchOpc = X86::JG_4; break; - case CmpInst::ICMP_SGE: SwapArgs = false; BranchOpc = X86::JGE_4; break; - case CmpInst::ICMP_SLT: SwapArgs = false; BranchOpc = X86::JL_4; break; - case CmpInst::ICMP_SLE: SwapArgs = false; BranchOpc = X86::JLE_4; break; - default: - return false; + std::swap(TrueMBB, FalseMBB); // fall-through + case CmpInst::FCMP_UNE: + NeedExtraBranch = true; + Predicate = CmpInst::FCMP_ONE; + break; } - const Value *Op0 = CI->getOperand(0), *Op1 = CI->getOperand(1); + bool SwapArgs; + unsigned BranchOpc; + std::tie(CC, SwapArgs) = getX86ConditionCode(Predicate); + assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); + + BranchOpc = X86::GetCondBranchFromCond(CC); if (SwapArgs) - std::swap(Op0, Op1); + std::swap(CmpLHS, CmpRHS); // Emit a compare of the LHS and RHS, setting the flags. - if (!X86FastEmitCompare(Op0, Op1, VT)) + if (!X86FastEmitCompare(CmpLHS, CmpRHS, VT)) return false; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc)) .addMBB(TrueMBB); - if (Predicate == CmpInst::FCMP_UNE) { - // X86 requires a second branch to handle UNE (and OEQ, - // which is mapped to UNE above). + // X86 requires a second branch to handle UNE (and OEQ, which is mapped + // to UNE above). + if (NeedExtraBranch) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JP_4)) .addMBB(TrueMBB); } + // Obtain the branch weight and add the TrueBB to the successor list. + uint32_t BranchWeight = 0; + if (FuncInfo.BPI) + BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), + TrueMBB->getBasicBlock()); + FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight); + + // Emits an unconditional branch to the FalseBB, obtains the branch + // weight, and adds it to the successor list. FastEmitBranch(FalseMBB, DbgLoc); - FuncInfo.MBB->addSuccessor(TrueMBB); + return true; } } else if (TruncInst *TI = dyn_cast<TruncInst>(BI->getCondition())) { @@ -1224,10 +1449,32 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(JmpOpc)) .addMBB(TrueMBB); FastEmitBranch(FalseMBB, DbgLoc); - FuncInfo.MBB->addSuccessor(TrueMBB); + uint32_t BranchWeight = 0; + if (FuncInfo.BPI) + BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), + TrueMBB->getBasicBlock()); + FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight); return true; } } + } else if (foldX86XALUIntrinsic(CC, BI, BI->getCondition())) { + // Fake request the condition, otherwise the intrinsic might be completely + // optimized away. + unsigned TmpReg = getRegForValue(BI->getCondition()); + if (TmpReg == 0) + return false; + + unsigned BranchOpc = X86::GetCondBranchFromCond(CC); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(BranchOpc)) + .addMBB(TrueMBB); + FastEmitBranch(FalseMBB, DbgLoc); + uint32_t BranchWeight = 0; + if (FuncInfo.BPI) + BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), + TrueMBB->getBasicBlock()); + FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight); + return true; } // Otherwise do a clumsy setcc and re-test it. @@ -1241,7 +1488,11 @@ bool X86FastISel::X86SelectBranch(const Instruction *I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::JNE_4)) .addMBB(TrueMBB); FastEmitBranch(FalseMBB, DbgLoc); - FuncInfo.MBB->addSuccessor(TrueMBB); + uint32_t BranchWeight = 0; + if (FuncInfo.BPI) + BranchWeight = FuncInfo.BPI->getEdgeWeight(BI->getParent(), + TrueMBB->getBasicBlock()); + FuncInfo.MBB->addSuccessor(TrueMBB, BranchWeight); return true; } @@ -1478,50 +1729,319 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) { return true; } -bool X86FastISel::X86SelectSelect(const Instruction *I) { - MVT VT; - if (!isTypeLegal(I->getType(), VT)) +/// \brief Emit a conditional move instruction (if the are supported) to lower +/// the select. +bool X86FastISel::X86FastEmitCMoveSelect(MVT RetVT, const Instruction *I) { + // Check if the subtarget supports these instructions. + if (!Subtarget->hasCMov()) return false; - // We only use cmov here, if we don't have a cmov instruction bail. - if (!Subtarget->hasCMov()) return false; + // FIXME: Add support for i8. + if (RetVT < MVT::i16 || RetVT > MVT::i64) + return false; - unsigned Opc = 0; - const TargetRegisterClass *RC = nullptr; - if (VT == MVT::i16) { - Opc = X86::CMOVE16rr; - RC = &X86::GR16RegClass; - } else if (VT == MVT::i32) { - Opc = X86::CMOVE32rr; - RC = &X86::GR32RegClass; - } else if (VT == MVT::i64) { - Opc = X86::CMOVE64rr; - RC = &X86::GR64RegClass; - } else { + const Value *Cond = I->getOperand(0); + const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); + bool NeedTest = true; + X86::CondCode CC = X86::COND_NE; + + // Optimize conditions coming from a compare if both instructions are in the + // same basic block (values defined in other basic blocks may not have + // initialized registers). + const auto *CI = dyn_cast<CmpInst>(Cond); + if (CI && (CI->getParent() == I->getParent())) { + CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); + + // FCMP_OEQ and FCMP_UNE cannot be checked with a single instruction. + static unsigned SETFOpcTable[2][3] = { + { X86::SETNPr, X86::SETEr , X86::TEST8rr }, + { X86::SETPr, X86::SETNEr, X86::OR8rr } + }; + unsigned *SETFOpc = nullptr; + switch (Predicate) { + default: break; + case CmpInst::FCMP_OEQ: + SETFOpc = &SETFOpcTable[0][0]; + Predicate = CmpInst::ICMP_NE; + break; + case CmpInst::FCMP_UNE: + SETFOpc = &SETFOpcTable[1][0]; + Predicate = CmpInst::ICMP_NE; + break; + } + + bool NeedSwap; + std::tie(CC, NeedSwap) = getX86ConditionCode(Predicate); + assert(CC <= X86::LAST_VALID_COND && "Unexpected condition code."); + + const Value *CmpLHS = CI->getOperand(0); + const Value *CmpRHS = CI->getOperand(1); + if (NeedSwap) + std::swap(CmpLHS, CmpRHS); + + EVT CmpVT = TLI.getValueType(CmpLHS->getType()); + // Emit a compare of the LHS and RHS, setting the flags. + if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT)) + return false; + + if (SETFOpc) { + unsigned FlagReg1 = createResultReg(&X86::GR8RegClass); + unsigned FlagReg2 = createResultReg(&X86::GR8RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[0]), + FlagReg1); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(SETFOpc[1]), + FlagReg2); + auto const &II = TII.get(SETFOpc[2]); + if (II.getNumDefs()) { + unsigned TmpReg = createResultReg(&X86::GR8RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II, TmpReg) + .addReg(FlagReg2).addReg(FlagReg1); + } else { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addReg(FlagReg2).addReg(FlagReg1); + } + } + NeedTest = false; + } else if (foldX86XALUIntrinsic(CC, I, Cond)) { + // Fake request the condition, otherwise the intrinsic might be completely + // optimized away. + unsigned TmpReg = getRegForValue(Cond); + if (TmpReg == 0) + return false; + + NeedTest = false; + } + + if (NeedTest) { + // Selects operate on i1, however, CondReg is 8 bits width and may contain + // garbage. Indeed, only the less significant bit is supposed to be + // accurate. If we read more than the lsb, we may see non-zero values + // whereas lsb is zero. Therefore, we have to truncate Op0Reg to i1 for + // the select. This is achieved by performing TEST against 1. + unsigned CondReg = getRegForValue(Cond); + if (CondReg == 0) + return false; + bool CondIsKill = hasTrivialKill(Cond); + + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) + .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1); + } + + const Value *LHS = I->getOperand(1); + const Value *RHS = I->getOperand(2); + + unsigned RHSReg = getRegForValue(RHS); + bool RHSIsKill = hasTrivialKill(RHS); + + unsigned LHSReg = getRegForValue(LHS); + bool LHSIsKill = hasTrivialKill(LHS); + + if (!LHSReg || !RHSReg) + return false; + + unsigned Opc = X86::getCMovFromCond(CC, RC->getSize()); + unsigned ResultReg = FastEmitInst_rr(Opc, RC, RHSReg, RHSIsKill, + LHSReg, LHSIsKill); + UpdateValueMap(I, ResultReg); + return true; +} + +/// \brief Emit SSE instructions to lower the select. +/// +/// Try to use SSE1/SSE2 instructions to simulate a select without branches. +/// This lowers fp selects into a CMP/AND/ANDN/OR sequence when the necessary +/// SSE instructions are available. +bool X86FastISel::X86FastEmitSSESelect(MVT RetVT, const Instruction *I) { + // Optimize conditions coming from a compare if both instructions are in the + // same basic block (values defined in other basic blocks may not have + // initialized registers). + const auto *CI = dyn_cast<FCmpInst>(I->getOperand(0)); + if (!CI || (CI->getParent() != I->getParent())) return false; + + if (I->getType() != CI->getOperand(0)->getType() || + !((Subtarget->hasSSE1() && RetVT == MVT::f32) || + (Subtarget->hasSSE2() && RetVT == MVT::f64) )) + return false; + + const Value *CmpLHS = CI->getOperand(0); + const Value *CmpRHS = CI->getOperand(1); + CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); + + // The optimizer might have replaced fcmp oeq %x, %x with fcmp ord %x, 0.0. + // We don't have to materialize a zero constant for this case and can just use + // %x again on the RHS. + if (Predicate == CmpInst::FCMP_ORD || Predicate == CmpInst::FCMP_UNO) { + const auto *CmpRHSC = dyn_cast<ConstantFP>(CmpRHS); + if (CmpRHSC && CmpRHSC->isNullValue()) + CmpRHS = CmpLHS; } - unsigned Op0Reg = getRegForValue(I->getOperand(0)); - if (Op0Reg == 0) return false; - unsigned Op1Reg = getRegForValue(I->getOperand(1)); - if (Op1Reg == 0) return false; - unsigned Op2Reg = getRegForValue(I->getOperand(2)); - if (Op2Reg == 0) return false; - - // Selects operate on i1, however, Op0Reg is 8 bits width and may contain - // garbage. Indeed, only the less significant bit is supposed to be accurate. - // If we read more than the lsb, we may see non-zero values whereas lsb - // is zero. Therefore, we have to truncate Op0Reg to i1 for the select. - // This is achieved by performing TEST against 1. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) - .addReg(Op0Reg).addImm(1); - unsigned ResultReg = createResultReg(RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) - .addReg(Op1Reg).addReg(Op2Reg); + unsigned CC; + bool NeedSwap; + std::tie(CC, NeedSwap) = getX86SSEConditionCode(Predicate); + if (CC > 7) + return false; + + if (NeedSwap) + std::swap(CmpLHS, CmpRHS); + + static unsigned OpcTable[2][2][4] = { + { { X86::CMPSSrr, X86::FsANDPSrr, X86::FsANDNPSrr, X86::FsORPSrr }, + { X86::VCMPSSrr, X86::VFsANDPSrr, X86::VFsANDNPSrr, X86::VFsORPSrr } }, + { { X86::CMPSDrr, X86::FsANDPDrr, X86::FsANDNPDrr, X86::FsORPDrr }, + { X86::VCMPSDrr, X86::VFsANDPDrr, X86::VFsANDNPDrr, X86::VFsORPDrr } } + }; + + bool HasAVX = Subtarget->hasAVX(); + unsigned *Opc = nullptr; + switch (RetVT.SimpleTy) { + default: return false; + case MVT::f32: Opc = &OpcTable[0][HasAVX][0]; break; + case MVT::f64: Opc = &OpcTable[1][HasAVX][0]; break; + } + + const Value *LHS = I->getOperand(1); + const Value *RHS = I->getOperand(2); + + unsigned LHSReg = getRegForValue(LHS); + bool LHSIsKill = hasTrivialKill(LHS); + + unsigned RHSReg = getRegForValue(RHS); + bool RHSIsKill = hasTrivialKill(RHS); + + unsigned CmpLHSReg = getRegForValue(CmpLHS); + bool CmpLHSIsKill = hasTrivialKill(CmpLHS); + + unsigned CmpRHSReg = getRegForValue(CmpRHS); + bool CmpRHSIsKill = hasTrivialKill(CmpRHS); + + if (!LHSReg || !RHSReg || !CmpLHS || !CmpRHS) + return false; + + const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); + unsigned CmpReg = FastEmitInst_rri(Opc[0], RC, CmpLHSReg, CmpLHSIsKill, + CmpRHSReg, CmpRHSIsKill, CC); + unsigned AndReg = FastEmitInst_rr(Opc[1], RC, CmpReg, /*IsKill=*/false, + LHSReg, LHSIsKill); + unsigned AndNReg = FastEmitInst_rr(Opc[2], RC, CmpReg, /*IsKill=*/true, + RHSReg, RHSIsKill); + unsigned ResultReg = FastEmitInst_rr(Opc[3], RC, AndNReg, /*IsKill=*/true, + AndReg, /*IsKill=*/true); + UpdateValueMap(I, ResultReg); + return true; +} + +bool X86FastISel::X86FastEmitPseudoSelect(MVT RetVT, const Instruction *I) { + // These are pseudo CMOV instructions and will be later expanded into control- + // flow. + unsigned Opc; + switch (RetVT.SimpleTy) { + default: return false; + case MVT::i8: Opc = X86::CMOV_GR8; break; + case MVT::i16: Opc = X86::CMOV_GR16; break; + case MVT::i32: Opc = X86::CMOV_GR32; break; + case MVT::f32: Opc = X86::CMOV_FR32; break; + case MVT::f64: Opc = X86::CMOV_FR64; break; + } + + const Value *Cond = I->getOperand(0); + X86::CondCode CC = X86::COND_NE; + + // Optimize conditions coming from a compare if both instructions are in the + // same basic block (values defined in other basic blocks may not have + // initialized registers). + const auto *CI = dyn_cast<CmpInst>(Cond); + if (CI && (CI->getParent() == I->getParent())) { + bool NeedSwap; + std::tie(CC, NeedSwap) = getX86ConditionCode(CI->getPredicate()); + if (CC > X86::LAST_VALID_COND) + return false; + + const Value *CmpLHS = CI->getOperand(0); + const Value *CmpRHS = CI->getOperand(1); + + if (NeedSwap) + std::swap(CmpLHS, CmpRHS); + + EVT CmpVT = TLI.getValueType(CmpLHS->getType()); + if (!X86FastEmitCompare(CmpLHS, CmpRHS, CmpVT)) + return false; + } else { + unsigned CondReg = getRegForValue(Cond); + if (CondReg == 0) + return false; + bool CondIsKill = hasTrivialKill(Cond); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TEST8ri)) + .addReg(CondReg, getKillRegState(CondIsKill)).addImm(1); + } + + const Value *LHS = I->getOperand(1); + const Value *RHS = I->getOperand(2); + + unsigned LHSReg = getRegForValue(LHS); + bool LHSIsKill = hasTrivialKill(LHS); + + unsigned RHSReg = getRegForValue(RHS); + bool RHSIsKill = hasTrivialKill(RHS); + + if (!LHSReg || !RHSReg) + return false; + + const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); + + unsigned ResultReg = + FastEmitInst_rri(Opc, RC, RHSReg, RHSIsKill, LHSReg, LHSIsKill, CC); UpdateValueMap(I, ResultReg); return true; } +bool X86FastISel::X86SelectSelect(const Instruction *I) { + MVT RetVT; + if (!isTypeLegal(I->getType(), RetVT)) + return false; + + // Check if we can fold the select. + if (const auto *CI = dyn_cast<CmpInst>(I->getOperand(0))) { + CmpInst::Predicate Predicate = optimizeCmpPredicate(CI); + const Value *Opnd = nullptr; + switch (Predicate) { + default: break; + case CmpInst::FCMP_FALSE: Opnd = I->getOperand(2); break; + case CmpInst::FCMP_TRUE: Opnd = I->getOperand(1); break; + } + // No need for a select anymore - this is an unconditional move. + if (Opnd) { + unsigned OpReg = getRegForValue(Opnd); + if (OpReg == 0) + return false; + bool OpIsKill = hasTrivialKill(Opnd); + const TargetRegisterClass *RC = TLI.getRegClassFor(RetVT); + unsigned ResultReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(OpReg, getKillRegState(OpIsKill)); + UpdateValueMap(I, ResultReg); + return true; + } + } + + // First try to use real conditional move instructions. + if (X86FastEmitCMoveSelect(RetVT, I)) + return true; + + // Try to use a sequence of SSE instructions to simulate a conditional move. + if (X86FastEmitSSESelect(RetVT, I)) + return true; + + // Fall-back to pseudo conditional move instructions, which will be later + // converted to control-flow. + if (X86FastEmitPseudoSelect(RetVT, I)) + return true; + + return false; +} + bool X86FastISel::X86SelectFPExt(const Instruction *I) { // fpext from float to double. if (X86ScalarSSEf64 && @@ -1633,8 +2153,8 @@ bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM, } unsigned Reg; - bool RV = X86FastEmitLoad(VT, SrcAM, Reg); - RV &= X86FastEmitStore(VT, Reg, DestAM); + bool RV = X86FastEmitLoad(VT, SrcAM, nullptr, Reg); + RV &= X86FastEmitStore(VT, Reg, /*Kill=*/true, DestAM); assert(RV && "Failed to emit load or store??"); unsigned Size = VT.getSizeInBits()/8; @@ -1646,10 +2166,74 @@ bool X86FastISel::TryEmitSmallMemcpy(X86AddressMode DestAM, return true; } +static bool isCommutativeIntrinsic(IntrinsicInst const &I) { + switch (I.getIntrinsicID()) { + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: + return true; + default: + return false; + } +} + bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { // FIXME: Handle more intrinsics. switch (I.getIntrinsicID()) { default: return false; + case Intrinsic::frameaddress: { + Type *RetTy = I.getCalledFunction()->getReturnType(); + + MVT VT; + if (!isTypeLegal(RetTy, VT)) + return false; + + unsigned Opc; + const TargetRegisterClass *RC = nullptr; + + switch (VT.SimpleTy) { + default: llvm_unreachable("Invalid result type for frameaddress."); + case MVT::i32: Opc = X86::MOV32rm; RC = &X86::GR32RegClass; break; + case MVT::i64: Opc = X86::MOV64rm; RC = &X86::GR64RegClass; break; + } + + // This needs to be set before we call getFrameRegister, otherwise we get + // the wrong frame register. + MachineFrameInfo *MFI = FuncInfo.MF->getFrameInfo(); + MFI->setFrameAddressIsTaken(true); + + const X86RegisterInfo *RegInfo = + static_cast<const X86RegisterInfo*>(TM.getRegisterInfo()); + unsigned FrameReg = RegInfo->getFrameRegister(*(FuncInfo.MF)); + assert(((FrameReg == X86::RBP && VT == MVT::i64) || + (FrameReg == X86::EBP && VT == MVT::i32)) && + "Invalid Frame Register!"); + + // Always make a copy of the frame register to to a vreg first, so that we + // never directly reference the frame register (the TwoAddressInstruction- + // Pass doesn't like that). + unsigned SrcReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), SrcReg).addReg(FrameReg); + + // Now recursively load from the frame address. + // movq (%rbp), %rax + // movq (%rax), %rax + // movq (%rax), %rax + // ... + unsigned DestReg; + unsigned Depth = cast<ConstantInt>(I.getOperand(0))->getZExtValue(); + while (Depth--) { + DestReg = createResultReg(RC); + addDirectMem(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(Opc), DestReg), SrcReg); + SrcReg = DestReg; + } + + UpdateValueMap(&I, SrcReg); + return true; + } case Intrinsic::memcpy: { const MemCpyInst &MCI = cast<MemCpyInst>(I); // Don't handle volatile or variable length memcpys. @@ -1726,52 +2310,233 @@ bool X86FastISel::X86VisitIntrinsicCall(const IntrinsicInst &I) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(X86::TRAP)); return true; } - case Intrinsic::sadd_with_overflow: - case Intrinsic::uadd_with_overflow: { - // FIXME: Should fold immediates. + case Intrinsic::sqrt: { + if (!Subtarget->hasSSE1()) + return false; - // Replace "add with overflow" intrinsics with an "add" instruction followed - // by a seto/setc instruction. - const Function *Callee = I.getCalledFunction(); - Type *RetTy = - cast<StructType>(Callee->getReturnType())->getTypeAtIndex(unsigned(0)); + Type *RetTy = I.getCalledFunction()->getReturnType(); MVT VT; if (!isTypeLegal(RetTy, VT)) return false; - const Value *Op1 = I.getArgOperand(0); - const Value *Op2 = I.getArgOperand(1); - unsigned Reg1 = getRegForValue(Op1); - unsigned Reg2 = getRegForValue(Op2); + // Unfortunately we can't use FastEmit_r, because the AVX version of FSQRT + // is not generated by FastISel yet. + // FIXME: Update this code once tablegen can handle it. + static const unsigned SqrtOpc[2][2] = { + {X86::SQRTSSr, X86::VSQRTSSr}, + {X86::SQRTSDr, X86::VSQRTSDr} + }; + bool HasAVX = Subtarget->hasAVX(); + unsigned Opc; + const TargetRegisterClass *RC; + switch (VT.SimpleTy) { + default: return false; + case MVT::f32: Opc = SqrtOpc[0][HasAVX]; RC = &X86::FR32RegClass; break; + case MVT::f64: Opc = SqrtOpc[1][HasAVX]; RC = &X86::FR64RegClass; break; + } + + const Value *SrcVal = I.getArgOperand(0); + unsigned SrcReg = getRegForValue(SrcVal); - if (Reg1 == 0 || Reg2 == 0) - // FIXME: Handle values *not* in registers. + if (SrcReg == 0) return false; - unsigned OpC = 0; - if (VT == MVT::i32) - OpC = X86::ADD32rr; - else if (VT == MVT::i64) - OpC = X86::ADD64rr; - else + unsigned ImplicitDefReg = 0; + if (HasAVX) { + ImplicitDefReg = createResultReg(RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::IMPLICIT_DEF), ImplicitDefReg); + } + + unsigned ResultReg = createResultReg(RC); + MachineInstrBuilder MIB; + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), + ResultReg); + + if (ImplicitDefReg) + MIB.addReg(ImplicitDefReg); + + MIB.addReg(SrcReg); + + UpdateValueMap(&I, ResultReg); + return true; + } + case Intrinsic::sadd_with_overflow: + case Intrinsic::uadd_with_overflow: + case Intrinsic::ssub_with_overflow: + case Intrinsic::usub_with_overflow: + case Intrinsic::smul_with_overflow: + case Intrinsic::umul_with_overflow: { + // This implements the basic lowering of the xalu with overflow intrinsics + // into add/sub/mul followed by either seto or setb. + const Function *Callee = I.getCalledFunction(); + auto *Ty = cast<StructType>(Callee->getReturnType()); + Type *RetTy = Ty->getTypeAtIndex(0U); + Type *CondTy = Ty->getTypeAtIndex(1); + + MVT VT; + if (!isTypeLegal(RetTy, VT)) + return false; + + if (VT < MVT::i8 || VT > MVT::i64) + return false; + + const Value *LHS = I.getArgOperand(0); + const Value *RHS = I.getArgOperand(1); + + // Canonicalize immediate to the RHS. + if (isa<ConstantInt>(LHS) && !isa<ConstantInt>(RHS) && + isCommutativeIntrinsic(I)) + std::swap(LHS, RHS); + + unsigned BaseOpc, CondOpc; + switch (I.getIntrinsicID()) { + default: llvm_unreachable("Unexpected intrinsic!"); + case Intrinsic::sadd_with_overflow: + BaseOpc = ISD::ADD; CondOpc = X86::SETOr; break; + case Intrinsic::uadd_with_overflow: + BaseOpc = ISD::ADD; CondOpc = X86::SETBr; break; + case Intrinsic::ssub_with_overflow: + BaseOpc = ISD::SUB; CondOpc = X86::SETOr; break; + case Intrinsic::usub_with_overflow: + BaseOpc = ISD::SUB; CondOpc = X86::SETBr; break; + case Intrinsic::smul_with_overflow: + BaseOpc = X86ISD::SMUL; CondOpc = X86::SETOr; break; + case Intrinsic::umul_with_overflow: + BaseOpc = X86ISD::UMUL; CondOpc = X86::SETOr; break; + } + + unsigned LHSReg = getRegForValue(LHS); + if (LHSReg == 0) return false; + bool LHSIsKill = hasTrivialKill(LHS); - // The call to CreateRegs builds two sequential registers, to store the - // both the returned values. - unsigned ResultReg = FuncInfo.CreateRegs(I.getType()); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(OpC), ResultReg) - .addReg(Reg1).addReg(Reg2); + unsigned ResultReg = 0; + // Check if we have an immediate version. + if (auto const *C = dyn_cast<ConstantInt>(RHS)) { + ResultReg = FastEmit_ri(VT, VT, BaseOpc, LHSReg, LHSIsKill, + C->getZExtValue()); + } - unsigned Opc = X86::SETBr; - if (I.getIntrinsicID() == Intrinsic::sadd_with_overflow) - Opc = X86::SETOr; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), - ResultReg + 1); + unsigned RHSReg; + bool RHSIsKill; + if (!ResultReg) { + RHSReg = getRegForValue(RHS); + if (RHSReg == 0) + return false; + RHSIsKill = hasTrivialKill(RHS); + ResultReg = FastEmit_rr(VT, VT, BaseOpc, LHSReg, LHSIsKill, RHSReg, + RHSIsKill); + } + + // FastISel doesn't have a pattern for all X86::MUL*r and X86::IMUL*r. Emit + // it manually. + if (BaseOpc == X86ISD::UMUL && !ResultReg) { + static const unsigned MULOpc[] = + { X86::MUL8r, X86::MUL16r, X86::MUL32r, X86::MUL64r }; + static const unsigned Reg[] = { X86::AL, X86::AX, X86::EAX, X86::RAX }; + // First copy the first operand into RAX, which is an implicit input to + // the X86::MUL*r instruction. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), Reg[VT.SimpleTy-MVT::i8]) + .addReg(LHSReg, getKillRegState(LHSIsKill)); + ResultReg = FastEmitInst_r(MULOpc[VT.SimpleTy-MVT::i8], + TLI.getRegClassFor(VT), RHSReg, RHSIsKill); + } else if (BaseOpc == X86ISD::SMUL && !ResultReg) { + static const unsigned MULOpc[] = + { X86::IMUL8r, X86::IMUL16rr, X86::IMUL32rr, X86::IMUL64rr }; + if (VT == MVT::i8) { + // Copy the first operand into AL, which is an implicit input to the + // X86::IMUL8r instruction. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(TargetOpcode::COPY), X86::AL) + .addReg(LHSReg, getKillRegState(LHSIsKill)); + ResultReg = FastEmitInst_r(MULOpc[0], TLI.getRegClassFor(VT), RHSReg, + RHSIsKill); + } else + ResultReg = FastEmitInst_rr(MULOpc[VT.SimpleTy-MVT::i8], + TLI.getRegClassFor(VT), LHSReg, LHSIsKill, + RHSReg, RHSIsKill); + } + + if (!ResultReg) + return false; + + unsigned ResultReg2 = FuncInfo.CreateRegs(CondTy); + assert((ResultReg+1) == ResultReg2 && "Nonconsecutive result registers."); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(CondOpc), + ResultReg2); UpdateValueMap(&I, ResultReg, 2); return true; } + case Intrinsic::x86_sse_cvttss2si: + case Intrinsic::x86_sse_cvttss2si64: + case Intrinsic::x86_sse2_cvttsd2si: + case Intrinsic::x86_sse2_cvttsd2si64: { + bool IsInputDouble; + switch (I.getIntrinsicID()) { + default: llvm_unreachable("Unexpected intrinsic."); + case Intrinsic::x86_sse_cvttss2si: + case Intrinsic::x86_sse_cvttss2si64: + if (!Subtarget->hasSSE1()) + return false; + IsInputDouble = false; + break; + case Intrinsic::x86_sse2_cvttsd2si: + case Intrinsic::x86_sse2_cvttsd2si64: + if (!Subtarget->hasSSE2()) + return false; + IsInputDouble = true; + break; + } + + Type *RetTy = I.getCalledFunction()->getReturnType(); + MVT VT; + if (!isTypeLegal(RetTy, VT)) + return false; + + static const unsigned CvtOpc[2][2][2] = { + { { X86::CVTTSS2SIrr, X86::VCVTTSS2SIrr }, + { X86::CVTTSS2SI64rr, X86::VCVTTSS2SI64rr } }, + { { X86::CVTTSD2SIrr, X86::VCVTTSD2SIrr }, + { X86::CVTTSD2SI64rr, X86::VCVTTSD2SI64rr } } + }; + bool HasAVX = Subtarget->hasAVX(); + unsigned Opc; + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected result type."); + case MVT::i32: Opc = CvtOpc[IsInputDouble][0][HasAVX]; break; + case MVT::i64: Opc = CvtOpc[IsInputDouble][1][HasAVX]; break; + } + + // Check if we can fold insertelement instructions into the convert. + const Value *Op = I.getArgOperand(0); + while (auto *IE = dyn_cast<InsertElementInst>(Op)) { + const Value *Index = IE->getOperand(2); + if (!isa<ConstantInt>(Index)) + break; + unsigned Idx = cast<ConstantInt>(Index)->getZExtValue(); + + if (Idx == 0) { + Op = IE->getOperand(1); + break; + } + Op = IE->getOperand(0); + } + + unsigned Reg = getRegForValue(Op); + if (Reg == 0) + return false; + + unsigned ResultReg = createResultReg(TLI.getRegClassFor(VT)); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg) + .addReg(Reg); + + UpdateValueMap(&I, ResultReg); + return true; + } } } @@ -1794,31 +2559,43 @@ bool X86FastISel::FastLowerArguments() { return false; // Only handle simple cases. i.e. Up to 6 i32/i64 scalar arguments. - unsigned Idx = 1; - for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I, ++Idx) { - if (Idx > 6) - return false; - + unsigned GPRCnt = 0; + unsigned FPRCnt = 0; + unsigned Idx = 0; + for (auto const &Arg : F->args()) { + // The first argument is at index 1. + ++Idx; if (F->getAttributes().hasAttribute(Idx, Attribute::ByVal) || F->getAttributes().hasAttribute(Idx, Attribute::InReg) || F->getAttributes().hasAttribute(Idx, Attribute::StructRet) || F->getAttributes().hasAttribute(Idx, Attribute::Nest)) return false; - Type *ArgTy = I->getType(); + Type *ArgTy = Arg.getType(); if (ArgTy->isStructTy() || ArgTy->isArrayTy() || ArgTy->isVectorTy()) return false; EVT ArgVT = TLI.getValueType(ArgTy); if (!ArgVT.isSimple()) return false; switch (ArgVT.getSimpleVT().SimpleTy) { + default: return false; case MVT::i32: case MVT::i64: + ++GPRCnt; + break; + case MVT::f32: + case MVT::f64: + if (!Subtarget->hasSSE1()) + return false; + ++FPRCnt; break; - default: - return false; } + + if (GPRCnt > 6) + return false; + + if (FPRCnt > 8) + return false; } static const MCPhysReg GPR32ArgRegs[] = { @@ -1827,24 +2604,33 @@ bool X86FastISel::FastLowerArguments() { static const MCPhysReg GPR64ArgRegs[] = { X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8 , X86::R9 }; + static const MCPhysReg XMMArgRegs[] = { + X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, + X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 + }; - Idx = 0; - const TargetRegisterClass *RC32 = TLI.getRegClassFor(MVT::i32); - const TargetRegisterClass *RC64 = TLI.getRegClassFor(MVT::i64); - for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); - I != E; ++I, ++Idx) { - bool is32Bit = TLI.getValueType(I->getType()) == MVT::i32; - const TargetRegisterClass *RC = is32Bit ? RC32 : RC64; - unsigned SrcReg = is32Bit ? GPR32ArgRegs[Idx] : GPR64ArgRegs[Idx]; + unsigned GPRIdx = 0; + unsigned FPRIdx = 0; + for (auto const &Arg : F->args()) { + MVT VT = TLI.getSimpleValueType(Arg.getType()); + const TargetRegisterClass *RC = TLI.getRegClassFor(VT); + unsigned SrcReg; + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type."); + case MVT::i32: SrcReg = GPR32ArgRegs[GPRIdx++]; break; + case MVT::i64: SrcReg = GPR64ArgRegs[GPRIdx++]; break; + case MVT::f32: // fall-through + case MVT::f64: SrcReg = XMMArgRegs[FPRIdx++]; break; + } unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. // Without this, EmitLiveInCopies may eliminate the livein if its only // use is a bitcast (which isn't turned into an instruction). unsigned ResultReg = createResultReg(RC); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, - TII.get(TargetOpcode::COPY), - ResultReg).addReg(DstReg, getKillRegState(true)); - UpdateValueMap(I, ResultReg); + TII.get(TargetOpcode::COPY), ResultReg) + .addReg(DstReg, getKillRegState(true)); + UpdateValueMap(&Arg, ResultReg); } return true; } @@ -2147,7 +2933,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { if (!X86FastEmitStore(ArgVT, ArgVal, AM)) return false; } else { - if (!X86FastEmitStore(ArgVT, Arg, AM)) + if (!X86FastEmitStore(ArgVT, Arg, /*ValIsKill=*/false, AM)) return false; } } @@ -2430,7 +3216,7 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { return 0; } - // Materialize addresses with LEA instructions. + // Materialize addresses with LEA/MOV instructions. if (isa<GlobalValue>(C)) { X86AddressMode AM; if (X86SelectAddress(C, AM)) { @@ -2440,10 +3226,19 @@ unsigned X86FastISel::TargetMaterializeConstant(const Constant *C) { AM.IndexReg == 0 && AM.Disp == 0 && AM.GV == nullptr) return AM.Base.Reg; - Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r; unsigned ResultReg = createResultReg(RC); - addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + if (TM.getRelocationModel() == Reloc::Static && + TLI.getPointerTy() == MVT::i64) { + // The displacement code be more than 32 bits away so we need to use + // an instruction with a 64 bit immediate + Opc = X86::MOV64ri; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, + TII.get(Opc), ResultReg).addGlobalAddress(cast<GlobalValue>(C)); + } else { + Opc = TLI.getPointerTy() == MVT::i32 ? X86::LEA32r : X86::LEA64r; + addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(Opc), ResultReg), AM); + } return ResultReg; } return 0; @@ -2544,8 +3339,9 @@ unsigned X86FastISel::TargetMaterializeFloatZero(const ConstantFP *CF) { bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, const LoadInst *LI) { + const Value *Ptr = LI->getPointerOperand(); X86AddressMode AM; - if (!X86SelectAddress(LI->getOperand(0), AM)) + if (!X86SelectAddress(Ptr, AM)) return false; const X86InstrInfo &XII = (const X86InstrInfo&)TII; @@ -2553,13 +3349,18 @@ bool X86FastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, unsigned Size = DL.getTypeAllocSize(LI->getType()); unsigned Alignment = LI->getAlignment(); + if (Alignment == 0) // Ensure that codegen never sees alignment 0 + Alignment = DL.getABITypeAlignment(LI->getType()); + SmallVector<MachineOperand, 8> AddrOps; AM.getFullAddress(AddrOps); MachineInstr *Result = XII.foldMemoryOperandImpl(*FuncInfo.MF, MI, OpNo, AddrOps, Size, Alignment); - if (!Result) return false; + if (!Result) + return false; + Result->addMemOperand(*FuncInfo.MF, createMachineMemOperandFor(LI)); FuncInfo.MBB->insert(FuncInfo.InsertPt, Result); MI->eraseFromParent(); return true; diff --git a/lib/Target/X86/X86FixupLEAs.cpp b/lib/Target/X86/X86FixupLEAs.cpp index 6c5b86f..4be766a 100644 --- a/lib/Target/X86/X86FixupLEAs.cpp +++ b/lib/Target/X86/X86FixupLEAs.cpp @@ -32,86 +32,89 @@ using namespace llvm; STATISTIC(NumLEAs, "Number of LEA instructions created"); namespace { - class FixupLEAPass : public MachineFunctionPass { - enum RegUsageState { RU_NotUsed, RU_Write, RU_Read }; - static char ID; - /// \brief Loop over all of the instructions in the basic block - /// replacing applicable instructions with LEA instructions, - /// where appropriate. - bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI); +class FixupLEAPass : public MachineFunctionPass { + enum RegUsageState { RU_NotUsed, RU_Write, RU_Read }; + static char ID; + /// \brief Loop over all of the instructions in the basic block + /// replacing applicable instructions with LEA instructions, + /// where appropriate. + bool processBasicBlock(MachineFunction &MF, MachineFunction::iterator MFI); - const char *getPassName() const override { return "X86 Atom LEA Fixup";} + const char *getPassName() const override { return "X86 Atom LEA Fixup"; } - /// \brief Given a machine register, look for the instruction - /// which writes it in the current basic block. If found, - /// try to replace it with an equivalent LEA instruction. - /// If replacement succeeds, then also process the the newly created - /// instruction. - void seekLEAFixup(MachineOperand& p, MachineBasicBlock::iterator& I, - MachineFunction::iterator MFI); + /// \brief Given a machine register, look for the instruction + /// which writes it in the current basic block. If found, + /// try to replace it with an equivalent LEA instruction. + /// If replacement succeeds, then also process the the newly created + /// instruction. + void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I, + MachineFunction::iterator MFI); - /// \brief Given a memory access or LEA instruction - /// whose address mode uses a base and/or index register, look for - /// an opportunity to replace the instruction which sets the base or index - /// register with an equivalent LEA instruction. - void processInstruction(MachineBasicBlock::iterator& I, - MachineFunction::iterator MFI); + /// \brief Given a memory access or LEA instruction + /// whose address mode uses a base and/or index register, look for + /// an opportunity to replace the instruction which sets the base or index + /// register with an equivalent LEA instruction. + void processInstruction(MachineBasicBlock::iterator &I, + MachineFunction::iterator MFI); - /// \brief Given a LEA instruction which is unprofitable - /// on Silvermont try to replace it with an equivalent ADD instruction - void processInstructionForSLM(MachineBasicBlock::iterator& I, - MachineFunction::iterator MFI); + /// \brief Given a LEA instruction which is unprofitable + /// on Silvermont try to replace it with an equivalent ADD instruction + void processInstructionForSLM(MachineBasicBlock::iterator &I, + MachineFunction::iterator MFI); - /// \brief Determine if an instruction references a machine register - /// and, if so, whether it reads or writes the register. - RegUsageState usesRegister(MachineOperand& p, - MachineBasicBlock::iterator I); + /// \brief Determine if an instruction references a machine register + /// and, if so, whether it reads or writes the register. + RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I); - /// \brief Step backwards through a basic block, looking - /// for an instruction which writes a register within - /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles. - MachineBasicBlock::iterator searchBackwards(MachineOperand& p, - MachineBasicBlock::iterator& I, - MachineFunction::iterator MFI); + /// \brief Step backwards through a basic block, looking + /// for an instruction which writes a register within + /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles. + MachineBasicBlock::iterator searchBackwards(MachineOperand &p, + MachineBasicBlock::iterator &I, + MachineFunction::iterator MFI); - /// \brief if an instruction can be converted to an - /// equivalent LEA, insert the new instruction into the basic block - /// and return a pointer to it. Otherwise, return zero. - MachineInstr* postRAConvertToLEA(MachineFunction::iterator &MFI, - MachineBasicBlock::iterator &MBBI) const; + /// \brief if an instruction can be converted to an + /// equivalent LEA, insert the new instruction into the basic block + /// and return a pointer to it. Otherwise, return zero. + MachineInstr *postRAConvertToLEA(MachineFunction::iterator &MFI, + MachineBasicBlock::iterator &MBBI) const; - public: - FixupLEAPass() : MachineFunctionPass(ID) {} +public: + FixupLEAPass() : MachineFunctionPass(ID) {} - /// \brief Loop over all of the basic blocks, - /// replacing instructions by equivalent LEA instructions - /// if needed and when possible. - bool runOnMachineFunction(MachineFunction &MF) override; + /// \brief Loop over all of the basic blocks, + /// replacing instructions by equivalent LEA instructions + /// if needed and when possible. + bool runOnMachineFunction(MachineFunction &MF) override; - private: - MachineFunction *MF; - const TargetMachine *TM; - const X86InstrInfo *TII; // Machine instruction info. - - }; - char FixupLEAPass::ID = 0; +private: + MachineFunction *MF; + const TargetMachine *TM; + const X86InstrInfo *TII; // Machine instruction info. +}; +char FixupLEAPass::ID = 0; } MachineInstr * FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI, MachineBasicBlock::iterator &MBBI) const { - MachineInstr* MI = MBBI; - MachineInstr* NewMI; + MachineInstr *MI = MBBI; + MachineInstr *NewMI; switch (MI->getOpcode()) { case X86::MOV32rr: case X86::MOV64rr: { - const MachineOperand& Src = MI->getOperand(1); - const MachineOperand& Dest = MI->getOperand(0); + const MachineOperand &Src = MI->getOperand(1); + const MachineOperand &Dest = MI->getOperand(0); NewMI = BuildMI(*MF, MI->getDebugLoc(), - TII->get( MI->getOpcode() == X86::MOV32rr ? X86::LEA32r : X86::LEA64r)) - .addOperand(Dest) - .addOperand(Src).addImm(1).addReg(0).addImm(0).addReg(0); - MFI->insert(MBBI, NewMI); // Insert the new inst + TII->get(MI->getOpcode() == X86::MOV32rr ? X86::LEA32r + : X86::LEA64r)) + .addOperand(Dest) + .addOperand(Src) + .addImm(1) + .addReg(0) + .addImm(0) + .addReg(0); + MFI->insert(MBBI, NewMI); // Insert the new inst return NewMI; } case X86::ADD64ri32: @@ -144,17 +147,16 @@ FixupLEAPass::postRAConvertToLEA(MachineFunction::iterator &MFI, return TII->convertToThreeAddress(MFI, MBBI, nullptr); } -FunctionPass *llvm::createX86FixupLEAs() { - return new FixupLEAPass(); -} +FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); } bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) { + MF = &Func; TM = &Func.getTarget(); const X86Subtarget &ST = TM->getSubtarget<X86Subtarget>(); if (!ST.LEAusesAG() && !ST.slowLEA()) return false; - TII = static_cast<const X86InstrInfo*>(TM->getInstrInfo()); + TII = static_cast<const X86InstrInfo *>(TM->getInstrInfo()); DEBUG(dbgs() << "Start X86FixupLEAs\n";); // Process all basic blocks. @@ -165,14 +167,14 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &Func) { return true; } -FixupLEAPass::RegUsageState FixupLEAPass::usesRegister(MachineOperand& p, - MachineBasicBlock::iterator I) { +FixupLEAPass::RegUsageState +FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) { RegUsageState RegUsage = RU_NotUsed; - MachineInstr* MI = I; + MachineInstr *MI = I; for (unsigned int i = 0; i < MI->getNumOperands(); ++i) { - MachineOperand& opnd = MI->getOperand(i); - if (opnd.isReg() && opnd.getReg() == p.getReg()){ + MachineOperand &opnd = MI->getOperand(i); + if (opnd.isReg() && opnd.getReg() == p.getReg()) { if (opnd.isDef()) return RU_Write; RegUsage = RU_Read; @@ -185,23 +187,22 @@ FixupLEAPass::RegUsageState FixupLEAPass::usesRegister(MachineOperand& p, /// block, return a reference to the previous instruction in the block, /// wrapping around to the last instruction of the block if the block /// branches to itself. -static inline bool getPreviousInstr(MachineBasicBlock::iterator& I, +static inline bool getPreviousInstr(MachineBasicBlock::iterator &I, MachineFunction::iterator MFI) { if (I == MFI->begin()) { if (MFI->isPredecessor(MFI)) { I = --MFI->end(); return true; - } - else + } else return false; } --I; return true; } -MachineBasicBlock::iterator FixupLEAPass::searchBackwards(MachineOperand& p, - MachineBasicBlock::iterator& I, - MachineFunction::iterator MFI) { +MachineBasicBlock::iterator +FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I, + MachineFunction::iterator MFI) { int InstrDistance = 1; MachineBasicBlock::iterator CurInst; static const int INSTR_DISTANCE_THRESHOLD = 5; @@ -209,12 +210,12 @@ MachineBasicBlock::iterator FixupLEAPass::searchBackwards(MachineOperand& p, CurInst = I; bool Found; Found = getPreviousInstr(CurInst, MFI); - while( Found && I != CurInst) { + while (Found && I != CurInst) { if (CurInst->isCall() || CurInst->isInlineAsm()) break; if (InstrDistance > INSTR_DISTANCE_THRESHOLD) break; // too far back to make a difference - if (usesRegister(p, CurInst) == RU_Write){ + if (usesRegister(p, CurInst) == RU_Write) { return CurInst; } InstrDistance += TII->getInstrLatency(TM->getInstrItineraryData(), CurInst); @@ -223,32 +224,32 @@ MachineBasicBlock::iterator FixupLEAPass::searchBackwards(MachineOperand& p, return nullptr; } -void FixupLEAPass::processInstruction(MachineBasicBlock::iterator& I, +void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I, MachineFunction::iterator MFI) { // Process a load, store, or LEA instruction. MachineInstr *MI = I; int opcode = MI->getOpcode(); - const MCInstrDesc& Desc = MI->getDesc(); + const MCInstrDesc &Desc = MI->getDesc(); int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags, opcode); if (AddrOffset >= 0) { AddrOffset += X86II::getOperandBias(Desc); - MachineOperand& p = MI->getOperand(AddrOffset + X86::AddrBaseReg); + MachineOperand &p = MI->getOperand(AddrOffset + X86::AddrBaseReg); if (p.isReg() && p.getReg() != X86::ESP) { seekLEAFixup(p, I, MFI); } - MachineOperand& q = MI->getOperand(AddrOffset + X86::AddrIndexReg); + MachineOperand &q = MI->getOperand(AddrOffset + X86::AddrIndexReg); if (q.isReg() && q.getReg() != X86::ESP) { seekLEAFixup(q, I, MFI); } } } -void FixupLEAPass::seekLEAFixup(MachineOperand& p, - MachineBasicBlock::iterator& I, +void FixupLEAPass::seekLEAFixup(MachineOperand &p, + MachineBasicBlock::iterator &I, MachineFunction::iterator MFI) { MachineBasicBlock::iterator MBI = searchBackwards(p, I, MFI); if (MBI) { - MachineInstr* NewMI = postRAConvertToLEA(MFI, MBI); + MachineInstr *NewMI = postRAConvertToLEA(MFI, MBI); if (NewMI) { ++NumLEAs; DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump();); @@ -256,7 +257,7 @@ void FixupLEAPass::seekLEAFixup(MachineOperand& p, DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump();); MFI->erase(MBI); MachineBasicBlock::iterator J = - static_cast<MachineBasicBlock::iterator> (NewMI); + static_cast<MachineBasicBlock::iterator>(NewMI); processInstruction(J, MFI); } } @@ -299,7 +300,7 @@ void FixupLEAPass::processInstructionForSLM(MachineBasicBlock::iterator &I, } DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump();); DEBUG(dbgs() << "FixLEA: Replaced by: ";); - MachineInstr *NewMI = 0; + MachineInstr *NewMI = nullptr; const MachineOperand &Dst = MI->getOperand(0); // Make ADD instruction for two registers writing to LEA's destination if (SrcR1 != 0 && SrcR2 != 0) { diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 4c1374f..8c029a8 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -29,6 +29,7 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Support/Debug.h" using namespace llvm; @@ -45,7 +46,7 @@ bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { bool X86FrameLowering::hasFP(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineModuleInfo &MMI = MF.getMMI(); - const TargetRegisterInfo *RegInfo = TM.getRegisterInfo(); + const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); return (MF.getTarget().Options.DisableFramePointerElim(MF) || RegInfo->needsStackRealignment(MF) || @@ -305,65 +306,25 @@ static bool isEAXLiveIn(MachineFunction &MF) { return false; } -void X86FrameLowering::emitCalleeSavedFrameMoves( - MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, - unsigned FramePtr) const { +void +X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + DebugLoc DL) const { MachineFunction &MF = *MBB.getParent(); MachineFrameInfo *MFI = MF.getFrameInfo(); MachineModuleInfo &MMI = MF.getMMI(); const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); - const X86InstrInfo &TII = *TM.getInstrInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); // Add callee saved registers to move list. const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); if (CSI.empty()) return; - const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); - bool HasFP = hasFP(MF); - - // Calculate amount of bytes used for return address storing. - int stackGrowth = -RegInfo->getSlotSize(); - - // FIXME: This is dirty hack. The code itself is pretty mess right now. - // It should be rewritten from scratch and generalized sometimes. - - // Determine maximum offset (minimum due to stack growth). - int64_t MaxOffset = 0; - for (std::vector<CalleeSavedInfo>::const_iterator - I = CSI.begin(), E = CSI.end(); I != E; ++I) - MaxOffset = std::min(MaxOffset, - MFI->getObjectOffset(I->getFrameIdx())); - // Calculate offsets. - int64_t saveAreaOffset = (HasFP ? 3 : 2) * stackGrowth; for (std::vector<CalleeSavedInfo>::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { int64_t Offset = MFI->getObjectOffset(I->getFrameIdx()); unsigned Reg = I->getReg(); - Offset = MaxOffset - Offset + saveAreaOffset; - - // Don't output a new machine move if we're re-saving the frame - // pointer. This happens when the PrologEpilogInserter has inserted an extra - // "PUSH" of the frame pointer -- the "emitPrologue" method automatically - // generates one when frame pointers are used. If we generate a "machine - // move" for this extra "PUSH", the linker will lose track of the fact that - // the frame pointer should have the value of the first "PUSH" when it's - // trying to unwind. - // - // FIXME: This looks inelegant. It's possibly correct, but it's covering up - // another bug. I.e., one where we generate a prolog like this: - // - // pushl %ebp - // movl %esp, %ebp - // pushl %ebp - // pushl %esi - // ... - // - // The immediate re-push of EBP is unnecessary. At the least, it's an - // optimization bug. EBP can be used as a scratch register in certain - // cases, but probably not when we have a frame pointer. - if (HasFP && FramePtr == Reg) - continue; unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); unsigned CFIIndex = @@ -395,23 +356,107 @@ static bool usesTheStack(const MachineFunction &MF) { /// automatically adjust the stack pointer. Adjust the stack pointer to allocate /// space for local variables. Also emit labels used by the exception handler to /// generate the exception handling frames. + +/* + Here's a gist of what gets emitted: + + ; Establish frame pointer, if needed + [if needs FP] + push %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + .seh_pushreg %rpb + mov %rsp, %rbp + .cfi_def_cfa_register %rbp + + ; Spill general-purpose registers + [for all callee-saved GPRs] + pushq %<reg> + [if not needs FP] + .cfi_def_cfa_offset (offset from RETADDR) + .seh_pushreg %<reg> + + ; If the required stack alignment > default stack alignment + ; rsp needs to be re-aligned. This creates a "re-alignment gap" + ; of unknown size in the stack frame. + [if stack needs re-alignment] + and $MASK, %rsp + + ; Allocate space for locals + [if target is Windows and allocated space > 4096 bytes] + ; Windows needs special care for allocations larger + ; than one page. + mov $NNN, %rax + call ___chkstk_ms/___chkstk + sub %rax, %rsp + [else] + sub $NNN, %rsp + + [if needs FP] + .seh_stackalloc (size of XMM spill slots) + .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots + [else] + .seh_stackalloc NNN + + ; Spill XMMs + ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved, + ; they may get spilled on any platform, if the current function + ; calls @llvm.eh.unwind.init + [if needs FP] + [for all callee-saved XMM registers] + movaps %<xmm reg>, -MMM(%rbp) + [for all callee-saved XMM registers] + .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset) + ; i.e. the offset relative to (%rbp - SEHFrameOffset) + [else] + [for all callee-saved XMM registers] + movaps %<xmm reg>, KKK(%rsp) + [for all callee-saved XMM registers] + .seh_savexmm %<xmm reg>, KKK + + .seh_endprologue + + [if needs base pointer] + mov %rsp, %rbx + + ; Emit CFI info + [if needs FP] + [for all callee-saved registers] + .cfi_offset %<reg>, (offset from %rbp) + [else] + .cfi_def_cfa_offset (offset from RETADDR) + [for all callee-saved registers] + .cfi_offset %<reg>, (offset from %rsp) + + Notes: + - .seh directives are emitted only for Windows 64 ABI + - .cfi directives are emitted for all other ABIs + - for 32-bit code, substitute %e?? registers for %r?? +*/ + void X86FrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); // Prologue goes in entry BB. MachineBasicBlock::iterator MBBI = MBB.begin(); MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *Fn = MF.getFunction(); - const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); - const X86InstrInfo &TII = *TM.getInstrInfo(); + const X86RegisterInfo *RegInfo = + static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo()); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); MachineModuleInfo &MMI = MF.getMMI(); X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); - bool needsFrameMoves = MMI.hasDebugInfo() || - Fn->needsUnwindTableEntry(); uint64_t MaxAlign = MFI->getMaxAlignment(); // Desired stack alignment. uint64_t StackSize = MFI->getStackSize(); // Number of bytes to allocate. bool HasFP = hasFP(MF); + const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); bool Is64Bit = STI.is64Bit(); bool IsLP64 = STI.isTarget64BitLP64(); bool IsWin64 = STI.isTargetWin64(); + bool IsWinEH = + MF.getTarget().getMCAsmInfo()->getExceptionHandlingType() == + ExceptionHandling::WinEH; // Not necessarily synonymous with IsWin64. + bool NeedsWinEH = IsWinEH && Fn->needsUnwindTableEntry(); + bool NeedsDwarfCFI = + !IsWinEH && (MMI.hasDebugInfo() || Fn->needsUnwindTableEntry()); bool UseLEA = STI.useLeaForSP(); unsigned StackAlign = getStackAlignment(); unsigned SlotSize = RegInfo->getSlotSize(); @@ -509,7 +554,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { .addReg(FramePtr, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); - if (needsFrameMoves) { + if (NeedsDwarfCFI) { // Mark the place where EBP/RBP was saved. // Define the current CFA rule to use the provided offset. assert(StackSize); @@ -527,13 +572,19 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { .addCFIIndex(CFIIndex); } + if (NeedsWinEH) { + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) + .addImm(FramePtr) + .setMIFlag(MachineInstr::FrameSetup); + } + // Update EBP with the new base value. BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr), FramePtr) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); - if (needsFrameMoves) { + if (NeedsDwarfCFI) { // Mark effective beginning of when frame pointer becomes valid. // Define the current CFA to use the EBP/RBP register. unsigned DwarfFramePtr = RegInfo->getDwarfRegNum(FramePtr, true); @@ -543,9 +594,8 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { .addCFIIndex(CFIIndex); } - // Mark the FramePtr as live-in in every block except the entry. - for (MachineFunction::iterator I = std::next(MF.begin()), E = MF.end(); - I != E; ++I) + // Mark the FramePtr as live-in in every block. + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) I->addLiveIn(FramePtr); } else { NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); @@ -559,10 +609,10 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { (MBBI->getOpcode() == X86::PUSH32r || MBBI->getOpcode() == X86::PUSH64r)) { PushedRegs = true; - MBBI->setFlag(MachineInstr::FrameSetup); + unsigned Reg = MBBI->getOperand(0).getReg(); ++MBBI; - if (!HasFP && needsFrameMoves) { + if (!HasFP && NeedsDwarfCFI) { // Mark callee-saved push instruction. // Define the current CFA rule to use the provided offset. assert(StackSize); @@ -572,16 +622,15 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { .addCFIIndex(CFIIndex); StackOffset += stackGrowth; } + + if (NeedsWinEH) { + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)).addImm(Reg).setMIFlag( + MachineInstr::FrameSetup); + } } // Realign stack after we pushed callee-saved registers (so that we'll be // able to calculate their offsets from the frame pointer). - - // NOTE: We push the registers before realigning the stack, so - // vector callee-saved (xmm) registers may be saved w/o proper - // alignment in this way. However, currently these regs are saved in - // stack slots (see X86FrameLowering::spillCalleeSavedRegisters()), so - // this shouldn't be a problem. if (RegInfo->needsStackRealignment(MF)) { assert(HasFP && "There should be a frame pointer if stack is realigned."); MachineInstr *MI = @@ -680,23 +729,88 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { MI->setFlag(MachineInstr::FrameSetup); MBB.insert(MBBI, MI); } - } else if (NumBytes) + } else if (NumBytes) { emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit, IsLP64, UseLEA, TII, *RegInfo); + } + + int SEHFrameOffset = 0; + if (NeedsWinEH) { + if (HasFP) { + // We need to set frame base offset low enough such that all saved + // register offsets would be positive relative to it, but we can't + // just use NumBytes, because .seh_setframe offset must be <=240. + // So we pretend to have only allocated enough space to spill the + // non-volatile registers. + // We don't care about the rest of stack allocation, because unwinder + // will restore SP to (BP - SEHFrameOffset) + for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) { + int offset = MFI->getObjectOffset(Info.getFrameIdx()); + SEHFrameOffset = std::max(SEHFrameOffset, abs(offset)); + } + SEHFrameOffset += SEHFrameOffset % 16; // ensure alignmant + + // This only needs to account for XMM spill slots, GPR slots + // are covered by the .seh_pushreg's emitted above. + unsigned Size = SEHFrameOffset - X86FI->getCalleeSavedFrameSize(); + if (Size) { + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) + .addImm(Size) + .setMIFlag(MachineInstr::FrameSetup); + } + + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) + .addImm(FramePtr) + .addImm(SEHFrameOffset) + .setMIFlag(MachineInstr::FrameSetup); + } else { + // SP will be the base register for restoring XMMs + if (NumBytes) { + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) + .addImm(NumBytes) + .setMIFlag(MachineInstr::FrameSetup); + } + } + } + + // Skip the rest of register spilling code + while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) + ++MBBI; + + // Emit SEH info for non-GPRs + if (NeedsWinEH) { + for (const CalleeSavedInfo &Info : MFI->getCalleeSavedInfo()) { + unsigned Reg = Info.getReg(); + if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) + continue; + assert(X86::FR64RegClass.contains(Reg) && "Unexpected register class"); + + int Offset = getFrameIndexOffset(MF, Info.getFrameIdx()); + Offset += SEHFrameOffset; + + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM)) + .addImm(Reg) + .addImm(Offset) + .setMIFlag(MachineInstr::FrameSetup); + } + + BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue)) + .setMIFlag(MachineInstr::FrameSetup); + } // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects // will be allocated after this, so we can still use the base pointer // to reference locals. if (RegInfo->hasBasePointer(MF)) { - // Update the frame pointer with the current stack pointer. + // Update the base pointer with the current stack pointer. unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr; BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) .addReg(StackPtr) .setMIFlag(MachineInstr::FrameSetup); } - if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) { + if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) { // Mark end of stack pointer adjustment. if (!HasFP && NumBytes) { // Define the current CFA rule to use the provided offset. @@ -711,7 +825,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const { // Emit DWARF info specifying the offsets of the callee-saved registers. if (PushedRegs) - emitCalleeSavedFrameMoves(MBB, MBBI, DL, HasFP ? FramePtr : StackPtr); + emitCalleeSavedFrameMoves(MBB, MBBI, DL); } } @@ -719,12 +833,14 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); - const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); - const X86InstrInfo &TII = *TM.getInstrInfo(); + const X86RegisterInfo *RegInfo = + static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo()); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); assert(MBBI != MBB.end() && "Returning block has no instructions"); unsigned RetOpcode = MBBI->getOpcode(); DebugLoc DL = MBBI->getDebugLoc(); + const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); bool Is64Bit = STI.is64Bit(); bool IsLP64 = STI.isTarget64BitLP64(); bool UseLEA = STI.useLeaForSP(); @@ -969,46 +1085,97 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, return getFrameIndexOffset(MF, FI); } -bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector<CalleeSavedInfo> &CSI, - const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; +bool X86FrameLowering::assignCalleeSavedSpillSlots( + MachineFunction &MF, const TargetRegisterInfo *TRI, + std::vector<CalleeSavedInfo> &CSI) const { + MachineFrameInfo *MFI = MF.getFrameInfo(); + const X86RegisterInfo *RegInfo = + static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo()); + unsigned SlotSize = RegInfo->getSlotSize(); + X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); - DebugLoc DL = MBB.findDebugLoc(MI); + unsigned CalleeSavedFrameSize = 0; + int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta(); - MachineFunction &MF = *MBB.getParent(); + if (hasFP(MF)) { + // emitPrologue always spills frame register the first thing. + SpillSlotOffset -= SlotSize; + MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); + + // Since emitPrologue and emitEpilogue will handle spilling and restoring of + // the frame register, we can delete it from CSI list and not have to worry + // about avoiding it later. + unsigned FPReg = RegInfo->getFrameRegister(MF); + for (unsigned i = 0; i < CSI.size(); ++i) { + if (CSI[i].getReg() == FPReg) { + CSI.erase(CSI.begin() + i); + break; + } + } + } + + // Assign slots for GPRs. It increases frame size. + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i - 1].getReg(); + + if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) + continue; - unsigned SlotSize = STI.is64Bit() ? 8 : 4; - unsigned FPReg = TRI->getFrameRegister(MF); - unsigned CalleeFrameSize = 0; + SpillSlotOffset -= SlotSize; + CalleeSavedFrameSize += SlotSize; + + int SlotIndex = MFI->CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); + CSI[i - 1].setFrameIdx(SlotIndex); + } + + X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize); + + // Assign slots for XMMs. + for (unsigned i = CSI.size(); i != 0; --i) { + unsigned Reg = CSI[i - 1].getReg(); + if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) + continue; + + const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); + // ensure alignment + SpillSlotOffset -= abs(SpillSlotOffset) % RC->getAlignment(); + // spill into slot + SpillSlotOffset -= RC->getSize(); + int SlotIndex = + MFI->CreateFixedSpillStackObject(RC->getSize(), SpillSlotOffset); + CSI[i - 1].setFrameIdx(SlotIndex); + MFI->ensureMaxAlignment(RC->getAlignment()); + } + + return true; +} +bool X86FrameLowering::spillCalleeSavedRegisters( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + const std::vector<CalleeSavedInfo> &CSI, + const TargetRegisterInfo *TRI) const { + DebugLoc DL = MBB.findDebugLoc(MI); + + MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); + const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); // Push GPRs. It increases frame size. unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; for (unsigned i = CSI.size(); i != 0; --i) { - unsigned Reg = CSI[i-1].getReg(); - if (!X86::GR64RegClass.contains(Reg) && - !X86::GR32RegClass.contains(Reg)) + unsigned Reg = CSI[i - 1].getReg(); + + if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) continue; // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); - if (Reg == FPReg) - // X86RegisterInfo::emitPrologue will handle spilling of frame register. - continue; - CalleeFrameSize += SlotSize; + BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, RegState::Kill) .setMIFlag(MachineInstr::FrameSetup); } - X86FI->setCalleeSavedFrameSize(CalleeFrameSize); - // Make XMM regs spilled. X86 does not have ability of push/pop XMM. // It can be done by spilling XMMs to stack frame. - // Note that only Win64 ABI might spill XMMs. for (unsigned i = CSI.size(); i != 0; --i) { unsigned Reg = CSI[i-1].getReg(); if (X86::GR64RegClass.contains(Reg) || @@ -1017,8 +1184,12 @@ bool X86FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, // Add the callee-saved register as live-in. It's killed at the spill. MBB.addLiveIn(Reg); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i-1].getFrameIdx(), - RC, TRI); + + TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC, + TRI); + --MI; + MI->setFlag(MachineInstr::FrameSetup); + ++MI; } return true; @@ -1035,6 +1206,7 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineFunction &MF = *MBB.getParent(); const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); // Reload XMMs from stack frame. for (unsigned i = 0, e = CSI.size(); i != e; ++i) { @@ -1042,22 +1214,19 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) continue; + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), - RC, TRI); + TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI); } // POP GPRs. - unsigned FPReg = TRI->getFrameRegister(MF); unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; for (unsigned i = 0, e = CSI.size(); i != e; ++i) { unsigned Reg = CSI[i].getReg(); if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) continue; - if (Reg == FPReg) - // X86RegisterInfo::emitEpilogue will handle restoring of frame register. - continue; + BuildMI(MBB, MI, DL, TII.get(Opc), Reg); } return true; @@ -1065,9 +1234,10 @@ bool X86FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, void X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { + RegScavenger *RS) const { MachineFrameInfo *MFI = MF.getFrameInfo(); - const X86RegisterInfo *RegInfo = TM.getRegisterInfo(); + const X86RegisterInfo *RegInfo = + static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo()); unsigned SlotSize = RegInfo->getSlotSize(); X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); @@ -1087,22 +1257,6 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, TailCallReturnAddrDelta - SlotSize, true); } - if (hasFP(MF)) { - assert((TailCallReturnAddrDelta <= 0) && - "The Delta should always be zero or negative"); - const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering(); - - // Create a frame entry for the EBP register that must be saved. - int FrameIdx = MFI->CreateFixedObject(SlotSize, - -(int)SlotSize + - TFI.getOffsetOfLocalArea() + - TailCallReturnAddrDelta, - true); - assert(FrameIdx == MFI->getObjectIndexBegin() && - "Slot for EBP register must be last in order to be found!"); - (void)FrameIdx; - } - // Spill the BasePtr if it's used. if (RegInfo->hasBasePointer(MF)) MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister()); @@ -1160,8 +1314,9 @@ void X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { MachineBasicBlock &prologueMBB = MF.front(); MachineFrameInfo *MFI = MF.getFrameInfo(); - const X86InstrInfo &TII = *TM.getInstrInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); uint64_t StackSize; + const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); bool Is64Bit = STI.is64Bit(); unsigned TlsReg, TlsOffset; DebugLoc DL; @@ -1368,9 +1523,12 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { /// temp0 = sp - MaxStack /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { - const X86InstrInfo &TII = *TM.getInstrInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); MachineFrameInfo *MFI = MF.getFrameInfo(); - const unsigned SlotSize = TM.getRegisterInfo()->getSlotSize(); + const unsigned SlotSize = + static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo()) + ->getSlotSize(); + const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); const bool Is64Bit = STI.is64Bit(); DebugLoc DL; // HiPE-specific values @@ -1499,12 +1657,14 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const { void X86FrameLowering:: eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const { - const X86InstrInfo &TII = *TM.getInstrInfo(); - const X86RegisterInfo &RegInfo = *TM.getRegisterInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const X86RegisterInfo &RegInfo = + *static_cast<const X86RegisterInfo *>(MF.getTarget().getRegisterInfo()); unsigned StackPtr = RegInfo.getStackRegister(); bool reseveCallFrame = hasReservedCallFrame(MF); int Opcode = I->getOpcode(); bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); + const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>(); bool IsLP64 = STI.isTarget64BitLP64(); DebugLoc DL = I->getDebugLoc(); uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0; @@ -1522,7 +1682,8 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // We need to keep the stack aligned properly. To do this, we round the // amount of space needed for the outgoing arguments up to the next // alignment boundary. - unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); + unsigned StackAlign = + MF.getTarget().getFrameLowering()->getStackAlignment(); Amount = (Amount + StackAlign - 1) / StackAlign * StackAlign; MachineInstr *New = nullptr; diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index 208bb8b..5ad3d4d 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -14,7 +14,6 @@ #ifndef X86_FRAMELOWERING_H #define X86_FRAMELOWERING_H -#include "X86Subtarget.h" #include "llvm/Target/TargetFrameLowering.h" namespace llvm { @@ -23,19 +22,13 @@ class MCSymbol; class X86TargetMachine; class X86FrameLowering : public TargetFrameLowering { - const X86TargetMachine &TM; - const X86Subtarget &STI; public: - explicit X86FrameLowering(const X86TargetMachine &tm, const X86Subtarget &sti) - : TargetFrameLowering(StackGrowsDown, - sti.getStackAlignment(), - (sti.is64Bit() ? -8 : -4)), - TM(tm), STI(sti) { - } + explicit X86FrameLowering(StackDirection D, unsigned StackAl, int LAO) + : TargetFrameLowering(StackGrowsDown, StackAl, LAO) {} void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, DebugLoc DL, - unsigned FramePtr) const; + MachineBasicBlock::iterator MBBI, + DebugLoc DL) const; /// emitProlog/emitEpilog - These methods insert prolog and epilog code into /// the function. @@ -49,6 +42,11 @@ public: void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, RegScavenger *RS = nullptr) const override; + bool + assignCalleeSavedSpillSlots(MachineFunction &MF, + const TargetRegisterInfo *TRI, + std::vector<CalleeSavedInfo> &CSI) const override; + bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const std::vector<CalleeSavedInfo> &CSI, diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 74386d3..ba2f5f6 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2126,38 +2126,6 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { return getGlobalBaseReg(); - case X86ISD::ATOMOR64_DAG: - case X86ISD::ATOMXOR64_DAG: - case X86ISD::ATOMADD64_DAG: - case X86ISD::ATOMSUB64_DAG: - case X86ISD::ATOMNAND64_DAG: - case X86ISD::ATOMAND64_DAG: - case X86ISD::ATOMMAX64_DAG: - case X86ISD::ATOMMIN64_DAG: - case X86ISD::ATOMUMAX64_DAG: - case X86ISD::ATOMUMIN64_DAG: - case X86ISD::ATOMSWAP64_DAG: { - unsigned Opc; - switch (Opcode) { - default: llvm_unreachable("Impossible opcode"); - case X86ISD::ATOMOR64_DAG: Opc = X86::ATOMOR6432; break; - case X86ISD::ATOMXOR64_DAG: Opc = X86::ATOMXOR6432; break; - case X86ISD::ATOMADD64_DAG: Opc = X86::ATOMADD6432; break; - case X86ISD::ATOMSUB64_DAG: Opc = X86::ATOMSUB6432; break; - case X86ISD::ATOMNAND64_DAG: Opc = X86::ATOMNAND6432; break; - case X86ISD::ATOMAND64_DAG: Opc = X86::ATOMAND6432; break; - case X86ISD::ATOMMAX64_DAG: Opc = X86::ATOMMAX6432; break; - case X86ISD::ATOMMIN64_DAG: Opc = X86::ATOMMIN6432; break; - case X86ISD::ATOMUMAX64_DAG: Opc = X86::ATOMUMAX6432; break; - case X86ISD::ATOMUMIN64_DAG: Opc = X86::ATOMUMIN6432; break; - case X86ISD::ATOMSWAP64_DAG: Opc = X86::ATOMSWAP6432; break; - } - SDNode *RetVal = SelectAtomic64(Node, Opc); - if (RetVal) - return RetVal; - break; - } - case ISD::ATOMIC_LOAD_XOR: case ISD::ATOMIC_LOAD_AND: case ISD::ATOMIC_LOAD_OR: diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index cbaf44e..5ccff20 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -44,11 +44,13 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Target/TargetOptions.h" #include <bitset> +#include <numeric> #include <cctype> using namespace llvm; @@ -56,6 +58,17 @@ using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); +static cl::opt<bool> ExperimentalVectorWideningLegalization( + "x86-experimental-vector-widening-legalization", cl::init(false), + cl::desc("Enable an experimental vector type legalization through widening " + "rather than promotion."), + cl::Hidden); + +static cl::opt<bool> ExperimentalVectorShuffleLowering( + "x86-experimental-vector-shuffle-lowering", cl::init(false), + cl::desc("Enable an experimental vector shuffle lowering code path."), + cl::Hidden); + // Forward declarations. static SDValue getMOVL(SelectionDAG &DAG, SDLoc dl, EVT VT, SDValue V1, SDValue V2); @@ -178,29 +191,28 @@ static SDValue Concat256BitVectors(SDValue V1, SDValue V2, EVT VT, return Insert256BitVector(V, V2, NumElems/2, DAG, dl); } -static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) { - const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); - bool is64Bit = Subtarget->is64Bit(); - - if (Subtarget->isTargetMacho()) { - if (is64Bit) +static TargetLoweringObjectFile *createTLOF(const Triple &TT) { + if (TT.isOSBinFormatMachO()) { + if (TT.getArch() == Triple::x86_64) return new X86_64MachoTargetObjectFile(); return new TargetLoweringObjectFileMachO(); } - if (Subtarget->isTargetLinux()) + if (TT.isOSLinux()) return new X86LinuxTargetObjectFile(); - if (Subtarget->isTargetELF()) + if (TT.isOSBinFormatELF()) return new TargetLoweringObjectFileELF(); - if (Subtarget->isTargetKnownWindowsMSVC()) + if (TT.isKnownWindowsMSVCEnvironment()) return new X86WindowsTargetObjectFile(); - if (Subtarget->isTargetCOFF()) + if (TT.isOSBinFormatCOFF()) return new TargetLoweringObjectFileCOFF(); llvm_unreachable("unknown subtarget type"); } +// FIXME: This should stop caching the target machine as soon as +// we can remove resetOperationActions et al. X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) - : TargetLowering(TM, createTLOF(TM)) { + : TargetLowering(TM, createTLOF(Triple(TM.getTargetTriple()))) { Subtarget = &TM.getSubtarget<X86Subtarget>(); X86ScalarSSEf64 = Subtarget->hasSSE2(); X86ScalarSSEf32 = Subtarget->hasSSE1(); @@ -443,7 +455,13 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::BR_CC , MVT::i16, Expand); setOperationAction(ISD::BR_CC , MVT::i32, Expand); setOperationAction(ISD::BR_CC , MVT::i64, Expand); - setOperationAction(ISD::SELECT_CC , MVT::Other, Expand); + setOperationAction(ISD::SELECT_CC , MVT::f32, Expand); + setOperationAction(ISD::SELECT_CC , MVT::f64, Expand); + setOperationAction(ISD::SELECT_CC , MVT::f80, Expand); + setOperationAction(ISD::SELECT_CC , MVT::i8, Expand); + setOperationAction(ISD::SELECT_CC , MVT::i16, Expand); + setOperationAction(ISD::SELECT_CC , MVT::i32, Expand); + setOperationAction(ISD::SELECT_CC , MVT::i64, Expand); if (Subtarget->is64Bit()) setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); @@ -497,6 +515,14 @@ void X86TargetLowering::resetOperationActions() { } } + // Special handling for half-precision floating point conversions. + // If we don't have F16C support, then lower half float conversions + // into library calls. + if (TM.Options.UseSoftFloat || !Subtarget->hasF16C()) { + setOperationAction(ISD::FP16_TO_FP32, MVT::f32, Expand); + setOperationAction(ISD::FP32_TO_FP16, MVT::i16, Expand); + } + if (Subtarget->hasPOPCNT()) { setOperationAction(ISD::CTPOP , MVT::i8 , Promote); } else { @@ -575,34 +601,18 @@ void X86TargetLowering::resetOperationActions() { // Expand certain atomics for (unsigned i = 0; i != array_lengthof(IntVTs); ++i) { MVT VT = IntVTs[i]; - setOperationAction(ISD::ATOMIC_CMP_SWAP, VT, Custom); + setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom); setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); setOperationAction(ISD::ATOMIC_STORE, VT, Custom); } - if (!Subtarget->is64Bit()) { - setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_AND, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_OR, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_XOR, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_NAND, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_SWAP, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_MAX, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_MIN, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom); - setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom); - } - if (Subtarget->hasCmpxchg16b()) { - setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom); + setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom); } // FIXME - use subtarget debug flags - if (!Subtarget->isTargetDarwin() && - !Subtarget->isTargetELF() && - !Subtarget->isTargetCygMing()) { + if (!Subtarget->isTargetDarwin() && !Subtarget->isTargetELF() && + !Subtarget->isTargetCygMing() && !Subtarget->isTargetWin64()) { setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); } @@ -861,6 +871,7 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::ZERO_EXTEND, VT, Expand); setOperationAction(ISD::ANY_EXTEND, VT, Expand); setOperationAction(ISD::VSELECT, VT, Expand); + setOperationAction(ISD::SELECT_CC, VT, Expand); for (int InnerVT = MVT::FIRST_VECTOR_VALUETYPE; InnerVT <= MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) setTruncStoreAction(VT, @@ -1433,6 +1444,11 @@ void X86TargetLowering::resetOperationActions() { setOperationAction(ISD::OR, MVT::v16i32, Legal); setOperationAction(ISD::XOR, MVT::v16i32, Legal); + if (Subtarget->hasCDI()) { + setOperationAction(ISD::CTLZ, MVT::v8i64, Legal); + setOperationAction(ISD::CTLZ, MVT::v16i32, Legal); + } + // Custom lower several nodes. for (int i = MVT::FIRST_VECTOR_VALUETYPE; i <= MVT::LAST_VECTOR_VALUETYPE; ++i) { @@ -1563,6 +1579,7 @@ void X86TargetLowering::resetOperationActions() { setTargetDAGCombine(ISD::SINT_TO_FP); setTargetDAGCombine(ISD::SETCC); setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); + setTargetDAGCombine(ISD::BUILD_VECTOR); if (Subtarget->is64Bit()) setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::XOR); @@ -1585,6 +1602,16 @@ void X86TargetLowering::resetOperationActions() { setPrefFunctionAlignment(4); // 2^4 bytes. } +TargetLoweringBase::LegalizeTypeAction +X86TargetLowering::getPreferredVectorAction(EVT VT) const { + if (ExperimentalVectorWideningLegalization && + VT.getVectorNumElements() != 1 && + VT.getVectorElementType().getSimpleVT() != MVT::i1) + return TypeWidenVector; + + return TargetLoweringBase::getPreferredVectorAction(VT); +} + EVT X86TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { if (!VT.isVector()) return Subtarget->hasAVX512() ? MVT::i1: MVT::i8; @@ -1725,7 +1752,7 @@ const MCExpr * X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid,MCContext &Ctx) const{ - assert(getTargetMachine().getRelocationModel() == Reloc::PIC_ && + assert(MBB->getParent()->getTarget().getRelocationModel() == Reloc::PIC_ && Subtarget->isPICStyleGOT()); // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF // entries. @@ -1824,7 +1851,7 @@ X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), + CCState CCInfo(CallConv, isVarArg, MF, MF.getTarget(), RVLocs, Context); return CCInfo.CheckReturn(Outs, RetCC_X86); } @@ -1844,7 +1871,7 @@ X86TargetLowering::LowerReturn(SDValue Chain, X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); SmallVector<CCValAssign, 16> RVLocs; - CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), + CCState CCInfo(CallConv, isVarArg, MF, DAG.getTarget(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeReturn(Outs, RetCC_X86); @@ -2016,7 +2043,7 @@ X86TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, SmallVector<CCValAssign, 16> RVLocs; bool Is64Bit = Subtarget->is64Bit(); CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + DAG.getTarget(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC_X86); // Copy all of the result registers out of their specified physreg. @@ -2166,8 +2193,8 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, unsigned i) const { // Create the nodes corresponding to a load from this parameter slot. ISD::ArgFlagsTy Flags = Ins[i].Flags; - bool AlwaysUseMutable = FuncIsMadeTailCallSafe(CallConv, - getTargetMachine().Options.GuaranteedTailCallOpt); + bool AlwaysUseMutable = FuncIsMadeTailCallSafe( + CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt); bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); EVT ValVT; @@ -2224,7 +2251,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // Assign locations to all of the incoming arguments. SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), + CCState CCInfo(CallConv, isVarArg, MF, DAG.getTarget(), ArgLocs, *DAG.getContext()); // Allocate shadow area for Win64 @@ -2388,7 +2415,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, TotalNumXMMRegs = 0; if (IsWin64) { - const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering(); + const TargetFrameLowering &TFI = *MF.getTarget().getFrameLowering(); // Get to the caller-allocated home save location. Add 8 to account // for the return address. int HomeOffset = TFI.getOffsetOfLocalArea() + 8; @@ -2587,7 +2614,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Analyze operands of the call, assigning locations to each operand. SmallVector<CCValAssign, 16> ArgLocs; - CCState CCInfo(CallConv, isVarArg, MF, getTargetMachine(), + CCState CCInfo(CallConv, isVarArg, MF, MF.getTarget(), ArgLocs, *DAG.getContext()); // Allocate shadow area for Win64 @@ -2602,7 +2629,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // This is a sibcall. The memory operands are available in caller's // own caller's stack. NumBytes = 0; - else if (getTargetMachine().Options.GuaranteedTailCallOpt && + else if (MF.getTarget().Options.GuaranteedTailCallOpt && IsTailCallConvention(CallConv)) NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); @@ -2649,7 +2676,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Walk the register/memloc assignments, inserting copies/loads. In the case // of tail call optimization arguments are handle later. const X86RegisterInfo *RegInfo = - static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo()); + static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo()); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { // Skip inalloca arguments, they have already been written. ISD::ArgFlagsTy Flags = Outs[i].Flags; @@ -2840,7 +2867,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, InFlag = Chain.getValue(1); } - if (getTargetMachine().getCodeModel() == CodeModel::Large) { + if (DAG.getTarget().getCodeModel() == CodeModel::Large) { assert(Is64Bit && "Large code model is only legal in 64-bit mode."); // In the 64-bit large code model, we have to make all calls // through a register, since the call instruction's 32-bit @@ -2864,7 +2891,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // has hidden or protected visibility, or if it is static or local, then // we don't need to use the PLT - we can directly call it. if (Subtarget->isTargetELF() && - getTargetMachine().getRelocationModel() == Reloc::PIC_ && + DAG.getTarget().getRelocationModel() == Reloc::PIC_ && GV->hasDefaultVisibility() && !GV->hasLocalLinkage()) { OpFlags = X86II::MO_PLT; } else if (Subtarget->isPICStyleStubAny() && @@ -2906,7 +2933,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // On ELF targets, in either X86-64 or X86-32 mode, direct calls to // external symbols should go through the PLT. if (Subtarget->isTargetELF() && - getTargetMachine().getRelocationModel() == Reloc::PIC_) { + DAG.getTarget().getRelocationModel() == Reloc::PIC_) { OpFlags = X86II::MO_PLT; } else if (Subtarget->isPICStyleStubAny() && (!Subtarget->getTargetTriple().isMacOSX() || @@ -2945,7 +2972,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); const uint32_t *Mask = TRI->getCallPreservedMask(CallConv); assert(Mask && "Missing call preserved mask for calling convention"); Ops.push_back(DAG.getRegisterMask(Mask)); @@ -2969,7 +2996,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Create the CALLSEQ_END node. unsigned NumBytesForCalleeToPop; if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, - getTargetMachine().Options.GuaranteedTailCallOpt)) + DAG.getTarget().Options.GuaranteedTailCallOpt)) NumBytesForCalleeToPop = NumBytes; // Callee pops everything else if (!Is64Bit && !IsTailCallConvention(CallConv) && !Subtarget->getTargetTriple().isOSMSVCRT() && @@ -3140,7 +3167,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC); bool IsCallerWin64 = Subtarget->isCallingConvWin64(CallerCC); - if (getTargetMachine().Options.GuaranteedTailCallOpt) { + if (DAG.getTarget().Options.GuaranteedTailCallOpt) { if (IsTailCallConvention(CalleeCC) && CCMatch) return true; return false; @@ -3152,7 +3179,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to // emit a special epilogue. const X86RegisterInfo *RegInfo = - static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo()); + static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo()); if (RegInfo->needsStackRealignment(MF)) return false; @@ -3181,7 +3208,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); + DAG.getTarget(), ArgLocs, *DAG.getContext()); CCInfo.AnalyzeCallOperands(Outs, CC_X86); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) @@ -3202,7 +3229,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (Unused) { SmallVector<CCValAssign, 16> RVLocs; CCState CCInfo(CalleeCC, false, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); + DAG.getTarget(), RVLocs, *DAG.getContext()); CCInfo.AnalyzeCallResult(Ins, RetCC_X86); for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { CCValAssign &VA = RVLocs[i]; @@ -3216,12 +3243,12 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (!CCMatch) { SmallVector<CCValAssign, 16> RVLocs1; CCState CCInfo1(CalleeCC, false, DAG.getMachineFunction(), - getTargetMachine(), RVLocs1, *DAG.getContext()); + DAG.getTarget(), RVLocs1, *DAG.getContext()); CCInfo1.AnalyzeCallResult(Ins, RetCC_X86); SmallVector<CCValAssign, 16> RVLocs2; CCState CCInfo2(CallerCC, false, DAG.getMachineFunction(), - getTargetMachine(), RVLocs2, *DAG.getContext()); + DAG.getTarget(), RVLocs2, *DAG.getContext()); CCInfo2.AnalyzeCallResult(Ins, RetCC_X86); if (RVLocs1.size() != RVLocs2.size()) @@ -3248,7 +3275,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, // argument is passed on the stack. SmallVector<CCValAssign, 16> ArgLocs; CCState CCInfo(CalleeCC, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); + DAG.getTarget(), ArgLocs, *DAG.getContext()); // Allocate shadow area for Win64 if (IsCalleeWin64) @@ -3265,7 +3292,7 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, MachineFrameInfo *MFI = MF.getFrameInfo(); const MachineRegisterInfo *MRI = &MF.getRegInfo(); const X86InstrInfo *TII = - ((const X86TargetMachine&)getTargetMachine()).getInstrInfo(); + static_cast<const X86InstrInfo *>(DAG.getTarget().getInstrInfo()); for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; SDValue Arg = OutVals[i]; @@ -3288,12 +3315,12 @@ X86TargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (!Subtarget->is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) && !isa<ExternalSymbolSDNode>(Callee)) || - getTargetMachine().getRelocationModel() == Reloc::PIC_)) { + DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { unsigned NumInRegs = 0; // In PIC we need an extra register to formulate the address computation // for the callee. unsigned MaxInRegs = - (getTargetMachine().getRelocationModel() == Reloc::PIC_) ? 2 : 3; + (DAG.getTarget().getRelocationModel() == Reloc::PIC_) ? 2 : 3; for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; @@ -3417,7 +3444,7 @@ static SDValue getTargetShuffleNode(unsigned Opc, SDLoc dl, EVT VT, SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); const X86RegisterInfo *RegInfo = - static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo()); + static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo()); X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); int ReturnAddrIndex = FuncInfo->getRAIndex(); @@ -3967,14 +3994,22 @@ static bool isINSERTPSMask(ArrayRef<int> Mask, MVT VT) { unsigned CorrectPosV1 = 0; unsigned CorrectPosV2 = 0; - for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i) + for (int i = 0, e = (int)VT.getVectorNumElements(); i != e; ++i) { + if (Mask[i] == -1) { + ++CorrectPosV1; + ++CorrectPosV2; + continue; + } + if (Mask[i] == i) ++CorrectPosV1; else if (Mask[i] == i + 4) ++CorrectPosV2; + } if (CorrectPosV1 == 3 || CorrectPosV2 == 3) - // We have 3 elements from one vector, and one from another. + // We have 3 elements (undefs count as elements from any vector) from one + // vector, and one from another. return true; return false; @@ -4823,19 +4858,6 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, return true; } -/// isSplatVector - Returns true if N is a BUILD_VECTOR node whose elements are -/// all the same. -static bool isSplatVector(SDNode *N) { - if (N->getOpcode() != ISD::BUILD_VECTOR) - return false; - - SDValue SplatValue = N->getOperand(0); - for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) - if (N->getOperand(i) != SplatValue) - return false; - return true; -} - /// isZeroShuffle - Returns true if N is a VECTOR_SHUFFLE that can be resolved /// to an zero vector. /// FIXME: move to dag combiner / method on ShuffleVectorSDNode @@ -5744,18 +5766,22 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget, return SDValue(); case ISD::BUILD_VECTOR: { - // The BUILD_VECTOR node must be a splat. - if (!isSplatVector(Op.getNode())) + auto *BVOp = cast<BuildVectorSDNode>(Op.getNode()); + BitVector UndefElements; + SDValue Splat = BVOp->getSplatValue(&UndefElements); + + // We need a splat of a single value to use broadcast, and it doesn't + // make any sense if the value is only in one element of the vector. + if (!Splat || (VT.getVectorNumElements() - UndefElements.count()) <= 1) return SDValue(); - Ld = Op.getOperand(0); + Ld = Splat; ConstSplatVal = (Ld.getOpcode() == ISD::Constant || - Ld.getOpcode() == ISD::ConstantFP); + Ld.getOpcode() == ISD::ConstantFP); - // The suspected load node has several users. Make sure that all - // of its users are from the BUILD_VECTOR node. - // Constants may have multiple users. - if (!ConstSplatVal && !Ld->hasNUsesOfValue(VT.getVectorNumElements(), 0)) + // Make sure that all of the users of a non-constant load are from the + // BUILD_VECTOR node. + if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode())) return SDValue(); break; } @@ -6042,6 +6068,433 @@ X86TargetLowering::LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::BITCAST, dl, VT, Select); } +/// \brief Return true if \p N implements a horizontal binop and return the +/// operands for the horizontal binop into V0 and V1. +/// +/// This is a helper function of PerformBUILD_VECTORCombine. +/// This function checks that the build_vector \p N in input implements a +/// horizontal operation. Parameter \p Opcode defines the kind of horizontal +/// operation to match. +/// For example, if \p Opcode is equal to ISD::ADD, then this function +/// checks if \p N implements a horizontal arithmetic add; if instead \p Opcode +/// is equal to ISD::SUB, then this function checks if this is a horizontal +/// arithmetic sub. +/// +/// This function only analyzes elements of \p N whose indices are +/// in range [BaseIdx, LastIdx). +static bool isHorizontalBinOp(const BuildVectorSDNode *N, unsigned Opcode, + SelectionDAG &DAG, + unsigned BaseIdx, unsigned LastIdx, + SDValue &V0, SDValue &V1) { + EVT VT = N->getValueType(0); + + assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!"); + assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx && + "Invalid Vector in input!"); + + bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD); + bool CanFold = true; + unsigned ExpectedVExtractIdx = BaseIdx; + unsigned NumElts = LastIdx - BaseIdx; + V0 = DAG.getUNDEF(VT); + V1 = DAG.getUNDEF(VT); + + // Check if N implements a horizontal binop. + for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) { + SDValue Op = N->getOperand(i + BaseIdx); + + // Skip UNDEFs. + if (Op->getOpcode() == ISD::UNDEF) { + // Update the expected vector extract index. + if (i * 2 == NumElts) + ExpectedVExtractIdx = BaseIdx; + ExpectedVExtractIdx += 2; + continue; + } + + CanFold = Op->getOpcode() == Opcode && Op->hasOneUse(); + + if (!CanFold) + break; + + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + + // Try to match the following pattern: + // (BINOP (extract_vector_elt A, I), (extract_vector_elt A, I+1)) + CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT && + Op0.getOperand(0) == Op1.getOperand(0) && + isa<ConstantSDNode>(Op0.getOperand(1)) && + isa<ConstantSDNode>(Op1.getOperand(1))); + if (!CanFold) + break; + + unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue(); + unsigned I1 = cast<ConstantSDNode>(Op1.getOperand(1))->getZExtValue(); + + if (i * 2 < NumElts) { + if (V0.getOpcode() == ISD::UNDEF) + V0 = Op0.getOperand(0); + } else { + if (V1.getOpcode() == ISD::UNDEF) + V1 = Op0.getOperand(0); + if (i * 2 == NumElts) + ExpectedVExtractIdx = BaseIdx; + } + + SDValue Expected = (i * 2 < NumElts) ? V0 : V1; + if (I0 == ExpectedVExtractIdx) + CanFold = I1 == I0 + 1 && Op0.getOperand(0) == Expected; + else if (IsCommutable && I1 == ExpectedVExtractIdx) { + // Try to match the following dag sequence: + // (BINOP (extract_vector_elt A, I+1), (extract_vector_elt A, I)) + CanFold = I0 == I1 + 1 && Op1.getOperand(0) == Expected; + } else + CanFold = false; + + ExpectedVExtractIdx += 2; + } + + return CanFold; +} + +/// \brief Emit a sequence of two 128-bit horizontal add/sub followed by +/// a concat_vector. +/// +/// This is a helper function of PerformBUILD_VECTORCombine. +/// This function expects two 256-bit vectors called V0 and V1. +/// At first, each vector is split into two separate 128-bit vectors. +/// Then, the resulting 128-bit vectors are used to implement two +/// horizontal binary operations. +/// +/// The kind of horizontal binary operation is defined by \p X86Opcode. +/// +/// \p Mode specifies how the 128-bit parts of V0 and V1 are passed in input to +/// the two new horizontal binop. +/// When Mode is set, the first horizontal binop dag node would take as input +/// the lower 128-bit of V0 and the upper 128-bit of V0. The second +/// horizontal binop dag node would take as input the lower 128-bit of V1 +/// and the upper 128-bit of V1. +/// Example: +/// HADD V0_LO, V0_HI +/// HADD V1_LO, V1_HI +/// +/// Otherwise, the first horizontal binop dag node takes as input the lower +/// 128-bit of V0 and the lower 128-bit of V1, and the second horizontal binop +/// dag node takes the the upper 128-bit of V0 and the upper 128-bit of V1. +/// Example: +/// HADD V0_LO, V1_LO +/// HADD V0_HI, V1_HI +/// +/// If \p isUndefLO is set, then the algorithm propagates UNDEF to the lower +/// 128-bits of the result. If \p isUndefHI is set, then UNDEF is propagated to +/// the upper 128-bits of the result. +static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1, + SDLoc DL, SelectionDAG &DAG, + unsigned X86Opcode, bool Mode, + bool isUndefLO, bool isUndefHI) { + EVT VT = V0.getValueType(); + assert(VT.is256BitVector() && VT == V1.getValueType() && + "Invalid nodes in input!"); + + unsigned NumElts = VT.getVectorNumElements(); + SDValue V0_LO = Extract128BitVector(V0, 0, DAG, DL); + SDValue V0_HI = Extract128BitVector(V0, NumElts/2, DAG, DL); + SDValue V1_LO = Extract128BitVector(V1, 0, DAG, DL); + SDValue V1_HI = Extract128BitVector(V1, NumElts/2, DAG, DL); + EVT NewVT = V0_LO.getValueType(); + + SDValue LO = DAG.getUNDEF(NewVT); + SDValue HI = DAG.getUNDEF(NewVT); + + if (Mode) { + // Don't emit a horizontal binop if the result is expected to be UNDEF. + if (!isUndefLO && V0->getOpcode() != ISD::UNDEF) + LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI); + if (!isUndefHI && V1->getOpcode() != ISD::UNDEF) + HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI); + } else { + // Don't emit a horizontal binop if the result is expected to be UNDEF. + if (!isUndefLO && (V0_LO->getOpcode() != ISD::UNDEF || + V1_LO->getOpcode() != ISD::UNDEF)) + LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO); + + if (!isUndefHI && (V0_HI->getOpcode() != ISD::UNDEF || + V1_HI->getOpcode() != ISD::UNDEF)) + HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI); + } + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI); +} + +/// \brief Try to fold a build_vector that performs an 'addsub' into the +/// sequence of 'vadd + vsub + blendi'. +static SDValue matchAddSub(const BuildVectorSDNode *BV, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + SDLoc DL(BV); + EVT VT = BV->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + SDValue InVec0 = DAG.getUNDEF(VT); + SDValue InVec1 = DAG.getUNDEF(VT); + + assert((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v4f32 || + VT == MVT::v2f64) && "build_vector with an invalid type found!"); + + // Don't try to emit a VSELECT that cannot be lowered into a blend. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT)) + return SDValue(); + + // Odd-numbered elements in the input build vector are obtained from + // adding two integer/float elements. + // Even-numbered elements in the input build vector are obtained from + // subtracting two integer/float elements. + unsigned ExpectedOpcode = ISD::FSUB; + unsigned NextExpectedOpcode = ISD::FADD; + bool AddFound = false; + bool SubFound = false; + + for (unsigned i = 0, e = NumElts; i != e; i++) { + SDValue Op = BV->getOperand(i); + + // Skip 'undef' values. + unsigned Opcode = Op.getOpcode(); + if (Opcode == ISD::UNDEF) { + std::swap(ExpectedOpcode, NextExpectedOpcode); + continue; + } + + // Early exit if we found an unexpected opcode. + if (Opcode != ExpectedOpcode) + return SDValue(); + + SDValue Op0 = Op.getOperand(0); + SDValue Op1 = Op.getOperand(1); + + // Try to match the following pattern: + // (BINOP (extract_vector_elt A, i), (extract_vector_elt B, i)) + // Early exit if we cannot match that sequence. + if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !isa<ConstantSDNode>(Op0.getOperand(1)) || + !isa<ConstantSDNode>(Op1.getOperand(1)) || + Op0.getOperand(1) != Op1.getOperand(1)) + return SDValue(); + + unsigned I0 = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue(); + if (I0 != i) + return SDValue(); + + // We found a valid add/sub node. Update the information accordingly. + if (i & 1) + AddFound = true; + else + SubFound = true; + + // Update InVec0 and InVec1. + if (InVec0.getOpcode() == ISD::UNDEF) + InVec0 = Op0.getOperand(0); + if (InVec1.getOpcode() == ISD::UNDEF) + InVec1 = Op1.getOperand(0); + + // Make sure that operands in input to each add/sub node always + // come from a same pair of vectors. + if (InVec0 != Op0.getOperand(0)) { + if (ExpectedOpcode == ISD::FSUB) + return SDValue(); + + // FADD is commutable. Try to commute the operands + // and then test again. + std::swap(Op0, Op1); + if (InVec0 != Op0.getOperand(0)) + return SDValue(); + } + + if (InVec1 != Op1.getOperand(0)) + return SDValue(); + + // Update the pair of expected opcodes. + std::swap(ExpectedOpcode, NextExpectedOpcode); + } + + // Don't try to fold this build_vector into a VSELECT if it has + // too many UNDEF operands. + if (AddFound && SubFound && InVec0.getOpcode() != ISD::UNDEF && + InVec1.getOpcode() != ISD::UNDEF) { + // Emit a sequence of vector add and sub followed by a VSELECT. + // The new VSELECT will be lowered into a BLENDI. + // At ISel stage, we pattern-match the sequence 'add + sub + BLENDI' + // and emit a single ADDSUB instruction. + SDValue Sub = DAG.getNode(ExpectedOpcode, DL, VT, InVec0, InVec1); + SDValue Add = DAG.getNode(NextExpectedOpcode, DL, VT, InVec0, InVec1); + + // Construct the VSELECT mask. + EVT MaskVT = VT.changeVectorElementTypeToInteger(); + EVT SVT = MaskVT.getVectorElementType(); + unsigned SVTBits = SVT.getSizeInBits(); + SmallVector<SDValue, 8> Ops; + + for (unsigned i = 0, e = NumElts; i != e; ++i) { + APInt Value = i & 1 ? APInt::getNullValue(SVTBits) : + APInt::getAllOnesValue(SVTBits); + SDValue Constant = DAG.getConstant(Value, SVT); + Ops.push_back(Constant); + } + + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, DL, MaskVT, Ops); + return DAG.getSelect(DL, VT, Mask, Sub, Add); + } + + return SDValue(); +} + +static SDValue PerformBUILD_VECTORCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + SDLoc DL(N); + EVT VT = N->getValueType(0); + unsigned NumElts = VT.getVectorNumElements(); + BuildVectorSDNode *BV = cast<BuildVectorSDNode>(N); + SDValue InVec0, InVec1; + + // Try to match an ADDSUB. + if ((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || + (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) { + SDValue Value = matchAddSub(BV, DAG, Subtarget); + if (Value.getNode()) + return Value; + } + + // Try to match horizontal ADD/SUB. + unsigned NumUndefsLO = 0; + unsigned NumUndefsHI = 0; + unsigned Half = NumElts/2; + + // Count the number of UNDEF operands in the build_vector in input. + for (unsigned i = 0, e = Half; i != e; ++i) + if (BV->getOperand(i)->getOpcode() == ISD::UNDEF) + NumUndefsLO++; + + for (unsigned i = Half, e = NumElts; i != e; ++i) + if (BV->getOperand(i)->getOpcode() == ISD::UNDEF) + NumUndefsHI++; + + // Early exit if this is either a build_vector of all UNDEFs or all the + // operands but one are UNDEF. + if (NumUndefsLO + NumUndefsHI + 1 >= NumElts) + return SDValue(); + + if ((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget->hasSSE3()) { + // Try to match an SSE3 float HADD/HSUB. + if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1)) + return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1); + + if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1)) + return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1); + } else if ((VT == MVT::v4i32 || VT == MVT::v8i16) && Subtarget->hasSSSE3()) { + // Try to match an SSSE3 integer HADD/HSUB. + if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1)) + return DAG.getNode(X86ISD::HADD, DL, VT, InVec0, InVec1); + + if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1)) + return DAG.getNode(X86ISD::HSUB, DL, VT, InVec0, InVec1); + } + + if (!Subtarget->hasAVX()) + return SDValue(); + + if ((VT == MVT::v8f32 || VT == MVT::v4f64)) { + // Try to match an AVX horizontal add/sub of packed single/double + // precision floating point values from 256-bit vectors. + SDValue InVec2, InVec3; + if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, Half, InVec0, InVec1) && + isHorizontalBinOp(BV, ISD::FADD, DAG, Half, NumElts, InVec2, InVec3) && + ((InVec0.getOpcode() == ISD::UNDEF || + InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) && + ((InVec1.getOpcode() == ISD::UNDEF || + InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3)) + return DAG.getNode(X86ISD::FHADD, DL, VT, InVec0, InVec1); + + if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, Half, InVec0, InVec1) && + isHorizontalBinOp(BV, ISD::FSUB, DAG, Half, NumElts, InVec2, InVec3) && + ((InVec0.getOpcode() == ISD::UNDEF || + InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) && + ((InVec1.getOpcode() == ISD::UNDEF || + InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3)) + return DAG.getNode(X86ISD::FHSUB, DL, VT, InVec0, InVec1); + } else if (VT == MVT::v8i32 || VT == MVT::v16i16) { + // Try to match an AVX2 horizontal add/sub of signed integers. + SDValue InVec2, InVec3; + unsigned X86Opcode; + bool CanFold = true; + + if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) && + isHorizontalBinOp(BV, ISD::ADD, DAG, Half, NumElts, InVec2, InVec3) && + ((InVec0.getOpcode() == ISD::UNDEF || + InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) && + ((InVec1.getOpcode() == ISD::UNDEF || + InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3)) + X86Opcode = X86ISD::HADD; + else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, Half, InVec0, InVec1) && + isHorizontalBinOp(BV, ISD::SUB, DAG, Half, NumElts, InVec2, InVec3) && + ((InVec0.getOpcode() == ISD::UNDEF || + InVec2.getOpcode() == ISD::UNDEF) || InVec0 == InVec2) && + ((InVec1.getOpcode() == ISD::UNDEF || + InVec3.getOpcode() == ISD::UNDEF) || InVec1 == InVec3)) + X86Opcode = X86ISD::HSUB; + else + CanFold = false; + + if (CanFold) { + // Fold this build_vector into a single horizontal add/sub. + // Do this only if the target has AVX2. + if (Subtarget->hasAVX2()) + return DAG.getNode(X86Opcode, DL, VT, InVec0, InVec1); + + // Do not try to expand this build_vector into a pair of horizontal + // add/sub if we can emit a pair of scalar add/sub. + if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half) + return SDValue(); + + // Convert this build_vector into a pair of horizontal binop followed by + // a concat vector. + bool isUndefLO = NumUndefsLO == Half; + bool isUndefHI = NumUndefsHI == Half; + return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, false, + isUndefLO, isUndefHI); + } + } + + if ((VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 || + VT == MVT::v16i16) && Subtarget->hasAVX()) { + unsigned X86Opcode; + if (isHorizontalBinOp(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1)) + X86Opcode = X86ISD::HADD; + else if (isHorizontalBinOp(BV, ISD::SUB, DAG, 0, NumElts, InVec0, InVec1)) + X86Opcode = X86ISD::HSUB; + else if (isHorizontalBinOp(BV, ISD::FADD, DAG, 0, NumElts, InVec0, InVec1)) + X86Opcode = X86ISD::FHADD; + else if (isHorizontalBinOp(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, InVec1)) + X86Opcode = X86ISD::FHSUB; + else + return SDValue(); + + // Don't try to expand this build_vector into a pair of horizontal add/sub + // if we can simply emit a pair of scalar add/sub. + if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half) + return SDValue(); + + // Convert this build_vector into two horizontal add/sub followed by + // a concat vector. + bool isUndefLO = NumUndefsLO == Half; + bool isUndefHI = NumUndefsHI == Half; + return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true, + isUndefLO, isUndefHI); + } + + return SDValue(); +} + SDValue X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); @@ -6429,38 +6882,1160 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { return LowerAVXCONCAT_VECTORS(Op, DAG); } -// Try to lower a shuffle node into a simple blend instruction. -static SDValue -LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp, - const X86Subtarget *Subtarget, SelectionDAG &DAG) { - SDValue V1 = SVOp->getOperand(0); - SDValue V2 = SVOp->getOperand(1); - SDLoc dl(SVOp); - MVT VT = SVOp->getSimpleValueType(0); + +//===----------------------------------------------------------------------===// +// Vector shuffle lowering +// +// This is an experimental code path for lowering vector shuffles on x86. It is +// designed to handle arbitrary vector shuffles and blends, gracefully +// degrading performance as necessary. It works hard to recognize idiomatic +// shuffles and lower them to optimal instruction patterns without leaving +// a framework that allows reasonably efficient handling of all vector shuffle +// patterns. +//===----------------------------------------------------------------------===// + +/// \brief Tiny helper function to identify a no-op mask. +/// +/// This is a somewhat boring predicate function. It checks whether the mask +/// array input, which is assumed to be a single-input shuffle mask of the kind +/// used by the X86 shuffle instructions (not a fully general +/// ShuffleVectorSDNode mask) requires any shuffles to occur. Both undef and an +/// in-place shuffle are 'no-op's. +static bool isNoopShuffleMask(ArrayRef<int> Mask) { + for (int i = 0, Size = Mask.size(); i < Size; ++i) + if (Mask[i] != -1 && Mask[i] != i) + return false; + return true; +} + +/// \brief Helper function to classify a mask as a single-input mask. +/// +/// This isn't a generic single-input test because in the vector shuffle +/// lowering we canonicalize single inputs to be the first input operand. This +/// means we can more quickly test for a single input by only checking whether +/// an input from the second operand exists. We also assume that the size of +/// mask corresponds to the size of the input vectors which isn't true in the +/// fully general case. +static bool isSingleInputShuffleMask(ArrayRef<int> Mask) { + for (int M : Mask) + if (M >= (int)Mask.size()) + return false; + return true; +} + +/// \brief Get a 4-lane 8-bit shuffle immediate for a mask. +/// +/// This helper function produces an 8-bit shuffle immediate corresponding to +/// the ubiquitous shuffle encoding scheme used in x86 instructions for +/// shuffling 4 lanes. It can be used with most of the PSHUF instructions for +/// example. +/// +/// NB: We rely heavily on "undef" masks preserving the input lane. +static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, + SelectionDAG &DAG) { + assert(Mask.size() == 4 && "Only 4-lane shuffle masks"); + assert(Mask[0] >= -1 && Mask[0] < 4 && "Out of bound mask element!"); + assert(Mask[1] >= -1 && Mask[1] < 4 && "Out of bound mask element!"); + assert(Mask[2] >= -1 && Mask[2] < 4 && "Out of bound mask element!"); + assert(Mask[3] >= -1 && Mask[3] < 4 && "Out of bound mask element!"); + + unsigned Imm = 0; + Imm |= (Mask[0] == -1 ? 0 : Mask[0]) << 0; + Imm |= (Mask[1] == -1 ? 1 : Mask[1]) << 2; + Imm |= (Mask[2] == -1 ? 2 : Mask[2]) << 4; + Imm |= (Mask[3] == -1 ? 3 : Mask[3]) << 6; + return DAG.getConstant(Imm, MVT::i8); +} + +/// \brief Handle lowering of 2-lane 64-bit floating point shuffles. +/// +/// This is the basis function for the 2-lane 64-bit shuffles as we have full +/// support for floating point shuffles but not integer shuffles. These +/// instructions will incur a domain crossing penalty on some chips though so +/// it is better to avoid lowering through this for integer vectors where +/// possible. +static SDValue lowerV2F64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + SDLoc DL(Op); + assert(Op.getSimpleValueType() == MVT::v2f64 && "Bad shuffle type!"); + assert(V1.getSimpleValueType() == MVT::v2f64 && "Bad operand type!"); + assert(V2.getSimpleValueType() == MVT::v2f64 && "Bad operand type!"); + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); + ArrayRef<int> Mask = SVOp->getMask(); + assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!"); + + if (isSingleInputShuffleMask(Mask)) { + // Straight shuffle of a single input vector. Simulate this by using the + // single input as both of the "inputs" to this instruction.. + unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1); + return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V1, + DAG.getConstant(SHUFPDMask, MVT::i8)); + } + assert(Mask[0] >= 0 && Mask[0] < 2 && "Non-canonicalized blend!"); + assert(Mask[1] >= 2 && "Non-canonicalized blend!"); + + unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1); + return DAG.getNode(X86ISD::SHUFP, SDLoc(Op), MVT::v2f64, V1, V2, + DAG.getConstant(SHUFPDMask, MVT::i8)); +} + +/// \brief Handle lowering of 2-lane 64-bit integer shuffles. +/// +/// Tries to lower a 2-lane 64-bit shuffle using shuffle operations provided by +/// the integer unit to minimize domain crossing penalties. However, for blends +/// it falls back to the floating point shuffle operation with appropriate bit +/// casting. +static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + SDLoc DL(Op); + assert(Op.getSimpleValueType() == MVT::v2i64 && "Bad shuffle type!"); + assert(V1.getSimpleValueType() == MVT::v2i64 && "Bad operand type!"); + assert(V2.getSimpleValueType() == MVT::v2i64 && "Bad operand type!"); + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); + ArrayRef<int> Mask = SVOp->getMask(); + assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!"); + + if (isSingleInputShuffleMask(Mask)) { + // Straight shuffle of a single input vector. For everything from SSE2 + // onward this has a single fast instruction with no scary immediates. + // We have to map the mask as it is actually a v4i32 shuffle instruction. + V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V1); + int WidenedMask[4] = { + std::max(Mask[0], 0) * 2, std::max(Mask[0], 0) * 2 + 1, + std::max(Mask[1], 0) * 2, std::max(Mask[1], 0) * 2 + 1}; + return DAG.getNode( + ISD::BITCAST, DL, MVT::v2i64, + DAG.getNode(X86ISD::PSHUFD, SDLoc(Op), MVT::v4i32, V1, + getV4X86ShuffleImm8ForMask(WidenedMask, DAG))); + } + + // We implement this with SHUFPD which is pretty lame because it will likely + // incur 2 cycles of stall for integer vectors on Nehalem and older chips. + // However, all the alternatives are still more cycles and newer chips don't + // have this problem. It would be really nice if x86 had better shuffles here. + V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, V1); + V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v2f64, V2); + return DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, + DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask)); +} + +/// \brief Lower 4-lane 32-bit floating point shuffles. +/// +/// Uses instructions exclusively from the floating point unit to minimize +/// domain crossing penalties, as these are sufficient to implement all v4f32 +/// shuffles. +static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + SDLoc DL(Op); + assert(Op.getSimpleValueType() == MVT::v4f32 && "Bad shuffle type!"); + assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); + assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); + ArrayRef<int> Mask = SVOp->getMask(); + assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); + + SDValue LowV = V1, HighV = V2; + int NewMask[4] = {Mask[0], Mask[1], Mask[2], Mask[3]}; + + int NumV2Elements = + std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }); + + if (NumV2Elements == 0) + // Straight shuffle of a single input vector. We pass the input vector to + // both operands to simulate this with a SHUFPS. + return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V1, + getV4X86ShuffleImm8ForMask(Mask, DAG)); + + if (NumV2Elements == 1) { + int V2Index = + std::find_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }) - + Mask.begin(); + // Compute the index adjacent to V2Index and in the same half by toggling + // the low bit. + int V2AdjIndex = V2Index ^ 1; + + if (Mask[V2AdjIndex] == -1) { + // Handles all the cases where we have a single V2 element and an undef. + // This will only ever happen in the high lanes because we commute the + // vector otherwise. + if (V2Index < 2) + std::swap(LowV, HighV); + NewMask[V2Index] -= 4; + } else { + // Handle the case where the V2 element ends up adjacent to a V1 element. + // To make this work, blend them together as the first step. + int V1Index = V2AdjIndex; + int BlendMask[4] = {Mask[V2Index] - 4, 0, Mask[V1Index], 0}; + V2 = DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V2, V1, + getV4X86ShuffleImm8ForMask(BlendMask, DAG)); + + // Now proceed to reconstruct the final blend as we have the necessary + // high or low half formed. + if (V2Index < 2) { + LowV = V2; + HighV = V1; + } else { + HighV = V2; + } + NewMask[V1Index] = 2; // We put the V1 element in V2[2]. + NewMask[V2Index] = 0; // We shifted the V2 element into V2[0]. + } + } else if (NumV2Elements == 2) { + if (Mask[0] < 4 && Mask[1] < 4) { + // Handle the easy case where we have V1 in the low lanes and V2 in the + // high lanes. We never see this reversed because we sort the shuffle. + NewMask[2] -= 4; + NewMask[3] -= 4; + } else { + // We have a mixture of V1 and V2 in both low and high lanes. Rather than + // trying to place elements directly, just blend them and set up the final + // shuffle to place them. + + // The first two blend mask elements are for V1, the second two are for + // V2. + int BlendMask[4] = {Mask[0] < 4 ? Mask[0] : Mask[1], + Mask[2] < 4 ? Mask[2] : Mask[3], + (Mask[0] >= 4 ? Mask[0] : Mask[1]) - 4, + (Mask[2] >= 4 ? Mask[2] : Mask[3]) - 4}; + V1 = DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V2, + getV4X86ShuffleImm8ForMask(BlendMask, DAG)); + + // Now we do a normal shuffle of V1 by giving V1 as both operands to + // a blend. + LowV = HighV = V1; + NewMask[0] = Mask[0] < 4 ? 0 : 2; + NewMask[1] = Mask[0] < 4 ? 2 : 0; + NewMask[2] = Mask[2] < 4 ? 1 : 3; + NewMask[3] = Mask[2] < 4 ? 3 : 1; + } + } + return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, LowV, HighV, + getV4X86ShuffleImm8ForMask(NewMask, DAG)); +} + +/// \brief Lower 4-lane i32 vector shuffles. +/// +/// We try to handle these with integer-domain shuffles where we can, but for +/// blends we use the floating point domain blend instructions. +static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + SDLoc DL(Op); + assert(Op.getSimpleValueType() == MVT::v4i32 && "Bad shuffle type!"); + assert(V1.getSimpleValueType() == MVT::v4i32 && "Bad operand type!"); + assert(V2.getSimpleValueType() == MVT::v4i32 && "Bad operand type!"); + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); + ArrayRef<int> Mask = SVOp->getMask(); + assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); + + if (isSingleInputShuffleMask(Mask)) + // Straight shuffle of a single input vector. For everything from SSE2 + // onward this has a single fast instruction with no scary immediates. + return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1, + getV4X86ShuffleImm8ForMask(Mask, DAG)); + + // We implement this with SHUFPS because it can blend from two vectors. + // Because we're going to eventually use SHUFPS, we use SHUFPS even to build + // up the inputs, bypassing domain shift penalties that we would encur if we + // directly used PSHUFD on Nehalem and older. For newer chips, this isn't + // relevant. + return DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, + DAG.getVectorShuffle( + MVT::v4f32, DL, + DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, V1), + DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, V2), Mask)); +} + +/// \brief Lowering of single-input v8i16 shuffles is the cornerstone of SSE2 +/// shuffle lowering, and the most complex part. +/// +/// The lowering strategy is to try to form pairs of input lanes which are +/// targeted at the same half of the final vector, and then use a dword shuffle +/// to place them onto the right half, and finally unpack the paired lanes into +/// their final position. +/// +/// The exact breakdown of how to form these dword pairs and align them on the +/// correct sides is really tricky. See the comments within the function for +/// more of the details. +static SDValue lowerV8I16SingleInputVectorShuffle( + SDLoc DL, SDValue V, MutableArrayRef<int> Mask, + const X86Subtarget *Subtarget, SelectionDAG &DAG) { + assert(V.getSimpleValueType() == MVT::v8i16 && "Bad input type!"); + MutableArrayRef<int> LoMask = Mask.slice(0, 4); + MutableArrayRef<int> HiMask = Mask.slice(4, 4); + + SmallVector<int, 4> LoInputs; + std::copy_if(LoMask.begin(), LoMask.end(), std::back_inserter(LoInputs), + [](int M) { return M >= 0; }); + std::sort(LoInputs.begin(), LoInputs.end()); + LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()), LoInputs.end()); + SmallVector<int, 4> HiInputs; + std::copy_if(HiMask.begin(), HiMask.end(), std::back_inserter(HiInputs), + [](int M) { return M >= 0; }); + std::sort(HiInputs.begin(), HiInputs.end()); + HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()), HiInputs.end()); + int NumLToL = + std::lower_bound(LoInputs.begin(), LoInputs.end(), 4) - LoInputs.begin(); + int NumHToL = LoInputs.size() - NumLToL; + int NumLToH = + std::lower_bound(HiInputs.begin(), HiInputs.end(), 4) - HiInputs.begin(); + int NumHToH = HiInputs.size() - NumLToH; + MutableArrayRef<int> LToLInputs(LoInputs.data(), NumLToL); + MutableArrayRef<int> LToHInputs(HiInputs.data(), NumLToH); + MutableArrayRef<int> HToLInputs(LoInputs.data() + NumLToL, NumHToL); + MutableArrayRef<int> HToHInputs(HiInputs.data() + NumLToH, NumHToH); + + // Simplify the 1-into-3 and 3-into-1 cases with a single pshufd. For all + // such inputs we can swap two of the dwords across the half mark and end up + // with <=2 inputs to each half in each half. Once there, we can fall through + // to the generic code below. For example: + // + // Input: [a, b, c, d, e, f, g, h] -PSHUFD[0,2,1,3]-> [a, b, e, f, c, d, g, h] + // Mask: [0, 1, 2, 7, 4, 5, 6, 3] -----------------> [0, 1, 4, 7, 2, 3, 6, 5] + // + // Before we had 3-1 in the low half and 3-1 in the high half. Afterward, 2-2 + // and 2-2. + auto balanceSides = [&](ArrayRef<int> ThreeInputs, int OneInput, + int ThreeInputHalfSum, int OneInputHalfOffset) { + // Compute the index of dword with only one word among the three inputs in + // a half by taking the sum of the half with three inputs and subtracting + // the sum of the actual three inputs. The difference is the remaining + // slot. + int DWordA = (ThreeInputHalfSum - + std::accumulate(ThreeInputs.begin(), ThreeInputs.end(), 0)) / + 2; + int DWordB = OneInputHalfOffset / 2 + (OneInput / 2 + 1) % 2; + + int PSHUFDMask[] = {0, 1, 2, 3}; + PSHUFDMask[DWordA] = DWordB; + PSHUFDMask[DWordB] = DWordA; + V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, + DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, + DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V), + getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG))); + + // Adjust the mask to match the new locations of A and B. + for (int &M : Mask) + if (M != -1 && M/2 == DWordA) + M = 2 * DWordB + M % 2; + else if (M != -1 && M/2 == DWordB) + M = 2 * DWordA + M % 2; + + // Recurse back into this routine to re-compute state now that this isn't + // a 3 and 1 problem. + return DAG.getVectorShuffle(MVT::v8i16, DL, V, DAG.getUNDEF(MVT::v8i16), + Mask); + }; + if (NumLToL == 3 && NumHToL == 1) + return balanceSides(LToLInputs, HToLInputs[0], 0 + 1 + 2 + 3, 4); + else if (NumLToL == 1 && NumHToL == 3) + return balanceSides(HToLInputs, LToLInputs[0], 4 + 5 + 6 + 7, 0); + else if (NumLToH == 1 && NumHToH == 3) + return balanceSides(HToHInputs, LToHInputs[0], 4 + 5 + 6 + 7, 0); + else if (NumLToH == 3 && NumHToH == 1) + return balanceSides(LToHInputs, HToHInputs[0], 0 + 1 + 2 + 3, 4); + + // At this point there are at most two inputs to the low and high halves from + // each half. That means the inputs can always be grouped into dwords and + // those dwords can then be moved to the correct half with a dword shuffle. + // We use at most one low and one high word shuffle to collect these paired + // inputs into dwords, and finally a dword shuffle to place them. + int PSHUFLMask[4] = {-1, -1, -1, -1}; + int PSHUFHMask[4] = {-1, -1, -1, -1}; + int PSHUFDMask[4] = {-1, -1, -1, -1}; + + // First fix the masks for all the inputs that are staying in their + // original halves. This will then dictate the targets of the cross-half + // shuffles. + auto fixInPlaceInputs = [&PSHUFDMask]( + ArrayRef<int> InPlaceInputs, MutableArrayRef<int> SourceHalfMask, + MutableArrayRef<int> HalfMask, int HalfOffset) { + if (InPlaceInputs.empty()) + return; + if (InPlaceInputs.size() == 1) { + SourceHalfMask[InPlaceInputs[0] - HalfOffset] = + InPlaceInputs[0] - HalfOffset; + PSHUFDMask[InPlaceInputs[0] / 2] = InPlaceInputs[0] / 2; + return; + } + + assert(InPlaceInputs.size() == 2 && "Cannot handle 3 or 4 inputs!"); + SourceHalfMask[InPlaceInputs[0] - HalfOffset] = + InPlaceInputs[0] - HalfOffset; + // Put the second input next to the first so that they are packed into + // a dword. We find the adjacent index by toggling the low bit. + int AdjIndex = InPlaceInputs[0] ^ 1; + SourceHalfMask[AdjIndex - HalfOffset] = InPlaceInputs[1] - HalfOffset; + std::replace(HalfMask.begin(), HalfMask.end(), InPlaceInputs[1], AdjIndex); + PSHUFDMask[AdjIndex / 2] = AdjIndex / 2; + }; + if (!HToLInputs.empty()) + fixInPlaceInputs(LToLInputs, PSHUFLMask, LoMask, 0); + if (!LToHInputs.empty()) + fixInPlaceInputs(HToHInputs, PSHUFHMask, HiMask, 4); + + // Now gather the cross-half inputs and place them into a free dword of + // their target half. + // FIXME: This operation could almost certainly be simplified dramatically to + // look more like the 3-1 fixing operation. + auto moveInputsToRightHalf = [&PSHUFDMask]( + MutableArrayRef<int> IncomingInputs, ArrayRef<int> ExistingInputs, + MutableArrayRef<int> SourceHalfMask, MutableArrayRef<int> HalfMask, + int SourceOffset, int DestOffset) { + auto isWordClobbered = [](ArrayRef<int> SourceHalfMask, int Word) { + return SourceHalfMask[Word] != -1 && SourceHalfMask[Word] != Word; + }; + auto isDWordClobbered = [&isWordClobbered](ArrayRef<int> SourceHalfMask, + int Word) { + int LowWord = Word & ~1; + int HighWord = Word | 1; + return isWordClobbered(SourceHalfMask, LowWord) || + isWordClobbered(SourceHalfMask, HighWord); + }; + + if (IncomingInputs.empty()) + return; + + if (ExistingInputs.empty()) { + // Map any dwords with inputs from them into the right half. + for (int Input : IncomingInputs) { + // If the source half mask maps over the inputs, turn those into + // swaps and use the swapped lane. + if (isWordClobbered(SourceHalfMask, Input - SourceOffset)) { + if (SourceHalfMask[SourceHalfMask[Input - SourceOffset]] == -1) { + SourceHalfMask[SourceHalfMask[Input - SourceOffset]] = + Input - SourceOffset; + // We have to swap the uses in our half mask in one sweep. + for (int &M : HalfMask) + if (M == SourceHalfMask[Input - SourceOffset]) + M = Input; + else if (M == Input) + M = SourceHalfMask[Input - SourceOffset] + SourceOffset; + } else { + assert(SourceHalfMask[SourceHalfMask[Input - SourceOffset]] == + Input - SourceOffset && + "Previous placement doesn't match!"); + } + // Note that this correctly re-maps both when we do a swap and when + // we observe the other side of the swap above. We rely on that to + // avoid swapping the members of the input list directly. + Input = SourceHalfMask[Input - SourceOffset] + SourceOffset; + } + + // Map the input's dword into the correct half. + if (PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] == -1) + PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] = Input / 2; + else + assert(PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] == + Input / 2 && + "Previous placement doesn't match!"); + } + + // And just directly shift any other-half mask elements to be same-half + // as we will have mirrored the dword containing the element into the + // same position within that half. + for (int &M : HalfMask) + if (M >= SourceOffset && M < SourceOffset + 4) { + M = M - SourceOffset + DestOffset; + assert(M >= 0 && "This should never wrap below zero!"); + } + return; + } + + // Ensure we have the input in a viable dword of its current half. This + // is particularly tricky because the original position may be clobbered + // by inputs being moved and *staying* in that half. + if (IncomingInputs.size() == 1) { + if (isWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) { + int InputFixed = std::find(std::begin(SourceHalfMask), + std::end(SourceHalfMask), -1) - + std::begin(SourceHalfMask) + SourceOffset; + SourceHalfMask[InputFixed - SourceOffset] = + IncomingInputs[0] - SourceOffset; + std::replace(HalfMask.begin(), HalfMask.end(), IncomingInputs[0], + InputFixed); + IncomingInputs[0] = InputFixed; + } + } else if (IncomingInputs.size() == 2) { + if (IncomingInputs[0] / 2 != IncomingInputs[1] / 2 || + isDWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) { + int SourceDWordBase = !isDWordClobbered(SourceHalfMask, 0) ? 0 : 2; + assert(!isDWordClobbered(SourceHalfMask, SourceDWordBase) && + "Not all dwords can be clobbered!"); + SourceHalfMask[SourceDWordBase] = IncomingInputs[0] - SourceOffset; + SourceHalfMask[SourceDWordBase + 1] = IncomingInputs[1] - SourceOffset; + for (int &M : HalfMask) + if (M == IncomingInputs[0]) + M = SourceDWordBase + SourceOffset; + else if (M == IncomingInputs[1]) + M = SourceDWordBase + 1 + SourceOffset; + IncomingInputs[0] = SourceDWordBase + SourceOffset; + IncomingInputs[1] = SourceDWordBase + 1 + SourceOffset; + } + } else { + llvm_unreachable("Unhandled input size!"); + } + + // Now hoist the DWord down to the right half. + int FreeDWord = (PSHUFDMask[DestOffset / 2] == -1 ? 0 : 1) + DestOffset / 2; + assert(PSHUFDMask[FreeDWord] == -1 && "DWord not free"); + PSHUFDMask[FreeDWord] = IncomingInputs[0] / 2; + for (int Input : IncomingInputs) + std::replace(HalfMask.begin(), HalfMask.end(), Input, + FreeDWord * 2 + Input % 2); + }; + moveInputsToRightHalf(HToLInputs, LToLInputs, PSHUFHMask, LoMask, + /*SourceOffset*/ 4, /*DestOffset*/ 0); + moveInputsToRightHalf(LToHInputs, HToHInputs, PSHUFLMask, HiMask, + /*SourceOffset*/ 0, /*DestOffset*/ 4); + + // Now enact all the shuffles we've computed to move the inputs into their + // target half. + if (!isNoopShuffleMask(PSHUFLMask)) + V = DAG.getNode(X86ISD::PSHUFLW, DL, MVT::v8i16, V, + getV4X86ShuffleImm8ForMask(PSHUFLMask, DAG)); + if (!isNoopShuffleMask(PSHUFHMask)) + V = DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16, V, + getV4X86ShuffleImm8ForMask(PSHUFHMask, DAG)); + if (!isNoopShuffleMask(PSHUFDMask)) + V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, + DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, + DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V), + getV4X86ShuffleImm8ForMask(PSHUFDMask, DAG))); + + // At this point, each half should contain all its inputs, and we can then + // just shuffle them into their final position. + assert(std::count_if(LoMask.begin(), LoMask.end(), + [](int M) { return M >= 4; }) == 0 && + "Failed to lift all the high half inputs to the low mask!"); + assert(std::count_if(HiMask.begin(), HiMask.end(), + [](int M) { return M >= 0 && M < 4; }) == 0 && + "Failed to lift all the low half inputs to the high mask!"); + + // Do a half shuffle for the low mask. + if (!isNoopShuffleMask(LoMask)) + V = DAG.getNode(X86ISD::PSHUFLW, DL, MVT::v8i16, V, + getV4X86ShuffleImm8ForMask(LoMask, DAG)); + + // Do a half shuffle with the high mask after shifting its values down. + for (int &M : HiMask) + if (M >= 0) + M -= 4; + if (!isNoopShuffleMask(HiMask)) + V = DAG.getNode(X86ISD::PSHUFHW, DL, MVT::v8i16, V, + getV4X86ShuffleImm8ForMask(HiMask, DAG)); + + return V; +} + +/// \brief Detect whether the mask pattern should be lowered through +/// interleaving. +/// +/// This essentially tests whether viewing the mask as an interleaving of two +/// sub-sequences reduces the cross-input traffic of a blend operation. If so, +/// lowering it through interleaving is a significantly better strategy. +static bool shouldLowerAsInterleaving(ArrayRef<int> Mask) { + int NumEvenInputs[2] = {0, 0}; + int NumOddInputs[2] = {0, 0}; + int NumLoInputs[2] = {0, 0}; + int NumHiInputs[2] = {0, 0}; + for (int i = 0, Size = Mask.size(); i < Size; ++i) { + if (Mask[i] < 0) + continue; + + int InputIdx = Mask[i] >= Size; + + if (i < Size / 2) + ++NumLoInputs[InputIdx]; + else + ++NumHiInputs[InputIdx]; + + if ((i % 2) == 0) + ++NumEvenInputs[InputIdx]; + else + ++NumOddInputs[InputIdx]; + } + + // The minimum number of cross-input results for both the interleaved and + // split cases. If interleaving results in fewer cross-input results, return + // true. + int InterleavedCrosses = std::min(NumEvenInputs[1] + NumOddInputs[0], + NumEvenInputs[0] + NumOddInputs[1]); + int SplitCrosses = std::min(NumLoInputs[1] + NumHiInputs[0], + NumLoInputs[0] + NumHiInputs[1]); + return InterleavedCrosses < SplitCrosses; +} + +/// \brief Blend two v8i16 vectors using a naive unpack strategy. +/// +/// This strategy only works when the inputs from each vector fit into a single +/// half of that vector, and generally there are not so many inputs as to leave +/// the in-place shuffles required highly constrained (and thus expensive). It +/// shifts all the inputs into a single side of both input vectors and then +/// uses an unpack to interleave these inputs in a single vector. At that +/// point, we will fall back on the generic single input shuffle lowering. +static SDValue lowerV8I16BasicBlendVectorShuffle(SDLoc DL, SDValue V1, + SDValue V2, + MutableArrayRef<int> Mask, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + assert(V1.getSimpleValueType() == MVT::v8i16 && "Bad input type!"); + assert(V2.getSimpleValueType() == MVT::v8i16 && "Bad input type!"); + SmallVector<int, 3> LoV1Inputs, HiV1Inputs, LoV2Inputs, HiV2Inputs; + for (int i = 0; i < 8; ++i) + if (Mask[i] >= 0 && Mask[i] < 4) + LoV1Inputs.push_back(i); + else if (Mask[i] >= 4 && Mask[i] < 8) + HiV1Inputs.push_back(i); + else if (Mask[i] >= 8 && Mask[i] < 12) + LoV2Inputs.push_back(i); + else if (Mask[i] >= 12) + HiV2Inputs.push_back(i); + + int NumV1Inputs = LoV1Inputs.size() + HiV1Inputs.size(); + int NumV2Inputs = LoV2Inputs.size() + HiV2Inputs.size(); + (void)NumV1Inputs; + (void)NumV2Inputs; + assert(NumV1Inputs > 0 && NumV1Inputs <= 3 && "At most 3 inputs supported"); + assert(NumV2Inputs > 0 && NumV2Inputs <= 3 && "At most 3 inputs supported"); + assert(NumV1Inputs + NumV2Inputs <= 4 && "At most 4 combined inputs"); + + bool MergeFromLo = LoV1Inputs.size() + LoV2Inputs.size() >= + HiV1Inputs.size() + HiV2Inputs.size(); + + auto moveInputsToHalf = [&](SDValue V, ArrayRef<int> LoInputs, + ArrayRef<int> HiInputs, bool MoveToLo, + int MaskOffset) { + ArrayRef<int> GoodInputs = MoveToLo ? LoInputs : HiInputs; + ArrayRef<int> BadInputs = MoveToLo ? HiInputs : LoInputs; + if (BadInputs.empty()) + return V; + + int MoveMask[] = {-1, -1, -1, -1, -1, -1, -1, -1}; + int MoveOffset = MoveToLo ? 0 : 4; + + if (GoodInputs.empty()) { + for (int BadInput : BadInputs) { + MoveMask[Mask[BadInput] % 4 + MoveOffset] = Mask[BadInput] - MaskOffset; + Mask[BadInput] = Mask[BadInput] % 4 + MoveOffset + MaskOffset; + } + } else { + if (GoodInputs.size() == 2) { + // If the low inputs are spread across two dwords, pack them into + // a single dword. + MoveMask[Mask[GoodInputs[0]] % 2 + MoveOffset] = + Mask[GoodInputs[0]] - MaskOffset; + MoveMask[Mask[GoodInputs[1]] % 2 + MoveOffset] = + Mask[GoodInputs[1]] - MaskOffset; + Mask[GoodInputs[0]] = Mask[GoodInputs[0]] % 2 + MoveOffset + MaskOffset; + Mask[GoodInputs[1]] = Mask[GoodInputs[0]] % 2 + MoveOffset + MaskOffset; + } else { + // Otherwise pin the low inputs. + for (int GoodInput : GoodInputs) + MoveMask[Mask[GoodInput] - MaskOffset] = Mask[GoodInput] - MaskOffset; + } + + int MoveMaskIdx = + std::find(std::begin(MoveMask) + MoveOffset, std::end(MoveMask), -1) - + std::begin(MoveMask); + assert(MoveMaskIdx >= MoveOffset && "Established above"); + + if (BadInputs.size() == 2) { + assert(MoveMask[MoveMaskIdx] == -1 && "Expected empty slot"); + assert(MoveMask[MoveMaskIdx + 1] == -1 && "Expected empty slot"); + MoveMask[MoveMaskIdx + Mask[BadInputs[0]] % 2] = + Mask[BadInputs[0]] - MaskOffset; + MoveMask[MoveMaskIdx + Mask[BadInputs[1]] % 2] = + Mask[BadInputs[1]] - MaskOffset; + Mask[BadInputs[0]] = MoveMaskIdx + Mask[BadInputs[0]] % 2 + MaskOffset; + Mask[BadInputs[1]] = MoveMaskIdx + Mask[BadInputs[1]] % 2 + MaskOffset; + } else { + assert(BadInputs.size() == 1 && "All sizes handled"); + MoveMask[MoveMaskIdx] = Mask[BadInputs[0]] - MaskOffset; + Mask[BadInputs[0]] = MoveMaskIdx + MaskOffset; + } + } + + return DAG.getVectorShuffle(MVT::v8i16, DL, V, DAG.getUNDEF(MVT::v8i16), + MoveMask); + }; + V1 = moveInputsToHalf(V1, LoV1Inputs, HiV1Inputs, MergeFromLo, + /*MaskOffset*/ 0); + V2 = moveInputsToHalf(V2, LoV2Inputs, HiV2Inputs, MergeFromLo, + /*MaskOffset*/ 8); + + // FIXME: Select an interleaving of the merge of V1 and V2 that minimizes + // cross-half traffic in the final shuffle. + + // Munge the mask to be a single-input mask after the unpack merges the + // results. + for (int &M : Mask) + if (M != -1) + M = 2 * (M % 4) + (M / 8); + + return DAG.getVectorShuffle( + MVT::v8i16, DL, DAG.getNode(MergeFromLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, + DL, MVT::v8i16, V1, V2), + DAG.getUNDEF(MVT::v8i16), Mask); +} + +/// \brief Generic lowering of 8-lane i16 shuffles. +/// +/// This handles both single-input shuffles and combined shuffle/blends with +/// two inputs. The single input shuffles are immediately delegated to +/// a dedicated lowering routine. +/// +/// The blends are lowered in one of three fundamental ways. If there are few +/// enough inputs, it delegates to a basic UNPCK-based strategy. If the shuffle +/// of the input is significantly cheaper when lowered as an interleaving of +/// the two inputs, try to interleave them. Otherwise, blend the low and high +/// halves of the inputs separately (making them have relatively few inputs) +/// and then concatenate them. +static SDValue lowerV8I16VectorShuffle(SDValue Op, SDValue V1, SDValue V2, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + SDLoc DL(Op); + assert(Op.getSimpleValueType() == MVT::v8i16 && "Bad shuffle type!"); + assert(V1.getSimpleValueType() == MVT::v8i16 && "Bad operand type!"); + assert(V2.getSimpleValueType() == MVT::v8i16 && "Bad operand type!"); + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); + ArrayRef<int> OrigMask = SVOp->getMask(); + int MaskStorage[8] = {OrigMask[0], OrigMask[1], OrigMask[2], OrigMask[3], + OrigMask[4], OrigMask[5], OrigMask[6], OrigMask[7]}; + MutableArrayRef<int> Mask(MaskStorage); + + assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); + + auto isV1 = [](int M) { return M >= 0 && M < 8; }; + auto isV2 = [](int M) { return M >= 8; }; + + int NumV1Inputs = std::count_if(Mask.begin(), Mask.end(), isV1); + int NumV2Inputs = std::count_if(Mask.begin(), Mask.end(), isV2); + + if (NumV2Inputs == 0) + return lowerV8I16SingleInputVectorShuffle(DL, V1, Mask, Subtarget, DAG); + + assert(NumV1Inputs > 0 && "All single-input shuffles should be canonicalized " + "to be V1-input shuffles."); + + if (NumV1Inputs + NumV2Inputs <= 4) + return lowerV8I16BasicBlendVectorShuffle(DL, V1, V2, Mask, Subtarget, DAG); + + // Check whether an interleaving lowering is likely to be more efficient. + // This isn't perfect but it is a strong heuristic that tends to work well on + // the kinds of shuffles that show up in practice. + // + // FIXME: Handle 1x, 2x, and 4x interleaving. + if (shouldLowerAsInterleaving(Mask)) { + // FIXME: Figure out whether we should pack these into the low or high + // halves. + + int EMask[8], OMask[8]; + for (int i = 0; i < 4; ++i) { + EMask[i] = Mask[2*i]; + OMask[i] = Mask[2*i + 1]; + EMask[i + 4] = -1; + OMask[i + 4] = -1; + } + + SDValue Evens = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, EMask); + SDValue Odds = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, OMask); + + return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v8i16, Evens, Odds); + } + + int LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; + int HiBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; + + for (int i = 0; i < 4; ++i) { + LoBlendMask[i] = Mask[i]; + HiBlendMask[i] = Mask[i + 4]; + } + + SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, LoBlendMask); + SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, HiBlendMask); + LoV = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, LoV); + HiV = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, HiV); + + return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, + DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, LoV, HiV)); +} + +/// \brief Generic lowering of v16i8 shuffles. +/// +/// This is a hybrid strategy to lower v16i8 vectors. It first attempts to +/// detect any complexity reducing interleaving. If that doesn't help, it uses +/// UNPCK to spread the i8 elements across two i16-element vectors, and uses +/// the existing lowering for v8i16 blends on each half, finally PACK-ing them +/// back together. +static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + SDLoc DL(Op); + assert(Op.getSimpleValueType() == MVT::v16i8 && "Bad shuffle type!"); + assert(V1.getSimpleValueType() == MVT::v16i8 && "Bad operand type!"); + assert(V2.getSimpleValueType() == MVT::v16i8 && "Bad operand type!"); + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); + ArrayRef<int> OrigMask = SVOp->getMask(); + assert(OrigMask.size() == 16 && "Unexpected mask size for v16 shuffle!"); + int MaskStorage[16] = { + OrigMask[0], OrigMask[1], OrigMask[2], OrigMask[3], + OrigMask[4], OrigMask[5], OrigMask[6], OrigMask[7], + OrigMask[8], OrigMask[9], OrigMask[10], OrigMask[11], + OrigMask[12], OrigMask[13], OrigMask[14], OrigMask[15]}; + MutableArrayRef<int> Mask(MaskStorage); + MutableArrayRef<int> LoMask = Mask.slice(0, 8); + MutableArrayRef<int> HiMask = Mask.slice(8, 8); + + // For single-input shuffles, there are some nicer lowering tricks we can use. + if (isSingleInputShuffleMask(Mask)) { + // Check whether we can widen this to an i16 shuffle by duplicating bytes. + // Notably, this handles splat and partial-splat shuffles more efficiently. + // However, it only makes sense if the pre-duplication shuffle simplifies + // things significantly. Currently, this means we need to be able to + // express the pre-duplication shuffle as an i16 shuffle. + // + // FIXME: We should check for other patterns which can be widened into an + // i16 shuffle as well. + auto canWidenViaDuplication = [](ArrayRef<int> Mask) { + for (int i = 0; i < 16; i += 2) { + if (Mask[i] != Mask[i + 1]) + return false; + } + return true; + }; + auto tryToWidenViaDuplication = [&]() -> SDValue { + if (!canWidenViaDuplication(Mask)) + return SDValue(); + SmallVector<int, 4> LoInputs; + std::copy_if(Mask.begin(), Mask.end(), std::back_inserter(LoInputs), + [](int M) { return M >= 0 && M < 8; }); + std::sort(LoInputs.begin(), LoInputs.end()); + LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()), + LoInputs.end()); + SmallVector<int, 4> HiInputs; + std::copy_if(Mask.begin(), Mask.end(), std::back_inserter(HiInputs), + [](int M) { return M >= 8; }); + std::sort(HiInputs.begin(), HiInputs.end()); + HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()), + HiInputs.end()); + + bool TargetLo = LoInputs.size() >= HiInputs.size(); + ArrayRef<int> InPlaceInputs = TargetLo ? LoInputs : HiInputs; + ArrayRef<int> MovingInputs = TargetLo ? HiInputs : LoInputs; + + int PreDupI16Shuffle[] = {-1, -1, -1, -1, -1, -1, -1, -1}; + SmallDenseMap<int, int, 8> LaneMap; + for (int I : InPlaceInputs) { + PreDupI16Shuffle[I/2] = I/2; + LaneMap[I] = I; + } + int j = TargetLo ? 0 : 4, je = j + 4; + for (int i = 0, ie = MovingInputs.size(); i < ie; ++i) { + // Check if j is already a shuffle of this input. This happens when + // there are two adjacent bytes after we move the low one. + if (PreDupI16Shuffle[j] != MovingInputs[i] / 2) { + // If we haven't yet mapped the input, search for a slot into which + // we can map it. + while (j < je && PreDupI16Shuffle[j] != -1) + ++j; + + if (j == je) + // We can't place the inputs into a single half with a simple i16 shuffle, so bail. + return SDValue(); + + // Map this input with the i16 shuffle. + PreDupI16Shuffle[j] = MovingInputs[i] / 2; + } + + // Update the lane map based on the mapping we ended up with. + LaneMap[MovingInputs[i]] = 2 * j + MovingInputs[i] % 2; + } + V1 = DAG.getNode( + ISD::BITCAST, DL, MVT::v16i8, + DAG.getVectorShuffle(MVT::v8i16, DL, + DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1), + DAG.getUNDEF(MVT::v8i16), PreDupI16Shuffle)); + + // Unpack the bytes to form the i16s that will be shuffled into place. + V1 = DAG.getNode(TargetLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL, + MVT::v16i8, V1, V1); + + int PostDupI16Shuffle[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; + for (int i = 0; i < 16; i += 2) { + if (Mask[i] != -1) + PostDupI16Shuffle[i / 2] = LaneMap[Mask[i]] - (TargetLo ? 0 : 8); + assert(PostDupI16Shuffle[i / 2] < 8 && "Invalid v8 shuffle mask!"); + } + return DAG.getNode( + ISD::BITCAST, DL, MVT::v16i8, + DAG.getVectorShuffle(MVT::v8i16, DL, + DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V1), + DAG.getUNDEF(MVT::v8i16), PostDupI16Shuffle)); + }; + if (SDValue V = tryToWidenViaDuplication()) + return V; + } + + // Check whether an interleaving lowering is likely to be more efficient. + // This isn't perfect but it is a strong heuristic that tends to work well on + // the kinds of shuffles that show up in practice. + // + // FIXME: We need to handle other interleaving widths (i16, i32, ...). + if (shouldLowerAsInterleaving(Mask)) { + // FIXME: Figure out whether we should pack these into the low or high + // halves. + + int EMask[16], OMask[16]; + for (int i = 0; i < 8; ++i) { + EMask[i] = Mask[2*i]; + OMask[i] = Mask[2*i + 1]; + EMask[i + 8] = -1; + OMask[i + 8] = -1; + } + + SDValue Evens = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, EMask); + SDValue Odds = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, OMask); + + return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, Evens, Odds); + } + + int V1LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; + int V1HiBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; + int V2LoBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; + int V2HiBlendMask[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; + + auto buildBlendMasks = [](MutableArrayRef<int> HalfMask, + MutableArrayRef<int> V1HalfBlendMask, + MutableArrayRef<int> V2HalfBlendMask) { + for (int i = 0; i < 8; ++i) + if (HalfMask[i] >= 0 && HalfMask[i] < 16) { + V1HalfBlendMask[i] = HalfMask[i]; + HalfMask[i] = i; + } else if (HalfMask[i] >= 16) { + V2HalfBlendMask[i] = HalfMask[i] - 16; + HalfMask[i] = i + 8; + } + }; + buildBlendMasks(LoMask, V1LoBlendMask, V2LoBlendMask); + buildBlendMasks(HiMask, V1HiBlendMask, V2HiBlendMask); + + SDValue Zero = getZeroVector(MVT::v8i16, Subtarget, DAG, DL); + + auto buildLoAndHiV8s = [&](SDValue V, MutableArrayRef<int> LoBlendMask, + MutableArrayRef<int> HiBlendMask) { + SDValue V1, V2; + // Check if any of the odd lanes in the v16i8 are used. If not, we can mask + // them out and avoid using UNPCK{L,H} to extract the elements of V as + // i16s. + if (std::none_of(LoBlendMask.begin(), LoBlendMask.end(), + [](int M) { return M >= 0 && M % 2 == 1; }) && + std::none_of(HiBlendMask.begin(), HiBlendMask.end(), + [](int M) { return M >= 0 && M % 2 == 1; })) { + // Use a mask to drop the high bytes. + V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V); + V1 = DAG.getNode(ISD::AND, DL, MVT::v8i16, V1, + DAG.getConstant(0x00FF, MVT::v8i16)); + + // This will be a single vector shuffle instead of a blend so nuke V2. + V2 = DAG.getUNDEF(MVT::v8i16); + + // Squash the masks to point directly into V1. + for (int &M : LoBlendMask) + if (M >= 0) + M /= 2; + for (int &M : HiBlendMask) + if (M >= 0) + M /= 2; + } else { + // Otherwise just unpack the low half of V into V1 and the high half into + // V2 so that we can blend them as i16s. + V1 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, + DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero)); + V2 = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, + DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero)); + } + + SDValue BlendedLo = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, LoBlendMask); + SDValue BlendedHi = DAG.getVectorShuffle(MVT::v8i16, DL, V1, V2, HiBlendMask); + return std::make_pair(BlendedLo, BlendedHi); + }; + SDValue V1Lo, V1Hi, V2Lo, V2Hi; + std::tie(V1Lo, V1Hi) = buildLoAndHiV8s(V1, V1LoBlendMask, V1HiBlendMask); + std::tie(V2Lo, V2Hi) = buildLoAndHiV8s(V2, V2LoBlendMask, V2HiBlendMask); + + SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, V1Lo, V2Lo, LoMask); + SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, V1Hi, V2Hi, HiMask); + + return DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, LoV, HiV); +} + +/// \brief Dispatching routine to lower various 128-bit x86 vector shuffles. +/// +/// This routine breaks down the specific type of 128-bit shuffle and +/// dispatches to the lowering routines accordingly. +static SDValue lower128BitVectorShuffle(SDValue Op, SDValue V1, SDValue V2, + MVT VT, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + switch (VT.SimpleTy) { + case MVT::v2i64: + return lowerV2I64VectorShuffle(Op, V1, V2, Subtarget, DAG); + case MVT::v2f64: + return lowerV2F64VectorShuffle(Op, V1, V2, Subtarget, DAG); + case MVT::v4i32: + return lowerV4I32VectorShuffle(Op, V1, V2, Subtarget, DAG); + case MVT::v4f32: + return lowerV4F32VectorShuffle(Op, V1, V2, Subtarget, DAG); + case MVT::v8i16: + return lowerV8I16VectorShuffle(Op, V1, V2, Subtarget, DAG); + case MVT::v16i8: + return lowerV16I8VectorShuffle(Op, V1, V2, Subtarget, DAG); + + default: + llvm_unreachable("Unimplemented!"); + } +} + +/// \brief Tiny helper function to test whether adjacent masks are sequential. +static bool areAdjacentMasksSequential(ArrayRef<int> Mask) { + for (int i = 0, Size = Mask.size(); i < Size; i += 2) + if (Mask[i] + 1 != Mask[i+1]) + return false; + + return true; +} + +/// \brief Top-level lowering for x86 vector shuffles. +/// +/// This handles decomposition, canonicalization, and lowering of all x86 +/// vector shuffles. Most of the specific lowering strategies are encapsulated +/// above in helper routines. The canonicalization attempts to widen shuffles +/// to involve fewer lanes of wider elements, consolidate symmetric patterns +/// s.t. only one of the two inputs needs to be tested, etc. +static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); + ArrayRef<int> Mask = SVOp->getMask(); + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + MVT VT = Op.getSimpleValueType(); + int NumElements = VT.getVectorNumElements(); + SDLoc dl(Op); + + assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles"); + + bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; + bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; + if (V1IsUndef && V2IsUndef) + return DAG.getUNDEF(VT); + + // When we create a shuffle node we put the UNDEF node to second operand, + // but in some cases the first operand may be transformed to UNDEF. + // In this case we should just commute the node. + if (V1IsUndef) + return CommuteVectorShuffle(SVOp, DAG); + + // Check for non-undef masks pointing at an undef vector and make the masks + // undef as well. This makes it easier to match the shuffle based solely on + // the mask. + if (V2IsUndef) + for (int M : Mask) + if (M >= NumElements) { + SmallVector<int, 8> NewMask(Mask.begin(), Mask.end()); + for (int &M : NewMask) + if (M >= NumElements) + M = -1; + return DAG.getVectorShuffle(VT, dl, V1, V2, NewMask); + } + + // For integer vector shuffles, try to collapse them into a shuffle of fewer + // lanes but wider integers. We cap this to not form integers larger than i64 + // but it might be interesting to form i128 integers to handle flipping the + // low and high halves of AVX 256-bit vectors. + if (VT.isInteger() && VT.getScalarSizeInBits() < 64 && + areAdjacentMasksSequential(Mask)) { + SmallVector<int, 8> NewMask; + for (int i = 0, Size = Mask.size(); i < Size; i += 2) + NewMask.push_back(Mask[i] / 2); + MVT NewVT = + MVT::getVectorVT(MVT::getIntegerVT(VT.getScalarSizeInBits() * 2), + VT.getVectorNumElements() / 2); + V1 = DAG.getNode(ISD::BITCAST, dl, NewVT, V1); + V2 = DAG.getNode(ISD::BITCAST, dl, NewVT, V2); + return DAG.getNode(ISD::BITCAST, dl, VT, + DAG.getVectorShuffle(NewVT, dl, V1, V2, NewMask)); + } + + int NumV1Elements = 0, NumUndefElements = 0, NumV2Elements = 0; + for (int M : SVOp->getMask()) + if (M < 0) + ++NumUndefElements; + else if (M < NumElements) + ++NumV1Elements; + else + ++NumV2Elements; + + // Commute the shuffle as needed such that more elements come from V1 than + // V2. This allows us to match the shuffle pattern strictly on how many + // elements come from V1 without handling the symmetric cases. + if (NumV2Elements > NumV1Elements) + return CommuteVectorShuffle(SVOp, DAG); + + // When the number of V1 and V2 elements are the same, try to minimize the + // number of uses of V2 in the low half of the vector. + if (NumV1Elements == NumV2Elements) { + int LowV1Elements = 0, LowV2Elements = 0; + for (int M : SVOp->getMask().slice(0, NumElements / 2)) + if (M >= NumElements) + ++LowV2Elements; + else if (M >= 0) + ++LowV1Elements; + if (LowV2Elements > LowV1Elements) + return CommuteVectorShuffle(SVOp, DAG); + } + + // For each vector width, delegate to a specialized lowering routine. + if (VT.getSizeInBits() == 128) + return lower128BitVectorShuffle(Op, V1, V2, VT, Subtarget, DAG); + + llvm_unreachable("Unimplemented!"); +} + + +//===----------------------------------------------------------------------===// +// Legacy vector shuffle lowering +// +// This code is the legacy code handling vector shuffles until the above +// replaces its functionality and performance. +//===----------------------------------------------------------------------===// + +static bool isBlendMask(ArrayRef<int> MaskVals, MVT VT, bool hasSSE41, + bool hasInt256, unsigned *MaskOut = nullptr) { MVT EltVT = VT.getVectorElementType(); - unsigned NumElems = VT.getVectorNumElements(); // There is no blend with immediate in AVX-512. if (VT.is512BitVector()) - return SDValue(); + return false; - if (!Subtarget->hasSSE41() || EltVT == MVT::i8) - return SDValue(); - if (!Subtarget->hasInt256() && VT == MVT::v16i16) - return SDValue(); + if (!hasSSE41 || EltVT == MVT::i8) + return false; + if (!hasInt256 && VT == MVT::v16i16) + return false; - // Check the mask for BLEND and build the value. unsigned MaskValue = 0; + unsigned NumElems = VT.getVectorNumElements(); // There are 2 lanes if (NumElems > 8), and 1 lane otherwise. - unsigned NumLanes = (NumElems-1)/8 + 1; + unsigned NumLanes = (NumElems - 1) / 8 + 1; unsigned NumElemsInLane = NumElems / NumLanes; // Blend for v16i16 should be symetric for the both lanes. for (unsigned i = 0; i < NumElemsInLane; ++i) { - int SndLaneEltIdx = (NumLanes == 2) ? - SVOp->getMaskElt(i + NumElemsInLane) : -1; - int EltIdx = SVOp->getMaskElt(i); + int SndLaneEltIdx = (NumLanes == 2) ? MaskVals[i + NumElemsInLane] : -1; + int EltIdx = MaskVals[i]; if ((EltIdx < 0 || EltIdx == (int)i) && (SndLaneEltIdx < 0 || SndLaneEltIdx == (int)(i + NumElemsInLane))) @@ -6469,11 +8044,34 @@ LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp, if (((unsigned)EltIdx == (i + NumElems)) && (SndLaneEltIdx < 0 || (unsigned)SndLaneEltIdx == i + NumElems + NumElemsInLane)) - MaskValue |= (1<<i); + MaskValue |= (1 << i); else - return SDValue(); + return false; } + if (MaskOut) + *MaskOut = MaskValue; + return true; +} + +// Try to lower a shuffle node into a simple blend instruction. +// This function assumes isBlendMask returns true for this +// SuffleVectorSDNode +static SDValue LowerVECTOR_SHUFFLEtoBlend(ShuffleVectorSDNode *SVOp, + unsigned MaskValue, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + MVT VT = SVOp->getSimpleValueType(0); + MVT EltVT = VT.getVectorElementType(); + assert(isBlendMask(SVOp->getMask(), VT, Subtarget->hasSSE41(), + Subtarget->hasInt256() && "Trying to lower a " + "VECTOR_SHUFFLE to a Blend but " + "with the wrong mask")); + SDValue V1 = SVOp->getOperand(0); + SDValue V2 = SVOp->getOperand(1); + SDLoc dl(SVOp); + unsigned NumElems = VT.getVectorNumElements(); + // Convert i32 vectors to floating point if it is not AVX2. // AVX2 introduced VPBLENDD instruction for 128 and 256-bit vectors. MVT BlendVT = VT; @@ -7450,8 +9048,9 @@ static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl, assert((VT == MVT::v4f32 || VT == MVT::v4i32) && "unsupported vector type for insertps/pinsrd"); - int FromV1 = std::count_if(Mask.begin(), Mask.end(), - [](const int &i) { return i < 4; }); + auto FromV1Predicate = [](const int &i) { return i < 4 && i > -1; }; + auto FromV2Predicate = [](const int &i) { return i >= 4; }; + int FromV1 = std::count_if(Mask.begin(), Mask.end(), FromV1Predicate); SDValue From; SDValue To; @@ -7459,23 +9058,26 @@ static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl, if (FromV1 == 1) { From = V1; To = V2; - DestIndex = std::find_if(Mask.begin(), Mask.end(), - [](const int &i) { return i < 4; }) - + DestIndex = std::find_if(Mask.begin(), Mask.end(), FromV1Predicate) - Mask.begin(); } else { + assert(std::count_if(Mask.begin(), Mask.end(), FromV2Predicate) == 1 && + "More than one element from V1 and from V2, or no elements from one " + "of the vectors. This case should not have returned true from " + "isINSERTPSMask"); From = V2; To = V1; - DestIndex = std::find_if(Mask.begin(), Mask.end(), - [](const int &i) { return i >= 4; }) - - Mask.begin(); + DestIndex = + std::find_if(Mask.begin(), Mask.end(), FromV2Predicate) - Mask.begin(); } + unsigned SrcIndex = Mask[DestIndex] % 4; if (MayFoldLoad(From)) { // Trivial case, when From comes from a load and is only used by the // shuffle. Make it use insertps from the vector that we need from that // load. SDValue NewLoad = - NarrowVectorLoadToElement(cast<LoadSDNode>(From), DestIndex, DAG); + NarrowVectorLoadToElement(cast<LoadSDNode>(From), SrcIndex, DAG); if (!NewLoad.getNode()) return SDValue(); @@ -7496,7 +9098,6 @@ static SDValue getINSERTPS(ShuffleVectorSDNode *SVOp, SDLoc &dl, } // Vector-element-to-vector - unsigned SrcIndex = Mask[DestIndex] % 4; SDValue InsertpsMask = DAG.getIntPtrConstant(DestIndex << 4 | SrcIndex << 6); return DAG.getNode(X86ISD::INSERTPS, dl, VT, To, From, InsertpsMask); } @@ -7663,6 +9264,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { bool OptForSize = MF.getFunction()->getAttributes(). hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); + // Check if we should use the experimental vector shuffle lowering. If so, + // delegate completely to that code path. + if (ExperimentalVectorShuffleLowering) + return lowerVectorShuffle(Op, Subtarget, DAG); + assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles"); if (V1IsUndef && V2IsUndef) @@ -7796,8 +9402,13 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { bool Commuted = false; // FIXME: This should also accept a bitcast of a splat? Be careful, not // 1,1,1,1 -> v8i16 though. - V1IsSplat = isSplatVector(V1.getNode()); - V2IsSplat = isSplatVector(V2.getNode()); + BitVector UndefElements; + if (auto *BVOp = dyn_cast<BuildVectorSDNode>(V1.getNode())) + if (BVOp->getConstantSplatNode(&UndefElements) && UndefElements.none()) + V1IsSplat = true; + if (auto *BVOp = dyn_cast<BuildVectorSDNode>(V2.getNode())) + if (BVOp->getConstantSplatNode(&UndefElements) && UndefElements.none()) + V2IsSplat = true; // Canonicalize the splat or undef, if present, to be on the RHS. if (!V2IsUndef && V1IsSplat && !V2IsSplat) { @@ -7873,6 +9484,11 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { getShufflePSHUFLWImmediate(SVOp), DAG); + unsigned MaskValue; + if (isBlendMask(M, VT, Subtarget->hasSSE41(), Subtarget->hasInt256(), + &MaskValue)) + return LowerVECTOR_SHUFFLEtoBlend(SVOp, MaskValue, Subtarget, DAG); + if (isSHUFPMask(M, VT)) return getTargetShuffleNode(X86ISD::SHUFP, dl, VT, V1, V2, getShuffleSHUFImmediate(SVOp), DAG); @@ -7910,10 +9526,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { return getTargetShuffleNode(X86ISD::VPERM2X128, dl, VT, V1, V2, getShuffleVPERM2X128Immediate(SVOp), DAG); - SDValue BlendOp = LowerVECTOR_SHUFFLEtoBlend(SVOp, Subtarget, DAG); - if (BlendOp.getNode()) - return BlendOp; - if (Subtarget->hasSSE41() && isINSERTPSMask(M, VT)) return getINSERTPS(SVOp, dl, DAG); @@ -8530,7 +10142,7 @@ X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { // global base reg. unsigned char OpFlag = 0; unsigned WrapperKind = X86ISD::Wrapper; - CodeModel::Model M = getTargetMachine().getCodeModel(); + CodeModel::Model M = DAG.getTarget().getCodeModel(); if (Subtarget->isPICStyleRIPRel() && (M == CodeModel::Small || M == CodeModel::Kernel)) @@ -8563,7 +10175,7 @@ SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { // global base reg. unsigned char OpFlag = 0; unsigned WrapperKind = X86ISD::Wrapper; - CodeModel::Model M = getTargetMachine().getCodeModel(); + CodeModel::Model M = DAG.getTarget().getCodeModel(); if (Subtarget->isPICStyleRIPRel() && (M == CodeModel::Small || M == CodeModel::Kernel)) @@ -8596,7 +10208,7 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { // global base reg. unsigned char OpFlag = 0; unsigned WrapperKind = X86ISD::Wrapper; - CodeModel::Model M = getTargetMachine().getCodeModel(); + CodeModel::Model M = DAG.getTarget().getCodeModel(); if (Subtarget->isPICStyleRIPRel() && (M == CodeModel::Small || M == CodeModel::Kernel)) { @@ -8617,7 +10229,7 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { Result = DAG.getNode(WrapperKind, DL, getPointerTy(), Result); // With PIC, the address is actually $g + Offset. - if (getTargetMachine().getRelocationModel() == Reloc::PIC_ && + if (DAG.getTarget().getRelocationModel() == Reloc::PIC_ && !Subtarget->is64Bit()) { Result = DAG.getNode(ISD::ADD, DL, getPointerTy(), DAG.getNode(X86ISD::GlobalBaseReg, @@ -8639,7 +10251,7 @@ X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { // Create the TargetBlockAddressAddress node. unsigned char OpFlags = Subtarget->ClassifyBlockAddressReference(); - CodeModel::Model M = getTargetMachine().getCodeModel(); + CodeModel::Model M = DAG.getTarget().getCodeModel(); const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset(); SDLoc dl(Op); @@ -8668,8 +10280,8 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, SDLoc dl, // Create the TargetGlobalAddress node, folding in the constant // offset if it is legal. unsigned char OpFlags = - Subtarget->ClassifyGlobalReference(GV, getTargetMachine()); - CodeModel::Model M = getTargetMachine().getCodeModel(); + Subtarget->ClassifyGlobalReference(GV, DAG.getTarget()); + CodeModel::Model M = DAG.getTarget().getCodeModel(); SDValue Result; if (OpFlags == X86II::MO_NO_FLAG && X86::isOffsetSuitableForCodeModel(Offset, M)) { @@ -8868,7 +10480,7 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { const GlobalValue *GV = GA->getGlobal(); if (Subtarget->isTargetELF()) { - TLSModel::Model model = getTargetMachine().getTLSModel(GV); + TLSModel::Model model = DAG.getTarget().getTLSModel(GV); switch (model) { case TLSModel::GeneralDynamic: @@ -8880,9 +10492,9 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { Subtarget->is64Bit()); case TLSModel::InitialExec: case TLSModel::LocalExec: - return LowerToTLSExecModel(GA, DAG, getPointerTy(), model, - Subtarget->is64Bit(), - getTargetMachine().getRelocationModel() == Reloc::PIC_); + return LowerToTLSExecModel( + GA, DAG, getPointerTy(), model, Subtarget->is64Bit(), + DAG.getTarget().getRelocationModel() == Reloc::PIC_); } llvm_unreachable("Unknown TLS model."); } @@ -8895,8 +10507,8 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // In PIC mode (unless we're in RIPRel PIC mode) we add an offset to the // global base reg. - bool PIC32 = (getTargetMachine().getRelocationModel() == Reloc::PIC_) && - !Subtarget->is64Bit(); + bool PIC32 = (DAG.getTarget().getRelocationModel() == Reloc::PIC_) && + !Subtarget->is64Bit(); if (PIC32) OpFlag = X86II::MO_TLVP_PIC_BASE; else @@ -10050,10 +11662,27 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl, break; case X86::COND_G: case X86::COND_GE: case X86::COND_L: case X86::COND_LE: - case X86::COND_O: case X86::COND_NO: - NeedOF = true; + case X86::COND_O: case X86::COND_NO: { + // Check if we really need to set the + // Overflow flag. If NoSignedWrap is present + // that is not actually needed. + switch (Op->getOpcode()) { + case ISD::ADD: + case ISD::SUB: + case ISD::MUL: + case ISD::SHL: { + const BinaryWithFlagsSDNode *BinNode = + cast<BinaryWithFlagsSDNode>(Op.getNode()); + if (BinNode->hasNoSignedWrap()) + break; + } + default: + NeedOF = true; + break; + } break; } + } // See if we can use the EFLAGS value from the operand instead of // doing a separate TEST. TEST always sets OF and CF to 0, so unless // we prove that the arithmetic won't overflow, we can't use OF or CF. @@ -10115,14 +11744,14 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl, if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(ArithOp.getNode()->getOperand(1))) { // An add of one will be selected as an INC. - if (C->getAPIntValue() == 1) { + if (C->getAPIntValue() == 1 && !Subtarget->slowIncDec()) { Opcode = X86ISD::INC; NumOperands = 1; break; } // An add of negative one (subtract of one) will be selected as a DEC. - if (C->getAPIntValue().isAllOnesValue()) { + if (C->getAPIntValue().isAllOnesValue() && !Subtarget->slowIncDec()) { Opcode = X86ISD::DEC; NumOperands = 1; break; @@ -10138,7 +11767,7 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, SDLoc dl, // If we have a constant logical shift that's only used in a comparison // against zero turn it into an equivalent AND. This allows turning it into // a TEST instruction later. - if ((X86CC == X86::COND_E || X86CC == X86::COND_NE) && + if ((X86CC == X86::COND_E || X86CC == X86::COND_NE) && Op->hasOneUse() && isa<ConstantSDNode>(Op->getOperand(1)) && !hasNonFlagsUse(Op)) { EVT VT = Op.getValueType(); unsigned BitWidth = VT.getSizeInBits(); @@ -11469,8 +13098,9 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { } if (addTest) { - CC = DAG.getConstant(X86::COND_NE, MVT::i8); - Cond = EmitTest(Cond, X86::COND_NE, dl, DAG); + X86::CondCode X86Cond = Inverted ? X86::COND_E : X86::COND_NE; + CC = DAG.getConstant(X86Cond, MVT::i8); + Cond = EmitTest(Cond, X86Cond, dl, DAG); } Cond = ConvertCmpIfNecessary(Cond, DAG); return DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), @@ -11513,7 +13143,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); Chain = SP.getValue(1); unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue(); - const TargetFrameLowering &TFI = *getTargetMachine().getFrameLowering(); + const TargetFrameLowering &TFI = *DAG.getTarget().getFrameLowering(); unsigned StackAlign = TFI.getStackAlignment(); Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value if (Align > StackAlign) @@ -11572,7 +13202,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Flag); const X86RegisterInfo *RegInfo = - static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo()); + static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo()); unsigned SPReg = RegInfo->getStackRegister(); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy); Chain = SP.getValue(1); @@ -11681,7 +13311,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { if (ArgMode == 2) { // Sanity Check: Make sure using fp_offset makes sense. - assert(!getTargetMachine().Options.UseSoftFloat && + assert(!DAG.getTarget().Options.UseSoftFloat && !(DAG.getMachineFunction() .getFunction()->getAttributes() .hasAttribute(AttributeSet::FunctionIndex, @@ -12158,11 +13788,37 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) { Op.getOperand(1), Op.getOperand(2)); } + case Intrinsic::x86_sse2_packssdw_128: + case Intrinsic::x86_sse2_packsswb_128: + case Intrinsic::x86_avx2_packssdw: + case Intrinsic::x86_avx2_packsswb: + return DAG.getNode(X86ISD::PACKSS, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + + case Intrinsic::x86_sse2_packuswb_128: + case Intrinsic::x86_sse41_packusdw: + case Intrinsic::x86_avx2_packuswb: + case Intrinsic::x86_avx2_packusdw: + return DAG.getNode(X86ISD::PACKUS, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_ssse3_pshuf_b_128: case Intrinsic::x86_avx2_pshuf_b: return DAG.getNode(X86ISD::PSHUFB, dl, Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_sse2_pshuf_d: + return DAG.getNode(X86ISD::PSHUFD, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + + case Intrinsic::x86_sse2_pshufl_w: + return DAG.getNode(X86ISD::PSHUFLW, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + + case Intrinsic::x86_sse2_pshufh_w: + return DAG.getNode(X86ISD::PSHUFHW, dl, Op.getValueType(), + Op.getOperand(1), Op.getOperand(2)); + case Intrinsic::x86_ssse3_psign_b_128: case Intrinsic::x86_ssse3_psign_w_128: case Intrinsic::x86_ssse3_psign_d_128: @@ -12610,6 +14266,51 @@ static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, return SDValue(Res, 0); } +// getReadPerformanceCounter - Handles the lowering of builtin intrinsics that +// read performance monitor counters (x86_rdpmc). +static void getReadPerformanceCounter(SDNode *N, SDLoc DL, + SelectionDAG &DAG, const X86Subtarget *Subtarget, + SmallVectorImpl<SDValue> &Results) { + assert(N->getNumOperands() == 3 && "Unexpected number of operands!"); + SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); + SDValue LO, HI; + + // The ECX register is used to select the index of the performance counter + // to read. + SDValue Chain = DAG.getCopyToReg(N->getOperand(0), DL, X86::ECX, + N->getOperand(2)); + SDValue rd = DAG.getNode(X86ISD::RDPMC_DAG, DL, Tys, Chain); + + // Reads the content of a 64-bit performance counter and returns it in the + // registers EDX:EAX. + if (Subtarget->is64Bit()) { + LO = DAG.getCopyFromReg(rd, DL, X86::RAX, MVT::i64, rd.getValue(1)); + HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64, + LO.getValue(2)); + } else { + LO = DAG.getCopyFromReg(rd, DL, X86::EAX, MVT::i32, rd.getValue(1)); + HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32, + LO.getValue(2)); + } + Chain = HI.getValue(1); + + if (Subtarget->is64Bit()) { + // The EAX register is loaded with the low-order 32 bits. The EDX register + // is loaded with the supported high-order bits of the counter. + SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI, + DAG.getConstant(32, MVT::i8)); + Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp)); + Results.push_back(Chain); + return; + } + + // Use a buildpair to merge the two 32-bit values into a 64-bit one. + SDValue Ops[] = { LO, HI }; + SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops); + Results.push_back(Pair); + Results.push_back(Chain); +} + // getReadTimeStampCounter - Handles the lowering of builtin intrinsics that // read the time stamp counter (x86_rdtsc and x86_rdtscp). This function is // also used to custom lower READCYCLECOUNTER nodes. @@ -12674,7 +14375,7 @@ static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget *Subtarget, } enum IntrinsicType { - GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDTSC, XTEST + GATHER, SCATTER, PREFETCH, RDSEED, RDRAND, RDPMC, RDTSC, XTEST }; struct IntrinsicData { @@ -12768,6 +14469,8 @@ static void InitIntinsicsMap() { IntrinsicData(RDTSC, X86ISD::RDTSC_DAG, 0))); IntrMap.insert(std::make_pair(Intrinsic::x86_rdtscp, IntrinsicData(RDTSC, X86ISD::RDTSCP_DAG, 0))); + IntrMap.insert(std::make_pair(Intrinsic::x86_rdpmc, + IntrinsicData(RDPMC, X86ISD::RDPMC_DAG, 0))); Initialized = true; } @@ -12826,7 +14529,7 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, case PREFETCH: { SDValue Hint = Op.getOperand(6); unsigned HintVal; - if (dyn_cast<ConstantSDNode> (Hint) == 0 || + if (dyn_cast<ConstantSDNode> (Hint) == nullptr || (HintVal = dyn_cast<ConstantSDNode> (Hint)->getZExtValue()) > 1) llvm_unreachable("Wrong prefetch hint in intrinsic: should be 0 or 1"); unsigned Opcode = (HintVal ? Intr.Opc1 : Intr.Opc0); @@ -12843,6 +14546,12 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget, getReadTimeStampCounter(Op.getNode(), dl, Intr.Opc0, DAG, Subtarget, Results); return DAG.getMergeValues(Results, dl); } + // Read Performance Monitoring Counters. + case RDPMC: { + SmallVector<SDValue, 2> Results; + getReadPerformanceCounter(Op.getNode(), dl, DAG, Subtarget, Results); + return DAG.getMergeValues(Results, dl); + } // XTEST intrinsics. case XTEST: { SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other); @@ -12873,7 +14582,7 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, if (Depth > 0) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); const X86RegisterInfo *RegInfo = - static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo()); + static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo()); SDValue Offset = DAG.getConstant(RegInfo->getSlotSize(), PtrVT); return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, PtrVT, @@ -12895,7 +14604,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); // FIXME probably not meaningful unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue(); const X86RegisterInfo *RegInfo = - static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo()); + static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo()); unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction()); assert(((FrameReg == X86::RBP && VT == MVT::i64) || (FrameReg == X86::EBP && VT == MVT::i32)) && @@ -12924,7 +14633,7 @@ unsigned X86TargetLowering::getRegisterByName(const char* RegName, SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, SelectionDAG &DAG) const { const X86RegisterInfo *RegInfo = - static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo()); + static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo()); return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize()); } @@ -12936,7 +14645,7 @@ SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(); const X86RegisterInfo *RegInfo = - static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo()); + static_cast<const X86RegisterInfo*>(DAG.getTarget().getRegisterInfo()); unsigned FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction()); assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) || (FrameReg == X86::EBP && PtrVT == MVT::i32)) && @@ -12983,7 +14692,7 @@ SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, SDLoc dl (Op); const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); - const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo(); + const TargetRegisterInfo* TRI = DAG.getTarget().getRegisterInfo(); if (Subtarget->is64Bit()) { SDValue OutChains[6]; @@ -13431,7 +15140,7 @@ SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) cons CLI.setDebugLoc(dl).setChain(InChain) .setCallee(getLibcallCallingConv(LC), static_cast<EVT>(MVT::v2i64).getTypeForEVT(*DAG.getContext()), - Callee, &Args, 0) + Callee, std::move(Args), 0) .setInRegister().setSExtResult(isSigned).setZExtResult(!isSigned); std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); @@ -13448,7 +15157,7 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, (VT == MVT::v8i32 && Subtarget->hasInt256())); // Get the high parts. - const int Mask[] = {1, 2, 3, 4, 5, 6, 7, 8}; + const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1}; SDValue Hi0 = DAG.getVectorShuffle(VT, dl, Op0, Op0, Mask); SDValue Hi1 = DAG.getVectorShuffle(VT, dl, Op1, Op1, Mask); @@ -13464,10 +15173,18 @@ static SDValue LowerMUL_LOHI(SDValue Op, const X86Subtarget *Subtarget, DAG.getNode(Opcode, dl, MulVT, Hi0, Hi1)); // Shuffle it back into the right order. - const int HighMask[] = {1, 5, 3, 7, 9, 13, 11, 15}; - SDValue Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask); - const int LowMask[] = {0, 4, 2, 6, 8, 12, 10, 14}; - SDValue Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask); + SDValue Highs, Lows; + if (VT == MVT::v8i32) { + const int HighMask[] = {1, 9, 3, 11, 5, 13, 7, 15}; + Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask); + const int LowMask[] = {0, 8, 2, 10, 4, 12, 6, 14}; + Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask); + } else { + const int HighMask[] = {1, 5, 3, 7}; + Highs = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, HighMask); + const int LowMask[] = {0, 4, 2, 6}; + Lows = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, LowMask); + } // If we have a signed multiply but no PMULDQ fix up the high parts of a // unsigned multiply. @@ -13494,10 +15211,9 @@ static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, SDValue Amt = Op.getOperand(1); // Optimize shl/srl/sra with constant shift amount. - if (isSplatVector(Amt.getNode())) { - SDValue SclrAmt = Amt->getOperand(0); - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) { - uint64_t ShiftAmt = C->getZExtValue(); + if (auto *BVAmt = dyn_cast<BuildVectorSDNode>(Amt)) { + if (auto *ShiftConst = BVAmt->getConstantSplatNode()) { + uint64_t ShiftAmt = ShiftConst->getZExtValue(); if (VT == MVT::v2i64 || VT == MVT::v4i32 || VT == MVT::v8i16 || (Subtarget->hasInt256() && @@ -13804,15 +15520,14 @@ static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, static SDValue LowerShift(SDValue Op, const X86Subtarget* Subtarget, SelectionDAG &DAG) { - MVT VT = Op.getSimpleValueType(); SDLoc dl(Op); SDValue R = Op.getOperand(0); SDValue Amt = Op.getOperand(1); SDValue V; - if (!Subtarget->hasSSE2()) - return SDValue(); + assert(VT.isVector() && "Custom lowering only for vector shifts!"); + assert(Subtarget->hasSSE2() && "Only custom lower when we have SSE2!"); V = LowerScalarImmediateShift(Op, DAG, Subtarget); if (V.getNode()) @@ -14254,7 +15969,7 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget, break; } SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg, - Op.getOperand(2), SDValue()); + Op.getOperand(2), SDValue()); SDValue Ops[] = { cpIn.getValue(0), Op.getOperand(1), Op.getOperand(3), @@ -14264,9 +15979,18 @@ static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget *Subtarget, MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand(); SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys, Ops, T, MMO); + SDValue cpOut = DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1)); - return cpOut; + SDValue EFLAGS = DAG.getCopyFromReg(cpOut.getValue(1), DL, X86::EFLAGS, + MVT::i32, cpOut.getValue(2)); + SDValue Success = DAG.getNode(X86ISD::SETCC, DL, Op->getValueType(1), + DAG.getConstant(X86::COND_E, MVT::i8), EFLAGS); + + DAG.ReplaceAllUsesOfValueWith(Op.getValue(0), cpOut); + DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Success); + DAG.ReplaceAllUsesOfValueWith(Op.getValue(2), EFLAGS.getValue(1)); + return SDValue(); } static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget, @@ -14422,7 +16146,7 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(DAG.getEntryNode()) - .setCallee(CallingConv::C, RetTy, Callee, &Args, 0); + .setCallee(CallingConv::C, RetTy, Callee, std::move(Args), 0); std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); @@ -14446,7 +16170,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { default: llvm_unreachable("Should not custom lower this!"); case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op,DAG); case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, Subtarget, DAG); - case ISD::ATOMIC_CMP_SWAP: return LowerCMP_SWAP(Op, Subtarget, DAG); + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: + return LowerCMP_SWAP(Op, Subtarget, DAG); case ISD::ATOMIC_LOAD_SUB: return LowerLOAD_SUB(Op,DAG); case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op,DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); @@ -14528,8 +16253,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { } static void ReplaceATOMIC_LOAD(SDNode *Node, - SmallVectorImpl<SDValue> &Results, - SelectionDAG &DAG) { + SmallVectorImpl<SDValue> &Results, + SelectionDAG &DAG) { SDLoc dl(Node); EVT VT = cast<AtomicSDNode>(Node)->getMemoryVT(); @@ -14538,38 +16263,16 @@ static void ReplaceATOMIC_LOAD(SDNode *Node, // (The only way to get a 16-byte load is cmpxchg16b) // FIXME: 16-byte ATOMIC_CMP_SWAP isn't actually hooked up at the moment. SDValue Zero = DAG.getConstant(0, VT); - SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT, - Node->getOperand(0), - Node->getOperand(1), Zero, Zero, - cast<AtomicSDNode>(Node)->getMemOperand(), - cast<AtomicSDNode>(Node)->getOrdering(), - cast<AtomicSDNode>(Node)->getOrdering(), - cast<AtomicSDNode>(Node)->getSynchScope()); + SDVTList VTs = DAG.getVTList(VT, MVT::i1, MVT::Other); + SDValue Swap = + DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, VT, VTs, + Node->getOperand(0), Node->getOperand(1), Zero, Zero, + cast<AtomicSDNode>(Node)->getMemOperand(), + cast<AtomicSDNode>(Node)->getOrdering(), + cast<AtomicSDNode>(Node)->getOrdering(), + cast<AtomicSDNode>(Node)->getSynchScope()); Results.push_back(Swap.getValue(0)); - Results.push_back(Swap.getValue(1)); -} - -static void -ReplaceATOMIC_BINARY_64(SDNode *Node, SmallVectorImpl<SDValue>&Results, - SelectionDAG &DAG, unsigned NewOp) { - SDLoc dl(Node); - assert (Node->getValueType(0) == MVT::i64 && - "Only know how to expand i64 atomics"); - - SDValue Chain = Node->getOperand(0); - SDValue In1 = Node->getOperand(1); - SDValue In2L = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(2), DAG.getIntPtrConstant(0)); - SDValue In2H = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(2), DAG.getIntPtrConstant(1)); - SDValue Ops[] = { Chain, In1, In2L, In2H }; - SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); - SDValue Result = - DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops, MVT::i64, - cast<MemSDNode>(Node)->getMemOperand()); - SDValue OpsF[] = { Result.getValue(0), Result.getValue(1)}; - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF)); - Results.push_back(Result.getValue(2)); + Results.push_back(Swap.getValue(2)); } /// ReplaceNodeResults - Replace a node with an illegal result type @@ -14656,13 +16359,15 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, case Intrinsic::x86_rdtscp: return getReadTimeStampCounter(N, dl, X86ISD::RDTSCP_DAG, DAG, Subtarget, Results); + case Intrinsic::x86_rdpmc: + return getReadPerformanceCounter(N, dl, DAG, Subtarget, Results); } } case ISD::READCYCLECOUNTER: { return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget, Results); } - case ISD::ATOMIC_CMP_SWAP: { + case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { EVT T = N->getValueType(0); assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair"); bool Regs64bit = T == MVT::i128; @@ -14704,61 +16409,33 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, Regs64bit ? X86::RDX : X86::EDX, HalfT, cpOutL.getValue(2)); SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)}; + + SDValue EFLAGS = DAG.getCopyFromReg(cpOutH.getValue(1), dl, X86::EFLAGS, + MVT::i32, cpOutH.getValue(2)); + SDValue Success = + DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(X86::COND_E, MVT::i8), EFLAGS); + Success = DAG.getZExtOrTrunc(Success, dl, N->getValueType(1)); + Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF)); - Results.push_back(cpOutH.getValue(1)); + Results.push_back(Success); + Results.push_back(EFLAGS.getValue(1)); return; } + case ISD::ATOMIC_SWAP: case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_AND: - case ISD::ATOMIC_LOAD_NAND: case ISD::ATOMIC_LOAD_OR: - case ISD::ATOMIC_LOAD_SUB: case ISD::ATOMIC_LOAD_XOR: - case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_NAND: case ISD::ATOMIC_LOAD_MIN: - case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_LOAD_MAX: case ISD::ATOMIC_LOAD_UMIN: - case ISD::ATOMIC_SWAP: { - unsigned Opc; - switch (N->getOpcode()) { - default: llvm_unreachable("Unexpected opcode"); - case ISD::ATOMIC_LOAD_ADD: - Opc = X86ISD::ATOMADD64_DAG; - break; - case ISD::ATOMIC_LOAD_AND: - Opc = X86ISD::ATOMAND64_DAG; - break; - case ISD::ATOMIC_LOAD_NAND: - Opc = X86ISD::ATOMNAND64_DAG; - break; - case ISD::ATOMIC_LOAD_OR: - Opc = X86ISD::ATOMOR64_DAG; - break; - case ISD::ATOMIC_LOAD_SUB: - Opc = X86ISD::ATOMSUB64_DAG; - break; - case ISD::ATOMIC_LOAD_XOR: - Opc = X86ISD::ATOMXOR64_DAG; - break; - case ISD::ATOMIC_LOAD_MAX: - Opc = X86ISD::ATOMMAX64_DAG; - break; - case ISD::ATOMIC_LOAD_MIN: - Opc = X86ISD::ATOMMIN64_DAG; - break; - case ISD::ATOMIC_LOAD_UMAX: - Opc = X86ISD::ATOMUMAX64_DAG; - break; - case ISD::ATOMIC_LOAD_UMIN: - Opc = X86ISD::ATOMUMIN64_DAG; - break; - case ISD::ATOMIC_SWAP: - Opc = X86ISD::ATOMSWAP64_DAG; - break; - } - ReplaceATOMIC_BINARY_64(N, Results, DAG, Opc); - return; - } + case ISD::ATOMIC_LOAD_UMAX: + // Delegate to generic TypeLegalization. Situations we can really handle + // should have already been dealt with by X86AtomicExpand.cpp. + break; case ISD::ATOMIC_LOAD: { ReplaceATOMIC_LOAD(N, Results, DAG); return; @@ -14779,6 +16456,13 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, MVT::v2f64, N->getOperand(0)); SDValue ToVecInt = DAG.getNode(ISD::BITCAST, dl, WiderVT, Expanded); + if (ExperimentalVectorWideningLegalization) { + // If we are legalizing vectors by widening, we already have the desired + // legal vector type, just return it. + Results.push_back(ToVecInt); + return; + } + SmallVector<SDValue, 8> Elts; for (unsigned i = 0, e = NumElts; i != e; ++i) Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, @@ -14810,6 +16494,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FST: return "X86ISD::FST"; case X86ISD::CALL: return "X86ISD::CALL"; case X86ISD::RDTSC_DAG: return "X86ISD::RDTSC_DAG"; + case X86ISD::RDTSCP_DAG: return "X86ISD::RDTSCP_DAG"; + case X86ISD::RDPMC_DAG: return "X86ISD::RDPMC_DAG"; case X86ISD::BT: return "X86ISD::BT"; case X86ISD::CMP: return "X86ISD::CMP"; case X86ISD::COMI: return "X86ISD::COMI"; @@ -14863,12 +16549,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::FNSTSW16r: return "X86ISD::FNSTSW16r"; case X86ISD::LCMPXCHG_DAG: return "X86ISD::LCMPXCHG_DAG"; case X86ISD::LCMPXCHG8_DAG: return "X86ISD::LCMPXCHG8_DAG"; - case X86ISD::ATOMADD64_DAG: return "X86ISD::ATOMADD64_DAG"; - case X86ISD::ATOMSUB64_DAG: return "X86ISD::ATOMSUB64_DAG"; - case X86ISD::ATOMOR64_DAG: return "X86ISD::ATOMOR64_DAG"; - case X86ISD::ATOMXOR64_DAG: return "X86ISD::ATOMXOR64_DAG"; - case X86ISD::ATOMAND64_DAG: return "X86ISD::ATOMAND64_DAG"; - case X86ISD::ATOMNAND64_DAG: return "X86ISD::ATOMNAND64_DAG"; + case X86ISD::LCMPXCHG16_DAG: return "X86ISD::LCMPXCHG16_DAG"; case X86ISD::VZEXT_MOVL: return "X86ISD::VZEXT_MOVL"; case X86ISD::VZEXT_LOAD: return "X86ISD::VZEXT_LOAD"; case X86ISD::VZEXT: return "X86ISD::VZEXT"; @@ -14909,6 +16590,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::TESTM: return "X86ISD::TESTM"; case X86ISD::TESTNM: return "X86ISD::TESTNM"; case X86ISD::KORTEST: return "X86ISD::KORTEST"; + case X86ISD::PACKSS: return "X86ISD::PACKSS"; + case X86ISD::PACKUS: return "X86ISD::PACKUS"; case X86ISD::PALIGNR: return "X86ISD::PALIGNR"; case X86ISD::PSHUFD: return "X86ISD::PSHUFD"; case X86ISD::PSHUFHW: return "X86ISD::PSHUFHW"; @@ -15173,7 +16856,8 @@ X86TargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M, isUNPCKLMask(M, SVT, Subtarget->hasInt256()) || isUNPCKHMask(M, SVT, Subtarget->hasInt256()) || isUNPCKL_v_undef_Mask(M, SVT, Subtarget->hasInt256()) || - isUNPCKH_v_undef_Mask(M, SVT, Subtarget->hasInt256())); + isUNPCKH_v_undef_Mask(M, SVT, Subtarget->hasInt256()) || + isBlendMask(M, SVT, Subtarget->hasSSE41(), Subtarget->hasInt256())); } bool @@ -15256,685 +16940,6 @@ static MachineBasicBlock *EmitXBegin(MachineInstr *MI, MachineBasicBlock *MBB, return sinkMBB; } -// Get CMPXCHG opcode for the specified data type. -static unsigned getCmpXChgOpcode(EVT VT) { - switch (VT.getSimpleVT().SimpleTy) { - case MVT::i8: return X86::LCMPXCHG8; - case MVT::i16: return X86::LCMPXCHG16; - case MVT::i32: return X86::LCMPXCHG32; - case MVT::i64: return X86::LCMPXCHG64; - default: - break; - } - llvm_unreachable("Invalid operand size!"); -} - -// Get LOAD opcode for the specified data type. -static unsigned getLoadOpcode(EVT VT) { - switch (VT.getSimpleVT().SimpleTy) { - case MVT::i8: return X86::MOV8rm; - case MVT::i16: return X86::MOV16rm; - case MVT::i32: return X86::MOV32rm; - case MVT::i64: return X86::MOV64rm; - default: - break; - } - llvm_unreachable("Invalid operand size!"); -} - -// Get opcode of the non-atomic one from the specified atomic instruction. -static unsigned getNonAtomicOpcode(unsigned Opc) { - switch (Opc) { - case X86::ATOMAND8: return X86::AND8rr; - case X86::ATOMAND16: return X86::AND16rr; - case X86::ATOMAND32: return X86::AND32rr; - case X86::ATOMAND64: return X86::AND64rr; - case X86::ATOMOR8: return X86::OR8rr; - case X86::ATOMOR16: return X86::OR16rr; - case X86::ATOMOR32: return X86::OR32rr; - case X86::ATOMOR64: return X86::OR64rr; - case X86::ATOMXOR8: return X86::XOR8rr; - case X86::ATOMXOR16: return X86::XOR16rr; - case X86::ATOMXOR32: return X86::XOR32rr; - case X86::ATOMXOR64: return X86::XOR64rr; - } - llvm_unreachable("Unhandled atomic-load-op opcode!"); -} - -// Get opcode of the non-atomic one from the specified atomic instruction with -// extra opcode. -static unsigned getNonAtomicOpcodeWithExtraOpc(unsigned Opc, - unsigned &ExtraOpc) { - switch (Opc) { - case X86::ATOMNAND8: ExtraOpc = X86::NOT8r; return X86::AND8rr; - case X86::ATOMNAND16: ExtraOpc = X86::NOT16r; return X86::AND16rr; - case X86::ATOMNAND32: ExtraOpc = X86::NOT32r; return X86::AND32rr; - case X86::ATOMNAND64: ExtraOpc = X86::NOT64r; return X86::AND64rr; - case X86::ATOMMAX8: ExtraOpc = X86::CMP8rr; return X86::CMOVL32rr; - case X86::ATOMMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVL16rr; - case X86::ATOMMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVL32rr; - case X86::ATOMMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVL64rr; - case X86::ATOMMIN8: ExtraOpc = X86::CMP8rr; return X86::CMOVG32rr; - case X86::ATOMMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVG16rr; - case X86::ATOMMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVG32rr; - case X86::ATOMMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVG64rr; - case X86::ATOMUMAX8: ExtraOpc = X86::CMP8rr; return X86::CMOVB32rr; - case X86::ATOMUMAX16: ExtraOpc = X86::CMP16rr; return X86::CMOVB16rr; - case X86::ATOMUMAX32: ExtraOpc = X86::CMP32rr; return X86::CMOVB32rr; - case X86::ATOMUMAX64: ExtraOpc = X86::CMP64rr; return X86::CMOVB64rr; - case X86::ATOMUMIN8: ExtraOpc = X86::CMP8rr; return X86::CMOVA32rr; - case X86::ATOMUMIN16: ExtraOpc = X86::CMP16rr; return X86::CMOVA16rr; - case X86::ATOMUMIN32: ExtraOpc = X86::CMP32rr; return X86::CMOVA32rr; - case X86::ATOMUMIN64: ExtraOpc = X86::CMP64rr; return X86::CMOVA64rr; - } - llvm_unreachable("Unhandled atomic-load-op opcode!"); -} - -// Get opcode of the non-atomic one from the specified atomic instruction for -// 64-bit data type on 32-bit target. -static unsigned getNonAtomic6432Opcode(unsigned Opc, unsigned &HiOpc) { - switch (Opc) { - case X86::ATOMAND6432: HiOpc = X86::AND32rr; return X86::AND32rr; - case X86::ATOMOR6432: HiOpc = X86::OR32rr; return X86::OR32rr; - case X86::ATOMXOR6432: HiOpc = X86::XOR32rr; return X86::XOR32rr; - case X86::ATOMADD6432: HiOpc = X86::ADC32rr; return X86::ADD32rr; - case X86::ATOMSUB6432: HiOpc = X86::SBB32rr; return X86::SUB32rr; - case X86::ATOMSWAP6432: HiOpc = X86::MOV32rr; return X86::MOV32rr; - case X86::ATOMMAX6432: HiOpc = X86::SETLr; return X86::SETLr; - case X86::ATOMMIN6432: HiOpc = X86::SETGr; return X86::SETGr; - case X86::ATOMUMAX6432: HiOpc = X86::SETBr; return X86::SETBr; - case X86::ATOMUMIN6432: HiOpc = X86::SETAr; return X86::SETAr; - } - llvm_unreachable("Unhandled atomic-load-op opcode!"); -} - -// Get opcode of the non-atomic one from the specified atomic instruction for -// 64-bit data type on 32-bit target with extra opcode. -static unsigned getNonAtomic6432OpcodeWithExtraOpc(unsigned Opc, - unsigned &HiOpc, - unsigned &ExtraOpc) { - switch (Opc) { - case X86::ATOMNAND6432: - ExtraOpc = X86::NOT32r; - HiOpc = X86::AND32rr; - return X86::AND32rr; - } - llvm_unreachable("Unhandled atomic-load-op opcode!"); -} - -// Get pseudo CMOV opcode from the specified data type. -static unsigned getPseudoCMOVOpc(EVT VT) { - switch (VT.getSimpleVT().SimpleTy) { - case MVT::i8: return X86::CMOV_GR8; - case MVT::i16: return X86::CMOV_GR16; - case MVT::i32: return X86::CMOV_GR32; - default: - break; - } - llvm_unreachable("Unknown CMOV opcode!"); -} - -// EmitAtomicLoadArith - emit the code sequence for pseudo atomic instructions. -// They will be translated into a spin-loop or compare-exchange loop from -// -// ... -// dst = atomic-fetch-op MI.addr, MI.val -// ... -// -// to -// -// ... -// t1 = LOAD MI.addr -// loop: -// t4 = phi(t1, t3 / loop) -// t2 = OP MI.val, t4 -// EAX = t4 -// LCMPXCHG [MI.addr], t2, [EAX is implicitly used & defined] -// t3 = EAX -// JNE loop -// sink: -// dst = t3 -// ... -MachineBasicBlock * -X86TargetLowering::EmitAtomicLoadArith(MachineInstr *MI, - MachineBasicBlock *MBB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); - - MachineFunction *MF = MBB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - - const BasicBlock *BB = MBB->getBasicBlock(); - MachineFunction::iterator I = MBB; - ++I; - - assert(MI->getNumOperands() <= X86::AddrNumOperands + 4 && - "Unexpected number of operands"); - - assert(MI->hasOneMemOperand() && - "Expected atomic-load-op to have one memoperand"); - - // Memory Reference - MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); - MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); - - unsigned DstReg, SrcReg; - unsigned MemOpndSlot; - - unsigned CurOp = 0; - - DstReg = MI->getOperand(CurOp++).getReg(); - MemOpndSlot = CurOp; - CurOp += X86::AddrNumOperands; - SrcReg = MI->getOperand(CurOp++).getReg(); - - const TargetRegisterClass *RC = MRI.getRegClass(DstReg); - MVT::SimpleValueType VT = *RC->vt_begin(); - unsigned t1 = MRI.createVirtualRegister(RC); - unsigned t2 = MRI.createVirtualRegister(RC); - unsigned t3 = MRI.createVirtualRegister(RC); - unsigned t4 = MRI.createVirtualRegister(RC); - unsigned PhyReg = getX86SubSuperRegister(X86::EAX, VT); - - unsigned LCMPXCHGOpc = getCmpXChgOpcode(VT); - unsigned LOADOpc = getLoadOpcode(VT); - - // For the atomic load-arith operator, we generate - // - // thisMBB: - // t1 = LOAD [MI.addr] - // mainMBB: - // t4 = phi(t1 / thisMBB, t3 / mainMBB) - // t1 = OP MI.val, EAX - // EAX = t4 - // LCMPXCHG [MI.addr], t1, [EAX is implicitly used & defined] - // t3 = EAX - // JNE mainMBB - // sinkMBB: - // dst = t3 - - MachineBasicBlock *thisMBB = MBB; - MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); - MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); - MF->insert(I, mainMBB); - MF->insert(I, sinkMBB); - - MachineInstrBuilder MIB; - - // Transfer the remainder of BB and its successor edges to sinkMBB. - sinkMBB->splice(sinkMBB->begin(), MBB, - std::next(MachineBasicBlock::iterator(MI)), MBB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); - - // thisMBB: - MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1); - for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { - MachineOperand NewMO = MI->getOperand(MemOpndSlot + i); - if (NewMO.isReg()) - NewMO.setIsKill(false); - MIB.addOperand(NewMO); - } - for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) { - unsigned flags = (*MMOI)->getFlags(); - flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad; - MachineMemOperand *MMO = - MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags, - (*MMOI)->getSize(), - (*MMOI)->getBaseAlignment(), - (*MMOI)->getTBAAInfo(), - (*MMOI)->getRanges()); - MIB.addMemOperand(MMO); - } - - thisMBB->addSuccessor(mainMBB); - - // mainMBB: - MachineBasicBlock *origMainMBB = mainMBB; - - // Add a PHI. - MachineInstr *Phi = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4) - .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB); - - unsigned Opc = MI->getOpcode(); - switch (Opc) { - default: - llvm_unreachable("Unhandled atomic-load-op opcode!"); - case X86::ATOMAND8: - case X86::ATOMAND16: - case X86::ATOMAND32: - case X86::ATOMAND64: - case X86::ATOMOR8: - case X86::ATOMOR16: - case X86::ATOMOR32: - case X86::ATOMOR64: - case X86::ATOMXOR8: - case X86::ATOMXOR16: - case X86::ATOMXOR32: - case X86::ATOMXOR64: { - unsigned ARITHOpc = getNonAtomicOpcode(Opc); - BuildMI(mainMBB, DL, TII->get(ARITHOpc), t2).addReg(SrcReg) - .addReg(t4); - break; - } - case X86::ATOMNAND8: - case X86::ATOMNAND16: - case X86::ATOMNAND32: - case X86::ATOMNAND64: { - unsigned Tmp = MRI.createVirtualRegister(RC); - unsigned NOTOpc; - unsigned ANDOpc = getNonAtomicOpcodeWithExtraOpc(Opc, NOTOpc); - BuildMI(mainMBB, DL, TII->get(ANDOpc), Tmp).addReg(SrcReg) - .addReg(t4); - BuildMI(mainMBB, DL, TII->get(NOTOpc), t2).addReg(Tmp); - break; - } - case X86::ATOMMAX8: - case X86::ATOMMAX16: - case X86::ATOMMAX32: - case X86::ATOMMAX64: - case X86::ATOMMIN8: - case X86::ATOMMIN16: - case X86::ATOMMIN32: - case X86::ATOMMIN64: - case X86::ATOMUMAX8: - case X86::ATOMUMAX16: - case X86::ATOMUMAX32: - case X86::ATOMUMAX64: - case X86::ATOMUMIN8: - case X86::ATOMUMIN16: - case X86::ATOMUMIN32: - case X86::ATOMUMIN64: { - unsigned CMPOpc; - unsigned CMOVOpc = getNonAtomicOpcodeWithExtraOpc(Opc, CMPOpc); - - BuildMI(mainMBB, DL, TII->get(CMPOpc)) - .addReg(SrcReg) - .addReg(t4); - - if (Subtarget->hasCMov()) { - if (VT != MVT::i8) { - // Native support - BuildMI(mainMBB, DL, TII->get(CMOVOpc), t2) - .addReg(SrcReg) - .addReg(t4); - } else { - // Promote i8 to i32 to use CMOV32 - const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo(); - const TargetRegisterClass *RC32 = - TRI->getSubClassWithSubReg(getRegClassFor(MVT::i32), X86::sub_8bit); - unsigned SrcReg32 = MRI.createVirtualRegister(RC32); - unsigned AccReg32 = MRI.createVirtualRegister(RC32); - unsigned Tmp = MRI.createVirtualRegister(RC32); - - unsigned Undef = MRI.createVirtualRegister(RC32); - BuildMI(mainMBB, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Undef); - - BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), SrcReg32) - .addReg(Undef) - .addReg(SrcReg) - .addImm(X86::sub_8bit); - BuildMI(mainMBB, DL, TII->get(TargetOpcode::INSERT_SUBREG), AccReg32) - .addReg(Undef) - .addReg(t4) - .addImm(X86::sub_8bit); - - BuildMI(mainMBB, DL, TII->get(CMOVOpc), Tmp) - .addReg(SrcReg32) - .addReg(AccReg32); - - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t2) - .addReg(Tmp, 0, X86::sub_8bit); - } - } else { - // Use pseudo select and lower them. - assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) && - "Invalid atomic-load-op transformation!"); - unsigned SelOpc = getPseudoCMOVOpc(VT); - X86::CondCode CC = X86::getCondFromCMovOpc(CMOVOpc); - assert(CC != X86::COND_INVALID && "Invalid atomic-load-op transformation!"); - MIB = BuildMI(mainMBB, DL, TII->get(SelOpc), t2) - .addReg(SrcReg).addReg(t4) - .addImm(CC); - mainMBB = EmitLoweredSelect(MIB, mainMBB); - // Replace the original PHI node as mainMBB is changed after CMOV - // lowering. - BuildMI(*origMainMBB, Phi, DL, TII->get(X86::PHI), t4) - .addReg(t1).addMBB(thisMBB).addReg(t3).addMBB(mainMBB); - Phi->eraseFromParent(); - } - break; - } - } - - // Copy PhyReg back from virtual register. - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), PhyReg) - .addReg(t4); - - MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc)); - for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { - MachineOperand NewMO = MI->getOperand(MemOpndSlot + i); - if (NewMO.isReg()) - NewMO.setIsKill(false); - MIB.addOperand(NewMO); - } - MIB.addReg(t2); - MIB.setMemRefs(MMOBegin, MMOEnd); - - // Copy PhyReg back to virtual register. - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3) - .addReg(PhyReg); - - BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB); - - mainMBB->addSuccessor(origMainMBB); - mainMBB->addSuccessor(sinkMBB); - - // sinkMBB: - BuildMI(*sinkMBB, sinkMBB->begin(), DL, - TII->get(TargetOpcode::COPY), DstReg) - .addReg(t3); - - MI->eraseFromParent(); - return sinkMBB; -} - -// EmitAtomicLoadArith6432 - emit the code sequence for pseudo atomic -// instructions. They will be translated into a spin-loop or compare-exchange -// loop from -// -// ... -// dst = atomic-fetch-op MI.addr, MI.val -// ... -// -// to -// -// ... -// t1L = LOAD [MI.addr + 0] -// t1H = LOAD [MI.addr + 4] -// loop: -// t4L = phi(t1L, t3L / loop) -// t4H = phi(t1H, t3H / loop) -// t2L = OP MI.val.lo, t4L -// t2H = OP MI.val.hi, t4H -// EAX = t4L -// EDX = t4H -// EBX = t2L -// ECX = t2H -// LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined] -// t3L = EAX -// t3H = EDX -// JNE loop -// sink: -// dstL = t3L -// dstH = t3H -// ... -MachineBasicBlock * -X86TargetLowering::EmitAtomicLoadArith6432(MachineInstr *MI, - MachineBasicBlock *MBB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); - - MachineFunction *MF = MBB->getParent(); - MachineRegisterInfo &MRI = MF->getRegInfo(); - - const BasicBlock *BB = MBB->getBasicBlock(); - MachineFunction::iterator I = MBB; - ++I; - - assert(MI->getNumOperands() <= X86::AddrNumOperands + 7 && - "Unexpected number of operands"); - - assert(MI->hasOneMemOperand() && - "Expected atomic-load-op32 to have one memoperand"); - - // Memory Reference - MachineInstr::mmo_iterator MMOBegin = MI->memoperands_begin(); - MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); - - unsigned DstLoReg, DstHiReg; - unsigned SrcLoReg, SrcHiReg; - unsigned MemOpndSlot; - - unsigned CurOp = 0; - - DstLoReg = MI->getOperand(CurOp++).getReg(); - DstHiReg = MI->getOperand(CurOp++).getReg(); - MemOpndSlot = CurOp; - CurOp += X86::AddrNumOperands; - SrcLoReg = MI->getOperand(CurOp++).getReg(); - SrcHiReg = MI->getOperand(CurOp++).getReg(); - - const TargetRegisterClass *RC = &X86::GR32RegClass; - const TargetRegisterClass *RC8 = &X86::GR8RegClass; - - unsigned t1L = MRI.createVirtualRegister(RC); - unsigned t1H = MRI.createVirtualRegister(RC); - unsigned t2L = MRI.createVirtualRegister(RC); - unsigned t2H = MRI.createVirtualRegister(RC); - unsigned t3L = MRI.createVirtualRegister(RC); - unsigned t3H = MRI.createVirtualRegister(RC); - unsigned t4L = MRI.createVirtualRegister(RC); - unsigned t4H = MRI.createVirtualRegister(RC); - - unsigned LCMPXCHGOpc = X86::LCMPXCHG8B; - unsigned LOADOpc = X86::MOV32rm; - - // For the atomic load-arith operator, we generate - // - // thisMBB: - // t1L = LOAD [MI.addr + 0] - // t1H = LOAD [MI.addr + 4] - // mainMBB: - // t4L = phi(t1L / thisMBB, t3L / mainMBB) - // t4H = phi(t1H / thisMBB, t3H / mainMBB) - // t2L = OP MI.val.lo, t4L - // t2H = OP MI.val.hi, t4H - // EBX = t2L - // ECX = t2H - // LCMPXCHG8B [MI.addr], [ECX:EBX & EDX:EAX are implicitly used and EDX:EAX is implicitly defined] - // t3L = EAX - // t3H = EDX - // JNE loop - // sinkMBB: - // dstL = t3L - // dstH = t3H - - MachineBasicBlock *thisMBB = MBB; - MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); - MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); - MF->insert(I, mainMBB); - MF->insert(I, sinkMBB); - - MachineInstrBuilder MIB; - - // Transfer the remainder of BB and its successor edges to sinkMBB. - sinkMBB->splice(sinkMBB->begin(), MBB, - std::next(MachineBasicBlock::iterator(MI)), MBB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); - - // thisMBB: - // Lo - MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1L); - for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { - MachineOperand NewMO = MI->getOperand(MemOpndSlot + i); - if (NewMO.isReg()) - NewMO.setIsKill(false); - MIB.addOperand(NewMO); - } - for (MachineInstr::mmo_iterator MMOI = MMOBegin; MMOI != MMOEnd; ++MMOI) { - unsigned flags = (*MMOI)->getFlags(); - flags = (flags & ~MachineMemOperand::MOStore) | MachineMemOperand::MOLoad; - MachineMemOperand *MMO = - MF->getMachineMemOperand((*MMOI)->getPointerInfo(), flags, - (*MMOI)->getSize(), - (*MMOI)->getBaseAlignment(), - (*MMOI)->getTBAAInfo(), - (*MMOI)->getRanges()); - MIB.addMemOperand(MMO); - }; - MachineInstr *LowMI = MIB; - - // Hi - MIB = BuildMI(thisMBB, DL, TII->get(LOADOpc), t1H); - for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { - if (i == X86::AddrDisp) { - MIB.addDisp(MI->getOperand(MemOpndSlot + i), 4); // 4 == sizeof(i32) - } else { - MachineOperand NewMO = MI->getOperand(MemOpndSlot + i); - if (NewMO.isReg()) - NewMO.setIsKill(false); - MIB.addOperand(NewMO); - } - } - MIB.setMemRefs(LowMI->memoperands_begin(), LowMI->memoperands_end()); - - thisMBB->addSuccessor(mainMBB); - - // mainMBB: - MachineBasicBlock *origMainMBB = mainMBB; - - // Add PHIs. - MachineInstr *PhiL = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4L) - .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB); - MachineInstr *PhiH = BuildMI(mainMBB, DL, TII->get(X86::PHI), t4H) - .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB); - - unsigned Opc = MI->getOpcode(); - switch (Opc) { - default: - llvm_unreachable("Unhandled atomic-load-op6432 opcode!"); - case X86::ATOMAND6432: - case X86::ATOMOR6432: - case X86::ATOMXOR6432: - case X86::ATOMADD6432: - case X86::ATOMSUB6432: { - unsigned HiOpc; - unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc); - BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(t4L) - .addReg(SrcLoReg); - BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(t4H) - .addReg(SrcHiReg); - break; - } - case X86::ATOMNAND6432: { - unsigned HiOpc, NOTOpc; - unsigned LoOpc = getNonAtomic6432OpcodeWithExtraOpc(Opc, HiOpc, NOTOpc); - unsigned TmpL = MRI.createVirtualRegister(RC); - unsigned TmpH = MRI.createVirtualRegister(RC); - BuildMI(mainMBB, DL, TII->get(LoOpc), TmpL).addReg(SrcLoReg) - .addReg(t4L); - BuildMI(mainMBB, DL, TII->get(HiOpc), TmpH).addReg(SrcHiReg) - .addReg(t4H); - BuildMI(mainMBB, DL, TII->get(NOTOpc), t2L).addReg(TmpL); - BuildMI(mainMBB, DL, TII->get(NOTOpc), t2H).addReg(TmpH); - break; - } - case X86::ATOMMAX6432: - case X86::ATOMMIN6432: - case X86::ATOMUMAX6432: - case X86::ATOMUMIN6432: { - unsigned HiOpc; - unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc); - unsigned cL = MRI.createVirtualRegister(RC8); - unsigned cH = MRI.createVirtualRegister(RC8); - unsigned cL32 = MRI.createVirtualRegister(RC); - unsigned cH32 = MRI.createVirtualRegister(RC); - unsigned cc = MRI.createVirtualRegister(RC); - // cl := cmp src_lo, lo - BuildMI(mainMBB, DL, TII->get(X86::CMP32rr)) - .addReg(SrcLoReg).addReg(t4L); - BuildMI(mainMBB, DL, TII->get(LoOpc), cL); - BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cL32).addReg(cL); - // ch := cmp src_hi, hi - BuildMI(mainMBB, DL, TII->get(X86::CMP32rr)) - .addReg(SrcHiReg).addReg(t4H); - BuildMI(mainMBB, DL, TII->get(HiOpc), cH); - BuildMI(mainMBB, DL, TII->get(X86::MOVZX32rr8), cH32).addReg(cH); - // cc := if (src_hi == hi) ? cl : ch; - if (Subtarget->hasCMov()) { - BuildMI(mainMBB, DL, TII->get(X86::CMOVE32rr), cc) - .addReg(cH32).addReg(cL32); - } else { - MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), cc) - .addReg(cH32).addReg(cL32) - .addImm(X86::COND_E); - mainMBB = EmitLoweredSelect(MIB, mainMBB); - } - BuildMI(mainMBB, DL, TII->get(X86::TEST32rr)).addReg(cc).addReg(cc); - if (Subtarget->hasCMov()) { - BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2L) - .addReg(SrcLoReg).addReg(t4L); - BuildMI(mainMBB, DL, TII->get(X86::CMOVNE32rr), t2H) - .addReg(SrcHiReg).addReg(t4H); - } else { - MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2L) - .addReg(SrcLoReg).addReg(t4L) - .addImm(X86::COND_NE); - mainMBB = EmitLoweredSelect(MIB, mainMBB); - // As the lowered CMOV won't clobber EFLAGS, we could reuse it for the - // 2nd CMOV lowering. - mainMBB->addLiveIn(X86::EFLAGS); - MIB = BuildMI(mainMBB, DL, TII->get(X86::CMOV_GR32), t2H) - .addReg(SrcHiReg).addReg(t4H) - .addImm(X86::COND_NE); - mainMBB = EmitLoweredSelect(MIB, mainMBB); - // Replace the original PHI node as mainMBB is changed after CMOV - // lowering. - BuildMI(*origMainMBB, PhiL, DL, TII->get(X86::PHI), t4L) - .addReg(t1L).addMBB(thisMBB).addReg(t3L).addMBB(mainMBB); - BuildMI(*origMainMBB, PhiH, DL, TII->get(X86::PHI), t4H) - .addReg(t1H).addMBB(thisMBB).addReg(t3H).addMBB(mainMBB); - PhiL->eraseFromParent(); - PhiH->eraseFromParent(); - } - break; - } - case X86::ATOMSWAP6432: { - unsigned HiOpc; - unsigned LoOpc = getNonAtomic6432Opcode(Opc, HiOpc); - BuildMI(mainMBB, DL, TII->get(LoOpc), t2L).addReg(SrcLoReg); - BuildMI(mainMBB, DL, TII->get(HiOpc), t2H).addReg(SrcHiReg); - break; - } - } - - // Copy EDX:EAX back from HiReg:LoReg - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EAX).addReg(t4L); - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EDX).addReg(t4H); - // Copy ECX:EBX from t1H:t1L - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::EBX).addReg(t2L); - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), X86::ECX).addReg(t2H); - - MIB = BuildMI(mainMBB, DL, TII->get(LCMPXCHGOpc)); - for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { - MachineOperand NewMO = MI->getOperand(MemOpndSlot + i); - if (NewMO.isReg()) - NewMO.setIsKill(false); - MIB.addOperand(NewMO); - } - MIB.setMemRefs(MMOBegin, MMOEnd); - - // Copy EDX:EAX back to t3H:t3L - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3L).addReg(X86::EAX); - BuildMI(mainMBB, DL, TII->get(TargetOpcode::COPY), t3H).addReg(X86::EDX); - - BuildMI(mainMBB, DL, TII->get(X86::JNE_4)).addMBB(origMainMBB); - - mainMBB->addSuccessor(origMainMBB); - mainMBB->addSuccessor(sinkMBB); - - // sinkMBB: - BuildMI(*sinkMBB, sinkMBB->begin(), DL, - TII->get(TargetOpcode::COPY), DstLoReg) - .addReg(t3L); - BuildMI(*sinkMBB, sinkMBB->begin(), DL, - TII->get(TargetOpcode::COPY), DstHiReg) - .addReg(t3H); - - MI->eraseFromParent(); - return sinkMBB; -} - // FIXME: When we get size specific XMM0 registers, i.e. XMM0_V16I8 // or XMM0_V32I8 in AVX all of this code can be replaced with that // in the .td file. @@ -16068,7 +17073,7 @@ X86TargetLowering::EmitVAARG64WithCustomInserter( MachineInstr::mmo_iterator MMOEnd = MI->memoperands_end(); // Machine Information - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = MBB->getParent()->getTarget().getInstrInfo(); MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64); const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32); @@ -16324,7 +17329,7 @@ X86TargetLowering::EmitVAStartSaveXMMRegsWithCustomInserter( XMMSaveMBB->addSuccessor(EndMBB); // Now add the instructions. - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = MBB->getParent()->getTarget().getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); unsigned CountReg = MI->getOperand(0).getReg(); @@ -16407,7 +17412,7 @@ static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr, MachineBasicBlock * X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + const TargetInstrInfo *TII = BB->getParent()->getTarget().getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); // To "insert" a SELECT_CC instruction, we actually have to insert the @@ -16433,7 +17438,7 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, // If the EFLAGS register isn't dead in the terminator, then claim that it's // live into the sink and copy blocks. - const TargetRegisterInfo* TRI = getTargetMachine().getRegisterInfo(); + const TargetRegisterInfo* TRI = BB->getParent()->getTarget().getRegisterInfo(); if (!MI->killsRegister(X86::EFLAGS) && !checkAndUpdateEFLAGSKill(MI, BB, TRI)) { copy0MBB->addLiveIn(X86::EFLAGS); @@ -16474,9 +17479,9 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI, MachineBasicBlock * X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB, bool Is64Bit) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - DebugLoc DL = MI->getDebugLoc(); MachineFunction *MF = BB->getParent(); + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + DebugLoc DL = MI->getDebugLoc(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); assert(MF->shouldSplitStack()); @@ -16546,7 +17551,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB, // Calls into a routine in libgcc to allocate more space from the heap. const uint32_t *RegMask = - getTargetMachine().getRegisterInfo()->getCallPreservedMask(CallingConv::C); + MF->getTarget().getRegisterInfo()->getCallPreservedMask(CallingConv::C); if (Is64Bit) { BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI) .addReg(sizeVReg); @@ -16594,8 +17599,8 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB, MachineBasicBlock * X86TargetLowering::EmitLoweredWinAlloca(MachineInstr *MI, - MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + MachineBasicBlock *BB) const { + const TargetInstrInfo *TII = BB->getParent()->getTarget().getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); assert(!Subtarget->isTargetMacho()); @@ -16651,10 +17656,10 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, // our load from the relocation, sticking it in either RDI (x86-64) // or EAX and doing an indirect call. The return value will then // be in the normal return register. + MachineFunction *F = BB->getParent(); const X86InstrInfo *TII - = static_cast<const X86InstrInfo*>(getTargetMachine().getInstrInfo()); + = static_cast<const X86InstrInfo*>(F->getTarget().getInstrInfo()); DebugLoc DL = MI->getDebugLoc(); - MachineFunction *F = BB->getParent(); assert(Subtarget->isTargetDarwin() && "Darwin only instr emitted?"); assert(MI->getOperand(3).isGlobal() && "This should be a global"); @@ -16663,7 +17668,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, // FIXME: The 32-bit calls have non-standard calling conventions. Use a // proper register mask. const uint32_t *RegMask = - getTargetMachine().getRegisterInfo()->getCallPreservedMask(CallingConv::C); + F->getTarget().getRegisterInfo()->getCallPreservedMask(CallingConv::C); if (Subtarget->is64Bit()) { MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(X86::MOV64rm), X86::RDI) @@ -16675,7 +17680,7 @@ X86TargetLowering::EmitLoweredTLSCall(MachineInstr *MI, MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m)); addDirectMem(MIB, X86::RDI); MIB.addReg(X86::RAX, RegState::ImplicitDefine).addRegMask(RegMask); - } else if (getTargetMachine().getRelocationModel() != Reloc::PIC_) { + } else if (F->getTarget().getRelocationModel() != Reloc::PIC_) { MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), X86::EAX) .addReg(0) @@ -16707,9 +17712,8 @@ MachineBasicBlock * X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI->getDebugLoc(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - MachineFunction *MF = MBB->getParent(); + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); MachineRegisterInfo &MRI = MF->getRegInfo(); const BasicBlock *BB = MBB->getBasicBlock(); @@ -16771,8 +17775,8 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, unsigned PtrStoreOpc = 0; unsigned LabelReg = 0; const int64_t LabelOffset = 1 * PVT.getStoreSize(); - Reloc::Model RM = getTargetMachine().getRelocationModel(); - bool UseImmLabel = (getTargetMachine().getCodeModel() == CodeModel::Small) && + Reloc::Model RM = MF->getTarget().getRelocationModel(); + bool UseImmLabel = (MF->getTarget().getCodeModel() == CodeModel::Small) && (RM == Reloc::Static || RM == Reloc::DynamicNoPIC); // Prepare IP either in reg or imm. @@ -16816,7 +17820,7 @@ X86TargetLowering::emitEHSjLjSetJmp(MachineInstr *MI, .addMBB(restoreMBB); const X86RegisterInfo *RegInfo = - static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo()); + static_cast<const X86RegisterInfo*>(MF->getTarget().getRegisterInfo()); MIB.addRegMask(RegInfo->getNoPreservedMask()); thisMBB->addSuccessor(mainMBB); thisMBB->addSuccessor(restoreMBB); @@ -16845,9 +17849,8 @@ MachineBasicBlock * X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, MachineBasicBlock *MBB) const { DebugLoc DL = MI->getDebugLoc(); - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - MachineFunction *MF = MBB->getParent(); + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); MachineRegisterInfo &MRI = MF->getRegInfo(); // Memory Reference @@ -16863,7 +17866,7 @@ X86TargetLowering::emitEHSjLjLongJmp(MachineInstr *MI, unsigned Tmp = MRI.createVirtualRegister(RC); // Since FP is only updated here but NOT referenced, it's treated as GPR. const X86RegisterInfo *RegInfo = - static_cast<const X86RegisterInfo*>(getTargetMachine().getRegisterInfo()); + static_cast<const X86RegisterInfo*>(MF->getTarget().getRegisterInfo()); unsigned FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP; unsigned SP = RegInfo->getStackRegister(); @@ -17038,12 +18041,12 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::FP80_TO_INT16_IN_MEM: case X86::FP80_TO_INT32_IN_MEM: case X86::FP80_TO_INT64_IN_MEM: { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + MachineFunction *F = BB->getParent(); + const TargetInstrInfo *TII = F->getTarget().getInstrInfo(); DebugLoc DL = MI->getDebugLoc(); // Change the floating point control register to use "round towards zero" // mode when truncating to an integer value. - MachineFunction *F = BB->getParent(); int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false); addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::FNSTCW16m)), CWFrameIdx); @@ -17123,7 +18126,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::VPCMPESTRM128MEM: assert(Subtarget->hasSSE42() && "Target must have SSE4.2 or AVX features enabled"); - return EmitPCMPSTRM(MI, BB, getTargetMachine().getInstrInfo()); + return EmitPCMPSTRM(MI, BB, BB->getParent()->getTarget().getInstrInfo()); // String/text processing lowering. case X86::PCMPISTRIREG: @@ -17136,71 +18139,15 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case X86::VPCMPESTRIMEM: assert(Subtarget->hasSSE42() && "Target must have SSE4.2 or AVX features enabled"); - return EmitPCMPSTRI(MI, BB, getTargetMachine().getInstrInfo()); + return EmitPCMPSTRI(MI, BB, BB->getParent()->getTarget().getInstrInfo()); // Thread synchronization. case X86::MONITOR: - return EmitMonitor(MI, BB, getTargetMachine().getInstrInfo(), Subtarget); + return EmitMonitor(MI, BB, BB->getParent()->getTarget().getInstrInfo(), Subtarget); // xbegin case X86::XBEGIN: - return EmitXBegin(MI, BB, getTargetMachine().getInstrInfo()); - - // Atomic Lowering. - case X86::ATOMAND8: - case X86::ATOMAND16: - case X86::ATOMAND32: - case X86::ATOMAND64: - // Fall through - case X86::ATOMOR8: - case X86::ATOMOR16: - case X86::ATOMOR32: - case X86::ATOMOR64: - // Fall through - case X86::ATOMXOR16: - case X86::ATOMXOR8: - case X86::ATOMXOR32: - case X86::ATOMXOR64: - // Fall through - case X86::ATOMNAND8: - case X86::ATOMNAND16: - case X86::ATOMNAND32: - case X86::ATOMNAND64: - // Fall through - case X86::ATOMMAX8: - case X86::ATOMMAX16: - case X86::ATOMMAX32: - case X86::ATOMMAX64: - // Fall through - case X86::ATOMMIN8: - case X86::ATOMMIN16: - case X86::ATOMMIN32: - case X86::ATOMMIN64: - // Fall through - case X86::ATOMUMAX8: - case X86::ATOMUMAX16: - case X86::ATOMUMAX32: - case X86::ATOMUMAX64: - // Fall through - case X86::ATOMUMIN8: - case X86::ATOMUMIN16: - case X86::ATOMUMIN32: - case X86::ATOMUMIN64: - return EmitAtomicLoadArith(MI, BB); - - // This group does 64-bit operations on a 32-bit host. - case X86::ATOMAND6432: - case X86::ATOMOR6432: - case X86::ATOMXOR6432: - case X86::ATOMNAND6432: - case X86::ATOMADD6432: - case X86::ATOMSUB6432: - case X86::ATOMMAX6432: - case X86::ATOMMIN6432: - case X86::ATOMUMAX6432: - case X86::ATOMUMIN6432: - case X86::ATOMSWAP6432: - return EmitAtomicLoadArith6432(MI, BB); + return EmitXBegin(MI, BB, BB->getParent()->getTarget().getInstrInfo()); case X86::VASTART_SAVE_XMM_REGS: return EmitVAStartSaveXMMRegsWithCustomInserter(MI, BB); @@ -17473,13 +18420,385 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// \brief Get the PSHUF-style mask from PSHUF node. +/// +/// This is a very minor wrapper around getTargetShuffleMask to easy forming v4 +/// PSHUF-style masks that can be reused with such instructions. +static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) { + SmallVector<int, 4> Mask; + bool IsUnary; + bool HaveMask = getTargetShuffleMask(N.getNode(), N.getSimpleValueType(), Mask, IsUnary); + (void)HaveMask; + assert(HaveMask); + + switch (N.getOpcode()) { + case X86ISD::PSHUFD: + return Mask; + case X86ISD::PSHUFLW: + Mask.resize(4); + return Mask; + case X86ISD::PSHUFHW: + Mask.erase(Mask.begin(), Mask.begin() + 4); + for (int &M : Mask) + M -= 4; + return Mask; + default: + llvm_unreachable("No valid shuffle instruction found!"); + } +} + +/// \brief Search for a combinable shuffle across a chain ending in pshufd. +/// +/// We walk up the chain and look for a combinable shuffle, skipping over +/// shuffles that we could hoist this shuffle's transformation past without +/// altering anything. +static bool combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask, + SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + assert(N.getOpcode() == X86ISD::PSHUFD && + "Called with something other than an x86 128-bit half shuffle!"); + SDLoc DL(N); + + // Walk up a single-use chain looking for a combinable shuffle. + SDValue V = N.getOperand(0); + for (; V.hasOneUse(); V = V.getOperand(0)) { + switch (V.getOpcode()) { + default: + return false; // Nothing combined! + + case ISD::BITCAST: + // Skip bitcasts as we always know the type for the target specific + // instructions. + continue; + + case X86ISD::PSHUFD: + // Found another dword shuffle. + break; + + case X86ISD::PSHUFLW: + // Check that the low words (being shuffled) are the identity in the + // dword shuffle, and the high words are self-contained. + if (Mask[0] != 0 || Mask[1] != 1 || + !(Mask[2] >= 2 && Mask[2] < 4 && Mask[3] >= 2 && Mask[3] < 4)) + return false; + + continue; + + case X86ISD::PSHUFHW: + // Check that the high words (being shuffled) are the identity in the + // dword shuffle, and the low words are self-contained. + if (Mask[2] != 2 || Mask[3] != 3 || + !(Mask[0] >= 0 && Mask[0] < 2 && Mask[1] >= 0 && Mask[1] < 2)) + return false; + + continue; + + case X86ISD::UNPCKL: + case X86ISD::UNPCKH: + // For either i8 -> i16 or i16 -> i32 unpacks, we can combine a dword + // shuffle into a preceding word shuffle. + if (V.getValueType() != MVT::v16i8 && V.getValueType() != MVT::v8i16) + return false; + + // Search for a half-shuffle which we can combine with. + unsigned CombineOp = + V.getOpcode() == X86ISD::UNPCKL ? X86ISD::PSHUFLW : X86ISD::PSHUFHW; + if (V.getOperand(0) != V.getOperand(1) || + !V->isOnlyUserOf(V.getOperand(0).getNode())) + return false; + V = V.getOperand(0); + do { + switch (V.getOpcode()) { + default: + return false; // Nothing to combine. + + case X86ISD::PSHUFLW: + case X86ISD::PSHUFHW: + if (V.getOpcode() == CombineOp) + break; + + // Fallthrough! + case ISD::BITCAST: + V = V.getOperand(0); + continue; + } + break; + } while (V.hasOneUse()); + break; + } + // Break out of the loop if we break out of the switch. + break; + } + + if (!V.hasOneUse()) + // We fell out of the loop without finding a viable combining instruction. + return false; + + // Record the old value to use in RAUW-ing. + SDValue Old = V; + + // Merge this node's mask and our incoming mask. + SmallVector<int, 4> VMask = getPSHUFShuffleMask(V); + for (int &M : Mask) + M = VMask[M]; + V = DAG.getNode(V.getOpcode(), DL, V.getValueType(), V.getOperand(0), + getV4X86ShuffleImm8ForMask(Mask, DAG)); + + // It is possible that one of the combinable shuffles was completely absorbed + // by the other, just replace it and revisit all users in that case. + if (Old.getNode() == V.getNode()) { + DCI.CombineTo(N.getNode(), N.getOperand(0), /*AddTo=*/true); + return true; + } + + // Replace N with its operand as we're going to combine that shuffle away. + DAG.ReplaceAllUsesWith(N, N.getOperand(0)); + + // Replace the combinable shuffle with the combined one, updating all users + // so that we re-evaluate the chain here. + DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true); + return true; +} + +/// \brief Search for a combinable shuffle across a chain ending in pshuflw or pshufhw. +/// +/// We walk up the chain, skipping shuffles of the other half and looking +/// through shuffles which switch halves trying to find a shuffle of the same +/// pair of dwords. +static bool combineRedundantHalfShuffle(SDValue N, MutableArrayRef<int> Mask, + SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + assert( + (N.getOpcode() == X86ISD::PSHUFLW || N.getOpcode() == X86ISD::PSHUFHW) && + "Called with something other than an x86 128-bit half shuffle!"); + SDLoc DL(N); + unsigned CombineOpcode = N.getOpcode(); + + // Walk up a single-use chain looking for a combinable shuffle. + SDValue V = N.getOperand(0); + for (; V.hasOneUse(); V = V.getOperand(0)) { + switch (V.getOpcode()) { + default: + return false; // Nothing combined! + + case ISD::BITCAST: + // Skip bitcasts as we always know the type for the target specific + // instructions. + continue; + + case X86ISD::PSHUFLW: + case X86ISD::PSHUFHW: + if (V.getOpcode() == CombineOpcode) + break; + + // Other-half shuffles are no-ops. + continue; + + case X86ISD::PSHUFD: { + // We can only handle pshufd if the half we are combining either stays in + // its half, or switches to the other half. Bail if one of these isn't + // true. + SmallVector<int, 4> VMask = getPSHUFShuffleMask(V); + int DOffset = CombineOpcode == X86ISD::PSHUFLW ? 0 : 2; + if (!((VMask[DOffset + 0] < 2 && VMask[DOffset + 1] < 2) || + (VMask[DOffset + 0] >= 2 && VMask[DOffset + 1] >= 2))) + return false; + + // Map the mask through the pshufd and keep walking up the chain. + for (int i = 0; i < 4; ++i) + Mask[i] = 2 * (VMask[DOffset + Mask[i] / 2] % 2) + Mask[i] % 2; + + // Switch halves if the pshufd does. + CombineOpcode = + VMask[DOffset + Mask[0] / 2] < 2 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW; + continue; + } + } + // Break out of the loop if we break out of the switch. + break; + } + + if (!V.hasOneUse()) + // We fell out of the loop without finding a viable combining instruction. + return false; + + // Record the old value to use in RAUW-ing. + SDValue Old = V; + + // Merge this node's mask and our incoming mask (adjusted to account for all + // the pshufd instructions encountered). + SmallVector<int, 4> VMask = getPSHUFShuffleMask(V); + for (int &M : Mask) + M = VMask[M]; + V = DAG.getNode(V.getOpcode(), DL, MVT::v8i16, V.getOperand(0), + getV4X86ShuffleImm8ForMask(Mask, DAG)); + + // Replace N with its operand as we're going to combine that shuffle away. + DAG.ReplaceAllUsesWith(N, N.getOperand(0)); + + // Replace the combinable shuffle with the combined one, updating all users + // so that we re-evaluate the chain here. + DCI.CombineTo(Old.getNode(), V, /*AddTo*/ true); + return true; +} + +/// \brief Try to combine x86 target specific shuffles. +static SDValue PerformTargetShuffleCombine(SDValue N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget) { + SDLoc DL(N); + MVT VT = N.getSimpleValueType(); + SmallVector<int, 4> Mask; + + switch (N.getOpcode()) { + case X86ISD::PSHUFD: + case X86ISD::PSHUFLW: + case X86ISD::PSHUFHW: + Mask = getPSHUFShuffleMask(N); + assert(Mask.size() == 4); + break; + default: + return SDValue(); + } + + // Nuke no-op shuffles that show up after combining. + if (isNoopShuffleMask(Mask)) + return DCI.CombineTo(N.getNode(), N.getOperand(0), /*AddTo*/ true); + + // Look for simplifications involving one or two shuffle instructions. + SDValue V = N.getOperand(0); + switch (N.getOpcode()) { + default: + break; + case X86ISD::PSHUFLW: + case X86ISD::PSHUFHW: + assert(VT == MVT::v8i16); + (void)VT; + + if (combineRedundantHalfShuffle(N, Mask, DAG, DCI)) + return SDValue(); // We combined away this shuffle, so we're done. + + // See if this reduces to a PSHUFD which is no more expensive and can + // combine with more operations. + if (Mask[0] % 2 == 0 && Mask[2] % 2 == 0 && + areAdjacentMasksSequential(Mask)) { + int DMask[] = {-1, -1, -1, -1}; + int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2; + DMask[DOffset + 0] = DOffset + Mask[0] / 2; + DMask[DOffset + 1] = DOffset + Mask[2] / 2; + V = DAG.getNode(ISD::BITCAST, DL, MVT::v4i32, V); + DCI.AddToWorklist(V.getNode()); + V = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V, + getV4X86ShuffleImm8ForMask(DMask, DAG)); + DCI.AddToWorklist(V.getNode()); + return DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, V); + } + + // Look for shuffle patterns which can be implemented as a single unpack. + // FIXME: This doesn't handle the location of the PSHUFD generically, and + // only works when we have a PSHUFD followed by two half-shuffles. + if (Mask[0] == Mask[1] && Mask[2] == Mask[3] && + (V.getOpcode() == X86ISD::PSHUFLW || + V.getOpcode() == X86ISD::PSHUFHW) && + V.getOpcode() != N.getOpcode() && + V.hasOneUse()) { + SDValue D = V.getOperand(0); + while (D.getOpcode() == ISD::BITCAST && D.hasOneUse()) + D = D.getOperand(0); + if (D.getOpcode() == X86ISD::PSHUFD && D.hasOneUse()) { + SmallVector<int, 4> VMask = getPSHUFShuffleMask(V); + SmallVector<int, 4> DMask = getPSHUFShuffleMask(D); + int NOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 4; + int VOffset = V.getOpcode() == X86ISD::PSHUFLW ? 0 : 4; + int WordMask[8]; + for (int i = 0; i < 4; ++i) { + WordMask[i + NOffset] = Mask[i] + NOffset; + WordMask[i + VOffset] = VMask[i] + VOffset; + } + // Map the word mask through the DWord mask. + int MappedMask[8]; + for (int i = 0; i < 8; ++i) + MappedMask[i] = 2 * DMask[WordMask[i] / 2] + WordMask[i] % 2; + const int UnpackLoMask[] = {0, 0, 1, 1, 2, 2, 3, 3}; + const int UnpackHiMask[] = {4, 4, 5, 5, 6, 6, 7, 7}; + if (std::equal(std::begin(MappedMask), std::end(MappedMask), + std::begin(UnpackLoMask)) || + std::equal(std::begin(MappedMask), std::end(MappedMask), + std::begin(UnpackHiMask))) { + // We can replace all three shuffles with an unpack. + V = DAG.getNode(ISD::BITCAST, DL, MVT::v8i16, D.getOperand(0)); + DCI.AddToWorklist(V.getNode()); + return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL + : X86ISD::UNPCKH, + DL, MVT::v8i16, V, V); + } + } + } + + break; + + case X86ISD::PSHUFD: + if (combineRedundantDWordShuffle(N, Mask, DAG, DCI)) + return SDValue(); // We combined away this shuffle. + + break; + } + + return SDValue(); +} + /// PerformShuffleCombine - Performs several different shuffle combines. static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget *Subtarget) { SDLoc dl(N); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); EVT VT = N->getValueType(0); + // Canonicalize shuffles that perform 'addsub' on packed float vectors + // according to the rule: + // (shuffle (FADD A, B), (FSUB A, B), Mask) -> + // (shuffle (FSUB A, -B), (FADD A, -B), Mask) + // + // Where 'Mask' is: + // <0,5,2,7> -- for v4f32 and v4f64 shuffles; + // <0,3> -- for v2f64 shuffles; + // <0,9,2,11,4,13,6,15> -- for v8f32 shuffles. + // + // This helps pattern-matching more SSE3/AVX ADDSUB instructions + // during ISel stage. + if (N->getOpcode() == ISD::VECTOR_SHUFFLE && + ((Subtarget->hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || + (Subtarget->hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) && + N0->getOpcode() == ISD::FADD && N1->getOpcode() == ISD::FSUB && + // Operands to the FADD and FSUB must be the same. + ((N0->getOperand(0) == N1->getOperand(0) && + N0->getOperand(1) == N1->getOperand(1)) || + // FADD is commutable. See if by commuting the operands of the FADD + // we would still be able to match the operands of the FSUB dag node. + (N0->getOperand(1) == N1->getOperand(0) && + N0->getOperand(0) == N1->getOperand(1))) && + N0->getOperand(0)->getOpcode() != ISD::UNDEF && + N0->getOperand(1)->getOpcode() != ISD::UNDEF) { + + ShuffleVectorSDNode *SV = cast<ShuffleVectorSDNode>(N); + unsigned NumElts = VT.getVectorNumElements(); + ArrayRef<int> Mask = SV->getMask(); + bool CanFold = true; + + for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) + CanFold = Mask[i] == (int)((i & 1) ? i + NumElts : i); + + if (CanFold) { + SDValue Op0 = N1->getOperand(0); + SDValue Op1 = DAG.getNode(ISD::FNEG, dl, VT, N1->getOperand(1)); + SDValue Sub = DAG.getNode(ISD::FSUB, dl, VT, Op0, Op1); + SDValue Add = DAG.getNode(ISD::FADD, dl, VT, Op0, Op1); + return DAG.getVectorShuffle(VT, dl, Sub, Add, Mask); + } + } + // Don't create instructions with illegal types after legalize types has run. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!DCI.isBeforeLegalize() && !TLI.isTypeLegal(VT.getVectorElementType())) @@ -17490,6 +18809,57 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, N->getOpcode() == ISD::VECTOR_SHUFFLE) return PerformShuffleCombine256(N, DAG, DCI, Subtarget); + // During Type Legalization, when promoting illegal vector types, + // the backend might introduce new shuffle dag nodes and bitcasts. + // + // This code performs the following transformation: + // fold: (shuffle (bitcast (BINOP A, B)), Undef, <Mask>) -> + // (shuffle (BINOP (bitcast A), (bitcast B)), Undef, <Mask>) + // + // We do this only if both the bitcast and the BINOP dag nodes have + // one use. Also, perform this transformation only if the new binary + // operation is legal. This is to avoid introducing dag nodes that + // potentially need to be further expanded (or custom lowered) into a + // less optimal sequence of dag nodes. + if (!DCI.isBeforeLegalize() && DCI.isBeforeLegalizeOps() && + N1.getOpcode() == ISD::UNDEF && N0.hasOneUse() && + N0.getOpcode() == ISD::BITCAST) { + SDValue BC0 = N0.getOperand(0); + EVT SVT = BC0.getValueType(); + unsigned Opcode = BC0.getOpcode(); + unsigned NumElts = VT.getVectorNumElements(); + + if (BC0.hasOneUse() && SVT.isVector() && + SVT.getVectorNumElements() * 2 == NumElts && + TLI.isOperationLegal(Opcode, VT)) { + bool CanFold = false; + switch (Opcode) { + default : break; + case ISD::ADD : + case ISD::FADD : + case ISD::SUB : + case ISD::FSUB : + case ISD::MUL : + case ISD::FMUL : + CanFold = true; + } + + unsigned SVTNumElts = SVT.getVectorNumElements(); + ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); + for (unsigned i = 0, e = SVTNumElts; i != e && CanFold; ++i) + CanFold = SVOp->getMaskElt(i) == (int)(i * 2); + for (unsigned i = SVTNumElts, e = NumElts; i != e && CanFold; ++i) + CanFold = SVOp->getMaskElt(i) < 0; + + if (CanFold) { + SDValue BC00 = DAG.getNode(ISD::BITCAST, dl, VT, BC0.getOperand(0)); + SDValue BC01 = DAG.getNode(ISD::BITCAST, dl, VT, BC0.getOperand(1)); + SDValue NewBinOp = DAG.getNode(BC0.getOpcode(), dl, VT, BC00, BC01); + return DAG.getVectorShuffle(VT, dl, NewBinOp, N1, &SVOp->getMask()[0]); + } + } + } + // Only handle 128 wide vector from here on. if (!VT.is128BitVector()) return SDValue(); @@ -17501,7 +18871,18 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG, for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) Elts.push_back(getShuffleScalarElt(N, i, DAG, 0)); - return EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true); + SDValue LD = EltsFromConsecutiveLoads(VT, Elts, dl, DAG, true); + if (LD.getNode()) + return LD; + + if (isTargetShuffle(N->getOpcode())) { + SDValue Shuffle = + PerformTargetShuffleCombine(SDValue(N, 0), DAG, DCI, Subtarget); + if (Shuffle.getNode()) + return Shuffle; + } + + return SDValue(); } /// PerformTruncateCombine - Converts truncate operation to @@ -18155,28 +19536,34 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG, Other->getOpcode() == ISD::SUB && DAG.isEqualTo(OpRHS, CondRHS)) return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS); - // If the RHS is a constant we have to reverse the const canonicalization. - // x > C-1 ? x+-C : 0 --> subus x, C - if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD && - isSplatVector(CondRHS.getNode()) && isSplatVector(OpRHS.getNode())) { - APInt A = cast<ConstantSDNode>(OpRHS.getOperand(0))->getAPIntValue(); - if (CondRHS.getConstantOperandVal(0) == -A-1) - return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, - DAG.getConstant(-A, VT)); - } - - // Another special case: If C was a sign bit, the sub has been - // canonicalized into a xor. - // FIXME: Would it be better to use computeKnownBits to determine whether - // it's safe to decanonicalize the xor? - // x s< 0 ? x^C : 0 --> subus x, C - if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR && - ISD::isBuildVectorAllZeros(CondRHS.getNode()) && - isSplatVector(OpRHS.getNode())) { - APInt A = cast<ConstantSDNode>(OpRHS.getOperand(0))->getAPIntValue(); - if (A.isSignBit()) - return DAG.getNode(X86ISD::SUBUS, DL, VT, OpLHS, OpRHS); - } + if (auto *OpRHSBV = dyn_cast<BuildVectorSDNode>(OpRHS)) + if (auto *OpRHSConst = OpRHSBV->getConstantSplatNode()) { + if (auto *CondRHSBV = dyn_cast<BuildVectorSDNode>(CondRHS)) + if (auto *CondRHSConst = CondRHSBV->getConstantSplatNode()) + // If the RHS is a constant we have to reverse the const + // canonicalization. + // x > C-1 ? x+-C : 0 --> subus x, C + if (CC == ISD::SETUGT && Other->getOpcode() == ISD::ADD && + CondRHSConst->getAPIntValue() == + (-OpRHSConst->getAPIntValue() - 1)) + return DAG.getNode( + X86ISD::SUBUS, DL, VT, OpLHS, + DAG.getConstant(-OpRHSConst->getAPIntValue(), VT)); + + // Another special case: If C was a sign bit, the sub has been + // canonicalized into a xor. + // FIXME: Would it be better to use computeKnownBits to determine + // whether it's safe to decanonicalize the xor? + // x s< 0 ? x^C : 0 --> subus x, C + if (CC == ISD::SETLT && Other->getOpcode() == ISD::XOR && + ISD::isBuildVectorAllZeros(CondRHS.getNode()) && + OpRHSConst->getAPIntValue().isSignBit()) + // Note that we have to rebuild the RHS constant here to ensure we + // don't rely on particular values of undef lanes. + return DAG.getNode( + X86ISD::SUBUS, DL, VT, OpLHS, + DAG.getConstant(OpRHSConst->getAPIntValue(), VT)); + } } } @@ -18743,6 +20130,8 @@ static SDValue PerformINTRINSIC_WO_CHAINCombine(SDNode *N, SelectionDAG &DAG, if (C->isAllOnesValue()) return Op1; } + + return SDValue(); } // Packed SSE2/AVX2 arithmetic shift immediate intrinsics. @@ -18882,16 +20271,15 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { // vector operations in many cases. Also, on sandybridge ADD is faster than // shl. // (shl V, 1) -> add V,V - if (isSplatVector(N1.getNode())) { - assert(N0.getValueType().isVector() && "Invalid vector shift type"); - ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1->getOperand(0)); - // We shift all of the values by one. In many cases we do not have - // hardware support for this operation. This is better expressed as an ADD - // of two values. - if (N1C && (1 == N1C->getZExtValue())) { - return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0); + if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1)) + if (auto *N1SplatC = N1BV->getConstantSplatNode()) { + assert(N0.getValueType().isVector() && "Invalid vector shift type"); + // We shift all of the values by one. In many cases we do not have + // hardware support for this operation. This is better expressed as an ADD + // of two values. + if (N1SplatC->getZExtValue() == 1) + return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0); } - } return SDValue(); } @@ -18910,10 +20298,9 @@ static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG, SDValue Amt = N->getOperand(1); SDLoc DL(N); - if (isSplatVector(Amt.getNode())) { - SDValue SclrAmt = Amt->getOperand(0); - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) { - APInt ShiftAmt = C->getAPIntValue(); + if (auto *AmtBV = dyn_cast<BuildVectorSDNode>(Amt)) + if (auto *AmtSplat = AmtBV->getConstantSplatNode()) { + APInt ShiftAmt = AmtSplat->getAPIntValue(); unsigned MaxAmount = VT.getVectorElementType().getSizeInBits(); // SSE2/AVX2 logical shifts always return a vector of 0s @@ -18923,7 +20310,6 @@ static SDValue performShiftToAllZeros(SDNode *N, SelectionDAG &DAG, if (ShiftAmt.trunc(8).uge(MaxAmount)) return getZeroVector(VT, Subtarget, DAG, DL); } - } return SDValue(); } @@ -19117,9 +20503,10 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, // The right side has to be a 'trunc' or a constant vector. bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE; - bool RHSConst = (isSplatVector(N1.getNode()) && - isa<ConstantSDNode>(N1->getOperand(0))); - if (!RHSTrunc && !RHSConst) + ConstantSDNode *RHSConstSplat = nullptr; + if (auto *RHSBV = dyn_cast<BuildVectorSDNode>(N1)) + RHSConstSplat = RHSBV->getConstantSplatNode(); + if (!RHSTrunc && !RHSConstSplat) return SDValue(); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -19129,9 +20516,9 @@ static SDValue WidenMaskArithmetic(SDNode *N, SelectionDAG &DAG, // Set N0 and N1 to hold the inputs to the new wide operation. N0 = N0->getOperand(0); - if (RHSConst) { + if (RHSConstSplat) { N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT.getScalarType(), - N1->getOperand(0)); + SDValue(RHSConstSplat, 0)); SmallVector<SDValue, 8> C(WideVT.getVectorNumElements(), N1); N1 = DAG.getNode(ISD::BUILD_VECTOR, DL, WideVT, C); } else if (RHSTrunc) { @@ -19277,12 +20664,9 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, unsigned EltBits = MaskVT.getVectorElementType().getSizeInBits(); unsigned SraAmt = ~0; if (Mask.getOpcode() == ISD::SRA) { - SDValue Amt = Mask.getOperand(1); - if (isSplatVector(Amt.getNode())) { - SDValue SclrAmt = Amt->getOperand(0); - if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(SclrAmt)) - SraAmt = C->getZExtValue(); - } + if (auto *AmtBV = dyn_cast<BuildVectorSDNode>(Mask.getOperand(1))) + if (auto *AmtConst = AmtBV->getConstantSplatNode()) + SraAmt = AmtConst->getZExtValue(); } else if (Mask.getOpcode() == X86ISD::VSRAI) { SDValue SraC = Mask.getOperand(1); SraAmt = cast<ConstantSDNode>(SraC)->getZExtValue(); @@ -20642,6 +22026,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, return PerformINTRINSIC_WO_CHAINCombine(N, DAG, Subtarget); case X86ISD::INSERTPS: return PerformINSERTPSCombine(N, DAG, Subtarget); + case ISD::BUILD_VECTOR: return PerformBUILD_VECTORCombine(N, DAG, Subtarget); } return SDValue(); @@ -21146,8 +22531,8 @@ void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, const GlobalValue *GV = GA->getGlobal(); // If we require an extra load to get this address, as in PIC mode, we // can't accept it. - if (isGlobalStubReference(Subtarget->ClassifyGlobalReference(GV, - getTargetMachine()))) + if (isGlobalStubReference( + Subtarget->ClassifyGlobalReference(GV, DAG.getTarget()))) return; Result = DAG.getTargetGlobalAddress(GV, SDLoc(Op), @@ -21425,3 +22810,7 @@ int X86TargetLowering::getScalingFactorCost(const AddrMode &AM, return AM.Scale != 0; return -1; } + +bool X86TargetLowering::isTargetFTOL() const { + return Subtarget->isTargetKnownWindowsMSVC() && !Subtarget->is64Bit(); +} diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 9f51b53..c8cdce7 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -15,13 +15,13 @@ #ifndef X86ISELLOWERING_H #define X86ISELLOWERING_H -#include "X86Subtarget.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" namespace llvm { + class X86Subtarget; class X86TargetMachine; namespace X86ISD { @@ -86,6 +86,9 @@ namespace llvm { /// X86 Read Time-Stamp Counter and Processor ID. RDTSCP_DAG, + /// X86 Read Performance Monitoring Counters. + RDPMC_DAG, + /// X86 compare and logical compare instructions. CMP, COMI, UCOMI, @@ -315,6 +318,8 @@ namespace llvm { KORTEST, // Several flavors of instructions with vector shuffle behaviors. + PACKSS, + PACKUS, PALIGNR, PSHUFD, PSHUFHW, @@ -400,23 +405,8 @@ namespace llvm { // XTEST - Test if in transactional execution. XTEST, - // ATOMADD64_DAG, ATOMSUB64_DAG, ATOMOR64_DAG, ATOMAND64_DAG, - // ATOMXOR64_DAG, ATOMNAND64_DAG, ATOMSWAP64_DAG - - // Atomic 64-bit binary operations. - ATOMADD64_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, - ATOMSUB64_DAG, - ATOMOR64_DAG, - ATOMXOR64_DAG, - ATOMAND64_DAG, - ATOMNAND64_DAG, - ATOMMAX64_DAG, - ATOMMIN64_DAG, - ATOMUMAX64_DAG, - ATOMUMIN64_DAG, - ATOMSWAP64_DAG, - // LCMPXCHG_DAG, LCMPXCHG8_DAG, LCMPXCHG16_DAG - Compare and swap. - LCMPXCHG_DAG, + LCMPXCHG_DAG = ISD::FIRST_TARGET_MEMORY_OPCODE, LCMPXCHG8_DAG, LCMPXCHG16_DAG, @@ -766,9 +756,7 @@ namespace llvm { /// isTargetFTOL - Return true if the target uses the MSVC _ftol2 routine /// for fptoui. - bool isTargetFTOL() const { - return Subtarget->isTargetKnownWindowsMSVC() && !Subtarget->is64Bit(); - } + bool isTargetFTOL() const; /// isIntegerTypeFTOL - Return true if the MSVC _ftol2 routine should be /// used for fptoui to the given type. @@ -808,6 +796,9 @@ namespace llvm { /// \brief Reset the operation actions based on target options. void resetOperationActions() override; + /// \brief Customize the preferred legalization strategy for certain types. + LegalizeTypeAction getPreferredVectorAction(EVT VT) const override; + protected: std::pair<const TargetRegisterClass*, uint8_t> findRepresentativeClass(MVT VT) const override; diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 37bcc52..41e900e 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -476,6 +476,28 @@ defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem, loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VT1>; +multiclass avx512_int_subvec_broadcast_rm<bits<8> opc, string OpcodeStr, + X86MemOperand x86memop, PatFrag ld_frag, + RegisterClass KRC> { + let mayLoad = 1 in { + def rm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins x86memop:$src), + !strconcat(OpcodeStr, " \t{$src, $dst|$dst, $src}"), + []>, EVEX; + def krm : AVX5128I<opc, MRMSrcMem, (outs VR512:$dst), (ins KRC:$mask, + x86memop:$src), + !strconcat(OpcodeStr, + " \t{$src, ${dst} {${mask}} {z}|${dst} {${mask}} {z}, $src}"), + []>, EVEX, EVEX_KZ; + } +} + +defm VBROADCASTI32X4 : avx512_int_subvec_broadcast_rm<0x5a, "vbroadcasti32x4", + i128mem, loadv2i64, VK16WM>, + EVEX_V512, EVEX_CD8<32, CD8VT4>; +defm VBROADCASTI64X4 : avx512_int_subvec_broadcast_rm<0x5b, "vbroadcasti64x4", + i256mem, loadv4i64, VK16WM>, VEX_W, + EVEX_V512, EVEX_CD8<64, CD8VT4>; + def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))), (VPBROADCASTDZrr VR128X:$src)>; def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))), @@ -517,10 +539,12 @@ def rr : AVX512XS8I<opc, MRMDestReg, (outs DstRC:$dst), (ins KRC:$src), []>, EVEX; } +let Predicates = [HasCDI] in { defm VPBROADCASTMW2D : avx512_mask_broadcast<0x3A, "vpbroadcastmw2d", VR512, VK16, v16i32, v16i1>, EVEX_V512; defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q", VR512, VK8, v8i64, v8i1>, EVEX_V512, VEX_W; +} //===----------------------------------------------------------------------===// // AVX-512 - VPERM @@ -585,7 +609,7 @@ defm VPERMPDZ : avx512_perm<0x16, "vpermpd", VR512, memopv8f64, f512mem, // -- VPERM2I - 3 source operands form -- multiclass avx512_perm_3src<bits<8> opc, string OpcodeStr, RegisterClass RC, PatFrag mem_frag, X86MemOperand x86memop, - SDNode OpNode, ValueType OpVT> { + SDNode OpNode, ValueType OpVT, RegisterClass KRC> { let Constraints = "$src1 = $dst" in { def rr : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, RC:$src3), @@ -595,48 +619,107 @@ let Constraints = "$src1 = $dst" in { (OpVT (OpNode RC:$src1, RC:$src2, RC:$src3)))]>, EVEX_4V; + def rrk : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3), + !strconcat(OpcodeStr, + " \t{$src3, $src2, $dst {${mask}}|" + "$dst {${mask}}, $src2, $src3}"), + [(set RC:$dst, (OpVT (vselect KRC:$mask, + (OpNode RC:$src1, RC:$src2, + RC:$src3), + RC:$src1)))]>, + EVEX_4V, EVEX_K; + + let AddedComplexity = 30 in // Prefer over VMOV*rrkz Pat<> + def rrkz : AVX5128I<opc, MRMSrcReg, (outs RC:$dst), + (ins RC:$src1, KRC:$mask, RC:$src2, RC:$src3), + !strconcat(OpcodeStr, + " \t{$src3, $src2, $dst {${mask}} {z} |", + "$dst {${mask}} {z}, $src2, $src3}"), + [(set RC:$dst, (OpVT (vselect KRC:$mask, + (OpNode RC:$src1, RC:$src2, + RC:$src3), + (OpVT (bitconvert + (v16i32 immAllZerosV))))))]>, + EVEX_4V, EVEX_KZ; + def rm : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, RC:$src2, x86memop:$src3), !strconcat(OpcodeStr, " \t{$src3, $src2, $dst|$dst, $src2, $src3}"), [(set RC:$dst, - (OpVT (OpNode RC:$src1, RC:$src2, + (OpVT (OpNode RC:$src1, RC:$src2, (mem_frag addr:$src3))))]>, EVEX_4V; + + def rmk : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3), + !strconcat(OpcodeStr, + " \t{$src3, $src2, $dst {${mask}}|" + "$dst {${mask}}, $src2, $src3}"), + [(set RC:$dst, + (OpVT (vselect KRC:$mask, + (OpNode RC:$src1, RC:$src2, + (mem_frag addr:$src3)), + RC:$src1)))]>, + EVEX_4V, EVEX_K; + + let AddedComplexity = 10 in // Prefer over the rrkz variant + def rmkz : AVX5128I<opc, MRMSrcMem, (outs RC:$dst), + (ins RC:$src1, KRC:$mask, RC:$src2, x86memop:$src3), + !strconcat(OpcodeStr, + " \t{$src3, $src2, $dst {${mask}} {z}|" + "$dst {${mask}} {z}, $src2, $src3}"), + [(set RC:$dst, + (OpVT (vselect KRC:$mask, + (OpNode RC:$src1, RC:$src2, + (mem_frag addr:$src3)), + (OpVT (bitconvert + (v16i32 immAllZerosV))))))]>, + EVEX_4V, EVEX_KZ; } } -defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32, i512mem, - X86VPermiv3, v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64, i512mem, - X86VPermiv3, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32, i512mem, - X86VPermiv3, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64, i512mem, - X86VPermiv3, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -defm VPERMT2D : avx512_perm_3src<0x7E, "vpermt2d", VR512, memopv16i32, i512mem, - X86VPermv3, v16i32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMT2Q : avx512_perm_3src<0x7E, "vpermt2q", VR512, memopv8i64, i512mem, - X86VPermv3, v8i64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VPERMT2PS : avx512_perm_3src<0x7F, "vpermt2ps", VR512, memopv16f32, i512mem, - X86VPermv3, v16f32>, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPERMT2PD : avx512_perm_3src<0x7F, "vpermt2pd", VR512, memopv8f64, i512mem, - X86VPermv3, v8f64>, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -def : Pat<(v16f32 (int_x86_avx512_mask_vpermt_ps_512 (v16i32 VR512:$idx), - (v16f32 VR512:$src1), (v16f32 VR512:$src2), (i16 -1))), - (VPERMT2PSrr VR512:$src1, VR512:$idx, VR512:$src2)>; - -def : Pat<(v16i32 (int_x86_avx512_mask_vpermt_d_512 (v16i32 VR512:$idx), - (v16i32 VR512:$src1), (v16i32 VR512:$src2), (i16 -1))), - (VPERMT2Drr VR512:$src1, VR512:$idx, VR512:$src2)>; - -def : Pat<(v8f64 (int_x86_avx512_mask_vpermt_pd_512 (v8i64 VR512:$idx), - (v8f64 VR512:$src1), (v8f64 VR512:$src2), (i8 -1))), - (VPERMT2PDrr VR512:$src1, VR512:$idx, VR512:$src2)>; - -def : Pat<(v8i64 (int_x86_avx512_mask_vpermt_q_512 (v8i64 VR512:$idx), - (v8i64 VR512:$src1), (v8i64 VR512:$src2), (i8 -1))), - (VPERMT2Qrr VR512:$src1, VR512:$idx, VR512:$src2)>; +defm VPERMI2D : avx512_perm_3src<0x76, "vpermi2d", VR512, memopv16i32, + i512mem, X86VPermiv3, v16i32, VK16WM>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPERMI2Q : avx512_perm_3src<0x76, "vpermi2q", VR512, memopv8i64, + i512mem, X86VPermiv3, v8i64, VK8WM>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMI2PS : avx512_perm_3src<0x77, "vpermi2ps", VR512, memopv16f32, + i512mem, X86VPermiv3, v16f32, VK16WM>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPERMI2PD : avx512_perm_3src<0x77, "vpermi2pd", VR512, memopv8f64, + i512mem, X86VPermiv3, v8f64, VK8WM>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +multiclass avx512_perm_table_3src<bits<8> opc, string Suffix, RegisterClass RC, + PatFrag mem_frag, X86MemOperand x86memop, + SDNode OpNode, ValueType OpVT, RegisterClass KRC, + ValueType MaskVT, RegisterClass MRC> : + avx512_perm_3src<opc, "vpermt2"##Suffix, RC, mem_frag, x86memop, OpNode, + OpVT, KRC> { + def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512") + VR512:$idx, VR512:$src1, VR512:$src2, -1)), + (!cast<Instruction>(NAME#rr) VR512:$src1, VR512:$idx, VR512:$src2)>; + + def : Pat<(OpVT (!cast<Intrinsic>("int_x86_avx512_mask_vpermt_"##Suffix##"_512") + VR512:$idx, VR512:$src1, VR512:$src2, MRC:$mask)), + (!cast<Instruction>(NAME#rrk) VR512:$src1, + (MaskVT (COPY_TO_REGCLASS MRC:$mask, KRC)), VR512:$idx, VR512:$src2)>; +} + +defm VPERMT2D : avx512_perm_table_3src<0x7E, "d", VR512, memopv16i32, i512mem, + X86VPermv3, v16i32, VK16WM, v16i1, GR16>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPERMT2Q : avx512_perm_table_3src<0x7E, "q", VR512, memopv8i64, i512mem, + X86VPermv3, v8i64, VK8WM, v8i1, GR8>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; +defm VPERMT2PS : avx512_perm_table_3src<0x7F, "ps", VR512, memopv16f32, i512mem, + X86VPermv3, v16f32, VK16WM, v16i1, GR16>, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPERMT2PD : avx512_perm_table_3src<0x7F, "pd", VR512, memopv8f64, i512mem, + X86VPermv3, v8f64, VK8WM, v8i1, GR8>, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + //===----------------------------------------------------------------------===// // AVX-512 - BLEND using mask // @@ -790,52 +873,61 @@ def : Pat<(v8i1 (X86pcmpeqm (v8i32 VR256X:$src1), (v8i32 VR256X:$src2))), (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src1, sub_ymm)), (v16i32 (SUBREG_TO_REG (i32 0), VR256X:$src2, sub_ymm))), VK8)>; -multiclass avx512_icmp_cc<bits<8> opc, RegisterClass KRC, +multiclass avx512_icmp_cc<bits<8> opc, RegisterClass WMRC, RegisterClass KRC, RegisterClass RC, X86MemOperand x86memop, PatFrag memop_frag, - SDNode OpNode, ValueType vt, Operand CC, string asm, - string asm_alt> { + SDNode OpNode, ValueType vt, Operand CC, string Suffix> { def rri : AVX512AIi8<opc, MRMSrcReg, - (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), asm, + (outs KRC:$dst), (ins RC:$src1, RC:$src2, CC:$cc), + !strconcat("vpcmp${cc}", Suffix, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set KRC:$dst, (OpNode (vt RC:$src1), (vt RC:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RR>, EVEX_4V; def rmi : AVX512AIi8<opc, MRMSrcMem, - (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), asm, + (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, CC:$cc), + !strconcat("vpcmp${cc}", Suffix, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set KRC:$dst, (OpNode (vt RC:$src1), (memop_frag addr:$src2), imm:$cc))], IIC_SSE_ALU_F32P_RM>, EVEX_4V; // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0 in { def rri_alt : AVX512AIi8<opc, MRMSrcReg, (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_ALU_F32P_RR>, EVEX_4V; + !strconcat("vpcmp", Suffix, + "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), + [], IIC_SSE_ALU_F32P_RR>, EVEX_4V; + def rrik_alt : AVX512AIi8<opc, MRMSrcReg, + (outs KRC:$dst), (ins WMRC:$mask, RC:$src1, RC:$src2, i8imm:$cc), + !strconcat("vpcmp", Suffix, + "\t{$cc, $src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2, $cc}"), + [], IIC_SSE_ALU_F32P_RR>, EVEX_4V, EVEX_K; def rmi_alt : AVX512AIi8<opc, MRMSrcMem, (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), - asm_alt, [], IIC_SSE_ALU_F32P_RM>, EVEX_4V; + !strconcat("vpcmp", Suffix, + "\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), + [], IIC_SSE_ALU_F32P_RM>, EVEX_4V; + def rmik_alt : AVX512AIi8<opc, MRMSrcMem, + (outs KRC:$dst), (ins WMRC:$mask, RC:$src1, x86memop:$src2, i8imm:$cc), + !strconcat("vpcmp", Suffix, + "\t{$cc, $src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2, $cc}"), + [], IIC_SSE_ALU_F32P_RM>, EVEX_4V, EVEX_K; } } -defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16, VR512, i512mem, memopv16i32, - X86cmpm, v16i32, AVXCC, - "vpcmp${cc}d\t{$src2, $src1, $dst|$dst, $src1, $src2}", - "vpcmpd\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, - EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16, VR512, i512mem, memopv16i32, - X86cmpmu, v16i32, AVXCC, - "vpcmp${cc}ud\t{$src2, $src1, $dst|$dst, $src1, $src2}", - "vpcmpud\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, - EVEX_V512, EVEX_CD8<32, CD8VF>; - -defm VPCMPQZ : avx512_icmp_cc<0x1F, VK8, VR512, i512mem, memopv8i64, - X86cmpm, v8i64, AVXCC, - "vpcmp${cc}q\t{$src2, $src1, $dst|$dst, $src1, $src2}", - "vpcmpq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, - VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; -defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8, VR512, i512mem, memopv8i64, - X86cmpmu, v8i64, AVXCC, - "vpcmp${cc}uq\t{$src2, $src1, $dst|$dst, $src1, $src2}", - "vpcmpuq\t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}">, - VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; - -// avx512_cmp_packed - sse 1 & 2 compare packed instructions +defm VPCMPDZ : avx512_icmp_cc<0x1F, VK16WM, VK16, VR512, i512mem, memopv16i32, + X86cmpm, v16i32, AVXCC, "d">, + EVEX_V512, EVEX_CD8<32, CD8VF>; +defm VPCMPUDZ : avx512_icmp_cc<0x1E, VK16WM, VK16, VR512, i512mem, memopv16i32, + X86cmpmu, v16i32, AVXCC, "ud">, + EVEX_V512, EVEX_CD8<32, CD8VF>; + +defm VPCMPQZ : avx512_icmp_cc<0x1F, VK8WM, VK8, VR512, i512mem, memopv8i64, + X86cmpm, v8i64, AVXCC, "q">, + VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; +defm VPCMPUQZ : avx512_icmp_cc<0x1E, VK8WM, VK8, VR512, i512mem, memopv8i64, + X86cmpmu, v8i64, AVXCC, "uq">, + VEX_W, EVEX_V512, EVEX_CD8<64, CD8VF>; + +// avx512_cmp_packed - compare packed instructions multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC, X86MemOperand x86memop, ValueType vt, string suffix, Domain d> { @@ -859,11 +951,11 @@ multiclass avx512_cmp_packed<RegisterClass KRC, RegisterClass RC, // Accept explicit immediate argument form instead of comparison code. let isAsmParserOnly = 1, hasSideEffects = 0 in { def rri_alt : AVX512PIi8<0xC2, MRMSrcReg, - (outs RC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), + (outs KRC:$dst), (ins RC:$src1, RC:$src2, i8imm:$cc), !strconcat("vcmp", suffix, " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>; def rmi_alt : AVX512PIi8<0xC2, MRMSrcMem, - (outs RC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), + (outs KRC:$dst), (ins RC:$src1, x86memop:$src2, i8imm:$cc), !strconcat("vcmp", suffix, " \t{$cc, $src2, $src1, $dst|$dst, $src1, $src2, $cc}"), [], d>; } @@ -1788,6 +1880,46 @@ def : Pat<(v8i64 (X86Vinsert undef, GR64:$src2, (iPTR 0))), (SUBREG_TO_REG (i32 0), (VMOV64toPQIZrr GR64:$src2), sub_xmm)>; //===----------------------------------------------------------------------===// +// AVX-512 - Non-temporals +//===----------------------------------------------------------------------===// + +def VMOVNTDQAZrm : AVX5128I<0x2A, MRMSrcMem, (outs VR512:$dst), + (ins i512mem:$src), + "vmovntdqa\t{$src, $dst|$dst, $src}", + [(set VR512:$dst, + (int_x86_avx512_movntdqa addr:$src))]>, + EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>; + +// Prefer non-temporal over temporal versions +let AddedComplexity = 400, SchedRW = [WriteStore] in { + +def VMOVNTPSZmr : AVX512PSI<0x2B, MRMDestMem, (outs), + (ins f512mem:$dst, VR512:$src), + "vmovntps\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v16f32 VR512:$src), + addr:$dst)], + IIC_SSE_MOVNT>, + EVEX, EVEX_V512, EVEX_CD8<32, CD8VF>; + +def VMOVNTPDZmr : AVX512PDI<0x2B, MRMDestMem, (outs), + (ins f512mem:$dst, VR512:$src), + "vmovntpd\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v8f64 VR512:$src), + addr:$dst)], + IIC_SSE_MOVNT>, + EVEX, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + + +def VMOVNTDQZmr : AVX512BI<0xE7, MRMDestMem, (outs), + (ins i512mem:$dst, VR512:$src), + "vmovntdq\t{$src, $dst|$dst, $src}", + [(alignednontemporalstore (v8i64 VR512:$src), + addr:$dst)], + IIC_SSE_MOVNT>, + EVEX, EVEX_V512, EVEX_CD8<64, CD8VF>; +} + +//===----------------------------------------------------------------------===// // AVX-512 - Integer arithmetic // multiclass avx512_binop_rm<bits<8> opc, string OpcodeStr, SDNode OpNode, @@ -3161,6 +3293,10 @@ def : Pat<(v4f32 (uint_to_fp (v4i32 VR128X:$src1))), (EXTRACT_SUBREG (v16f32 (VCVTUDQ2PSZrr (v16i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_xmm)>; +def : Pat<(v4f64 (uint_to_fp (v4i32 VR128X:$src1))), + (EXTRACT_SUBREG (v8f64 (VCVTUDQ2PDZrr + (v8i32 (SUBREG_TO_REG (i32 0), VR128X:$src1, sub_xmm)))), sub_ymm)>; + def : Pat<(v16f32 (int_x86_avx512_mask_cvtdq2ps_512 (v16i32 VR512:$src), (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1), imm:$rc)), (VCVTDQ2PSZrrb VR512:$src, imm:$rc)>; @@ -4343,6 +4479,37 @@ def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1, (VPCONFLICTQrrk VR512:$src1, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>; +let Predicates = [HasCDI] in { +defm VPLZCNTD : avx512_conflict<0x44, "vplzcntd", VR512, VK16WM, + i512mem, i32mem, "{1to16}">, + EVEX_V512, EVEX_CD8<32, CD8VF>; + + +defm VPLZCNTQ : avx512_conflict<0x44, "vplzcntq", VR512, VK8WM, + i512mem, i64mem, "{1to8}">, + EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; + +} + +def : Pat<(int_x86_avx512_mask_lzcnt_d_512 VR512:$src2, VR512:$src1, + GR16:$mask), + (VPLZCNTDrrk VR512:$src1, + (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>; + +def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1, + GR8:$mask), + (VPLZCNTQrrk VR512:$src1, + (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>; + +def : Pat<(v16i32 (ctlz (memopv16i32 addr:$src))), + (VPLZCNTDrm addr:$src)>; +def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))), + (VPLZCNTDrr VR512:$src)>; +def : Pat<(v8i64 (ctlz (memopv8i64 addr:$src))), + (VPLZCNTQrm addr:$src)>; +def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))), + (VPLZCNTQrr VR512:$src)>; + def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>; def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>; def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>; diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 368e14b..f2574cc 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -1278,8 +1278,10 @@ let isCompare = 1 in { def TEST64mi32 : BinOpMI_F<"test", Xi64, X86testpat, MRM0m, 0xF6>; // When testing the result of EXTRACT_SUBREG sub_8bit_hi, make sure the - // register class is constrained to GR8_NOREX. - let isPseudo = 1 in + // register class is constrained to GR8_NOREX. This pseudo is explicitly + // marked side-effect free, since it doesn't have an isel pattern like + // other test instructions. + let isPseudo = 1, hasSideEffects = 0 in def TEST8ri_NOREX : I<0, Pseudo, (outs), (ins GR8_NOREX:$src, i8imm:$mask), "", [], IIC_BIN_NONMEM>, Sched<[WriteALU]>; } // Defs = [EFLAGS] diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 34d8fb9..ca4f608 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -110,7 +110,7 @@ let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in // When using segmented stacks these are lowered into instructions which first // check if the current stacklet has enough free memory. If it does, memory is -// allocated by bumping the stack pointer. Otherwise memory is allocated from +// allocated by bumping the stack pointer. Otherwise memory is allocated from // the heap. let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in @@ -197,6 +197,26 @@ let isBranch = 1, isTerminator = 1, isCodeGenOnly = 1 in { } //===----------------------------------------------------------------------===// +// Pseudo instructions used by unwind info. +// +let isPseudo = 1 in { + def SEH_PushReg : I<0, Pseudo, (outs), (ins i32imm:$reg), + "#SEH_PushReg $reg", []>; + def SEH_SaveReg : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst), + "#SEH_SaveReg $reg, $dst", []>; + def SEH_SaveXMM : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$dst), + "#SEH_SaveXMM $reg, $dst", []>; + def SEH_StackAlloc : I<0, Pseudo, (outs), (ins i32imm:$size), + "#SEH_StackAlloc $size", []>; + def SEH_SetFrame : I<0, Pseudo, (outs), (ins i32imm:$reg, i32imm:$offset), + "#SEH_SetFrame $reg, $offset", []>; + def SEH_PushFrame : I<0, Pseudo, (outs), (ins i1imm:$mode), + "#SEH_PushFrame $mode", []>; + def SEH_EndPrologue : I<0, Pseudo, (outs), (ins), + "#SEH_EndPrologue", []>; +} + +//===----------------------------------------------------------------------===// // Pseudo instructions used by segmented stacks. // @@ -371,7 +391,7 @@ let Defs = [RCX,RDI], isCodeGenOnly = 1 in { def REP_STOSD_64 : I<0xAB, RawFrm, (outs), (ins), "{rep;stosl|rep stosd}", [(X86rep_stos i32)], IIC_REP_STOS>, REP, OpSize32, Requires<[In64BitMode]>; - + let Uses = [RAX,RCX,RDI] in def REP_STOSQ_64 : RI<0xAB, RawFrm, (outs), (ins), "{rep;stosq|rep stosq}", [(X86rep_stos i64)], IIC_REP_STOS>, REP, @@ -502,83 +522,6 @@ def CMOV_RFP80 : I<0, Pseudo, //===----------------------------------------------------------------------===// -// Atomic Instruction Pseudo Instructions -//===----------------------------------------------------------------------===// - -// Pseudo atomic instructions - -multiclass PSEUDO_ATOMIC_LOAD_BINOP<string mnemonic> { - let usesCustomInserter = 1, mayLoad = 1, mayStore = 1 in { - let Defs = [EFLAGS, AL] in - def NAME#8 : I<0, Pseudo, (outs GR8:$dst), - (ins i8mem:$ptr, GR8:$val), - !strconcat(mnemonic, "8 PSEUDO!"), []>; - let Defs = [EFLAGS, AX] in - def NAME#16 : I<0, Pseudo,(outs GR16:$dst), - (ins i16mem:$ptr, GR16:$val), - !strconcat(mnemonic, "16 PSEUDO!"), []>; - let Defs = [EFLAGS, EAX] in - def NAME#32 : I<0, Pseudo, (outs GR32:$dst), - (ins i32mem:$ptr, GR32:$val), - !strconcat(mnemonic, "32 PSEUDO!"), []>; - let Defs = [EFLAGS, RAX] in - def NAME#64 : I<0, Pseudo, (outs GR64:$dst), - (ins i64mem:$ptr, GR64:$val), - !strconcat(mnemonic, "64 PSEUDO!"), []>; - } -} - -multiclass PSEUDO_ATOMIC_LOAD_BINOP_PATS<string name, string frag> { - def : Pat<(!cast<PatFrag>(frag # "_8") addr:$ptr, GR8:$val), - (!cast<Instruction>(name # "8") addr:$ptr, GR8:$val)>; - def : Pat<(!cast<PatFrag>(frag # "_16") addr:$ptr, GR16:$val), - (!cast<Instruction>(name # "16") addr:$ptr, GR16:$val)>; - def : Pat<(!cast<PatFrag>(frag # "_32") addr:$ptr, GR32:$val), - (!cast<Instruction>(name # "32") addr:$ptr, GR32:$val)>; - def : Pat<(!cast<PatFrag>(frag # "_64") addr:$ptr, GR64:$val), - (!cast<Instruction>(name # "64") addr:$ptr, GR64:$val)>; -} - -// Atomic exchange, and, or, xor -defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMAND">; -defm ATOMOR : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMOR">; -defm ATOMXOR : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMXOR">; -defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMNAND">; -defm ATOMMAX : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMAX">; -defm ATOMMIN : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMMIN">; -defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMAX">; -defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP<"#ATOMUMIN">; - -defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMAND", "atomic_load_and">; -defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMOR", "atomic_load_or">; -defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMXOR", "atomic_load_xor">; -defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMNAND", "atomic_load_nand">; -defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMAX", "atomic_load_max">; -defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMMIN", "atomic_load_min">; -defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMAX", "atomic_load_umax">; -defm : PSEUDO_ATOMIC_LOAD_BINOP_PATS<"ATOMUMIN", "atomic_load_umin">; - -multiclass PSEUDO_ATOMIC_LOAD_BINOP6432<string mnemonic> { - let usesCustomInserter = 1, Defs = [EFLAGS, EAX, EDX], - mayLoad = 1, mayStore = 1, hasSideEffects = 0 in - def NAME#6432 : I<0, Pseudo, (outs GR32:$dst1, GR32:$dst2), - (ins i64mem:$ptr, GR32:$val1, GR32:$val2), - !strconcat(mnemonic, "6432 PSEUDO!"), []>; -} - -defm ATOMAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMAND">; -defm ATOMOR : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMOR">; -defm ATOMXOR : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMXOR">; -defm ATOMNAND : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMNAND">; -defm ATOMADD : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMADD">; -defm ATOMSUB : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSUB">; -defm ATOMMAX : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMMAX">; -defm ATOMMIN : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMMIN">; -defm ATOMUMAX : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMUMAX">; -defm ATOMUMIN : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMUMIN">; -defm ATOMSWAP : PSEUDO_ATOMIC_LOAD_BINOP6432<"#ATOMSWAP">; - -//===----------------------------------------------------------------------===// // Normal-Instructions-With-Lock-Prefix Pseudo Instructions //===----------------------------------------------------------------------===// @@ -1696,20 +1639,34 @@ def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2), (IMUL64rmi32 addr:$src1, i64immSExt32:$src2)>; // Increment reg. -def : Pat<(add GR8 :$src, 1), (INC8r GR8 :$src)>; -def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>, Requires<[Not64BitMode]>; -def : Pat<(add GR16:$src, 1), (INC64_16r GR16:$src)>, Requires<[In64BitMode]>; -def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>, Requires<[Not64BitMode]>; -def : Pat<(add GR32:$src, 1), (INC64_32r GR32:$src)>, Requires<[In64BitMode]>; -def : Pat<(add GR64:$src, 1), (INC64r GR64:$src)>; +// Do not make INC if it is slow +def : Pat<(add GR8:$src, 1), + (INC8r GR8:$src)>, Requires<[NotSlowIncDec]>; +def : Pat<(add GR16:$src, 1), + (INC16r GR16:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>; +def : Pat<(add GR16:$src, 1), + (INC64_16r GR16:$src)>, Requires<[NotSlowIncDec, In64BitMode]>; +def : Pat<(add GR32:$src, 1), + (INC32r GR32:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>; +def : Pat<(add GR32:$src, 1), + (INC64_32r GR32:$src)>, Requires<[NotSlowIncDec, In64BitMode]>; +def : Pat<(add GR64:$src, 1), + (INC64r GR64:$src)>, Requires<[NotSlowIncDec]>; // Decrement reg. -def : Pat<(add GR8 :$src, -1), (DEC8r GR8 :$src)>; -def : Pat<(add GR16:$src, -1), (DEC16r GR16:$src)>, Requires<[Not64BitMode]>; -def : Pat<(add GR16:$src, -1), (DEC64_16r GR16:$src)>, Requires<[In64BitMode]>; -def : Pat<(add GR32:$src, -1), (DEC32r GR32:$src)>, Requires<[Not64BitMode]>; -def : Pat<(add GR32:$src, -1), (DEC64_32r GR32:$src)>, Requires<[In64BitMode]>; -def : Pat<(add GR64:$src, -1), (DEC64r GR64:$src)>; +// Do not make DEC if it is slow +def : Pat<(add GR8:$src, -1), + (DEC8r GR8:$src)>, Requires<[NotSlowIncDec]>; +def : Pat<(add GR16:$src, -1), + (DEC16r GR16:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>; +def : Pat<(add GR16:$src, -1), + (DEC64_16r GR16:$src)>, Requires<[NotSlowIncDec, In64BitMode]>; +def : Pat<(add GR32:$src, -1), + (DEC32r GR32:$src)>, Requires<[NotSlowIncDec, Not64BitMode]>; +def : Pat<(add GR32:$src, -1), + (DEC64_32r GR32:$src)>, Requires<[NotSlowIncDec, In64BitMode]>; +def : Pat<(add GR64:$src, -1), + (DEC64r GR64:$src)>, Requires<[NotSlowIncDec]>; // or reg/reg. def : Pat<(or GR8 :$src1, GR8 :$src2), (OR8rr GR8 :$src1, GR8 :$src2)>; diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index 1582f43..6f0fa94 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -224,6 +224,10 @@ def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>; def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>; def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>; +def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisSameAs<2, 1>]>; +def X86Packss : SDNode<"X86ISD::PACKSS", SDTPack>; +def X86Packus : SDNode<"X86ISD::PACKUS", SDTPack>; + def X86Unpckl : SDNode<"X86ISD::UNPCKL", SDTShuff2Op>; def X86Unpckh : SDNode<"X86ISD::UNPCKH", SDTShuff2Op>; diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 6993577..0d3afc4 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -28,6 +28,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/LLVMContext.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -97,14 +98,11 @@ struct X86OpTblEntry { // Pin the vtable to this file. void X86InstrInfo::anchor() {} -X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) - : X86GenInstrInfo((tm.getSubtarget<X86Subtarget>().is64Bit() - ? X86::ADJCALLSTACKDOWN64 - : X86::ADJCALLSTACKDOWN32), - (tm.getSubtarget<X86Subtarget>().is64Bit() - ? X86::ADJCALLSTACKUP64 - : X86::ADJCALLSTACKUP32)), - TM(tm), RI(tm) { +X86InstrInfo::X86InstrInfo(X86Subtarget &STI) + : X86GenInstrInfo( + (STI.is64Bit() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32), + (STI.is64Bit() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)), + Subtarget(STI), RI(STI) { static const X86OpTblEntry OpTbl2Addr[] = { { X86::ADC32ri, X86::ADC32mi, 0 }, @@ -1472,7 +1470,7 @@ X86InstrInfo::isCoalescableExtInstr(const MachineInstr &MI, case X86::MOVSX32rr8: case X86::MOVZX32rr8: case X86::MOVSX64rr8: - if (!TM.getSubtarget<X86Subtarget>().is64Bit()) + if (!Subtarget.is64Bit()) // It's not always legal to reference the low 8-bit of the larger // register in 32-bit mode. return false; @@ -1950,7 +1948,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo(); unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass); unsigned Opc, leaInReg; - if (TM.getSubtarget<X86Subtarget>().is64Bit()) { + if (Subtarget.is64Bit()) { Opc = X86::LEA64_32r; leaInReg = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); } else { @@ -2006,7 +2004,7 @@ X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc, // just a single insert_subreg. addRegReg(MIB, leaInReg, true, leaInReg, false); } else { - if (TM.getSubtarget<X86Subtarget>().is64Bit()) + if (Subtarget.is64Bit()) leaInReg2 = RegInfo.createVirtualRegister(&X86::GR64_NOSPRegClass); else leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32_NOSPRegClass); @@ -2076,13 +2074,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, // we have better subtarget support, enable the 16-bit LEA generation here. // 16-bit LEA is also slow on Core2. bool DisableLEA16 = true; - bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit(); + bool is64Bit = Subtarget.is64Bit(); unsigned MIOpc = MI->getOpcode(); switch (MIOpc) { case X86::SHUFPSrri: { assert(MI->getNumOperands() == 4 && "Unknown shufps instruction!"); - if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return nullptr; + if (!Subtarget.hasSSE2()) return nullptr; unsigned B = MI->getOperand(1).getReg(); unsigned C = MI->getOperand(2).getReg(); @@ -2094,7 +2092,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } case X86::SHUFPDrri: { assert(MI->getNumOperands() == 4 && "Unknown shufpd instruction!"); - if (!TM.getSubtarget<X86Subtarget>().hasSSE2()) return nullptr; + if (!Subtarget.hasSSE2()) return nullptr; unsigned B = MI->getOperand(1).getReg(); unsigned C = MI->getOperand(2).getReg(); @@ -2672,8 +2670,7 @@ static X86::CondCode getSwappedCondition(X86::CondCode CC) { /// getSETFromCond - Return a set opcode for the given condition and /// whether it has memory operand. -static unsigned getSETFromCond(X86::CondCode CC, - bool HasMemoryOperand) { +unsigned X86::getSETFromCond(CondCode CC, bool HasMemoryOperand) { static const uint16_t Opc[16][2] = { { X86::SETAr, X86::SETAm }, { X86::SETAEr, X86::SETAEm }, @@ -2693,14 +2690,14 @@ static unsigned getSETFromCond(X86::CondCode CC, { X86::SETSr, X86::SETSm } }; - assert(CC < 16 && "Can only handle standard cond codes"); + assert(CC <= LAST_VALID_COND && "Can only handle standard cond codes"); return Opc[CC][HasMemoryOperand ? 1 : 0]; } /// getCMovFromCond - Return a cmov opcode for the given condition, /// register size in bytes, and operand type. -static unsigned getCMovFromCond(X86::CondCode CC, unsigned RegBytes, - bool HasMemoryOperand) { +unsigned X86::getCMovFromCond(CondCode CC, unsigned RegBytes, + bool HasMemoryOperand) { static const uint16_t Opc[32][3] = { { X86::CMOVA16rr, X86::CMOVA32rr, X86::CMOVA64rr }, { X86::CMOVAE16rr, X86::CMOVAE32rr, X86::CMOVAE64rr }, @@ -2976,7 +2973,7 @@ canInsertSelect(const MachineBasicBlock &MBB, unsigned TrueReg, unsigned FalseReg, int &CondCycles, int &TrueCycles, int &FalseCycles) const { // Not all subtargets have cmov instructions. - if (!TM.getSubtarget<X86Subtarget>().hasCMov()) + if (!Subtarget.hasCMov()) return false; if (Cond.size() != 1) return false; @@ -3027,8 +3024,7 @@ static bool isHReg(unsigned Reg) { // Try and copy between VR128/VR64 and GR64 registers. static unsigned CopyToFromAsymmetricReg(unsigned DestReg, unsigned SrcReg, - const X86Subtarget& Subtarget) { - + const X86Subtarget &Subtarget) { // SrcReg(VR128) -> DestReg(GR64) // SrcReg(VR64) -> DestReg(GR64) @@ -3107,8 +3103,8 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { // First deal with the normal symmetric copies. - bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); - bool HasAVX512 = TM.getSubtarget<X86Subtarget>().hasAVX512(); + bool HasAVX = Subtarget.hasAVX(); + bool HasAVX512 = Subtarget.hasAVX512(); unsigned Opc = 0; if (X86::GR64RegClass.contains(DestReg, SrcReg)) Opc = X86::MOV64rr; @@ -3120,7 +3116,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copying to or from a physical H register on x86-64 requires a NOREX // move. Otherwise use a normal move. if ((isHReg(DestReg) || isHReg(SrcReg)) && - TM.getSubtarget<X86Subtarget>().is64Bit()) { + Subtarget.is64Bit()) { Opc = X86::MOV8rr_NOREX; // Both operands must be encodable without an REX prefix. assert(X86::GR8_NOREXRegClass.contains(SrcReg, DestReg) && @@ -3137,7 +3133,7 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, else if (X86::VR256RegClass.contains(DestReg, SrcReg)) Opc = X86::VMOVAPSYrr; if (!Opc) - Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, TM.getSubtarget<X86Subtarget>()); + Opc = CopyToFromAsymmetricReg(DestReg, SrcReg, Subtarget); if (Opc) { BuildMI(MBB, MI, DL, get(Opc), DestReg) @@ -3183,9 +3179,9 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, static unsigned getLoadStoreRegOpcode(unsigned Reg, const TargetRegisterClass *RC, bool isStackAligned, - const TargetMachine &TM, + const X86Subtarget &STI, bool load) { - if (TM.getSubtarget<X86Subtarget>().hasAVX512()) { + if (STI.hasAVX512()) { if (X86::VK8RegClass.hasSubClassEq(RC) || X86::VK16RegClass.hasSubClassEq(RC)) return load ? X86::KMOVWkm : X86::KMOVWmk; @@ -3197,13 +3193,13 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, return load ? X86::VMOVUPSZrm : X86::VMOVUPSZmr; } - bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); + bool HasAVX = STI.hasAVX(); switch (RC->getSize()) { default: llvm_unreachable("Unknown spill size"); case 1: assert(X86::GR8RegClass.hasSubClassEq(RC) && "Unknown 1-byte regclass"); - if (TM.getSubtarget<X86Subtarget>().is64Bit()) + if (STI.is64Bit()) // Copying to or from a physical H register on x86-64 requires a NOREX // move. Otherwise use a normal move. if (isHReg(Reg) || X86::GR8_ABCD_HRegClass.hasSubClassEq(RC)) @@ -3270,16 +3266,16 @@ static unsigned getLoadStoreRegOpcode(unsigned Reg, static unsigned getStoreRegOpcode(unsigned SrcReg, const TargetRegisterClass *RC, bool isStackAligned, - TargetMachine &TM) { - return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, TM, false); + const X86Subtarget &STI) { + return getLoadStoreRegOpcode(SrcReg, RC, isStackAligned, STI, false); } static unsigned getLoadRegOpcode(unsigned DestReg, const TargetRegisterClass *RC, bool isStackAligned, - const TargetMachine &TM) { - return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, TM, true); + const X86Subtarget &STI) { + return getLoadStoreRegOpcode(DestReg, RC, isStackAligned, STI, true); } void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, @@ -3291,9 +3287,10 @@ void X86InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, assert(MF.getFrameInfo()->getObjectSize(FrameIdx) >= RC->getSize() && "Stack slot too small for store"); unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16); - bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) || - RI.canRealignStack(MF); - unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); + bool isAligned = + (MF.getTarget().getFrameLowering()->getStackAlignment() >= Alignment) || + RI.canRealignStack(MF); + unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget); DebugLoc DL = MBB.findDebugLoc(MI); addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx) .addReg(SrcReg, getKillRegState(isKill)); @@ -3309,7 +3306,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg, unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16); bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= Alignment; - unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM); + unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, Subtarget); DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc)); for (unsigned i = 0, e = Addr.size(); i != e; ++i) @@ -3327,9 +3324,10 @@ void X86InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) const { const MachineFunction &MF = *MBB.getParent(); unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16); - bool isAligned = (TM.getFrameLowering()->getStackAlignment() >= Alignment) || - RI.canRealignStack(MF); - unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); + bool isAligned = + (MF.getTarget().getFrameLowering()->getStackAlignment() >= Alignment) || + RI.canRealignStack(MF); + unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget); DebugLoc DL = MBB.findDebugLoc(MI); addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx); } @@ -3343,7 +3341,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg, unsigned Alignment = std::max<uint32_t>(RC->getSize(), 16); bool isAligned = MMOBegin != MMOEnd && (*MMOBegin)->getAlignment() >= Alignment; - unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM); + unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, Subtarget); DebugLoc DL; MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg); for (unsigned i = 0, e = Addr.size(); i != e; ++i) @@ -3741,7 +3739,7 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, continue; // EFLAGS is used by this instruction. - X86::CondCode OldCC; + X86::CondCode OldCC = X86::COND_INVALID; bool OpcIsSET = false; if (IsCmpZero || IsSwapped) { // We decode the condition code from opcode. @@ -3964,7 +3962,7 @@ static bool Expand2AddrUndef(MachineInstrBuilder &MIB, } bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { - bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); + bool HasAVX = Subtarget.hasAVX(); MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); switch (MI->getOpcode()) { case X86::MOV32r0: @@ -4075,7 +4073,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, unsigned Size, unsigned Align) const { const DenseMap<unsigned, std::pair<unsigned,unsigned> > *OpcodeTablePtr = nullptr; - bool isCallRegIndirect = TM.getSubtarget<X86Subtarget>().callRegIndirect(); + bool isCallRegIndirect = Subtarget.callRegIndirect(); bool isTwoAddrFold = false; // Atom favors register form of call. So, we do not fold loads into calls @@ -4316,7 +4314,7 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum, if (X86::VR128RegClass.contains(Reg)) { // These instructions are all floating point domain, so xorps is the best // choice. - bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); + bool HasAVX = Subtarget.hasAVX(); unsigned Opc = HasAVX ? X86::VXORPSrr : X86::XORPSrr; BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(Opc), Reg) .addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef); @@ -4352,7 +4350,8 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, // If the function stack isn't realigned we don't want to fold instructions // that need increased alignment. if (!RI.needsStackRealignment(MF)) - Alignment = std::min(Alignment, TM.getFrameLowering()->getStackAlignment()); + Alignment = std::min( + Alignment, MF.getTarget().getFrameLowering()->getStackAlignment()); if (Ops.size() == 2 && Ops[0] == 0 && Ops[1] == 1) { unsigned NewOpc = 0; unsigned RCSize = 0; @@ -4453,14 +4452,14 @@ MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, // Create a constant-pool entry and operands to load from it. // Medium and large mode can't fold loads this way. - if (TM.getCodeModel() != CodeModel::Small && - TM.getCodeModel() != CodeModel::Kernel) + if (MF.getTarget().getCodeModel() != CodeModel::Small && + MF.getTarget().getCodeModel() != CodeModel::Kernel) return nullptr; // x86-32 PIC requires a PIC base register for constant pools. unsigned PICBase = 0; - if (TM.getRelocationModel() == Reloc::PIC_) { - if (TM.getSubtarget<X86Subtarget>().is64Bit()) + if (MF.getTarget().getRelocationModel() == Reloc::PIC_) { + if (Subtarget.is64Bit()) PICBase = X86::RIP; else // FIXME: PICBase = getGlobalBaseReg(&MF); @@ -4600,7 +4599,7 @@ bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, const TargetRegisterClass *RC = getRegClass(MCID, Index, &RI, MF); if (!MI->hasOneMemOperand() && RC == &X86::VR128RegClass && - !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) + !Subtarget.isUnalignedMemAccessFast()) // Without memoperands, loadRegFromAddr and storeRegToStackSlot will // conservatively assume the address is unaligned. That's bad for // performance. @@ -4748,13 +4747,13 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, cast<MachineSDNode>(N)->memoperands_end()); if (!(*MMOs.first) && RC == &X86::VR128RegClass && - !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) + !Subtarget.isUnalignedMemAccessFast()) // Do not introduce a slow unaligned load. return false; unsigned Alignment = RC->getSize() == 32 ? 32 : 16; bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= Alignment; - Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl, + Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, Subtarget), dl, VT, MVT::Other, AddrOps); NewNodes.push_back(Load); @@ -4791,15 +4790,15 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, cast<MachineSDNode>(N)->memoperands_end()); if (!(*MMOs.first) && RC == &X86::VR128RegClass && - !TM.getSubtarget<X86Subtarget>().isUnalignedMemAccessFast()) + !Subtarget.isUnalignedMemAccessFast()) // Do not introduce a slow unaligned store. return false; unsigned Alignment = RC->getSize() == 32 ? 32 : 16; bool isAligned = (*MMOs.first) && (*MMOs.first)->getAlignment() >= Alignment; - SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC, - isAligned, TM), - dl, MVT::Other, AddrOps); + SDNode *Store = + DAG.getMachineNode(getStoreRegOpcode(0, DstRC, isAligned, Subtarget), + dl, MVT::Other, AddrOps); NewNodes.push_back(Store); // Preserve memory reference information. @@ -4960,7 +4959,7 @@ bool X86InstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, default: // XMM registers. In 64-bit mode we can be a bit more aggressive since we // have 16 of them to play with. - if (TM.getSubtargetImpl()->is64Bit()) { + if (Subtarget.is64Bit()) { if (NumLoads >= 3) return false; } else if (NumLoads) { @@ -4986,7 +4985,7 @@ bool X86InstrInfo::shouldScheduleAdjacent(MachineInstr* First, // Check if this processor supports macro-fusion. Since this is a minor // heuristic, we haven't specifically reserved a feature. hasAVX is a decent // proxy for SandyBridge+. - if (!TM.getSubtarget<X86Subtarget>().hasAVX()) + if (!Subtarget.hasAVX()) return false; enum { @@ -5038,6 +5037,7 @@ bool X86InstrInfo::shouldScheduleAdjacent(MachineInstr* First, case X86::TEST16rm: case X86::TEST32rm: case X86::TEST64rm: + case X86::TEST8ri_NOREX: case X86::AND16i16: case X86::AND16ri: case X86::AND16ri8: @@ -5168,7 +5168,7 @@ isSafeToMoveRegClassDefs(const TargetRegisterClass *RC) const { /// TODO: Eliminate this and move the code to X86MachineFunctionInfo. /// unsigned X86InstrInfo::getGlobalBaseReg(MachineFunction *MF) const { - assert(!TM.getSubtarget<X86Subtarget>().is64Bit() && + assert(!Subtarget.is64Bit() && "X86-64 PIC uses RIP relative addressing"); X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>(); @@ -5271,7 +5271,7 @@ static const uint16_t *lookupAVX2(unsigned opcode, unsigned domain) { std::pair<uint16_t, uint16_t> X86InstrInfo::getExecutionDomain(const MachineInstr *MI) const { uint16_t domain = (MI->getDesc().TSFlags >> X86II::SSEDomainShift) & 3; - bool hasAVX2 = TM.getSubtarget<X86Subtarget>().hasAVX2(); + bool hasAVX2 = Subtarget.hasAVX2(); uint16_t validDomains = 0; if (domain && lookup(MI->getOpcode(), domain)) validDomains = 0xe; @@ -5286,7 +5286,7 @@ void X86InstrInfo::setExecutionDomain(MachineInstr *MI, unsigned Domain) const { assert(dom && "Not an SSE instruction"); const uint16_t *table = lookup(MI->getOpcode(), dom); if (!table) { // try the other table - assert((TM.getSubtarget<X86Subtarget>().hasAVX2() || Domain < 3) && + assert((Subtarget.hasAVX2() || Domain < 3) && "256-bit vector operations only available in AVX2"); table = lookupAVX2(MI->getOpcode(), dom); } @@ -5299,6 +5299,16 @@ void X86InstrInfo::getNoopForMachoTarget(MCInst &NopInst) const { NopInst.setOpcode(X86::NOOP); } +void X86InstrInfo::getUnconditionalBranch( + MCInst &Branch, const MCSymbolRefExpr *BranchTarget) const { + Branch.setOpcode(X86::JMP_4); + Branch.addOperand(MCOperand::CreateExpr(BranchTarget)); +} + +void X86InstrInfo::getTrap(MCInst &MI) const { + MI.setOpcode(X86::TRAP); +} + bool X86InstrInfo::isHighLatencyDef(int opc) const { switch (opc) { default: return false; diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 5f34915..c177e3a 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -24,7 +24,7 @@ namespace llvm { class X86RegisterInfo; - class X86TargetMachine; + class X86Subtarget; namespace X86 { // X86 specific condition code. These correspond to X86_*_COND in @@ -46,6 +46,7 @@ namespace X86 { COND_O = 13, COND_P = 14, COND_S = 15, + LAST_VALID_COND = COND_S, // Artificial condition codes. These are used by AnalyzeBranch // to indicate a block terminated with two conditional branches to @@ -61,12 +62,21 @@ namespace X86 { // Turn condition code into conditional branch opcode. unsigned GetCondBranchFromCond(CondCode CC); + /// \brief Return a set opcode for the given condition and whether it has + /// a memory operand. + unsigned getSETFromCond(CondCode CC, bool HasMemoryOperand = false); + + /// \brief Return a cmov opcode for the given condition, register size in + /// bytes, and operand type. + unsigned getCMovFromCond(CondCode CC, unsigned RegBytes, + bool HasMemoryOperand = false); + // Turn CMov opcode into condition code. CondCode getCondFromCMovOpc(unsigned Opc); /// GetOppositeBranchCondition - Return the inverse of the specified cond, /// e.g. turning COND_E to COND_NE. - CondCode GetOppositeBranchCondition(X86::CondCode CC); + CondCode GetOppositeBranchCondition(CondCode CC); } // end namespace X86; @@ -129,7 +139,7 @@ inline static bool isMem(const MachineInstr *MI, unsigned Op) { } class X86InstrInfo final : public X86GenInstrInfo { - X86TargetMachine &TM; + X86Subtarget &Subtarget; const X86RegisterInfo RI; /// RegOp2MemOpTable3Addr, RegOp2MemOpTable0, RegOp2MemOpTable1, @@ -156,7 +166,7 @@ class X86InstrInfo final : public X86GenInstrInfo { virtual void anchor(); public: - explicit X86InstrInfo(X86TargetMachine &tm); + explicit X86InstrInfo(X86Subtarget &STI); /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As /// such, whenever a client has an instance of instruction info, it should @@ -396,6 +406,12 @@ public: const SmallVectorImpl<MachineOperand> &MOs, unsigned Size, unsigned Alignment) const; + void + getUnconditionalBranch(MCInst &Branch, + const MCSymbolRefExpr *BranchTarget) const override; + + void getTrap(MCInst &MI) const override; + bool isHighLatencyDef(int opc) const override; bool hasHighOperandLatency(const InstrItineraryData *ItinData, diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 0d97669..e7b532c 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -155,27 +155,6 @@ def X86cas16 : SDNode<"X86ISD::LCMPXCHG16_DAG", SDTX86caspair, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -def X86AtomAdd64 : SDNode<"X86ISD::ATOMADD64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, - SDNPMayLoad, SDNPMemOperand]>; -def X86AtomSub64 : SDNode<"X86ISD::ATOMSUB64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, - SDNPMayLoad, SDNPMemOperand]>; -def X86AtomOr64 : SDNode<"X86ISD::ATOMOR64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, - SDNPMayLoad, SDNPMemOperand]>; -def X86AtomXor64 : SDNode<"X86ISD::ATOMXOR64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, - SDNPMayLoad, SDNPMemOperand]>; -def X86AtomAnd64 : SDNode<"X86ISD::ATOMAND64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, - SDNPMayLoad, SDNPMemOperand]>; -def X86AtomNand64 : SDNode<"X86ISD::ATOMNAND64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, - SDNPMayLoad, SDNPMemOperand]>; -def X86AtomSwap64 : SDNode<"X86ISD::ATOMSWAP64_DAG", SDTX86atomicBinary, - [SDNPHasChain, SDNPMayStore, - SDNPMayLoad, SDNPMemOperand]>; def X86retflag : SDNode<"X86ISD::RET_FLAG", SDTX86Ret, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; @@ -208,6 +187,8 @@ def X86rdtsc : SDNode<"X86ISD::RDTSC_DAG", SDTX86Void, [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; def X86rdtscp : SDNode<"X86ISD::RDTSCP_DAG", SDTX86Void, [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; +def X86rdpmc : SDNode<"X86ISD::RDPMC_DAG", SDTX86Void, + [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; def X86Wrapper : SDNode<"X86ISD::Wrapper", SDTX86Wrapper>; def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>; @@ -795,6 +776,7 @@ def OptForSpeed : Predicate<"!OptForSize">; def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">; def CallImmAddr : Predicate<"Subtarget->IsLegalToCallImmediateAddr(TM)">; def FavorMemIndirectCall : Predicate<"!Subtarget->callRegIndirect()">; +def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">; //===----------------------------------------------------------------------===// // X86 Instruction Format Definitions. diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 1eb0485..f9a5ae1 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4337,20 +4337,6 @@ defm PCMPGTD : PDI_binop_all<0x66, "pcmpgtd", X86pcmpgt, v4i32, v8i32, SSE_INTALU_ITINS_P, 0>; //===---------------------------------------------------------------------===// -// SSE2 - Packed Integer Pack Instructions -//===---------------------------------------------------------------------===// - -defm PACKSSWB : PDI_binop_all_int<0x63, "packsswb", int_x86_sse2_packsswb_128, - int_x86_avx2_packsswb, - SSE_INTALU_ITINS_SHUFF_P, 0>; -defm PACKSSDW : PDI_binop_all_int<0x6B, "packssdw", int_x86_sse2_packssdw_128, - int_x86_avx2_packssdw, - SSE_INTALU_ITINS_SHUFF_P, 0>; -defm PACKUSWB : PDI_binop_all_int<0x67, "packuswb", int_x86_sse2_packuswb_128, - int_x86_avx2_packuswb, - SSE_INTALU_ITINS_SHUFF_P, 0>; - -//===---------------------------------------------------------------------===// // SSE2 - Packed Integer Shuffle Instructions //===---------------------------------------------------------------------===// @@ -4432,6 +4418,136 @@ let Predicates = [UseSSE2] in { } //===---------------------------------------------------------------------===// +// Packed Integer Pack Instructions (SSE & AVX) +//===---------------------------------------------------------------------===// + +let ExeDomain = SSEPackedInt in { +multiclass sse2_pack<bits<8> opc, string OpcodeStr, ValueType OutVT, + ValueType ArgVT, SDNode OpNode, PatFrag bc_frag, + bit Is2Addr = 1> { + def rr : PDI<opc, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, + (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>, + Sched<[WriteShuffle]>; + def rm : PDI<opc, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, + (OutVT (OpNode VR128:$src1, + (bc_frag (memopv2i64 addr:$src2)))))]>, + Sched<[WriteShuffleLd, ReadAfterLd]>; +} + +multiclass sse2_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT, + ValueType ArgVT, SDNode OpNode, PatFrag bc_frag> { + def Yrr : PDI<opc, MRMSrcReg, + (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, + (OutVT (OpNode (ArgVT VR256:$src1), VR256:$src2)))]>, + Sched<[WriteShuffle]>; + def Yrm : PDI<opc, MRMSrcMem, + (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, + (OutVT (OpNode VR256:$src1, + (bc_frag (memopv4i64 addr:$src2)))))]>, + Sched<[WriteShuffleLd, ReadAfterLd]>; +} + +multiclass sse4_pack<bits<8> opc, string OpcodeStr, ValueType OutVT, + ValueType ArgVT, SDNode OpNode, PatFrag bc_frag, + bit Is2Addr = 1> { + def rr : SS48I<opc, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, + (OutVT (OpNode (ArgVT VR128:$src1), VR128:$src2)))]>, + Sched<[WriteShuffle]>; + def rm : SS48I<opc, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + !if(Is2Addr, + !strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), + [(set VR128:$dst, + (OutVT (OpNode VR128:$src1, + (bc_frag (memopv2i64 addr:$src2)))))]>, + Sched<[WriteShuffleLd, ReadAfterLd]>; +} + +multiclass sse4_pack_y<bits<8> opc, string OpcodeStr, ValueType OutVT, + ValueType ArgVT, SDNode OpNode, PatFrag bc_frag> { + def Yrr : SS48I<opc, MRMSrcReg, + (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, + (OutVT (OpNode (ArgVT VR256:$src1), VR256:$src2)))]>, + Sched<[WriteShuffle]>; + def Yrm : SS48I<opc, MRMSrcMem, + (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, + (OutVT (OpNode VR256:$src1, + (bc_frag (memopv4i64 addr:$src2)))))]>, + Sched<[WriteShuffleLd, ReadAfterLd]>; +} + +let Predicates = [HasAVX] in { + defm VPACKSSWB : sse2_pack<0x63, "vpacksswb", v16i8, v8i16, X86Packss, + bc_v8i16, 0>, VEX_4V; + defm VPACKSSDW : sse2_pack<0x6B, "vpackssdw", v8i16, v4i32, X86Packss, + bc_v4i32, 0>, VEX_4V; + + defm VPACKUSWB : sse2_pack<0x67, "vpackuswb", v16i8, v8i16, X86Packus, + bc_v8i16, 0>, VEX_4V; + defm VPACKUSDW : sse4_pack<0x2B, "vpackusdw", v8i16, v4i32, X86Packus, + bc_v4i32, 0>, VEX_4V; +} + +let Predicates = [HasAVX2] in { + defm VPACKSSWB : sse2_pack_y<0x63, "vpacksswb", v32i8, v16i16, X86Packss, + bc_v16i16>, VEX_4V, VEX_L; + defm VPACKSSDW : sse2_pack_y<0x6B, "vpackssdw", v16i16, v8i32, X86Packss, + bc_v8i32>, VEX_4V, VEX_L; + + defm VPACKUSWB : sse2_pack_y<0x67, "vpackuswb", v32i8, v16i16, X86Packus, + bc_v16i16>, VEX_4V, VEX_L; + defm VPACKUSDW : sse4_pack_y<0x2B, "vpackusdw", v16i16, v8i32, X86Packus, + bc_v8i32>, VEX_4V, VEX_L; +} + +let Constraints = "$src1 = $dst" in { + defm PACKSSWB : sse2_pack<0x63, "packsswb", v16i8, v8i16, X86Packss, + bc_v8i16>; + defm PACKSSDW : sse2_pack<0x6B, "packssdw", v8i16, v4i32, X86Packss, + bc_v4i32>; + + defm PACKUSWB : sse2_pack<0x67, "packuswb", v16i8, v8i16, X86Packus, + bc_v8i16>; + + let Predicates = [HasSSE41] in + defm PACKUSDW : sse4_pack<0x2B, "packusdw", v8i16, v4i32, X86Packus, + bc_v4i32>; +} +} // ExeDomain = SSEPackedInt + +//===---------------------------------------------------------------------===// // SSE2 - Packed Integer Unpack Instructions //===---------------------------------------------------------------------===// @@ -5239,6 +5355,60 @@ let Constraints = "$src1 = $dst", Predicates = [UseSSE3] in { f128mem, SSE_ALU_F64P>, PD; } +// Patterns used to select 'addsub' instructions. +let Predicates = [HasAVX] in { + // Constant 170 corresponds to the binary mask '10101010'. + // When used as a blend mask, it allows selecting eight elements from two + // input vectors as follow: + // - Even-numbered values in the destination are copied from + // the corresponding elements in the first input vector; + // - Odd-numbered values in the destination are copied from + // the corresponding elements in the second input vector. + + def : Pat<(v8f32 (X86Blendi (v8f32 (fsub VR256:$lhs, VR256:$rhs)), + (v8f32 (fadd VR256:$lhs, VR256:$rhs)), (i32 170))), + (VADDSUBPSYrr VR256:$lhs, VR256:$rhs)>; + + // Constant 10 corresponds to the binary mask '1010'. + // In the two pattens below, constant 10 is used as a blend mask to select + // - the 1st and 3rd element from the first input vector (the 'fsub' node); + // - the 2nd and 4th element from the second input vector (the 'fadd' node). + + def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)), + (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))), + (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>; + def : Pat<(v4f64 (X86Blendi (v4f64 (fsub VR256:$lhs, VR256:$rhs)), + (v4f64 (fadd VR256:$lhs, VR256:$rhs)), (i32 10))), + (VADDSUBPDYrr VR256:$lhs, VR256:$rhs)>; + def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)), + (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))), + (VADDSUBPSrr VR128:$lhs, VR128:$rhs)>; + def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)), + (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i32 2))), + (VADDSUBPDrr VR128:$lhs, VR128:$rhs)>; + def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)), + (v2f64 (fsub VR128:$lhs, VR128:$rhs)))), + (VADDSUBPDrr VR128:$lhs, VR128:$rhs)>; +} + +let Predicates = [UseSSE3] in { + // Constant 10 corresponds to the binary mask '1010'. + // In the pattern below, it is used as a blend mask to select: + // - the 1st and 3rd element from the first input vector (the fsub node); + // - the 2nd and 4th element from the second input vector (the fadd node). + + def : Pat<(v4f32 (X86Blendi (v4f32 (fsub VR128:$lhs, VR128:$rhs)), + (v4f32 (fadd VR128:$lhs, VR128:$rhs)), (i32 10))), + (ADDSUBPSrr VR128:$lhs, VR128:$rhs)>; + + def : Pat<(v2f64 (X86Blendi (v2f64 (fsub VR128:$lhs, VR128:$rhs)), + (v2f64 (fadd VR128:$lhs, VR128:$rhs)), (i32 2))), + (ADDSUBPDrr VR128:$lhs, VR128:$rhs)>; + def : Pat<(v2f64 (X86Movsd (v2f64 (fadd VR128:$lhs, VR128:$rhs)), + (v2f64 (fsub VR128:$lhs, VR128:$rhs)))), + (ADDSUBPDrr VR128:$lhs, VR128:$rhs)>; +} + //===---------------------------------------------------------------------===// // SSE3 Instructions //===---------------------------------------------------------------------===// @@ -7053,8 +7223,6 @@ multiclass SS48I_binop_rm2<bits<8> opc, string OpcodeStr, SDNode OpNode, let Predicates = [HasAVX] in { let isCommutable = 0 in - defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw, - 0, DEFAULT_ITINS_SHUFFLESCHED>, VEX_4V; defm VPMINSB : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128, loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>, VEX_4V; @@ -7086,9 +7254,6 @@ let Predicates = [HasAVX] in { let Predicates = [HasAVX2] in { let isCommutable = 0 in - defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw", - int_x86_avx2_packusdw, WriteShuffle>, - VEX_4V, VEX_L; defm VPMINSBY : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256, loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>, VEX_4V, VEX_L; @@ -7120,8 +7285,6 @@ let Predicates = [HasAVX2] in { let Constraints = "$src1 = $dst" in { let isCommutable = 0 in - defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw, - 1, DEFAULT_ITINS_SHUFFLESCHED>; defm PMINSB : SS48I_binop_rm<0x38, "pminsb", X86smin, v16i8, VR128, memopv2i64, i128mem, 1, SSE_INTALU_ITINS_P>; defm PMINSD : SS48I_binop_rm<0x39, "pminsd", X86smin, v4i32, VR128, @@ -7969,6 +8132,16 @@ class avx_broadcast<bits<8> opc, string OpcodeStr, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (Int addr:$src))]>, Sched<[Sched]>, VEX; +class avx_broadcast_no_int<bits<8> opc, string OpcodeStr, RegisterClass RC, + X86MemOperand x86memop, ValueType VT, + PatFrag ld_frag, SchedWrite Sched> : + AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), + [(set RC:$dst, (VT (X86VBroadcast (ld_frag addr:$src))))]>, + Sched<[Sched]>, VEX { + let mayLoad = 1; +} + // AVX2 adds register forms class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC, Intrinsic Int, SchedWrite Sched> : @@ -7977,16 +8150,15 @@ class avx2_broadcast_reg<bits<8> opc, string OpcodeStr, RegisterClass RC, [(set RC:$dst, (Int VR128:$src))]>, Sched<[Sched]>, VEX; let ExeDomain = SSEPackedSingle in { - def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem, - int_x86_avx_vbroadcast_ss, WriteLoad>; - def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem, - int_x86_avx_vbroadcast_ss_256, - WriteFShuffleLd>, VEX_L; + def VBROADCASTSSrm : avx_broadcast_no_int<0x18, "vbroadcastss", VR128, + f32mem, v4f32, loadf32, WriteLoad>; + def VBROADCASTSSYrm : avx_broadcast_no_int<0x18, "vbroadcastss", VR256, + f32mem, v8f32, loadf32, + WriteFShuffleLd>, VEX_L; } let ExeDomain = SSEPackedDouble in -def VBROADCASTSDYrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem, - int_x86_avx_vbroadcast_sd_256, - WriteFShuffleLd>, VEX_L; +def VBROADCASTSDYrm : avx_broadcast_no_int<0x19, "vbroadcastsd", VR256, f64mem, + v4f64, loadf64, WriteFShuffleLd>, VEX_L; def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem, int_x86_avx_vbroadcastf128_pd_256, WriteFShuffleLd>, VEX_L; @@ -8366,6 +8538,21 @@ let Predicates = [HasF16C] in { (VCVTPH2PSrm addr:$src)>; } +// Patterns for matching conversions from float to half-float and vice versa. +let Predicates = [HasF16C] in { + def : Pat<(f32_to_f16 FR32:$src), + (i16 (EXTRACT_SUBREG (VMOVPDI2DIrr (VCVTPS2PHrr + (COPY_TO_REGCLASS FR32:$src, VR128), 0)), sub_16bit))>; + + def : Pat<(f16_to_f32 GR16:$src), + (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr + (COPY_TO_REGCLASS (MOVSX32rr16 GR16:$src), VR128)), FR32)) >; + + def : Pat<(f16_to_f32 (i16 (f32_to_f16 FR32:$src))), + (f32 (COPY_TO_REGCLASS (VCVTPH2PSrr + (VCVTPS2PHrr (COPY_TO_REGCLASS FR32:$src, VR128), 0)), FR32)) >; +} + //===----------------------------------------------------------------------===// // AVX2 Instructions //===----------------------------------------------------------------------===// @@ -8543,13 +8730,6 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), } let Predicates = [HasAVX] in { -def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))), - (VBROADCASTSSYrm addr:$src)>; -def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))), - (VBROADCASTSDYrm addr:$src)>; -def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))), - (VBROADCASTSSrm addr:$src)>; - // Provide fallback in case the load node that is used in the patterns above // is used by additional users, which prevents the pattern selection. let AddedComplexity = 20 in { diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index b5595cb..5402780 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -439,7 +439,10 @@ def LLDT16m : I<0x00, MRM2m, (outs), (ins i16mem:$src), let SchedRW = [WriteSystem] in { def WRMSR : I<0x30, RawFrm, (outs), (ins), "wrmsr", [], IIC_WRMSR>, TB; def RDMSR : I<0x32, RawFrm, (outs), (ins), "rdmsr", [], IIC_RDMSR>, TB; -def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [], IIC_RDPMC>, TB; + +let Defs = [RAX, RDX], Uses = [ECX] in + def RDPMC : I<0x33, RawFrm, (outs), (ins), "rdpmc", [(X86rdpmc)], IIC_RDPMC>, + TB; def SMSW16r : I<0x01, MRM4r, (outs GR16:$dst), (ins), "smsw{w}\t$dst", [], IIC_SMSW>, OpSize16, TB; diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp index e969ef2..a082c4f 100644 --- a/lib/Target/X86/X86JITInfo.cpp +++ b/lib/Target/X86/X86JITInfo.cpp @@ -432,7 +432,7 @@ X86JITInfo::getLazyResolverFunction(JITCompilerFn F) { // SSE Callback should be called for SSE-enabled LLVM. return X86CompilationCallback_SSE; #else - if (Subtarget->hasSSE1()) + if (useSSE) return X86CompilationCallback_SSE; #endif #endif @@ -440,8 +440,8 @@ X86JITInfo::getLazyResolverFunction(JITCompilerFn F) { return X86CompilationCallback; } -X86JITInfo::X86JITInfo(X86TargetMachine &tm) : TM(tm) { - Subtarget = &TM.getSubtarget<X86Subtarget>(); +X86JITInfo::X86JITInfo(bool UseSSE) { + useSSE = UseSSE; useGOT = 0; TLSOffset = nullptr; } diff --git a/lib/Target/X86/X86JITInfo.h b/lib/Target/X86/X86JITInfo.h index 4d279de..564343f 100644 --- a/lib/Target/X86/X86JITInfo.h +++ b/lib/Target/X86/X86JITInfo.h @@ -19,16 +19,14 @@ #include "llvm/Target/TargetJITInfo.h" namespace llvm { - class X86TargetMachine; class X86Subtarget; class X86JITInfo : public TargetJITInfo { - X86TargetMachine &TM; - const X86Subtarget *Subtarget; uintptr_t PICBase; - char* TLSOffset; + char *TLSOffset; + bool useSSE; public: - explicit X86JITInfo(X86TargetMachine &tm); + explicit X86JITInfo(bool UseSSE); /// replaceMachineCodeForFunction - Make it so that calling the function /// whose machine code is at OLD turns into a call to NEW, perhaps by diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 0190080..2bd70a9 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "X86AsmPrinter.h" +#include "X86RegisterInfo.h" #include "InstPrinter/X86ATTInstPrinter.h" #include "MCTargetDesc/X86BaseInfo.h" #include "llvm/ADT/SmallString.h" @@ -779,6 +780,9 @@ static void LowerPATCHPOINT(MCStreamer &OS, StackMaps &SM, void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { X86MCInstLower MCInstLowering(*MF, *this); + const X86RegisterInfo *RI = + static_cast<const X86RegisterInfo *>(TM.getRegisterInfo()); + switch (MI->getOpcode()) { case TargetOpcode::DBG_VALUE: llvm_unreachable("Should be handled target independently"); @@ -883,6 +887,37 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { .addReg(X86::R10) .addReg(X86::RAX)); return; + + case X86::SEH_PushReg: + OutStreamer.EmitWinCFIPushReg(RI->getSEHRegNum(MI->getOperand(0).getImm())); + return; + + case X86::SEH_SaveReg: + OutStreamer.EmitWinCFISaveReg(RI->getSEHRegNum(MI->getOperand(0).getImm()), + MI->getOperand(1).getImm()); + return; + + case X86::SEH_SaveXMM: + OutStreamer.EmitWinCFISaveXMM(RI->getSEHRegNum(MI->getOperand(0).getImm()), + MI->getOperand(1).getImm()); + return; + + case X86::SEH_StackAlloc: + OutStreamer.EmitWinCFIAllocStack(MI->getOperand(0).getImm()); + return; + + case X86::SEH_SetFrame: + OutStreamer.EmitWinCFISetFrame(RI->getSEHRegNum(MI->getOperand(0).getImm()), + MI->getOperand(1).getImm()); + return; + + case X86::SEH_PushFrame: + OutStreamer.EmitWinCFIPushFrame(MI->getOperand(0).getImm()); + return; + + case X86::SEH_EndPrologue: + OutStreamer.EmitWinCFIEndProlog(); + return; } MCInst TmpInst; diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index a83e1e4..e8a7e84 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -53,20 +53,18 @@ static cl::opt<bool> EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true), cl::desc("Enable use of a base pointer for complex stack frames")); -X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm) - : X86GenRegisterInfo((tm.getSubtarget<X86Subtarget>().is64Bit() - ? X86::RIP : X86::EIP), - X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), false), - X86_MC::getDwarfRegFlavour(tm.getTargetTriple(), true), - (tm.getSubtarget<X86Subtarget>().is64Bit() - ? X86::RIP : X86::EIP)), - TM(tm) { +X86RegisterInfo::X86RegisterInfo(const X86Subtarget &STI) + : X86GenRegisterInfo( + (STI.is64Bit() ? X86::RIP : X86::EIP), + X86_MC::getDwarfRegFlavour(STI.getTargetTriple(), false), + X86_MC::getDwarfRegFlavour(STI.getTargetTriple(), true), + (STI.is64Bit() ? X86::RIP : X86::EIP)), + Subtarget(STI) { X86_MC::InitLLVM2SEHRegisterMapping(this); // Cache some information. - const X86Subtarget *Subtarget = &TM.getSubtarget<X86Subtarget>(); - Is64Bit = Subtarget->is64Bit(); - IsWin64 = Subtarget->isTargetWin64(); + Is64Bit = Subtarget.is64Bit(); + IsWin64 = Subtarget.isTargetWin64(); if (Is64Bit) { SlotSize = 8; @@ -83,21 +81,6 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm) BasePtr = Is64Bit ? X86::RBX : X86::ESI; } -/// getCompactUnwindRegNum - This function maps the register to the number for -/// compact unwind encoding. Return -1 if the register isn't valid. -int X86RegisterInfo::getCompactUnwindRegNum(unsigned RegNum, bool isEH) const { - switch (getLLVMRegNum(RegNum, isEH)) { - case X86::EBX: case X86::RBX: return 1; - case X86::ECX: case X86::R12: return 2; - case X86::EDX: case X86::R13: return 3; - case X86::EDI: case X86::R14: return 4; - case X86::ESI: case X86::R15: return 5; - case X86::EBP: case X86::RBP: return 6; - } - - return -1; -} - bool X86RegisterInfo::trackLivenessAfterRegAlloc(const MachineFunction &MF) const { // ExeDepsFixer and PostRAScheduler require liveness. @@ -173,9 +156,8 @@ X86RegisterInfo::getLargestLegalSuperClass(const TargetRegisterClass *RC) const{ } const TargetRegisterClass * -X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) - const { - const X86Subtarget &Subtarget = TM.getSubtarget<X86Subtarget>(); +X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, + unsigned Kind) const { switch (Kind) { default: llvm_unreachable("Unexpected Kind in getPointerRegClass!"); case 0: // Normal GPRs. @@ -225,7 +207,7 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, case X86::GR64RegClassID: return 12 - FPDiff; case X86::VR128RegClassID: - return TM.getSubtarget<X86Subtarget>().is64Bit() ? 10 : 4; + return Subtarget.is64Bit() ? 10 : 4; case X86::VR64RegClassID: return 4; } @@ -233,8 +215,8 @@ X86RegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC, const MCPhysReg * X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); - bool HasAVX512 = TM.getSubtarget<X86Subtarget>().hasAVX512(); + bool HasAVX = Subtarget.hasAVX(); + bool HasAVX512 = Subtarget.hasAVX512(); assert(MF && "MachineFunction required"); switch (MF->getFunction()->getCallingConv()) { @@ -287,8 +269,8 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const uint32_t* X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { - bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX(); - bool HasAVX512 = TM.getSubtarget<X86Subtarget>().hasAVX512(); + bool HasAVX = Subtarget.hasAVX(); + bool HasAVX512 = Subtarget.hasAVX512(); switch (CC) { case CallingConv::GHC: @@ -406,7 +388,7 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(*AI); } } - if (!Is64Bit || !TM.getSubtarget<X86Subtarget>().hasAVX512()) { + if (!Is64Bit || !Subtarget.hasAVX512()) { for (unsigned n = 16; n != 32; ++n) { for (MCRegAliasIterator AI(X86::XMM0 + n, this, true); AI.isValid(); ++AI) Reserved.set(*AI); @@ -459,7 +441,7 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const { bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); const Function *F = MF.getFunction(); - unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); + unsigned StackAlign = MF.getTarget().getFrameLowering()->getStackAlignment(); bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) || F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index 2289d91..74efd1f 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -22,11 +22,11 @@ namespace llvm { class Type; class TargetInstrInfo; - class X86TargetMachine; + class X86Subtarget; class X86RegisterInfo final : public X86GenRegisterInfo { public: - X86TargetMachine &TM; + const X86Subtarget &Subtarget; private: /// Is64Bit - Is the target 64-bits. @@ -55,15 +55,11 @@ private: unsigned BasePtr; public: - X86RegisterInfo(X86TargetMachine &tm); + X86RegisterInfo(const X86Subtarget &STI); // FIXME: This should be tablegen'd like getDwarfRegNum is int getSEHRegNum(unsigned i) const; - /// getCompactUnwindRegNum - This function maps the register to the number for - /// compact unwind encoding. Return -1 if the register isn't valid. - int getCompactUnwindRegNum(unsigned RegNum, bool isEH) const override; - /// Code Generation virtual methods... /// bool trackLivenessAfterRegAlloc(const MachineFunction &MF) const override; diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index 744890d..a83dd9b 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -11,21 +11,23 @@ // //===----------------------------------------------------------------------===// -#include "X86TargetMachine.h" +#include "X86InstrInfo.h" +#include "X86ISelLowering.h" +#include "X86RegisterInfo.h" +#include "X86Subtarget.h" +#include "X86SelectionDAGInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/Target/TargetLowering.h" + using namespace llvm; #define DEBUG_TYPE "x86-selectiondag-info" -X86SelectionDAGInfo::X86SelectionDAGInfo(const X86TargetMachine &TM) : - TargetSelectionDAGInfo(TM), - Subtarget(&TM.getSubtarget<X86Subtarget>()), - TLI(*TM.getTargetLowering()) { -} +X86SelectionDAGInfo::X86SelectionDAGInfo(const DataLayout &DL) + : TargetSelectionDAGInfo(&DL) {} -X86SelectionDAGInfo::~X86SelectionDAGInfo() { -} +X86SelectionDAGInfo::~X86SelectionDAGInfo() {} SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, @@ -35,6 +37,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, bool isVolatile, MachinePointerInfo DstPtrInfo) const { ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + const X86Subtarget &Subtarget = DAG.getTarget().getSubtarget<X86Subtarget>(); // If to a segment-relative address space, use the default lowering. if (DstPtrInfo.getAddrSpace() >= 256) @@ -43,16 +46,14 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, // If not DWORD aligned or size is more than the threshold, call the library. // The libc version is likely to be faster for these cases. It can use the // address value and run time information about the CPU. - if ((Align & 3) != 0 || - !ConstantSize || - ConstantSize->getZExtValue() > - Subtarget->getMaxInlineSizeThreshold()) { + if ((Align & 3) != 0 || !ConstantSize || + ConstantSize->getZExtValue() > Subtarget.getMaxInlineSizeThreshold()) { // Check to see if there is a specialized entry-point for memory zeroing. ConstantSDNode *V = dyn_cast<ConstantSDNode>(Src); if (const char *bzeroEntry = V && - V->isNullValue() ? Subtarget->getBZeroEntry() : nullptr) { - EVT IntPtr = TLI.getPointerTy(); + V->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) { + EVT IntPtr = DAG.getTargetLoweringInfo().getPointerTy(); Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -65,10 +66,11 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, TargetLowering::CallLoweringInfo CLI(DAG); CLI.setDebugLoc(dl).setChain(Chain) .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), - DAG.getExternalSymbol(bzeroEntry, IntPtr), &Args, 0) + DAG.getExternalSymbol(bzeroEntry, IntPtr), std::move(Args), + 0) .setDiscardResult(); - std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); + std::pair<SDValue,SDValue> CallResult = DAG.getTargetLoweringInfo().LowerCallTo(CLI); return CallResult.second; } @@ -99,7 +101,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, ValReg = X86::EAX; Val = (Val << 8) | Val; Val = (Val << 16) | Val; - if (Subtarget->is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned + if (Subtarget.is64Bit() && ((Align & 0x7) == 0)) { // QWORD aligned AVT = MVT::i64; ValReg = X86::RAX; Val = (Val << 32) | Val; @@ -128,13 +130,11 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, InFlag = Chain.getValue(1); } - Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : - X86::ECX, - Count, InFlag); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX, + Count, InFlag); InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : - X86::EDI, - Dst, InFlag); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI, + Dst, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); @@ -182,10 +182,11 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, // This requires the copy size to be a constant, preferably // within a subtarget-specific limit. ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size); + const X86Subtarget &Subtarget = DAG.getTarget().getSubtarget<X86Subtarget>(); if (!ConstantSize) return SDValue(); uint64_t SizeVal = ConstantSize->getZExtValue(); - if (!AlwaysInline && SizeVal > Subtarget->getMaxInlineSizeThreshold()) + if (!AlwaysInline && SizeVal > Subtarget.getMaxInlineSizeThreshold()) return SDValue(); /// If not DWORD aligned, it is more efficient to call the library. However @@ -218,7 +219,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, AVT = MVT::i32; else // QWORD aligned - AVT = Subtarget->is64Bit() ? MVT::i64 : MVT::i32; + AVT = Subtarget.is64Bit() ? MVT::i64 : MVT::i32; unsigned UBytes = AVT.getSizeInBits() / 8; unsigned CountVal = SizeVal / UBytes; @@ -226,15 +227,15 @@ X86SelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, unsigned BytesLeft = SizeVal % UBytes; SDValue InFlag; - Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RCX : + Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX, Count, InFlag); InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RDI : + Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI, Dst, InFlag); InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, dl, Subtarget->is64Bit() ? X86::RSI : + Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI : X86::ESI, Src, InFlag); InFlag = Chain.getValue(1); diff --git a/lib/Target/X86/X86SelectionDAGInfo.h b/lib/Target/X86/X86SelectionDAGInfo.h index 0d5dc38..c12555a 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.h +++ b/lib/Target/X86/X86SelectionDAGInfo.h @@ -23,14 +23,8 @@ class X86TargetMachine; class X86Subtarget; class X86SelectionDAGInfo : public TargetSelectionDAGInfo { - /// Subtarget - Keep a pointer to the X86Subtarget around so that we can - /// make the right decision when generating code for different targets. - const X86Subtarget *Subtarget; - - const X86TargetLowering &TLI; - public: - explicit X86SelectionDAGInfo(const X86TargetMachine &TM); + explicit X86SelectionDAGInfo(const DataLayout &DL); ~X86SelectionDAGInfo(); SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 989e0d6..79b7e68 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -291,13 +291,60 @@ void X86Subtarget::initializeEnvironment() { CallRegIndirect = false; LEAUsesAG = false; SlowLEA = false; + SlowIncDec = false; stackAlignment = 4; // FIXME: this is a known good value for Yonah. How about others? MaxInlineSizeThreshold = 128; } +static std::string computeDataLayout(const X86Subtarget &ST) { + // X86 is little endian + std::string Ret = "e"; + + Ret += DataLayout::getManglingComponent(ST.getTargetTriple()); + // X86 and x32 have 32 bit pointers. + if (ST.isTarget64BitILP32() || !ST.is64Bit()) + Ret += "-p:32:32"; + + // Some ABIs align 64 bit integers and doubles to 64 bits, others to 32. + if (ST.is64Bit() || ST.isOSWindows() || ST.isTargetNaCl()) + Ret += "-i64:64"; + else + Ret += "-f64:32:64"; + + // Some ABIs align long double to 128 bits, others to 32. + if (ST.isTargetNaCl()) + ; // No f80 + else if (ST.is64Bit() || ST.isTargetDarwin()) + Ret += "-f80:128"; + else + Ret += "-f80:32"; + + // The registers can hold 8, 16, 32 or, in x86-64, 64 bits. + if (ST.is64Bit()) + Ret += "-n8:16:32:64"; + else + Ret += "-n8:16:32"; + + // The stack is aligned to 32 bits on some ABIs and 128 bits on others. + if (!ST.is64Bit() && ST.isOSWindows()) + Ret += "-S32"; + else + Ret += "-S128"; + + return Ret; +} + +X86Subtarget &X86Subtarget::initializeSubtargetDependencies(StringRef CPU, + StringRef FS) { + initializeEnvironment(); + resetSubtargetFeatures(CPU, FS); + return *this; +} + X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, unsigned StackAlignOverride) + const std::string &FS, X86TargetMachine &TM, + unsigned StackAlignOverride) : X86GenSubtargetInfo(TT, CPU, FS), X86ProcFamily(Others), PICStyle(PICStyles::None), TargetTriple(TT), StackAlignOverride(StackAlignOverride), @@ -305,10 +352,12 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, In32BitMode(TargetTriple.getArch() == Triple::x86 && TargetTriple.getEnvironment() != Triple::CODE16), In16BitMode(TargetTriple.getArch() == Triple::x86 && - TargetTriple.getEnvironment() == Triple::CODE16) { - initializeEnvironment(); - resetSubtargetFeatures(CPU, FS); -} + TargetTriple.getEnvironment() == Triple::CODE16), + DL(computeDataLayout(*this)), TSInfo(DL), + InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM), + FrameLowering(TargetFrameLowering::StackGrowsDown, getStackAlignment(), + is64Bit() ? -8 : -4), + JITInfo(hasSSE1()) {} bool X86Subtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel, diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 703559a..09db0eb 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -14,6 +14,11 @@ #ifndef X86SUBTARGET_H #define X86SUBTARGET_H +#include "X86FrameLowering.h" +#include "X86ISelLowering.h" +#include "X86InstrInfo.h" +#include "X86JITInfo.h" +#include "X86SelectionDAGInfo.h" #include "llvm/ADT/Triple.h" #include "llvm/IR/CallingConv.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -40,6 +45,7 @@ enum Style { } class X86Subtarget final : public X86GenSubtargetInfo { + protected: enum X86SSEEnum { NoMMXSSE, MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512F @@ -181,6 +187,9 @@ protected: /// SlowLEA - True if the LEA instruction with certain arguments is slow bool SlowLEA; + /// SlowIncDec - True if INC and DEC instructions are slow when writing to flags + bool SlowIncDec; + /// Processor has AVX-512 PreFetch Instructions bool HasPFI; @@ -217,14 +226,31 @@ private: /// In16BitMode - True if compiling for 16-bit, false for 32-bit or 64-bit. bool In16BitMode; + // Calculates type size & alignment + const DataLayout DL; + X86SelectionDAGInfo TSInfo; + // Ordering here is important. X86InstrInfo initializes X86RegisterInfo which + // X86TargetLowering needs. + X86InstrInfo InstrInfo; + X86TargetLowering TLInfo; + X86FrameLowering FrameLowering; + X86JITInfo JITInfo; + public: /// This constructor initializes the data members to match that /// of the specified triple. /// X86Subtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, + const std::string &FS, X86TargetMachine &TM, unsigned StackAlignOverride); + const X86TargetLowering *getTargetLowering() const { return &TLInfo; } + const X86InstrInfo *getInstrInfo() const { return &InstrInfo; } + const DataLayout *getDataLayout() const { return &DL; } + const X86FrameLowering *getFrameLowering() const { return &FrameLowering; } + const X86SelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } + X86JITInfo *getJITInfo() { return &JITInfo; } + /// getStackAlignment - Returns the minimum alignment known to hold of the /// stack frame on entry to the function and which must be maintained by every /// function for this subtarget. @@ -241,6 +267,9 @@ public: /// \brief Reset the features for the X86 target. void resetSubtargetFeatures(const MachineFunction *MF) override; private: + /// \brief Initialize the full set of dependencies so we can use an initializer + /// list for X86Subtarget. + X86Subtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS); void initializeEnvironment(); void resetSubtargetFeatures(StringRef CPU, StringRef FS); public: @@ -319,6 +348,7 @@ public: bool callRegIndirect() const { return CallRegIndirect; } bool LEAusesAG() const { return LEAUsesAG; } bool slowLEA() const { return SlowLEA; } + bool slowIncDec() const { return SlowIncDec; } bool hasCDI() const { return HasCDI; } bool hasPFI() const { return HasPFI; } bool hasERI() const { return HasERI; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 93760ef..f12140f 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -29,61 +29,14 @@ extern "C" void LLVMInitializeX86Target() { void X86TargetMachine::anchor() { } -static std::string computeDataLayout(const X86Subtarget &ST) { - // X86 is little endian - std::string Ret = "e"; - - Ret += DataLayout::getManglingComponent(ST.getTargetTriple()); - // X86 and x32 have 32 bit pointers. - if (ST.isTarget64BitILP32() || !ST.is64Bit()) - Ret += "-p:32:32"; - - // Some ABIs align 64 bit integers and doubles to 64 bits, others to 32. - if (ST.is64Bit() || ST.isTargetCygMing() || ST.isTargetKnownWindowsMSVC() || - ST.isTargetNaCl()) - Ret += "-i64:64"; - else - Ret += "-f64:32:64"; - - // Some ABIs align long double to 128 bits, others to 32. - if (ST.isTargetNaCl()) - ; // No f80 - else if (ST.is64Bit() || ST.isTargetDarwin()) - Ret += "-f80:128"; - else - Ret += "-f80:32"; - - // The registers can hold 8, 16, 32 or, in x86-64, 64 bits. - if (ST.is64Bit()) - Ret += "-n8:16:32:64"; - else - Ret += "-n8:16:32"; - - // The stack is aligned to 32 bits on some ABIs and 128 bits on others. - if (!ST.is64Bit() && (ST.isTargetCygMing() || ST.isTargetKnownWindowsMSVC())) - Ret += "-S32"; - else - Ret += "-S128"; - - return Ret; -} - /// X86TargetMachine ctor - Create an X86 target. /// -X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - const TargetOptions &Options, +X86TargetMachine::X86TargetMachine(const Target &T, StringRef TT, StringRef CPU, + StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS, Options.StackAlignmentOverride), - FrameLowering(*this, Subtarget), - InstrItins(Subtarget.getInstrItineraryData()), - DL(computeDataLayout(*getSubtargetImpl())), - InstrInfo(*this), - TLInfo(*this), - TSInfo(*this), - JITInfo(*this) { + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS, *this, Options.StackAlignmentOverride) { // Determine the PICStyle based on the target selected. if (getRelocationModel() == Reloc::Static) { // Unless we're in PIC or DynamicNoPIC mode, set the PIC style to None. @@ -158,6 +111,7 @@ public: return *getX86TargetMachine().getSubtargetImpl(); } + void addIRPasses() override; bool addInstSelector() override; bool addILPOpts() override; bool addPreRegAlloc() override; @@ -170,6 +124,12 @@ TargetPassConfig *X86TargetMachine::createPassConfig(PassManagerBase &PM) { return new X86PassConfig(this, PM); } +void X86PassConfig::addIRPasses() { + addPass(createX86AtomicExpandPass(&getX86TargetMachine())); + + TargetPassConfig::addIRPasses(); +} + bool X86PassConfig::addInstSelector() { // Install an instruction selector. addPass(createX86ISelDag(getX86TargetMachine(), getOptLevel())); diff --git a/lib/Target/X86/X86TargetMachine.h b/lib/Target/X86/X86TargetMachine.h index 57e6eda..41d5157 100644 --- a/lib/Target/X86/X86TargetMachine.h +++ b/lib/Target/X86/X86TargetMachine.h @@ -13,12 +13,7 @@ #ifndef X86TARGETMACHINE_H #define X86TARGETMACHINE_H - -#include "X86FrameLowering.h" -#include "X86ISelLowering.h" #include "X86InstrInfo.h" -#include "X86JITInfo.h" -#include "X86SelectionDAGInfo.h" #include "X86Subtarget.h" #include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" @@ -30,13 +25,6 @@ class StringRef; class X86TargetMachine final : public LLVMTargetMachine { virtual void anchor(); X86Subtarget Subtarget; - X86FrameLowering FrameLowering; - InstrItineraryData InstrItins; - const DataLayout DL; // Calculates type size & alignment - X86InstrInfo InstrInfo; - X86TargetLowering TLInfo; - X86SelectionDAGInfo TSInfo; - X86JITInfo JITInfo; public: X86TargetMachine(const Target &T, StringRef TT, @@ -44,28 +32,28 @@ public: Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); - const DataLayout *getDataLayout() const override { return &DL; } + const DataLayout *getDataLayout() const override { + return getSubtargetImpl()->getDataLayout(); + } const X86InstrInfo *getInstrInfo() const override { - return &InstrInfo; + return getSubtargetImpl()->getInstrInfo(); } const TargetFrameLowering *getFrameLowering() const override { - return &FrameLowering; - } - X86JITInfo *getJITInfo() override { - return &JITInfo; + return getSubtargetImpl()->getFrameLowering(); } + X86JITInfo *getJITInfo() override { return Subtarget.getJITInfo(); } const X86Subtarget *getSubtargetImpl() const override { return &Subtarget; } const X86TargetLowering *getTargetLowering() const override { - return &TLInfo; + return getSubtargetImpl()->getTargetLowering(); } const X86SelectionDAGInfo *getSelectionDAGInfo() const override { - return &TSInfo; + return getSubtargetImpl()->getSelectionDAGInfo(); } const X86RegisterInfo *getRegisterInfo() const override { return &getInstrInfo()->getRegisterInfo(); } const InstrItineraryData *getInstrItineraryData() const override { - return &InstrItins; + return &getSubtargetImpl()->getInstrItineraryData(); } /// \brief Register X86 analysis passes with a pass manager. diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index 91b9d40..c961e2f 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -102,6 +102,8 @@ public: unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm) const override; + unsigned getIntImmCost(int64_t) const; + unsigned getIntImmCost(const APInt &Imm, Type *Ty) const override; unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, @@ -142,13 +144,17 @@ unsigned X86TTI::getNumberOfRegisters(bool Vector) const { if (Vector && !ST->hasSSE1()) return 0; - if (ST->is64Bit()) + if (ST->is64Bit()) { + if (Vector && ST->hasAVX512()) + return 32; return 16; + } return 8; } unsigned X86TTI::getRegisterBitWidth(bool Vector) const { if (Vector) { + if (ST->hasAVX512()) return 512; if (ST->hasAVX()) return 256; if (ST->hasSSE1()) return 128; return 0; @@ -400,17 +406,117 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, Type *SubTp) const { - // We only estimate the cost of reverse shuffles. - if (Kind != SK_Reverse) + // We only estimate the cost of reverse and alternate shuffles. + if (Kind != SK_Reverse && Kind != SK_Alternate) return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); - std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); - unsigned Cost = 1; - if (LT.second.getSizeInBits() > 128) - Cost = 3; // Extract + insert + copy. + if (Kind == SK_Reverse) { + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); + unsigned Cost = 1; + if (LT.second.getSizeInBits() > 128) + Cost = 3; // Extract + insert + copy. + + // Multiple by the number of parts. + return Cost * LT.first; + } + + if (Kind == SK_Alternate) { + // 64-bit packed float vectors (v2f32) are widened to type v4f32. + // 64-bit packed integer vectors (v2i32) are promoted to type v2i64. + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); + + // The backend knows how to generate a single VEX.256 version of + // instruction VPBLENDW if the target supports AVX2. + if (ST->hasAVX2() && LT.second == MVT::v16i16) + return LT.first; + + static const CostTblEntry<MVT::SimpleValueType> AVXAltShuffleTbl[] = { + {ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vblendpd + {ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vblendpd + + {ISD::VECTOR_SHUFFLE, MVT::v8i32, 1}, // vblendps + {ISD::VECTOR_SHUFFLE, MVT::v8f32, 1}, // vblendps + + // This shuffle is custom lowered into a sequence of: + // 2x vextractf128 , 2x vpblendw , 1x vinsertf128 + {ISD::VECTOR_SHUFFLE, MVT::v16i16, 5}, + + // This shuffle is custom lowered into a long sequence of: + // 2x vextractf128 , 4x vpshufb , 2x vpor , 1x vinsertf128 + {ISD::VECTOR_SHUFFLE, MVT::v32i8, 9} + }; + + if (ST->hasAVX()) { + int Idx = CostTableLookup(AVXAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); + if (Idx != -1) + return LT.first * AVXAltShuffleTbl[Idx].Cost; + } + + static const CostTblEntry<MVT::SimpleValueType> SSE41AltShuffleTbl[] = { + // These are lowered into movsd. + {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, + + // packed float vectors with four elements are lowered into BLENDI dag + // nodes. A v4i32/v4f32 BLENDI generates a single 'blendps'/'blendpd'. + {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1}, + {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, + + // This shuffle generates a single pshufw. + {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, + + // There is no instruction that matches a v16i8 alternate shuffle. + // The backend will expand it into the sequence 'pshufb + pshufb + or'. + {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} + }; + + if (ST->hasSSE41()) { + int Idx = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); + if (Idx != -1) + return LT.first * SSE41AltShuffleTbl[Idx].Cost; + } + + static const CostTblEntry<MVT::SimpleValueType> SSSE3AltShuffleTbl[] = { + {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd + {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd + + // SSE3 doesn't have 'blendps'. The following shuffles are expanded into + // the sequence 'shufps + pshufd' + {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, + {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, - // Multiple by the number of parts. - return Cost * LT.first; + {ISD::VECTOR_SHUFFLE, MVT::v8i16, 3}, // pshufb + pshufb + or + {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // pshufb + pshufb + or + }; + + if (ST->hasSSSE3()) { + int Idx = CostTableLookup(SSSE3AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); + if (Idx != -1) + return LT.first * SSSE3AltShuffleTbl[Idx].Cost; + } + + static const CostTblEntry<MVT::SimpleValueType> SSEAltShuffleTbl[] = { + {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd + {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd + + {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, // shufps + pshufd + {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, // shufps + pshufd + + // This is expanded into a long sequence of four extract + four insert. + {ISD::VECTOR_SHUFFLE, MVT::v8i16, 8}, // 4 x pextrw + 4 pinsrw. + + // 8 x (pinsrw + pextrw + and + movb + movzb + or) + {ISD::VECTOR_SHUFFLE, MVT::v16i8, 48} + }; + + // Fall-back (SSE3 and SSE2). + int Idx = CostTableLookup(SSEAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second); + if (Idx != -1) + return LT.first * SSEAltShuffleTbl[Idx].Cost; + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); + } + + return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); } unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { @@ -808,6 +914,19 @@ unsigned X86TTI::getReductionCost(unsigned Opcode, Type *ValTy, return TargetTransformInfo::getReductionCost(Opcode, ValTy, IsPairwise); } +/// \brief Calculate the cost of materializing a 64-bit value. This helper +/// method might only calculate a fraction of a larger immediate. Therefore it +/// is valid to return a cost of ZERO. +unsigned X86TTI::getIntImmCost(int64_t Val) const { + if (Val == 0) + return TCC_Free; + + if (isInt<32>(Val)) + return TCC_Basic; + + return 2 * TCC_Basic; +} + unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const { assert(Ty->isIntegerTy()); @@ -825,11 +944,21 @@ unsigned X86TTI::getIntImmCost(const APInt &Imm, Type *Ty) const { if (Imm == 0) return TCC_Free; - if (Imm.getBitWidth() <= 64 && - (isInt<32>(Imm.getSExtValue()) || isUInt<32>(Imm.getZExtValue()))) - return TCC_Basic; - else - return 2 * TCC_Basic; + // Sign-extend all constants to a multiple of 64-bit. + APInt ImmVal = Imm; + if (BitSize & 0x3f) + ImmVal = Imm.sext((BitSize + 63) & ~0x3fU); + + // Split the constant into 64-bit chunks and calculate the cost for each + // chunk. + unsigned Cost = 0; + for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) { + APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64); + int64_t Val = Tmp.getSExtValue(); + Cost += getIntImmCost(Val); + } + // We need at least one instruction to materialze the constant. + return std::max(1U, Cost); } unsigned X86TTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, @@ -889,9 +1018,13 @@ unsigned X86TTI::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, break; } - if ((Idx == ImmIdx) && - Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue())) - return TCC_Free; + if (Idx == ImmIdx) { + unsigned NumConstants = (BitSize + 63) / 64; + unsigned Cost = X86TTI::getIntImmCost(Imm, Ty); + return (Cost <= NumConstants * TCC_Basic) + ? static_cast<unsigned>(TCC_Free) + : Cost; + } return X86TTI::getIntImmCost(Imm, Ty); } diff --git a/lib/Target/XCore/XCoreFrameLowering.cpp b/lib/Target/XCore/XCoreFrameLowering.cpp index 5499aba..e694736 100644 --- a/lib/Target/XCore/XCoreFrameLowering.cpp +++ b/lib/Target/XCore/XCoreFrameLowering.cpp @@ -228,7 +228,9 @@ void XCoreFrameLowering::emitPrologue(MachineFunction &MF) const { const XCoreInstrInfo &TII = *static_cast<const XCoreInstrInfo*>(MF.getTarget().getInstrInfo()); XCoreFunctionInfo *XFI = MF.getInfo<XCoreFunctionInfo>(); - DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); + // Debug location must be unknown since the first debug location is used + // to determine the end of the prologue. + DebugLoc dl; if (MFI->getMaxAlignment() > getStackAlignment()) report_fatal_error("emitPrologue unsupported alignment: " @@ -416,7 +418,7 @@ spillCalleeSavedRegisters(MachineBasicBlock &MBB, bool emitFrameMoves = XCoreRegisterInfo::needsFrameMoves(*MF); DebugLoc DL; - if (MI != MBB.end()) + if (MI != MBB.end() && !MI->isDebugValue()) DL = MI->getDebugLoc(); for (std::vector<CalleeSavedInfo>::const_iterator it = CSI.begin(); diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 9d78586..be7ef64 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -68,10 +68,9 @@ getTargetNodeName(unsigned Opcode) const } } -XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) - : TargetLowering(XTM, new XCoreTargetObjectFile()), - TM(XTM), - Subtarget(*XTM.getSubtargetImpl()) { +XCoreTargetLowering::XCoreTargetLowering(const TargetMachine &TM) + : TargetLowering(TM, new XCoreTargetObjectFile()), TM(TM), + Subtarget(TM.getSubtarget<XCoreSubtarget>()) { // Set up the register classes. addRegisterClass(MVT::i32, &XCore::GRRegsRegClass); @@ -92,15 +91,12 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM) // XCore does not have the NodeTypes below. setOperationAction(ISD::BR_CC, MVT::i32, Expand); - setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); + setOperationAction(ISD::SELECT_CC, MVT::i32, Expand); setOperationAction(ISD::ADDC, MVT::i32, Expand); setOperationAction(ISD::ADDE, MVT::i32, Expand); setOperationAction(ISD::SUBC, MVT::i32, Expand); setOperationAction(ISD::SUBE, MVT::i32, Expand); - // Stop the combiner recombining select and set_cc - setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); - // 64bit setOperationAction(ISD::ADD, MVT::i64, Custom); setOperationAction(ISD::SUB, MVT::i64, Custom); @@ -217,7 +213,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::BR_JT: return LowerBR_JT(Op, DAG); case ISD::LOAD: return LowerLOAD(Op, DAG); case ISD::STORE: return LowerSTORE(Op, DAG); - case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::VAARG: return LowerVAARG(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); case ISD::SMUL_LOHI: return LowerSMUL_LOHI(Op, DAG); @@ -258,33 +253,21 @@ void XCoreTargetLowering::ReplaceNodeResults(SDNode *N, // Misc Lower Operation implementation //===----------------------------------------------------------------------===// -SDValue XCoreTargetLowering:: -LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const -{ - SDLoc dl(Op); - SDValue Cond = DAG.getNode(ISD::SETCC, dl, MVT::i32, Op.getOperand(2), - Op.getOperand(3), Op.getOperand(4)); - return DAG.getNode(ISD::SELECT, dl, MVT::i32, Cond, Op.getOperand(0), - Op.getOperand(1)); -} - SDValue XCoreTargetLowering::getGlobalAddressWrapper(SDValue GA, const GlobalValue *GV, SelectionDAG &DAG) const { // FIXME there is no actual debug info here SDLoc dl(GA); - const GlobalValue *UnderlyingGV = GV; - // If GV is an alias then use the aliasee to determine the wrapper type - if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV)) - UnderlyingGV = GA->getAliasee(); - if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(UnderlyingGV)) { - if ((GVar->isConstant() && GV->hasLocalLinkage()) || - (GVar->hasSection() && - StringRef(GVar->getSection()).startswith(".cp."))) - return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA); - return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA); - } - return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA); + + if (GV->getType()->getElementType()->isFunctionTy()) + return DAG.getNode(XCoreISD::PCRelativeWrapper, dl, MVT::i32, GA); + + const auto *GVar = dyn_cast<GlobalVariable>(GV); + if ((GV->hasSection() && StringRef(GV->getSection()).startswith(".cp.")) || + (GVar && GVar->isConstant() && GV->hasLocalLinkage())) + return DAG.getNode(XCoreISD::CPRelativeWrapper, dl, MVT::i32, GA); + + return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, GA); } static bool IsSmallObject(const GlobalValue *GV, const XCoreTargetLowering &XTL) { @@ -508,7 +491,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { CLI.setDebugLoc(DL).setChain(Chain) .setCallee(CallingConv::C, IntPtrTy, DAG.getExternalSymbol("__misaligned_load", getPointerTy()), - &Args, 0); + std::move(Args), 0); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); SDValue Ops[] = { CallResult.first, CallResult.second }; @@ -568,7 +551,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG) const CLI.setDebugLoc(dl).setChain(Chain) .setCallee(CallingConv::C, Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol("__misaligned_store", getPointerTy()), - &Args, 0); + std::move(Args), 0); std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI); return CallResult.second; diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h index d28715b..62b89c3 100644 --- a/lib/Target/XCore/XCoreISelLowering.h +++ b/lib/Target/XCore/XCoreISelLowering.h @@ -94,7 +94,7 @@ namespace llvm { { public: - explicit XCoreTargetLowering(XCoreTargetMachine &TM); + explicit XCoreTargetLowering(const TargetMachine &TM); using TargetLowering::isZExtFree; bool isZExtFree(SDValue Val, EVT VT2) const override; @@ -123,7 +123,7 @@ namespace llvm { bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; private: - const XCoreTargetMachine &TM; + const TargetMachine &TM; const XCoreSubtarget &Subtarget; // Lower Operand helpers @@ -157,7 +157,6 @@ namespace llvm { SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) const; SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; SDValue LowerUMUL_LOHI(SDValue Op, SelectionDAG &DAG) const; diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp index 984f0cd..36ea9a0 100644 --- a/lib/Target/XCore/XCoreInstrInfo.cpp +++ b/lib/Target/XCore/XCoreInstrInfo.cpp @@ -373,7 +373,8 @@ void XCoreInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) const { DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); + if (I != MBB.end() && !I->isDebugValue()) + DL = I->getDebugLoc(); MachineFunction *MF = MBB.getParent(); const MachineFrameInfo &MFI = *MF->getFrameInfo(); MachineMemOperand *MMO = @@ -395,7 +396,8 @@ void XCoreInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, const TargetRegisterInfo *TRI) const { DebugLoc DL; - if (I != MBB.end()) DL = I->getDebugLoc(); + if (I != MBB.end() && !I->isDebugValue()) + DL = I->getDebugLoc(); MachineFunction *MF = MBB.getParent(); const MachineFrameInfo &MFI = *MF->getFrameInfo(); MachineMemOperand *MMO = @@ -440,7 +442,8 @@ MachineBasicBlock::iterator XCoreInstrInfo::loadImmediate( MachineBasicBlock::iterator MI, unsigned Reg, uint64_t Value) const { DebugLoc dl; - if (MI != MBB.end()) dl = MI->getDebugLoc(); + if (MI != MBB.end() && !MI->isDebugValue()) + dl = MI->getDebugLoc(); if (isImmMskBitp(Value)) { int N = Log2_32(Value) + 1; return BuildMI(MBB, MI, dl, get(XCore::MKMSK_rus), Reg).addImm(N); diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp index 5a6bbe7..91b33fd 100644 --- a/lib/Target/XCore/XCoreSelectionDAGInfo.cpp +++ b/lib/Target/XCore/XCoreSelectionDAGInfo.cpp @@ -16,9 +16,8 @@ using namespace llvm; #define DEBUG_TYPE "xcore-selectiondag-info" -XCoreSelectionDAGInfo::XCoreSelectionDAGInfo(const XCoreTargetMachine &TM) - : TargetSelectionDAGInfo(TM) { -} +XCoreSelectionDAGInfo::XCoreSelectionDAGInfo(const DataLayout &DL) + : TargetSelectionDAGInfo(&DL) {} XCoreSelectionDAGInfo::~XCoreSelectionDAGInfo() { } @@ -47,7 +46,7 @@ EmitTargetCodeForMemcpy(SelectionDAG &DAG, SDLoc dl, SDValue Chain, .setCallee(TLI.getLibcallCallingConv(RTLIB::MEMCPY), Type::getVoidTy(*DAG.getContext()), DAG.getExternalSymbol("__memcpy_4", TLI.getPointerTy()), - &Args, 0) + std::move(Args), 0) .setDiscardResult(); std::pair<SDValue,SDValue> CallResult = TLI.LowerCallTo(CLI); diff --git a/lib/Target/XCore/XCoreSelectionDAGInfo.h b/lib/Target/XCore/XCoreSelectionDAGInfo.h index ea6af98..0079de1 100644 --- a/lib/Target/XCore/XCoreSelectionDAGInfo.h +++ b/lib/Target/XCore/XCoreSelectionDAGInfo.h @@ -22,7 +22,7 @@ class XCoreTargetMachine; class XCoreSelectionDAGInfo : public TargetSelectionDAGInfo { public: - explicit XCoreSelectionDAGInfo(const XCoreTargetMachine &TM); + explicit XCoreSelectionDAGInfo(const DataLayout &DL); ~XCoreSelectionDAGInfo(); SDValue diff --git a/lib/Target/XCore/XCoreSubtarget.cpp b/lib/Target/XCore/XCoreSubtarget.cpp index 89ea03a..7227411 100644 --- a/lib/Target/XCore/XCoreSubtarget.cpp +++ b/lib/Target/XCore/XCoreSubtarget.cpp @@ -25,8 +25,8 @@ using namespace llvm; void XCoreSubtarget::anchor() { } -XCoreSubtarget::XCoreSubtarget(const std::string &TT, - const std::string &CPU, const std::string &FS) - : XCoreGenSubtargetInfo(TT, CPU, FS) -{ -} +XCoreSubtarget::XCoreSubtarget(const std::string &TT, const std::string &CPU, + const std::string &FS, const TargetMachine &TM) + : XCoreGenSubtargetInfo(TT, CPU, FS), + DL("e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32"), + InstrInfo(), FrameLowering(*this), TLInfo(TM), TSInfo(DL) {} diff --git a/lib/Target/XCore/XCoreSubtarget.h b/lib/Target/XCore/XCoreSubtarget.h index 5ac4dbc..1e9810b 100644 --- a/lib/Target/XCore/XCoreSubtarget.h +++ b/lib/Target/XCore/XCoreSubtarget.h @@ -14,6 +14,11 @@ #ifndef XCORESUBTARGET_H #define XCORESUBTARGET_H +#include "XCoreFrameLowering.h" +#include "XCoreISelLowering.h" +#include "XCoreInstrInfo.h" +#include "XCoreSelectionDAGInfo.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetSubtargetInfo.h" #include <string> @@ -26,17 +31,31 @@ class StringRef; class XCoreSubtarget : public XCoreGenSubtargetInfo { virtual void anchor(); + const DataLayout DL; // Calculates type size & alignment + XCoreInstrInfo InstrInfo; + XCoreFrameLowering FrameLowering; + XCoreTargetLowering TLInfo; + XCoreSelectionDAGInfo TSInfo; public: /// This constructor initializes the data members to match that /// of the specified triple. /// XCoreSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS); + const std::string &FS, const TargetMachine &TM); /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); + + const XCoreInstrInfo *getInstrInfo() const { return &InstrInfo; } + const XCoreFrameLowering *getFrameLowering() const { return &FrameLowering; } + const XCoreTargetLowering *getTargetLowering() const { return &TLInfo; } + const XCoreSelectionDAGInfo *getSelectionDAGInfo() const { return &TSInfo; } + const TargetRegisterInfo *getRegisterInfo() const { + return &InstrInfo.getRegisterInfo(); + } + const DataLayout *getDataLayout() const { return &DL; } }; } // End llvm namespace diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp index 0fb21c5..8d8bb38 100644 --- a/lib/Target/XCore/XCoreTargetMachine.cpp +++ b/lib/Target/XCore/XCoreTargetMachine.cpp @@ -25,13 +25,8 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, StringRef TT, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), - Subtarget(TT, CPU, FS), - DL("e-m:e-p:32:32-i1:8:32-i8:8:32-i16:16:32-i64:32-f64:32-a:0:32-n32"), - InstrInfo(), - FrameLowering(Subtarget), - TLInfo(*this), - TSInfo(*this) { + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + Subtarget(TT, CPU, FS, *this) { initAsmInfo(); } diff --git a/lib/Target/XCore/XCoreTargetMachine.h b/lib/Target/XCore/XCoreTargetMachine.h index a57ca55..14c43bf 100644 --- a/lib/Target/XCore/XCoreTargetMachine.h +++ b/lib/Target/XCore/XCoreTargetMachine.h @@ -14,46 +14,38 @@ #ifndef XCORETARGETMACHINE_H #define XCORETARGETMACHINE_H -#include "XCoreFrameLowering.h" -#include "XCoreISelLowering.h" -#include "XCoreInstrInfo.h" -#include "XCoreSelectionDAGInfo.h" #include "XCoreSubtarget.h" -#include "llvm/IR/DataLayout.h" #include "llvm/Target/TargetMachine.h" namespace llvm { class XCoreTargetMachine : public LLVMTargetMachine { XCoreSubtarget Subtarget; - const DataLayout DL; // Calculates type size & alignment - XCoreInstrInfo InstrInfo; - XCoreFrameLowering FrameLowering; - XCoreTargetLowering TLInfo; - XCoreSelectionDAGInfo TSInfo; public: XCoreTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL); - const XCoreInstrInfo *getInstrInfo() const override { return &InstrInfo; } + const XCoreInstrInfo *getInstrInfo() const override { + return getSubtargetImpl()->getInstrInfo(); + } const XCoreFrameLowering *getFrameLowering() const override { - return &FrameLowering; + return getSubtargetImpl()->getFrameLowering(); } const XCoreSubtarget *getSubtargetImpl() const override { return &Subtarget; } const XCoreTargetLowering *getTargetLowering() const override { - return &TLInfo; + return getSubtargetImpl()->getTargetLowering(); } - const XCoreSelectionDAGInfo* getSelectionDAGInfo() const override { - return &TSInfo; + return getSubtargetImpl()->getSelectionDAGInfo(); } - const TargetRegisterInfo *getRegisterInfo() const override { - return &InstrInfo.getRegisterInfo(); + return getSubtargetImpl()->getRegisterInfo(); + } + const DataLayout *getDataLayout() const override { + return getSubtargetImpl()->getDataLayout(); } - const DataLayout *getDataLayout() const override { return &DL; } // Pass Pipeline Configuration TargetPassConfig *createPassConfig(PassManagerBase &PM) override; diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp index 377fa15..f9de54a 100644 --- a/lib/Transforms/IPO/ArgumentPromotion.cpp +++ b/lib/Transforms/IPO/ArgumentPromotion.cpp @@ -39,6 +39,8 @@ #include "llvm/IR/CFG.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" @@ -67,21 +69,24 @@ namespace { bool runOnSCC(CallGraphSCC &SCC) override; static char ID; // Pass identification, replacement for typeid explicit ArgPromotion(unsigned maxElements = 3) - : CallGraphSCCPass(ID), maxElements(maxElements) { + : CallGraphSCCPass(ID), DL(nullptr), maxElements(maxElements) { initializeArgPromotionPass(*PassRegistry::getPassRegistry()); } /// A vector used to hold the indices of a single GEP instruction typedef std::vector<uint64_t> IndicesVector; + const DataLayout *DL; private: CallGraphNode *PromoteArguments(CallGraphNode *CGN); bool isSafeToPromoteArgument(Argument *Arg, bool isByVal) const; CallGraphNode *DoPromotion(Function *F, SmallPtrSet<Argument*, 8> &ArgsToPromote, SmallPtrSet<Argument*, 8> &ByValArgsToTransform); + bool doInitialization(CallGraph &CG) override; /// The maximum number of elements to expand, or 0 for unlimited. unsigned maxElements; + DenseMap<const Function *, DISubprogram> FunctionDIs; }; } @@ -100,6 +105,9 @@ Pass *llvm::createArgumentPromotionPass(unsigned maxElements) { bool ArgPromotion::runOnSCC(CallGraphSCC &SCC) { bool Changed = false, LocalChange; + DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); + DL = DLP ? &DLP->getDataLayout() : nullptr; + do { // Iterate until we stop promoting from this SCC. LocalChange = false; // Attempt to promote arguments from all functions in this SCC. @@ -215,7 +223,8 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) { /// AllCallersPassInValidPointerForArgument - Return true if we can prove that /// all callees pass in a valid pointer for the specified function argument. -static bool AllCallersPassInValidPointerForArgument(Argument *Arg) { +static bool AllCallersPassInValidPointerForArgument(Argument *Arg, + const DataLayout *DL) { Function *Callee = Arg->getParent(); unsigned ArgNo = Arg->getArgNo(); @@ -226,7 +235,7 @@ static bool AllCallersPassInValidPointerForArgument(Argument *Arg) { CallSite CS(U); assert(CS && "Should only have direct calls!"); - if (!CS.getArgument(ArgNo)->isDereferenceablePointer()) + if (!CS.getArgument(ArgNo)->isDereferenceablePointer(DL)) return false; } return true; @@ -334,7 +343,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, GEPIndicesSet ToPromote; // If the pointer is always valid, any load with first index 0 is valid. - if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg)) + if (isByValOrInAlloca || AllCallersPassInValidPointerForArgument(Arg, DL)) SafeToUnconditionallyLoad.insert(IndicesVector(1, 0)); // First, iterate the entry block and mark loads of (geps of) arguments as @@ -604,6 +613,10 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, Function *NF = Function::Create(NFTy, F->getLinkage(), F->getName()); NF->copyAttributesFrom(F); + // Patch the pointer to LLVM function in debug info descriptor. + auto DI = FunctionDIs.find(F); + if (DI != FunctionDIs.end()) + DI->second.replaceFunction(NF); DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n" << "From: " << *F); @@ -741,6 +754,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, if (cast<CallInst>(Call)->isTailCall()) cast<CallInst>(New)->setTailCall(); } + New->setDebugLoc(Call->getDebugLoc()); Args.clear(); AttributesVec.clear(); @@ -902,3 +916,8 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F, return NF_CGN; } + +bool ArgPromotion::doInitialization(CallGraph &CG) { + FunctionDIs = makeSubprogramMap(CG.getModule()); + return CallGraphSCCPass::doInitialization(CG); +} diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index 284b896..ac3853d 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -127,8 +127,7 @@ namespace { // As the code generation for module is finished (and DIBuilder is // finalized) we assume that subprogram descriptors won't be changed, and // they are stored in map for short duration anyway. - typedef DenseMap<Function*, DISubprogram> FunctionDIMap; - FunctionDIMap FunctionDIs; + DenseMap<const Function *, DISubprogram> FunctionDIs; protected: // DAH uses this to specify a different ID. @@ -150,7 +149,6 @@ namespace { unsigned RetValNum = 0); Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses); - void CollectFunctionDIs(Module &M); void SurveyFunction(const Function &F); void MarkValue(const RetOrArg &RA, Liveness L, const UseVector &MaybeLiveUses); @@ -190,35 +188,6 @@ INITIALIZE_PASS(DAH, "deadarghaX0r", ModulePass *llvm::createDeadArgEliminationPass() { return new DAE(); } ModulePass *llvm::createDeadArgHackingPass() { return new DAH(); } -/// CollectFunctionDIs - Map each function in the module to its debug info -/// descriptor. -void DAE::CollectFunctionDIs(Module &M) { - FunctionDIs.clear(); - - for (Module::named_metadata_iterator I = M.named_metadata_begin(), - E = M.named_metadata_end(); I != E; ++I) { - NamedMDNode &NMD = *I; - for (unsigned MDIndex = 0, MDNum = NMD.getNumOperands(); - MDIndex < MDNum; ++MDIndex) { - MDNode *Node = NMD.getOperand(MDIndex); - if (!DIDescriptor(Node).isCompileUnit()) - continue; - DICompileUnit CU(Node); - const DIArray &SPs = CU.getSubprograms(); - for (unsigned SPIndex = 0, SPNum = SPs.getNumElements(); - SPIndex < SPNum; ++SPIndex) { - DISubprogram SP(SPs.getElement(SPIndex)); - assert((!SP || SP.isSubprogram()) && - "A MDNode in subprograms of a CU should be null or a DISubprogram."); - if (!SP) - continue; - if (Function *F = SP.getFunction()) - FunctionDIs[F] = SP; - } - } - } -} - /// DeleteDeadVarargs - If this is an function that takes a ... list, and if /// llvm.vastart is never called, the varargs list is dead for the function. bool DAE::DeleteDeadVarargs(Function &Fn) { @@ -327,7 +296,7 @@ bool DAE::DeleteDeadVarargs(Function &Fn) { } // Patch the pointer to LLVM function in debug info descriptor. - FunctionDIMap::iterator DI = FunctionDIs.find(&Fn); + auto DI = FunctionDIs.find(&Fn); if (DI != FunctionDIs.end()) DI->second.replaceFunction(NF); @@ -1087,7 +1056,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) { } // Patch the pointer to LLVM function in debug info descriptor. - FunctionDIMap::iterator DI = FunctionDIs.find(F); + auto DI = FunctionDIs.find(F); if (DI != FunctionDIs.end()) DI->second.replaceFunction(NF); @@ -1101,7 +1070,7 @@ bool DAE::runOnModule(Module &M) { bool Changed = false; // Collect debug info descriptors for functions. - CollectFunctionDIs(M); + FunctionDIs = makeSubprogramMap(M); // First pass: Do a simple check to see if any functions can have their "..." // removed. We can do this if they never call va_start. This loop cannot be diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp index fed8839..8174df9 100644 --- a/lib/Transforms/IPO/FunctionAttrs.cpp +++ b/lib/Transforms/IPO/FunctionAttrs.cpp @@ -449,14 +449,29 @@ determinePointerReadAttrs(Argument *A, case Instruction::Call: case Instruction::Invoke: { + bool Captures = true; + + if (I->getType()->isVoidTy()) + Captures = false; + + auto AddUsersToWorklistIfCapturing = [&] { + if (Captures) + for (Use &UU : I->uses()) + if (Visited.insert(&UU)) + Worklist.push_back(&UU); + }; + CallSite CS(I); - if (CS.doesNotAccessMemory()) + if (CS.doesNotAccessMemory()) { + AddUsersToWorklistIfCapturing(); continue; + } Function *F = CS.getCalledFunction(); if (!F) { if (CS.onlyReadsMemory()) { IsRead = true; + AddUsersToWorklistIfCapturing(); continue; } return Attribute::None; @@ -471,6 +486,7 @@ determinePointerReadAttrs(Argument *A, "More params than args in non-varargs call."); return Attribute::None; } + Captures &= !CS.doesNotCapture(A - B); if (SCCNodes.count(AI)) continue; if (!CS.onlyReadsMemory() && !CS.onlyReadsMemory(A - B)) @@ -479,6 +495,7 @@ determinePointerReadAttrs(Argument *A, IsRead = true; } } + AddUsersToWorklistIfCapturing(); break; } diff --git a/lib/Transforms/IPO/GlobalDCE.cpp b/lib/Transforms/IPO/GlobalDCE.cpp index 9decddc..7e7a4c0 100644 --- a/lib/Transforms/IPO/GlobalDCE.cpp +++ b/lib/Transforms/IPO/GlobalDCE.cpp @@ -62,7 +62,7 @@ static bool isEmptyFunction(Function *F) { if (Entry.size() != 1 || !isa<ReturnInst>(Entry.front())) return false; ReturnInst &RI = cast<ReturnInst>(Entry.front()); - return RI.getReturnValue() == NULL; + return RI.getReturnValue() == nullptr; } char GlobalDCE::ID = 0; @@ -77,13 +77,19 @@ bool GlobalDCE::runOnModule(Module &M) { // Remove empty functions from the global ctors list. Changed |= optimizeGlobalCtorsList(M, isEmptyFunction); + typedef std::multimap<const Comdat *, GlobalValue *> ComdatGVPairsTy; + ComdatGVPairsTy ComdatGVPairs; + // Loop over the module, adding globals which are obviously necessary. for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { Changed |= RemoveUnusedGlobalValue(*I); // Functions with external linkage are needed if they have a body - if (!I->isDiscardableIfUnused() && - !I->isDeclaration() && !I->hasAvailableExternallyLinkage()) - GlobalIsNeeded(I); + if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) { + if (!I->isDiscardableIfUnused()) + GlobalIsNeeded(I); + else if (const Comdat *C = I->getComdat()) + ComdatGVPairs.insert(std::make_pair(C, I)); + } } for (Module::global_iterator I = M.global_begin(), E = M.global_end(); @@ -91,17 +97,38 @@ bool GlobalDCE::runOnModule(Module &M) { Changed |= RemoveUnusedGlobalValue(*I); // Externally visible & appending globals are needed, if they have an // initializer. - if (!I->isDiscardableIfUnused() && - !I->isDeclaration() && !I->hasAvailableExternallyLinkage()) - GlobalIsNeeded(I); + if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) { + if (!I->isDiscardableIfUnused()) + GlobalIsNeeded(I); + else if (const Comdat *C = I->getComdat()) + ComdatGVPairs.insert(std::make_pair(C, I)); + } } for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E; ++I) { Changed |= RemoveUnusedGlobalValue(*I); // Externally visible aliases are needed. - if (!I->isDiscardableIfUnused()) + if (!I->isDiscardableIfUnused()) { GlobalIsNeeded(I); + } else if (const Comdat *C = I->getComdat()) { + ComdatGVPairs.insert(std::make_pair(C, I)); + } + } + + for (ComdatGVPairsTy::iterator I = ComdatGVPairs.begin(), + E = ComdatGVPairs.end(); + I != E;) { + ComdatGVPairsTy::iterator UB = ComdatGVPairs.upper_bound(I->first); + bool CanDiscard = std::all_of(I, UB, [](ComdatGVPairsTy::value_type Pair) { + return Pair.second->isDiscardableIfUnused(); + }); + if (!CanDiscard) { + std::for_each(I, UB, [this](ComdatGVPairsTy::value_type Pair) { + GlobalIsNeeded(Pair.second); + }); + } + I = UB; } // Now that all globals which are needed are in the AliveGlobals set, we loop diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index ae80c43..c1d0d3b 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" @@ -1699,9 +1700,6 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) { /// possible. If we make a change, return true. bool GlobalOpt::ProcessGlobal(GlobalVariable *GV, Module::global_iterator &GVI) { - if (!GV->isDiscardableIfUnused()) - return false; - // Do more involved optimizations if the global is internal. GV->removeDeadConstantUsers(); @@ -1910,7 +1908,7 @@ bool GlobalOpt::OptimizeFunctions(Module &M) { for (Module::iterator FI = M.begin(), E = M.end(); FI != E; ) { Function *F = FI++; // Functions without names cannot be referenced outside this module. - if (!F->hasName() && !F->isDeclaration()) + if (!F->hasName() && !F->isDeclaration() && !F->hasLocalLinkage()) F->setLinkage(GlobalValue::InternalLinkage); F->removeDeadConstantUsers(); if (F->isDefTriviallyDead()) { @@ -1944,11 +1942,18 @@ bool GlobalOpt::OptimizeFunctions(Module &M) { bool GlobalOpt::OptimizeGlobalVars(Module &M) { bool Changed = false; + + SmallSet<const Comdat *, 8> NotDiscardableComdats; + for (const GlobalVariable &GV : M.globals()) + if (const Comdat *C = GV.getComdat()) + if (!GV.isDiscardableIfUnused()) + NotDiscardableComdats.insert(C); + for (Module::global_iterator GVI = M.global_begin(), E = M.global_end(); GVI != E; ) { GlobalVariable *GV = GVI++; // Global variables without names cannot be referenced outside this module. - if (!GV->hasName() && !GV->isDeclaration()) + if (!GV->hasName() && !GV->isDeclaration() && !GV->hasLocalLinkage()) GV->setLinkage(GlobalValue::InternalLinkage); // Simplify the initializer. if (GV->hasInitializer()) @@ -1958,7 +1963,12 @@ bool GlobalOpt::OptimizeGlobalVars(Module &M) { GV->setInitializer(New); } - Changed |= ProcessGlobal(GV, GVI); + if (GV->isDiscardableIfUnused()) { + if (const Comdat *C = GV->getComdat()) + if (NotDiscardableComdats.count(C)) + continue; + Changed |= ProcessGlobal(GV, GVI); + } } return Changed; } @@ -1980,10 +1990,13 @@ isSimpleEnoughValueToCommit(Constant *C, static bool isSimpleEnoughValueToCommitHelper(Constant *C, SmallPtrSet<Constant*, 8> &SimpleConstants, const DataLayout *DL) { - // Simple integer, undef, constant aggregate zero, global addresses, etc are - // all supported. - if (C->getNumOperands() == 0 || isa<BlockAddress>(C) || - isa<GlobalValue>(C)) + // Simple global addresses are supported, do not allow dllimport or + // thread-local globals. + if (auto *GV = dyn_cast<GlobalValue>(C)) + return !GV->hasDLLImportStorageClass() && !GV->isThreadLocal(); + + // Simple integer, undef, constant aggregate zero, etc are all supported. + if (C->getNumOperands() == 0 || isa<BlockAddress>(C)) return true; // Aggregate values are safe if all their elements are. @@ -2054,8 +2067,7 @@ static bool isSimpleEnoughPointerToCommit(Constant *C) { return false; if (GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) - // Do not allow weak/*_odr/linkonce/dllimport/dllexport linkage or - // external globals. + // Do not allow weak/*_odr/linkonce linkage or external globals. return GV->hasUniqueInitializer(); if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) { @@ -2846,14 +2858,19 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) { I != E;) { Module::alias_iterator J = I++; // Aliases without names cannot be referenced outside this module. - if (!J->hasName() && !J->isDeclaration()) + if (!J->hasName() && !J->isDeclaration() && !J->hasLocalLinkage()) J->setLinkage(GlobalValue::InternalLinkage); // If the aliasee may change at link time, nothing can be done - bail out. if (J->mayBeOverridden()) continue; Constant *Aliasee = J->getAliasee(); - GlobalValue *Target = cast<GlobalValue>(Aliasee->stripPointerCasts()); + GlobalValue *Target = dyn_cast<GlobalValue>(Aliasee->stripPointerCasts()); + // We can't trivially replace the alias with the aliasee if the aliasee is + // non-trivial in some way. + // TODO: Try to handle non-zero GEPs of local aliasees. + if (!Target) + continue; Target->removeDeadConstantUsers(); // Make all users of the alias use the aliasee instead. diff --git a/lib/Transforms/IPO/MergeFunctions.cpp b/lib/Transforms/IPO/MergeFunctions.cpp index c3a2b12..559ef0b 100644 --- a/lib/Transforms/IPO/MergeFunctions.cpp +++ b/lib/Transforms/IPO/MergeFunctions.cpp @@ -9,13 +9,24 @@ // // This pass looks for equivalent functions that are mergable and folds them. // -// A hash is computed from the function, based on its type and number of -// basic blocks. +// Order relation is defined on set of functions. It was made through +// special function comparison procedure that returns +// 0 when functions are equal, +// -1 when Left function is less than right function, and +// 1 for opposite case. We need total-ordering, so we need to maintain +// four properties on the functions set: +// a <= a (reflexivity) +// if a <= b and b <= a then a = b (antisymmetry) +// if a <= b and b <= c then a <= c (transitivity). +// for all a and b: a <= b or b <= a (totality). // -// Once all hashes are computed, we perform an expensive equality comparison -// on each function pair. This takes n^2/2 comparisons per bucket, so it's -// important that the hash function be high quality. The equality comparison -// iterates through each instruction in each basic block. +// Comparison iterates through each instruction in each basic block. +// Functions are kept on binary tree. For each new function F we perform +// lookup in binary tree. +// In practice it works the following way: +// -- We define Function* container class with custom "operator<" (FunctionPtr). +// -- "FunctionPtr" instances are stored in std::set collection, so every +// std::set::insert operation will give you result in log(N) time. // // When a match is found the functions are folded. If both functions are // overridable, we move the functionality into a new internal function and @@ -31,9 +42,6 @@ // the object they belong to. However, as long as it's only used for a lookup // and call, this is irrelevant, and we'd like to fold such functions. // -// * switch from n^2 pair-wise comparisons to an n-way comparison for each -// bucket. -// // * be smarter about bitcasts. // // In order to fold functions, we will sometimes add either bitcast instructions @@ -41,6 +49,36 @@ // analysis since the two functions differ where one has a bitcast and the // other doesn't. We should learn to look through bitcasts. // +// * Compare complex types with pointer types inside. +// * Compare cross-reference cases. +// * Compare complex expressions. +// +// All the three issues above could be described as ability to prove that +// fA == fB == fC == fE == fF == fG in example below: +// +// void fA() { +// fB(); +// } +// void fB() { +// fA(); +// } +// +// void fE() { +// fF(); +// } +// void fF() { +// fG(); +// } +// void fG() { +// fE(); +// } +// +// Simplest cross-reference case (fA <--> fB) was implemented in previous +// versions of MergeFunctions, though it presented only in two function pairs +// in test-suite (that counts >50k functions) +// Though possibility to detect complex cross-referencing (e.g.: A->B->C->D->A) +// could cover much more cases. +// //===----------------------------------------------------------------------===// #include "llvm/Transforms/IPO.h" @@ -60,6 +98,7 @@ #include "llvm/IR/Operator.h" #include "llvm/IR/ValueHandle.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -73,89 +112,12 @@ STATISTIC(NumThunksWritten, "Number of thunks generated"); STATISTIC(NumAliasesWritten, "Number of aliases generated"); STATISTIC(NumDoubleWeak, "Number of new functions created"); -/// Returns the type id for a type to be hashed. We turn pointer types into -/// integers here because the actual compare logic below considers pointers and -/// integers of the same size as equal. -static Type::TypeID getTypeIDForHash(Type *Ty) { - if (Ty->isPointerTy()) - return Type::IntegerTyID; - return Ty->getTypeID(); -} - -/// Creates a hash-code for the function which is the same for any two -/// functions that will compare equal, without looking at the instructions -/// inside the function. -static unsigned profileFunction(const Function *F) { - FunctionType *FTy = F->getFunctionType(); - - FoldingSetNodeID ID; - ID.AddInteger(F->size()); - ID.AddInteger(F->getCallingConv()); - ID.AddBoolean(F->hasGC()); - ID.AddBoolean(FTy->isVarArg()); - ID.AddInteger(getTypeIDForHash(FTy->getReturnType())); - for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i) - ID.AddInteger(getTypeIDForHash(FTy->getParamType(i))); - return ID.ComputeHash(); -} - -namespace { - -/// ComparableFunction - A struct that pairs together functions with a -/// DataLayout so that we can keep them together as elements in the DenseSet. -class ComparableFunction { -public: - static const ComparableFunction EmptyKey; - static const ComparableFunction TombstoneKey; - static DataLayout * const LookupOnly; - - ComparableFunction(Function *Func, const DataLayout *DL) - : Func(Func), Hash(profileFunction(Func)), DL(DL) {} - - Function *getFunc() const { return Func; } - unsigned getHash() const { return Hash; } - const DataLayout *getDataLayout() const { return DL; } - - // Drops AssertingVH reference to the function. Outside of debug mode, this - // does nothing. - void release() { - assert(Func && - "Attempted to release function twice, or release empty/tombstone!"); - Func = nullptr; - } - -private: - explicit ComparableFunction(unsigned Hash) - : Func(nullptr), Hash(Hash), DL(nullptr) {} - - AssertingVH<Function> Func; - unsigned Hash; - const DataLayout *DL; -}; - -const ComparableFunction ComparableFunction::EmptyKey = ComparableFunction(0); -const ComparableFunction ComparableFunction::TombstoneKey = - ComparableFunction(1); -DataLayout *const ComparableFunction::LookupOnly = (DataLayout*)(-1); - -} - -namespace llvm { - template <> - struct DenseMapInfo<ComparableFunction> { - static ComparableFunction getEmptyKey() { - return ComparableFunction::EmptyKey; - } - static ComparableFunction getTombstoneKey() { - return ComparableFunction::TombstoneKey; - } - static unsigned getHashValue(const ComparableFunction &CF) { - return CF.getHash(); - } - static bool isEqual(const ComparableFunction &LHS, - const ComparableFunction &RHS); - }; -} +static cl::opt<unsigned> NumFunctionsForSanityCheck( + "mergefunc-sanity", + cl::desc("How many functions in module could be used for " + "MergeFunctions pass sanity check. " + "'0' disables this check. Works only with '-debug' key."), + cl::init(0), cl::Hidden); namespace { @@ -167,14 +129,14 @@ class FunctionComparator { public: FunctionComparator(const DataLayout *DL, const Function *F1, const Function *F2) - : F1(F1), F2(F2), DL(DL) {} + : FnL(F1), FnR(F2), DL(DL) {} /// Test whether the two functions have equivalent behaviour. - bool compare(); + int compare(); private: /// Test whether two basic blocks have equivalent behaviour. - bool compare(const BasicBlock *BB1, const BasicBlock *BB2); + int compare(const BasicBlock *BBL, const BasicBlock *BBR); /// Constants comparison. /// Its analog to lexicographical comparison between hypothetical numbers @@ -300,10 +262,6 @@ private: /// see comments for sn_mapL and sn_mapR. int cmpValues(const Value *L, const Value *R); - bool enumerate(const Value *V1, const Value *V2) { - return cmpValues(V1, V2) == 0; - } - /// Compare two Instructions for equivalence, similar to /// Instruction::isSameOperationAs but with modifications to the type /// comparison. @@ -325,15 +283,11 @@ private: /// 6.1.Load: volatile (as boolean flag) /// 6.2.Load: alignment (as integer numbers) /// 6.3.Load: synch-scope (as integer numbers) + /// 6.4.Load: range metadata (as integer numbers) /// On this stage its better to see the code, since its not more than 10-15 /// strings for particular instruction, and could change sometimes. int cmpOperation(const Instruction *L, const Instruction *R) const; - bool isEquivalentOperation(const Instruction *I1, - const Instruction *I2) const { - return cmpOperation(I1, I2) == 0; - } - /// Compare two GEPs for equivalent pointer arithmetic. /// Parts to be compared for each comparison stage, /// most significant stage first: @@ -348,14 +302,6 @@ private: return cmpGEP(cast<GEPOperator>(GEPL), cast<GEPOperator>(GEPR)); } - bool isEquivalentGEP(const GEPOperator *GEP1, const GEPOperator *GEP2) { - return cmpGEP(GEP1, GEP2) == 0; - } - bool isEquivalentGEP(const GetElementPtrInst *GEP1, - const GetElementPtrInst *GEP2) { - return isEquivalentGEP(cast<GEPOperator>(GEP1), cast<GEPOperator>(GEP2)); - } - /// cmpType - compares two types, /// defines total ordering among the types set. /// @@ -398,10 +344,6 @@ private: /// 6. For all other cases put llvm_unreachable. int cmpType(Type *TyL, Type *TyR) const; - bool isEquivalentType(Type *Ty1, Type *Ty2) const { - return cmpType(Ty1, Ty2) == 0; - } - int cmpNumbers(uint64_t L, uint64_t R) const; int cmpAPInt(const APInt &L, const APInt &R) const; @@ -410,7 +352,7 @@ private: int cmpAttrs(const AttributeSet L, const AttributeSet R) const; // The two functions undergoing comparison. - const Function *F1, *F2; + const Function *FnL, *FnR; const DataLayout *DL; @@ -450,6 +392,18 @@ private: DenseMap<const Value*, int> sn_mapL, sn_mapR; }; +class FunctionPtr { + AssertingVH<Function> F; + const DataLayout *DL; + +public: + FunctionPtr(Function *F, const DataLayout *DL) : F(F), DL(DL) {} + Function *getFunc() const { return F; } + void release() { F = 0; } + bool operator<(const FunctionPtr &RHS) const { + return (FunctionComparator(DL, F, RHS.getFunc()).compare()) == -1; + } +}; } int FunctionComparator::cmpNumbers(uint64_t L, uint64_t R) const { @@ -788,7 +742,11 @@ int FunctionComparator::cmpOperation(const Instruction *L, if (int Res = cmpNumbers(LI->getOrdering(), cast<LoadInst>(R)->getOrdering())) return Res; - return cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope()); + if (int Res = + cmpNumbers(LI->getSynchScope(), cast<LoadInst>(R)->getSynchScope())) + return Res; + return cmpNumbers((uint64_t)LI->getMetadata(LLVMContext::MD_range), + (uint64_t)cast<LoadInst>(R)->getMetadata(LLVMContext::MD_range)); } if (const StoreInst *SI = dyn_cast<StoreInst>(L)) { if (int Res = @@ -847,6 +805,9 @@ int FunctionComparator::cmpOperation(const Instruction *L, if (int Res = cmpNumbers(CXI->isVolatile(), cast<AtomicCmpXchgInst>(R)->isVolatile())) return Res; + if (int Res = cmpNumbers(CXI->isWeak(), + cast<AtomicCmpXchgInst>(R)->isWeak())) + return Res; if (int Res = cmpNumbers(CXI->getSuccessOrdering(), cast<AtomicCmpXchgInst>(R)->getSuccessOrdering())) return Res; @@ -914,13 +875,13 @@ int FunctionComparator::cmpGEP(const GEPOperator *GEPL, /// See comments in declaration for more details. int FunctionComparator::cmpValues(const Value *L, const Value *R) { // Catch self-reference case. - if (L == F1) { - if (R == F2) + if (L == FnL) { + if (R == FnR) return 0; return -1; } - if (R == F2) { - if (L == F1) + if (R == FnR) { + if (L == FnL) return 0; return 1; } @@ -954,90 +915,102 @@ int FunctionComparator::cmpValues(const Value *L, const Value *R) { return cmpNumbers(LeftSN.first->second, RightSN.first->second); } // Test whether two basic blocks have equivalent behaviour. -bool FunctionComparator::compare(const BasicBlock *BB1, const BasicBlock *BB2) { - BasicBlock::const_iterator F1I = BB1->begin(), F1E = BB1->end(); - BasicBlock::const_iterator F2I = BB2->begin(), F2E = BB2->end(); +int FunctionComparator::compare(const BasicBlock *BBL, const BasicBlock *BBR) { + BasicBlock::const_iterator InstL = BBL->begin(), InstLE = BBL->end(); + BasicBlock::const_iterator InstR = BBR->begin(), InstRE = BBR->end(); do { - if (!enumerate(F1I, F2I)) - return false; + if (int Res = cmpValues(InstL, InstR)) + return Res; - if (const GetElementPtrInst *GEP1 = dyn_cast<GetElementPtrInst>(F1I)) { - const GetElementPtrInst *GEP2 = dyn_cast<GetElementPtrInst>(F2I); - if (!GEP2) - return false; + const GetElementPtrInst *GEPL = dyn_cast<GetElementPtrInst>(InstL); + const GetElementPtrInst *GEPR = dyn_cast<GetElementPtrInst>(InstR); - if (!enumerate(GEP1->getPointerOperand(), GEP2->getPointerOperand())) - return false; + if (GEPL && !GEPR) + return 1; + if (GEPR && !GEPL) + return -1; - if (!isEquivalentGEP(GEP1, GEP2)) - return false; + if (GEPL && GEPR) { + if (int Res = + cmpValues(GEPL->getPointerOperand(), GEPR->getPointerOperand())) + return Res; + if (int Res = cmpGEP(GEPL, GEPR)) + return Res; } else { - if (!isEquivalentOperation(F1I, F2I)) - return false; - - assert(F1I->getNumOperands() == F2I->getNumOperands()); - for (unsigned i = 0, e = F1I->getNumOperands(); i != e; ++i) { - Value *OpF1 = F1I->getOperand(i); - Value *OpF2 = F2I->getOperand(i); - - if (!enumerate(OpF1, OpF2)) - return false; + if (int Res = cmpOperation(InstL, InstR)) + return Res; + assert(InstL->getNumOperands() == InstR->getNumOperands()); - if (OpF1->getValueID() != OpF2->getValueID() || - !isEquivalentType(OpF1->getType(), OpF2->getType())) - return false; + for (unsigned i = 0, e = InstL->getNumOperands(); i != e; ++i) { + Value *OpL = InstL->getOperand(i); + Value *OpR = InstR->getOperand(i); + if (int Res = cmpValues(OpL, OpR)) + return Res; + if (int Res = cmpNumbers(OpL->getValueID(), OpR->getValueID())) + return Res; + // TODO: Already checked in cmpOperation + if (int Res = cmpType(OpL->getType(), OpR->getType())) + return Res; } } - ++F1I, ++F2I; - } while (F1I != F1E && F2I != F2E); + ++InstL, ++InstR; + } while (InstL != InstLE && InstR != InstRE); - return F1I == F1E && F2I == F2E; + if (InstL != InstLE && InstR == InstRE) + return 1; + if (InstL == InstLE && InstR != InstRE) + return -1; + return 0; } // Test whether the two functions have equivalent behaviour. -bool FunctionComparator::compare() { - // We need to recheck everything, but check the things that weren't included - // in the hash first. +int FunctionComparator::compare() { sn_mapL.clear(); sn_mapR.clear(); - if (F1->getAttributes() != F2->getAttributes()) - return false; + if (int Res = cmpAttrs(FnL->getAttributes(), FnR->getAttributes())) + return Res; - if (F1->hasGC() != F2->hasGC()) - return false; + if (int Res = cmpNumbers(FnL->hasGC(), FnR->hasGC())) + return Res; - if (F1->hasGC() && F1->getGC() != F2->getGC()) - return false; + if (FnL->hasGC()) { + if (int Res = cmpNumbers((uint64_t)FnL->getGC(), (uint64_t)FnR->getGC())) + return Res; + } - if (F1->hasSection() != F2->hasSection()) - return false; + if (int Res = cmpNumbers(FnL->hasSection(), FnR->hasSection())) + return Res; - if (F1->hasSection() && F1->getSection() != F2->getSection()) - return false; + if (FnL->hasSection()) { + if (int Res = cmpStrings(FnL->getSection(), FnR->getSection())) + return Res; + } - if (F1->isVarArg() != F2->isVarArg()) - return false; + if (int Res = cmpNumbers(FnL->isVarArg(), FnR->isVarArg())) + return Res; // TODO: if it's internal and only used in direct calls, we could handle this // case too. - if (F1->getCallingConv() != F2->getCallingConv()) - return false; + if (int Res = cmpNumbers(FnL->getCallingConv(), FnR->getCallingConv())) + return Res; - if (!isEquivalentType(F1->getFunctionType(), F2->getFunctionType())) - return false; + if (int Res = cmpType(FnL->getFunctionType(), FnR->getFunctionType())) + return Res; - assert(F1->arg_size() == F2->arg_size() && + assert(FnL->arg_size() == FnR->arg_size() && "Identically typed functions have different numbers of args!"); // Visit the arguments so that they get enumerated in the order they're // passed in. - for (Function::const_arg_iterator f1i = F1->arg_begin(), - f2i = F2->arg_begin(), f1e = F1->arg_end(); f1i != f1e; ++f1i, ++f2i) { - if (!enumerate(f1i, f2i)) + for (Function::const_arg_iterator ArgLI = FnL->arg_begin(), + ArgRI = FnR->arg_begin(), + ArgLE = FnL->arg_end(); + ArgLI != ArgLE; ++ArgLI, ++ArgRI) { + if (cmpValues(ArgLI, ArgRI) != 0) llvm_unreachable("Arguments repeat!"); } @@ -1045,33 +1018,36 @@ bool FunctionComparator::compare() { // linked list is immaterial. Our walk starts at the entry block for both // functions, then takes each block from each terminator in order. As an // artifact, this also means that unreachable blocks are ignored. - SmallVector<const BasicBlock *, 8> F1BBs, F2BBs; + SmallVector<const BasicBlock *, 8> FnLBBs, FnRBBs; SmallSet<const BasicBlock *, 128> VisitedBBs; // in terms of F1. - F1BBs.push_back(&F1->getEntryBlock()); - F2BBs.push_back(&F2->getEntryBlock()); + FnLBBs.push_back(&FnL->getEntryBlock()); + FnRBBs.push_back(&FnR->getEntryBlock()); - VisitedBBs.insert(F1BBs[0]); - while (!F1BBs.empty()) { - const BasicBlock *F1BB = F1BBs.pop_back_val(); - const BasicBlock *F2BB = F2BBs.pop_back_val(); + VisitedBBs.insert(FnLBBs[0]); + while (!FnLBBs.empty()) { + const BasicBlock *BBL = FnLBBs.pop_back_val(); + const BasicBlock *BBR = FnRBBs.pop_back_val(); - if (!enumerate(F1BB, F2BB) || !compare(F1BB, F2BB)) - return false; + if (int Res = cmpValues(BBL, BBR)) + return Res; + + if (int Res = compare(BBL, BBR)) + return Res; - const TerminatorInst *F1TI = F1BB->getTerminator(); - const TerminatorInst *F2TI = F2BB->getTerminator(); + const TerminatorInst *TermL = BBL->getTerminator(); + const TerminatorInst *TermR = BBR->getTerminator(); - assert(F1TI->getNumSuccessors() == F2TI->getNumSuccessors()); - for (unsigned i = 0, e = F1TI->getNumSuccessors(); i != e; ++i) { - if (!VisitedBBs.insert(F1TI->getSuccessor(i))) + assert(TermL->getNumSuccessors() == TermR->getNumSuccessors()); + for (unsigned i = 0, e = TermL->getNumSuccessors(); i != e; ++i) { + if (!VisitedBBs.insert(TermL->getSuccessor(i))) continue; - F1BBs.push_back(F1TI->getSuccessor(i)); - F2BBs.push_back(F2TI->getSuccessor(i)); + FnLBBs.push_back(TermL->getSuccessor(i)); + FnRBBs.push_back(TermR->getSuccessor(i)); } } - return true; + return 0; } namespace { @@ -1092,21 +1068,25 @@ public: bool runOnModule(Module &M) override; private: - typedef DenseSet<ComparableFunction> FnSetType; + typedef std::set<FunctionPtr> FnTreeType; /// A work queue of functions that may have been modified and should be /// analyzed again. std::vector<WeakVH> Deferred; - /// Insert a ComparableFunction into the FnSet, or merge it away if it's + /// Checks the rules of order relation introduced among functions set. + /// Returns true, if sanity check has been passed, and false if failed. + bool doSanityCheck(std::vector<WeakVH> &Worklist); + + /// Insert a ComparableFunction into the FnTree, or merge it away if it's /// equal to one that's already present. - bool insert(ComparableFunction &NewF); + bool insert(Function *NewFunction); - /// Remove a Function from the FnSet and queue it up for a second sweep of + /// Remove a Function from the FnTree and queue it up for a second sweep of /// analysis. void remove(Function *F); - /// Find the functions that use this Value and remove them from FnSet and + /// Find the functions that use this Value and remove them from FnTree and /// queue the functions. void removeUsers(Value *V); @@ -1131,7 +1111,7 @@ private: /// The set of all distinct functions. Use the insert() and remove() methods /// to modify it. - FnSetType FnSet; + FnTreeType FnTree; /// DataLayout for more accurate GEP comparisons. May be NULL. const DataLayout *DL; @@ -1149,6 +1129,78 @@ ModulePass *llvm::createMergeFunctionsPass() { return new MergeFunctions(); } +bool MergeFunctions::doSanityCheck(std::vector<WeakVH> &Worklist) { + if (const unsigned Max = NumFunctionsForSanityCheck) { + unsigned TripleNumber = 0; + bool Valid = true; + + dbgs() << "MERGEFUNC-SANITY: Started for first " << Max << " functions.\n"; + + unsigned i = 0; + for (std::vector<WeakVH>::iterator I = Worklist.begin(), E = Worklist.end(); + I != E && i < Max; ++I, ++i) { + unsigned j = i; + for (std::vector<WeakVH>::iterator J = I; J != E && j < Max; ++J, ++j) { + Function *F1 = cast<Function>(*I); + Function *F2 = cast<Function>(*J); + int Res1 = FunctionComparator(DL, F1, F2).compare(); + int Res2 = FunctionComparator(DL, F2, F1).compare(); + + // If F1 <= F2, then F2 >= F1, otherwise report failure. + if (Res1 != -Res2) { + dbgs() << "MERGEFUNC-SANITY: Non-symmetric; triple: " << TripleNumber + << "\n"; + F1->dump(); + F2->dump(); + Valid = false; + } + + if (Res1 == 0) + continue; + + unsigned k = j; + for (std::vector<WeakVH>::iterator K = J; K != E && k < Max; + ++k, ++K, ++TripleNumber) { + if (K == J) + continue; + + Function *F3 = cast<Function>(*K); + int Res3 = FunctionComparator(DL, F1, F3).compare(); + int Res4 = FunctionComparator(DL, F2, F3).compare(); + + bool Transitive = true; + + if (Res1 != 0 && Res1 == Res4) { + // F1 > F2, F2 > F3 => F1 > F3 + Transitive = Res3 == Res1; + } else if (Res3 != 0 && Res3 == -Res4) { + // F1 > F3, F3 > F2 => F1 > F2 + Transitive = Res3 == Res1; + } else if (Res4 != 0 && -Res3 == Res4) { + // F2 > F3, F3 > F1 => F2 > F1 + Transitive = Res4 == -Res1; + } + + if (!Transitive) { + dbgs() << "MERGEFUNC-SANITY: Non-transitive; triple: " + << TripleNumber << "\n"; + dbgs() << "Res1, Res3, Res4: " << Res1 << ", " << Res3 << ", " + << Res4 << "\n"; + F1->dump(); + F2->dump(); + F3->dump(); + Valid = false; + } + } + } + } + + dbgs() << "MERGEFUNC-SANITY: " << (Valid ? "Passed." : "Failed.") << "\n"; + return Valid; + } + return true; +} + bool MergeFunctions::runOnModule(Module &M) { bool Changed = false; DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); @@ -1158,12 +1210,13 @@ bool MergeFunctions::runOnModule(Module &M) { if (!I->isDeclaration() && !I->hasAvailableExternallyLinkage()) Deferred.push_back(WeakVH(I)); } - FnSet.resize(Deferred.size()); do { std::vector<WeakVH> Worklist; Deferred.swap(Worklist); + DEBUG(doSanityCheck(Worklist)); + DEBUG(dbgs() << "size of module: " << M.size() << '\n'); DEBUG(dbgs() << "size of worklist: " << Worklist.size() << '\n'); @@ -1175,8 +1228,7 @@ bool MergeFunctions::runOnModule(Module &M) { Function *F = cast<Function>(*I); if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && !F->mayBeOverridden()) { - ComparableFunction CF = ComparableFunction(F, DL); - Changed |= insert(CF); + Changed |= insert(F); } } @@ -1190,38 +1242,17 @@ bool MergeFunctions::runOnModule(Module &M) { Function *F = cast<Function>(*I); if (!F->isDeclaration() && !F->hasAvailableExternallyLinkage() && F->mayBeOverridden()) { - ComparableFunction CF = ComparableFunction(F, DL); - Changed |= insert(CF); + Changed |= insert(F); } } - DEBUG(dbgs() << "size of FnSet: " << FnSet.size() << '\n'); + DEBUG(dbgs() << "size of FnTree: " << FnTree.size() << '\n'); } while (!Deferred.empty()); - FnSet.clear(); + FnTree.clear(); return Changed; } -bool DenseMapInfo<ComparableFunction>::isEqual(const ComparableFunction &LHS, - const ComparableFunction &RHS) { - if (LHS.getFunc() == RHS.getFunc() && - LHS.getHash() == RHS.getHash()) - return true; - if (!LHS.getFunc() || !RHS.getFunc()) - return false; - - // One of these is a special "underlying pointer comparison only" object. - if (LHS.getDataLayout() == ComparableFunction::LookupOnly || - RHS.getDataLayout() == ComparableFunction::LookupOnly) - return false; - - assert(LHS.getDataLayout() == RHS.getDataLayout() && - "Comparing functions for different targets"); - - return FunctionComparator(LHS.getDataLayout(), LHS.getFunc(), - RHS.getFunc()).compare(); -} - // Replace direct callers of Old with New. void MergeFunctions::replaceDirectCallers(Function *Old, Function *New) { Constant *BitcastNew = ConstantExpr::getBitCast(New, Old->getType()); @@ -1376,54 +1407,57 @@ void MergeFunctions::mergeTwoFunctions(Function *F, Function *G) { ++NumFunctionsMerged; } -// Insert a ComparableFunction into the FnSet, or merge it away if equal to one +// Insert a ComparableFunction into the FnTree, or merge it away if equal to one // that was already inserted. -bool MergeFunctions::insert(ComparableFunction &NewF) { - std::pair<FnSetType::iterator, bool> Result = FnSet.insert(NewF); +bool MergeFunctions::insert(Function *NewFunction) { + std::pair<FnTreeType::iterator, bool> Result = + FnTree.insert(FunctionPtr(NewFunction, DL)); + if (Result.second) { - DEBUG(dbgs() << "Inserting as unique: " << NewF.getFunc()->getName() << '\n'); + DEBUG(dbgs() << "Inserting as unique: " << NewFunction->getName() << '\n'); return false; } - const ComparableFunction &OldF = *Result.first; + const FunctionPtr &OldF = *Result.first; // Don't merge tiny functions, since it can just end up making the function // larger. // FIXME: Should still merge them if they are unnamed_addr and produce an // alias. - if (NewF.getFunc()->size() == 1) { - if (NewF.getFunc()->front().size() <= 2) { - DEBUG(dbgs() << NewF.getFunc()->getName() - << " is to small to bother merging\n"); + if (NewFunction->size() == 1) { + if (NewFunction->front().size() <= 2) { + DEBUG(dbgs() << NewFunction->getName() + << " is to small to bother merging\n"); return false; } } // Never thunk a strong function to a weak function. - assert(!OldF.getFunc()->mayBeOverridden() || - NewF.getFunc()->mayBeOverridden()); + assert(!OldF.getFunc()->mayBeOverridden() || NewFunction->mayBeOverridden()); - DEBUG(dbgs() << " " << OldF.getFunc()->getName() << " == " - << NewF.getFunc()->getName() << '\n'); + DEBUG(dbgs() << " " << OldF.getFunc()->getName() + << " == " << NewFunction->getName() << '\n'); - Function *DeleteF = NewF.getFunc(); - NewF.release(); + Function *DeleteF = NewFunction; mergeTwoFunctions(OldF.getFunc(), DeleteF); return true; } -// Remove a function from FnSet. If it was already in FnSet, add it to Deferred -// so that we'll look at it in the next round. +// Remove a function from FnTree. If it was already in FnTree, add +// it to Deferred so that we'll look at it in the next round. void MergeFunctions::remove(Function *F) { // We need to make sure we remove F, not a function "equal" to F per the // function equality comparator. - // - // The special "lookup only" ComparableFunction bypasses the expensive - // function comparison in favour of a pointer comparison on the underlying - // Function*'s. - ComparableFunction CF = ComparableFunction(F, ComparableFunction::LookupOnly); - if (FnSet.erase(CF)) { - DEBUG(dbgs() << "Removed " << F->getName() << " from set and deferred it.\n"); + FnTreeType::iterator found = FnTree.find(FunctionPtr(F, DL)); + size_t Erased = 0; + if (found != FnTree.end() && found->getFunc() == F) { + Erased = 1; + FnTree.erase(found); + } + + if (Erased) { + DEBUG(dbgs() << "Removed " << F->getName() + << " from set and deferred it.\n"); Deferred.push_back(F); } } diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 38e1b8e..46a3187 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -53,6 +53,10 @@ static cl::opt<bool> RunLoopRerolling("reroll-loops", cl::Hidden, cl::desc("Run the loop rerolling pass")); +static cl::opt<bool> RunLoadCombine("combine-loads", cl::init(false), + cl::Hidden, + cl::desc("Run the load combining pass")); + PassManagerBuilder::PassManagerBuilder() { OptLevel = 2; SizeLevel = 0; @@ -65,6 +69,7 @@ PassManagerBuilder::PassManagerBuilder() { SLPVectorize = RunSLPVectorization; LoopVectorize = RunLoopVectorization; RerollLoops = RunLoopRerolling; + LoadCombine = RunLoadCombine; } PassManagerBuilder::~PassManagerBuilder() { @@ -151,9 +156,9 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { if (!DisableUnitAtATime) { addExtensionsToPM(EP_ModuleOptimizerEarly, MPM); + MPM.add(createIPSCCPPass()); // IP SCCP MPM.add(createGlobalOptimizerPass()); // Optimize out global vars - MPM.add(createIPSCCPPass()); // IP SCCP MPM.add(createDeadArgEliminationPass()); // Dead argument elimination MPM.add(createInstructionCombiningPass());// Clean up after IPCP & DAE @@ -236,6 +241,9 @@ void PassManagerBuilder::populateModulePassManager(PassManagerBase &MPM) { MPM.add(createLoopUnrollPass()); } + if (LoadCombine) + MPM.add(createLoadCombinePass()); + MPM.add(createAggressiveDCEPass()); // Delete dead instructions MPM.add(createCFGSimplificationPass()); // Merge & remove BBs MPM.add(createInstructionCombiningPass()); // Clean up after everything. @@ -352,6 +360,9 @@ void PassManagerBuilder::populateLTOPassManager(PassManagerBase &PM, // More scalar chains could be vectorized due to more alias information PM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains. + if (LoadCombine) + PM.add(createLoadCombinePass()); + // Cleanup and simplify the code after the scalar optimizations. PM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, PM); diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h index e04b1be..ab4dc1c 100644 --- a/lib/Transforms/InstCombine/InstCombine.h +++ b/lib/Transforms/InstCombine/InstCombine.h @@ -37,8 +37,9 @@ enum SelectPatternFlavor { SPF_SMIN, SPF_UMIN, SPF_SMAX, - SPF_UMAX - // SPF_ABS - TODO. + SPF_UMAX, + SPF_ABS, + SPF_NABS }; /// getComplexity: Assign a complexity or rank value to LLVM Values... @@ -246,6 +247,7 @@ private: bool DoXform = true); Instruction *transformSExtICmp(ICmpInst *ICI, Instruction &CI); bool WillNotOverflowSignedAdd(Value *LHS, Value *RHS); + bool WillNotOverflowUnsignedAdd(Value *LHS, Value *RHS); Value *EmitGEPOffset(User *GEP); Instruction *scalarizePHI(ExtractElementInst &EI, PHINode *PN); Value *EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask); diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp index c37a9cf..99f0f1f 100644 --- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp +++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp @@ -865,69 +865,170 @@ Value *FAddCombine::createAddendVal return createFMul(OpndVal, Coeff.getValue(Instr->getType())); } -// dyn_castFoldableMul - If this value is a multiply that can be folded into -// other computations (because it has a constant operand), return the -// non-constant operand of the multiply, and set CST to point to the multiplier. -// Otherwise, return null. -// -static inline Value *dyn_castFoldableMul(Value *V, Constant *&CST) { - if (!V->hasOneUse() || !V->getType()->isIntOrIntVectorTy()) - return nullptr; - - Instruction *I = dyn_cast<Instruction>(V); - if (!I) return nullptr; - - if (I->getOpcode() == Instruction::Mul) - if ((CST = dyn_cast<Constant>(I->getOperand(1)))) - return I->getOperand(0); - if (I->getOpcode() == Instruction::Shl) - if ((CST = dyn_cast<Constant>(I->getOperand(1)))) { - // The multiplier is really 1 << CST. - CST = ConstantExpr::getShl(ConstantInt::get(V->getType(), 1), CST); - return I->getOperand(0); - } - return nullptr; +// If one of the operands only has one non-zero bit, and if the other +// operand has a known-zero bit in a more significant place than it (not +// including the sign bit) the ripple may go up to and fill the zero, but +// won't change the sign. For example, (X & ~4) + 1. +static bool checkRippleForAdd(const APInt &Op0KnownZero, + const APInt &Op1KnownZero) { + APInt Op1MaybeOne = ~Op1KnownZero; + // Make sure that one of the operand has at most one bit set to 1. + if (Op1MaybeOne.countPopulation() != 1) + return false; + + // Find the most significant known 0 other than the sign bit. + int BitWidth = Op0KnownZero.getBitWidth(); + APInt Op0KnownZeroTemp(Op0KnownZero); + Op0KnownZeroTemp.clearBit(BitWidth - 1); + int Op0ZeroPosition = BitWidth - Op0KnownZeroTemp.countLeadingZeros() - 1; + + int Op1OnePosition = BitWidth - Op1MaybeOne.countLeadingZeros() - 1; + assert(Op1OnePosition >= 0); + + // This also covers the case of no known zero, since in that case + // Op0ZeroPosition is -1. + return Op0ZeroPosition >= Op1OnePosition; } - /// WillNotOverflowSignedAdd - Return true if we can prove that: /// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS)) /// This basically requires proving that the add in the original type would not /// overflow to change the sign bit or have a carry out. +/// TODO: Handle this for Vectors. bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { // There are different heuristics we can use for this. Here are some simple // ones. - // Add has the property that adding any two 2's complement numbers can only - // have one carry bit which can change a sign. As such, if LHS and RHS each - // have at least two sign bits, we know that the addition of the two values - // will sign extend fine. + // If LHS and RHS each have at least two sign bits, the addition will look + // like + // + // XX..... + + // YY..... + // + // If the carry into the most significant position is 0, X and Y can't both + // be 1 and therefore the carry out of the addition is also 0. + // + // If the carry into the most significant position is 1, X and Y can't both + // be 0 and therefore the carry out of the addition is also 1. + // + // Since the carry into the most significant position is always equal to + // the carry out of the addition, there is no signed overflow. if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1) return true; + if (IntegerType *IT = dyn_cast<IntegerType>(LHS->getType())) { + int BitWidth = IT->getBitWidth(); + APInt LHSKnownZero(BitWidth, 0); + APInt LHSKnownOne(BitWidth, 0); + computeKnownBits(LHS, LHSKnownZero, LHSKnownOne); - // If one of the operands only has one non-zero bit, and if the other operand - // has a known-zero bit in a more significant place than it (not including the - // sign bit) the ripple may go up to and fill the zero, but won't change the - // sign. For example, (X & ~4) + 1. + APInt RHSKnownZero(BitWidth, 0); + APInt RHSKnownOne(BitWidth, 0); + computeKnownBits(RHS, RHSKnownZero, RHSKnownOne); + + // Addition of two 2's compliment numbers having opposite signs will never + // overflow. + if ((LHSKnownOne[BitWidth - 1] && RHSKnownZero[BitWidth - 1]) || + (LHSKnownZero[BitWidth - 1] && RHSKnownOne[BitWidth - 1])) + return true; + + // Check if carry bit of addition will not cause overflow. + if (checkRippleForAdd(LHSKnownZero, RHSKnownZero)) + return true; + if (checkRippleForAdd(RHSKnownZero, LHSKnownZero)) + return true; + } + return false; +} - // TODO: Implement. +/// WillNotOverflowUnsignedAdd - Return true if we can prove that: +/// (zext (add LHS, RHS)) === (add (zext LHS), (zext RHS)) +bool InstCombiner::WillNotOverflowUnsignedAdd(Value *LHS, Value *RHS) { + // There are different heuristics we can use for this. Here is a simple one. + // If the sign bit of LHS and that of RHS are both zero, no unsigned wrap. + bool LHSKnownNonNegative, LHSKnownNegative; + bool RHSKnownNonNegative, RHSKnownNegative; + ComputeSignBit(LHS, LHSKnownNonNegative, LHSKnownNegative, DL, 0); + ComputeSignBit(RHS, RHSKnownNonNegative, RHSKnownNegative, DL, 0); + if (LHSKnownNonNegative && RHSKnownNonNegative) + return true; return false; } -Instruction *InstCombiner::visitAdd(BinaryOperator &I) { - bool Changed = SimplifyAssociativeOrCommutative(I); +// Checks if any operand is negative and we can convert add to sub. +// This function checks for following negative patterns +// ADD(XOR(OR(Z, NOT(C)), C)), 1) == NEG(AND(Z, C)) +// ADD(XOR(AND(Z, C), C), 1) == NEG(OR(Z, ~C)) +// XOR(AND(Z, C), (C + 1)) == NEG(OR(Z, ~C)) if C is even +static Value *checkForNegativeOperand(BinaryOperator &I, + InstCombiner::BuilderTy *Builder) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - if (Value *V = SimplifyVectorOp(I)) - return ReplaceInstUsesWith(I, V); + // This function creates 2 instructions to replace ADD, we need at least one + // of LHS or RHS to have one use to ensure benefit in transform. + if (!LHS->hasOneUse() && !RHS->hasOneUse()) + return nullptr; - if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), - I.hasNoUnsignedWrap(), DL)) - return ReplaceInstUsesWith(I, V); + Value *X = nullptr, *Y = nullptr, *Z = nullptr; + const APInt *C1 = nullptr, *C2 = nullptr; + + // if ONE is on other side, swap + if (match(RHS, m_Add(m_Value(X), m_One()))) + std::swap(LHS, RHS); + + if (match(LHS, m_Add(m_Value(X), m_One()))) { + // if XOR on other side, swap + if (match(RHS, m_Xor(m_Value(Y), m_APInt(C1)))) + std::swap(X, RHS); + + if (match(X, m_Xor(m_Value(Y), m_APInt(C1)))) { + // X = XOR(Y, C1), Y = OR(Z, C2), C2 = NOT(C1) ==> X == NOT(AND(Z, C1)) + // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, AND(Z, C1)) + if (match(Y, m_Or(m_Value(Z), m_APInt(C2))) && (*C2 == ~(*C1))) { + Value *NewAnd = Builder->CreateAnd(Z, *C1); + return Builder->CreateSub(RHS, NewAnd, "sub"); + } else if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && (*C1 == *C2)) { + // X = XOR(Y, C1), Y = AND(Z, C2), C2 == C1 ==> X == NOT(OR(Z, ~C1)) + // ADD(ADD(X, 1), RHS) == ADD(X, ADD(RHS, 1)) == SUB(RHS, OR(Z, ~C1)) + Value *NewOr = Builder->CreateOr(Z, ~(*C1)); + return Builder->CreateSub(RHS, NewOr, "sub"); + } + } + } + + // Restore LHS and RHS + LHS = I.getOperand(0); + RHS = I.getOperand(1); + + // if XOR is on other side, swap + if (match(RHS, m_Xor(m_Value(Y), m_APInt(C1)))) + std::swap(LHS, RHS); + + // C2 is ODD + // LHS = XOR(Y, C1), Y = AND(Z, C2), C1 == (C2 + 1) => LHS == NEG(OR(Z, ~C2)) + // ADD(LHS, RHS) == SUB(RHS, OR(Z, ~C2)) + if (match(LHS, m_Xor(m_Value(Y), m_APInt(C1)))) + if (C1->countTrailingZeros() == 0) + if (match(Y, m_And(m_Value(Z), m_APInt(C2))) && *C1 == (*C2 + 1)) { + Value *NewOr = Builder->CreateOr(Z, ~(*C2)); + return Builder->CreateSub(RHS, NewOr, "sub"); + } + return nullptr; +} + +Instruction *InstCombiner::visitAdd(BinaryOperator &I) { + bool Changed = SimplifyAssociativeOrCommutative(I); + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + + if (Value *V = SimplifyVectorOp(I)) + return ReplaceInstUsesWith(I, V); - // (A*B)+(A*C) -> A*(B+C) etc + if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), + I.hasNoUnsignedWrap(), DL)) + return ReplaceInstUsesWith(I, V); + + // (A*B)+(A*C) -> A*(B+C) etc if (Value *V = SimplifyUsingDistributiveLaws(I)) return ReplaceInstUsesWith(I, V); @@ -1025,23 +1126,8 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { if (Value *V = dyn_castNegVal(RHS)) return BinaryOperator::CreateSub(LHS, V); - - { - Constant *C2; - if (Value *X = dyn_castFoldableMul(LHS, C2)) { - if (X == RHS) // X*C + X --> X * (C+1) - return BinaryOperator::CreateMul(RHS, AddOne(C2)); - - // X*C1 + X*C2 --> X * (C1+C2) - Constant *C1; - if (X == dyn_castFoldableMul(RHS, C1)) - return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2)); - } - - // X + X*C --> X * (C+1) - if (dyn_castFoldableMul(RHS, C2) == LHS) - return BinaryOperator::CreateMul(LHS, AddOne(C2)); - } + if (Value *V = checkForNegativeOperand(I, Builder)) + return ReplaceInstUsesWith(I, V); // A+B --> A|B iff A and B have no bits set in common. if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { @@ -1059,29 +1145,6 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { } } - // W*X + Y*Z --> W * (X+Z) iff W == Y - { - Value *W, *X, *Y, *Z; - if (match(LHS, m_Mul(m_Value(W), m_Value(X))) && - match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) { - if (W != Y) { - if (W == Z) { - std::swap(Y, Z); - } else if (Y == X) { - std::swap(W, X); - } else if (X == Z) { - std::swap(Y, Z); - std::swap(W, X); - } - } - - if (W == Y) { - Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName()); - return BinaryOperator::CreateMul(W, NewAdd); - } - } - } - if (Constant *CRHS = dyn_cast<Constant>(RHS)) { Value *X; if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X @@ -1191,6 +1254,18 @@ Instruction *InstCombiner::visitAdd(BinaryOperator &I) { return BinaryOperator::CreateOr(A, B); } + // TODO(jingyue): Consider WillNotOverflowSignedAdd and + // WillNotOverflowUnsignedAdd to reduce the number of invocations of + // computeKnownBits. + if (!I.hasNoSignedWrap() && WillNotOverflowSignedAdd(LHS, RHS)) { + Changed = true; + I.setHasNoSignedWrap(true); + } + if (!I.hasNoUnsignedWrap() && WillNotOverflowUnsignedAdd(LHS, RHS)) { + Changed = true; + I.setHasNoUnsignedWrap(true); + } + return Changed ? &I : nullptr; } @@ -1478,9 +1553,9 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { return BinaryOperator::CreateAnd(Op0, Builder->CreateNot(Y, Y->getName() + ".not")); - // 0 - (X sdiv C) -> (X sdiv -C) - if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && - match(Op0, m_Zero())) + // 0 - (X sdiv C) -> (X sdiv -C) provided the negation doesn't overflow. + if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && match(Op0, m_Zero()) && + !C->isMinSignedValue()) return BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(C)); // 0 - (X << Y) -> (-X << Y) when X is freely negatable. @@ -1488,19 +1563,6 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { if (Value *XNeg = dyn_castNegVal(X)) return BinaryOperator::CreateShl(XNeg, Y); - // X - X*C --> X * (1-C) - if (match(Op1, m_Mul(m_Specific(Op0), m_Constant(CI)))) { - Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(),1), CI); - return BinaryOperator::CreateMul(Op0, CP1); - } - - // X - X<<C --> X * (1-(1<<C)) - if (match(Op1, m_Shl(m_Specific(Op0), m_Constant(CI)))) { - Constant *One = ConstantInt::get(I.getType(), 1); - C = ConstantExpr::getSub(One, ConstantExpr::getShl(One, CI)); - return BinaryOperator::CreateMul(Op0, C); - } - // X - A*-B -> X + A*B // X - -A*B -> X + A*B Value *A, *B; @@ -1517,16 +1579,6 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { } } - Constant *C1; - if (Value *X = dyn_castFoldableMul(Op0, C1)) { - if (X == Op1) // X*C - X --> X * (C-1) - return BinaryOperator::CreateMul(Op1, SubOne(C1)); - - Constant *C2; // X*C1 - X*C2 -> X * (C1-C2) - if (X == dyn_castFoldableMul(Op1, C2)) - return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2)); - } - // Optimize pointer differences into the same array into a size. Consider: // &A[10] - &A[0]: we should compile this to "10". if (DL) { @@ -1541,7 +1593,7 @@ Instruction *InstCombiner::visitSub(BinaryOperator &I) { match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) return ReplaceInstUsesWith(I, Res); - } + } return nullptr; } diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 4f5d65a..b23a606 100644 --- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1996,29 +1996,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) { C1 = dyn_cast<ConstantInt>(C); C2 = dyn_cast<ConstantInt>(D); if (C1 && C2) { // (A & C1)|(B & C2) - // If we have: ((V + N) & C1) | (V & C2) - // .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0 - // replace with V+N. - if (C1->getValue() == ~C2->getValue()) { - if ((C2->getValue() & (C2->getValue()+1)) == 0 && // C2 == 0+1+ - match(A, m_Add(m_Value(V1), m_Value(V2)))) { - // Add commutes, try both ways. - if (V1 == B && MaskedValueIsZero(V2, C2->getValue())) - return ReplaceInstUsesWith(I, A); - if (V2 == B && MaskedValueIsZero(V1, C2->getValue())) - return ReplaceInstUsesWith(I, A); - } - // Or commutes, try both ways. - if ((C1->getValue() & (C1->getValue()+1)) == 0 && - match(B, m_Add(m_Value(V1), m_Value(V2)))) { - // Add commutes, try both ways. - if (V1 == A && MaskedValueIsZero(V2, C1->getValue())) - return ReplaceInstUsesWith(I, B); - if (V2 == A && MaskedValueIsZero(V1, C1->getValue())) - return ReplaceInstUsesWith(I, B); - } - } - if ((C1->getValue() & C2->getValue()) == 0) { // ((V | N) & C1) | (V & C2) --> (V|N) & (C1|C2) // iff (C1&C2) == 0 and (N&~C1) == 0 diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp index d4b583b..658178d 100644 --- a/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -421,6 +421,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { return InsertValueInst::Create(Struct, II->getArgOperand(0), 0); } } + + // We can strength reduce reduce this signed add into a regular add if we + // can prove that it will never overflow. + if (II->getIntrinsicID() == Intrinsic::sadd_with_overflow) { + Value *LHS = II->getArgOperand(0), *RHS = II->getArgOperand(1); + if (WillNotOverflowSignedAdd(LHS, RHS)) { + Value *Add = Builder->CreateNSWAdd(LHS, RHS); + Add->takeName(&CI); + Constant *V[] = {UndefValue::get(Add->getType()), Builder->getFalse()}; + StructType *ST = cast<StructType>(II->getType()); + Constant *Struct = ConstantStruct::get(ST, V); + return InsertValueInst::Create(Struct, Add, 0); + } + } + break; case Intrinsic::usub_with_overflow: case Intrinsic::ssub_with_overflow: @@ -800,6 +815,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { case Intrinsic::ppc_altivec_vperm: // Turn vperm(V1,V2,mask) -> shuffle(V1,V2,mask) if mask is a constant. + // Note that ppc_altivec_vperm has a big-endian bias, so when creating + // a vectorshuffle for little endian, we must undo the transformation + // performed on vec_perm in altivec.h. That is, we must complement + // the permutation mask with respect to 31 and reverse the order of + // V1 and V2. if (Constant *Mask = dyn_cast<Constant>(II->getArgOperand(2))) { assert(Mask->getType()->getVectorNumElements() == 16 && "Bad type for intrinsic!"); @@ -832,10 +852,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { unsigned Idx = cast<ConstantInt>(Mask->getAggregateElement(i))->getZExtValue(); Idx &= 31; // Match the hardware behavior. + if (DL && DL->isLittleEndian()) + Idx = 31 - Idx; if (!ExtractedElts[Idx]) { + Value *Op0ToUse = (DL && DL->isLittleEndian()) ? Op1 : Op0; + Value *Op1ToUse = (DL && DL->isLittleEndian()) ? Op0 : Op1; ExtractedElts[Idx] = - Builder->CreateExtractElement(Idx < 16 ? Op0 : Op1, + Builder->CreateExtractElement(Idx < 16 ? Op0ToUse : Op1ToUse, Builder->getInt32(Idx&15)); } @@ -913,6 +937,20 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::AMDGPU_rcp: { + if (const ConstantFP *C = dyn_cast<ConstantFP>(II->getArgOperand(0))) { + const APFloat &ArgVal = C->getValueAPF(); + APFloat Val(ArgVal.getSemantics(), 1.0); + APFloat::opStatus Status = Val.divide(ArgVal, + APFloat::rmNearestTiesToEven); + // Only do this if it was exact and therefore not dependent on the + // rounding mode. + if (Status == APFloat::opOK) + return ReplaceInstUsesWith(CI, ConstantFP::get(II->getContext(), Val)); + } + + break; + } case Intrinsic::stackrestore: { // If the save is right next to the restore, remove the restore. This can // happen when variable allocas are DCE'd. diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 356803a..ff083d7 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1434,7 +1434,12 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) { if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Src)) { // If casting the result of a getelementptr instruction with no offset, turn // this into a cast of the original pointer! - if (GEP->hasAllZeroIndices()) { + if (GEP->hasAllZeroIndices() && + // If CI is an addrspacecast and GEP changes the poiner type, merging + // GEP into CI would undo canonicalizing addrspacecast with different + // pointer types, causing infinite loops. + (!isa<AddrSpaceCastInst>(CI) || + GEP->getType() == GEP->getPointerOperand()->getType())) { // Changing the cast operand is usually not a good idea but it is safe // here because the pointer operand is being replaced with another // pointer operand so the opcode doesn't need to change. @@ -1904,5 +1909,24 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { } Instruction *InstCombiner::visitAddrSpaceCast(AddrSpaceCastInst &CI) { + // If the destination pointer element type is not the the same as the source's + // do the addrspacecast to the same type, and then the bitcast in the new + // address space. This allows the cast to be exposed to other transforms. + Value *Src = CI.getOperand(0); + PointerType *SrcTy = cast<PointerType>(Src->getType()->getScalarType()); + PointerType *DestTy = cast<PointerType>(CI.getType()->getScalarType()); + + Type *DestElemTy = DestTy->getElementType(); + if (SrcTy->getElementType() != DestElemTy) { + Type *MidTy = PointerType::get(DestElemTy, SrcTy->getAddressSpace()); + if (VectorType *VT = dyn_cast<VectorType>(CI.getType())) { + // Handle vectors of pointers. + MidTy = VectorType::get(MidTy, VT->getNumElements()); + } + + Value *NewBitCast = Builder->CreateBitCast(Src, MidTy); + return new AddrSpaceCastInst(NewBitCast, CI.getType()); + } + return commonPointerCastTransforms(CI); } diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp index 02e8bf1..5e71c5c 100644 --- a/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -612,9 +612,10 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, if (ICmpInst::isSigned(Cond)) return nullptr; - // Look through bitcasts. - if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS)) - RHS = BCI->getOperand(0); + // Look through bitcasts and addrspacecasts. We do not however want to remove + // 0 GEPs. + if (!isa<GetElementPtrInst>(RHS)) + RHS = RHS->stripPointerCasts(); Value *PtrBase = GEPLHS->getOperand(0); if (DL && PtrBase == RHS && GEPLHS->isInBounds()) { @@ -655,9 +656,24 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, (GEPRHS->hasAllConstantIndices() || GEPRHS->hasOneUse()) && PtrBase->stripPointerCasts() == GEPRHS->getOperand(0)->stripPointerCasts()) { + Value *LOffset = EmitGEPOffset(GEPLHS); + Value *ROffset = EmitGEPOffset(GEPRHS); + + // If we looked through an addrspacecast between different sized address + // spaces, the LHS and RHS pointers are different sized + // integers. Truncate to the smaller one. + Type *LHSIndexTy = LOffset->getType(); + Type *RHSIndexTy = ROffset->getType(); + if (LHSIndexTy != RHSIndexTy) { + if (LHSIndexTy->getPrimitiveSizeInBits() < + RHSIndexTy->getPrimitiveSizeInBits()) { + ROffset = Builder->CreateTrunc(ROffset, LHSIndexTy); + } else + LOffset = Builder->CreateTrunc(LOffset, RHSIndexTy); + } + Value *Cmp = Builder->CreateICmp(ICmpInst::getSignedPredicate(Cond), - EmitGEPOffset(GEPLHS), - EmitGEPOffset(GEPRHS)); + LOffset, ROffset); return ReplaceInstUsesWith(I, Cmp); } @@ -667,26 +683,12 @@ Instruction *InstCombiner::FoldGEPICmp(GEPOperator *GEPLHS, Value *RHS, } // If one of the GEPs has all zero indices, recurse. - bool AllZeros = true; - for (unsigned i = 1, e = GEPLHS->getNumOperands(); i != e; ++i) - if (!isa<Constant>(GEPLHS->getOperand(i)) || - !cast<Constant>(GEPLHS->getOperand(i))->isNullValue()) { - AllZeros = false; - break; - } - if (AllZeros) + if (GEPLHS->hasAllZeroIndices()) return FoldGEPICmp(GEPRHS, GEPLHS->getOperand(0), ICmpInst::getSwappedPredicate(Cond), I); // If the other GEP has all zero indices, recurse. - AllZeros = true; - for (unsigned i = 1, e = GEPRHS->getNumOperands(); i != e; ++i) - if (!isa<Constant>(GEPRHS->getOperand(i)) || - !cast<Constant>(GEPRHS->getOperand(i))->isNullValue()) { - AllZeros = false; - break; - } - if (AllZeros) + if (GEPRHS->hasAllZeroIndices()) return FoldGEPICmp(GEPLHS, GEPRHS->getOperand(0), Cond, I); bool GEPsInBounds = GEPLHS->isInBounds() && GEPRHS->isInBounds(); @@ -2026,9 +2028,13 @@ static Instruction *ProcessUAddIdiom(Instruction &I, Value *OrigAddV, /// replacement required. static Instruction *ProcessUMulZExtIdiom(ICmpInst &I, Value *MulVal, Value *OtherVal, InstCombiner &IC) { + // Don't bother doing this transformation for pointers, don't do it for + // vectors. + if (!isa<IntegerType>(MulVal->getType())) + return nullptr; + assert(I.getOperand(0) == MulVal || I.getOperand(1) == MulVal); assert(I.getOperand(0) == OtherVal || I.getOperand(1) == OtherVal); - assert(isa<IntegerType>(MulVal->getType())); Instruction *MulInstr = cast<Instruction>(MulVal); assert(MulInstr->getOpcode() == Instruction::Mul); @@ -2523,7 +2529,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // bit is set. If the comparison is against zero, then this is a check // to see if *that* bit is set. APInt Op0KnownZeroInverted = ~Op0KnownZero; - if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) { + if (~Op1KnownZero == 0) { // If the LHS is an AND with the same constant, look through it. Value *LHS = nullptr; ConstantInt *LHSC = nullptr; @@ -2533,11 +2539,19 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // If the LHS is 1 << x, and we know the result is a power of 2 like 8, // then turn "((1 << x)&8) == 0" into "x != 3". + // or turn "((1 << x)&7) == 0" into "x > 2". Value *X = nullptr; if (match(LHS, m_Shl(m_One(), m_Value(X)))) { - unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros(); - return new ICmpInst(ICmpInst::ICMP_NE, X, - ConstantInt::get(X->getType(), CmpVal)); + APInt ValToCheck = Op0KnownZeroInverted; + if (ValToCheck.isPowerOf2()) { + unsigned CmpVal = ValToCheck.countTrailingZeros(); + return new ICmpInst(ICmpInst::ICMP_NE, X, + ConstantInt::get(X->getType(), CmpVal)); + } else if ((++ValToCheck).isPowerOf2()) { + unsigned CmpVal = ValToCheck.countTrailingZeros() - 1; + return new ICmpInst(ICmpInst::ICMP_UGT, X, + ConstantInt::get(X->getType(), CmpVal)); + } } // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1, @@ -2560,7 +2574,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // bit is set. If the comparison is against zero, then this is a check // to see if *that* bit is set. APInt Op0KnownZeroInverted = ~Op0KnownZero; - if (~Op1KnownZero == 0 && Op0KnownZeroInverted.isPowerOf2()) { + if (~Op1KnownZero == 0) { // If the LHS is an AND with the same constant, look through it. Value *LHS = nullptr; ConstantInt *LHSC = nullptr; @@ -2570,11 +2584,19 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) { // If the LHS is 1 << x, and we know the result is a power of 2 like 8, // then turn "((1 << x)&8) != 0" into "x == 3". + // or turn "((1 << x)&7) != 0" into "x < 3". Value *X = nullptr; if (match(LHS, m_Shl(m_One(), m_Value(X)))) { - unsigned CmpVal = Op0KnownZeroInverted.countTrailingZeros(); - return new ICmpInst(ICmpInst::ICMP_EQ, X, - ConstantInt::get(X->getType(), CmpVal)); + APInt ValToCheck = Op0KnownZeroInverted; + if (ValToCheck.isPowerOf2()) { + unsigned CmpVal = ValToCheck.countTrailingZeros(); + return new ICmpInst(ICmpInst::ICMP_EQ, X, + ConstantInt::get(X->getType(), CmpVal)); + } else if ((++ValToCheck).isPowerOf2()) { + unsigned CmpVal = ValToCheck.countTrailingZeros(); + return new ICmpInst(ICmpInst::ICMP_ULT, X, + ConstantInt::get(X->getType(), CmpVal)); + } } // If the LHS is 8 >>u x, and we know the result is a power of 2 like 1, diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp index 66d0938..c10e92a 100644 --- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp +++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp @@ -50,99 +50,102 @@ static bool pointsToConstantGlobal(Value *V) { /// can optimize this. static bool isOnlyCopiedFromConstantGlobal(Value *V, MemTransferInst *&TheCopy, - SmallVectorImpl<Instruction *> &ToDelete, - bool IsOffset = false) { + SmallVectorImpl<Instruction *> &ToDelete) { // We track lifetime intrinsics as we encounter them. If we decide to go // ahead and replace the value with the global, this lets the caller quickly // eliminate the markers. - for (Use &U : V->uses()) { - Instruction *I = cast<Instruction>(U.getUser()); - - if (LoadInst *LI = dyn_cast<LoadInst>(I)) { - // Ignore non-volatile loads, they are always ok. - if (!LI->isSimple()) return false; - continue; - } - - if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) { - // If uses of the bitcast are ok, we are ok. - if (!isOnlyCopiedFromConstantGlobal(I, TheCopy, ToDelete, IsOffset)) - return false; - continue; - } - if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { - // If the GEP has all zero indices, it doesn't offset the pointer. If it - // doesn't, it does. - if (!isOnlyCopiedFromConstantGlobal( - GEP, TheCopy, ToDelete, IsOffset || !GEP->hasAllZeroIndices())) - return false; - continue; - } + SmallVector<std::pair<Value *, bool>, 35> ValuesToInspect; + ValuesToInspect.push_back(std::make_pair(V, false)); + while (!ValuesToInspect.empty()) { + auto ValuePair = ValuesToInspect.pop_back_val(); + const bool IsOffset = ValuePair.second; + for (auto &U : ValuePair.first->uses()) { + Instruction *I = cast<Instruction>(U.getUser()); + + if (LoadInst *LI = dyn_cast<LoadInst>(I)) { + // Ignore non-volatile loads, they are always ok. + if (!LI->isSimple()) return false; + continue; + } - if (CallSite CS = I) { - // If this is the function being called then we treat it like a load and - // ignore it. - if (CS.isCallee(&U)) + if (isa<BitCastInst>(I) || isa<AddrSpaceCastInst>(I)) { + // If uses of the bitcast are ok, we are ok. + ValuesToInspect.push_back(std::make_pair(I, IsOffset)); continue; + } + if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { + // If the GEP has all zero indices, it doesn't offset the pointer. If it + // doesn't, it does. + ValuesToInspect.push_back( + std::make_pair(I, IsOffset || !GEP->hasAllZeroIndices())); + continue; + } - // Inalloca arguments are clobbered by the call. - unsigned ArgNo = CS.getArgumentNo(&U); - if (CS.isInAllocaArgument(ArgNo)) - return false; + if (CallSite CS = I) { + // If this is the function being called then we treat it like a load and + // ignore it. + if (CS.isCallee(&U)) + continue; - // If this is a readonly/readnone call site, then we know it is just a - // load (but one that potentially returns the value itself), so we can - // ignore it if we know that the value isn't captured. - if (CS.onlyReadsMemory() && - (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) - continue; + // Inalloca arguments are clobbered by the call. + unsigned ArgNo = CS.getArgumentNo(&U); + if (CS.isInAllocaArgument(ArgNo)) + return false; - // If this is being passed as a byval argument, the caller is making a - // copy, so it is only a read of the alloca. - if (CS.isByValArgument(ArgNo)) - continue; - } + // If this is a readonly/readnone call site, then we know it is just a + // load (but one that potentially returns the value itself), so we can + // ignore it if we know that the value isn't captured. + if (CS.onlyReadsMemory() && + (CS.getInstruction()->use_empty() || CS.doesNotCapture(ArgNo))) + continue; + + // If this is being passed as a byval argument, the caller is making a + // copy, so it is only a read of the alloca. + if (CS.isByValArgument(ArgNo)) + continue; + } - // Lifetime intrinsics can be handled by the caller. - if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { - if (II->getIntrinsicID() == Intrinsic::lifetime_start || - II->getIntrinsicID() == Intrinsic::lifetime_end) { - assert(II->use_empty() && "Lifetime markers have no result to use!"); - ToDelete.push_back(II); - continue; + // Lifetime intrinsics can be handled by the caller. + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { + if (II->getIntrinsicID() == Intrinsic::lifetime_start || + II->getIntrinsicID() == Intrinsic::lifetime_end) { + assert(II->use_empty() && "Lifetime markers have no result to use!"); + ToDelete.push_back(II); + continue; + } } - } - // If this is isn't our memcpy/memmove, reject it as something we can't - // handle. - MemTransferInst *MI = dyn_cast<MemTransferInst>(I); - if (!MI) - return false; + // If this is isn't our memcpy/memmove, reject it as something we can't + // handle. + MemTransferInst *MI = dyn_cast<MemTransferInst>(I); + if (!MI) + return false; - // If the transfer is using the alloca as a source of the transfer, then - // ignore it since it is a load (unless the transfer is volatile). - if (U.getOperandNo() == 1) { - if (MI->isVolatile()) return false; - continue; - } + // If the transfer is using the alloca as a source of the transfer, then + // ignore it since it is a load (unless the transfer is volatile). + if (U.getOperandNo() == 1) { + if (MI->isVolatile()) return false; + continue; + } - // If we already have seen a copy, reject the second one. - if (TheCopy) return false; + // If we already have seen a copy, reject the second one. + if (TheCopy) return false; - // If the pointer has been offset from the start of the alloca, we can't - // safely handle this. - if (IsOffset) return false; + // If the pointer has been offset from the start of the alloca, we can't + // safely handle this. + if (IsOffset) return false; - // If the memintrinsic isn't using the alloca as the dest, reject it. - if (U.getOperandNo() != 0) return false; + // If the memintrinsic isn't using the alloca as the dest, reject it. + if (U.getOperandNo() != 0) return false; - // If the source of the memcpy/move is not a constant global, reject it. - if (!pointsToConstantGlobal(MI->getSource())) - return false; + // If the source of the memcpy/move is not a constant global, reject it. + if (!pointsToConstantGlobal(MI->getSource())) + return false; - // Otherwise, the transform is safe. Remember the copy instruction. - TheCopy = MI; + // Otherwise, the transform is safe. Remember the copy instruction. + TheCopy = MI; + } } return true; } diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 9996ebc..6c6e7d8 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -203,8 +203,11 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) { Value *X; Constant *C1; if (match(Op0, m_OneUse(m_Add(m_Value(X), m_Constant(C1))))) { - Value *Add = Builder->CreateMul(X, Op1); - return BinaryOperator::CreateAdd(Add, Builder->CreateMul(C1, Op1)); + Value *Mul = Builder->CreateMul(C1, Op1); + // Only go forward with the transform if C1*CI simplifies to a tidier + // constant. + if (!match(Mul, m_Mul(m_Value(), m_Value()))) + return BinaryOperator::CreateAdd(Builder->CreateMul(X, Op1), Mul); } } } @@ -990,6 +993,10 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) { } if (Constant *RHS = dyn_cast<Constant>(Op1)) { + // X/INT_MIN -> X == INT_MIN + if (RHS->isMinSignedValue()) + return new ZExtInst(Builder->CreateICmpEQ(Op0, Op1), I.getType()); + // -X/C --> X/-C provided the negation doesn't overflow. if (SubOperator *Sub = dyn_cast<SubOperator>(Op0)) if (match(Sub->getOperand(0), m_Zero()) && Sub->hasNoSignedWrap()) diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 9a41e4b..06c9e29 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -31,13 +31,18 @@ MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) { ICmpInst *ICI = dyn_cast<ICmpInst>(SI->getCondition()); if (!ICI) return SPF_UNKNOWN; - LHS = ICI->getOperand(0); - RHS = ICI->getOperand(1); + ICmpInst::Predicate Pred = ICI->getPredicate(); + Value *CmpLHS = ICI->getOperand(0); + Value *CmpRHS = ICI->getOperand(1); + Value *TrueVal = SI->getTrueValue(); + Value *FalseVal = SI->getFalseValue(); + + LHS = CmpLHS; + RHS = CmpRHS; // (icmp X, Y) ? X : Y - if (SI->getTrueValue() == ICI->getOperand(0) && - SI->getFalseValue() == ICI->getOperand(1)) { - switch (ICI->getPredicate()) { + if (TrueVal == CmpLHS && FalseVal == CmpRHS) { + switch (Pred) { default: return SPF_UNKNOWN; // Equality. case ICmpInst::ICMP_UGT: case ICmpInst::ICMP_UGE: return SPF_UMAX; @@ -51,18 +56,35 @@ MatchSelectPattern(Value *V, Value *&LHS, Value *&RHS) { } // (icmp X, Y) ? Y : X - if (SI->getTrueValue() == ICI->getOperand(1) && - SI->getFalseValue() == ICI->getOperand(0)) { - switch (ICI->getPredicate()) { - default: return SPF_UNKNOWN; // Equality. - case ICmpInst::ICMP_UGT: - case ICmpInst::ICMP_UGE: return SPF_UMIN; - case ICmpInst::ICMP_SGT: - case ICmpInst::ICMP_SGE: return SPF_SMIN; - case ICmpInst::ICMP_ULT: - case ICmpInst::ICMP_ULE: return SPF_UMAX; - case ICmpInst::ICMP_SLT: - case ICmpInst::ICMP_SLE: return SPF_SMAX; + if (TrueVal == CmpRHS && FalseVal == CmpLHS) { + switch (Pred) { + default: return SPF_UNKNOWN; // Equality. + case ICmpInst::ICMP_UGT: + case ICmpInst::ICMP_UGE: return SPF_UMIN; + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_SGE: return SPF_SMIN; + case ICmpInst::ICMP_ULT: + case ICmpInst::ICMP_ULE: return SPF_UMAX; + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_SLE: return SPF_SMAX; + } + } + + if (ConstantInt *C1 = dyn_cast<ConstantInt>(CmpRHS)) { + if ((CmpLHS == TrueVal && match(FalseVal, m_Neg(m_Specific(CmpLHS)))) || + (CmpLHS == FalseVal && match(TrueVal, m_Neg(m_Specific(CmpLHS))))) { + + // ABS(X) ==> (X >s 0) ? X : -X and (X >s -1) ? X : -X + // NABS(X) ==> (X >s 0) ? -X : X and (X >s -1) ? -X : X + if (Pred == ICmpInst::ICMP_SGT && (C1->isZero() || C1->isMinusOne())) { + return (CmpLHS == TrueVal) ? SPF_ABS : SPF_NABS; + } + + // ABS(X) ==> (X <s 0) ? -X : X and (X <s 1) ? -X : X + // NABS(X) ==> (X <s 0) ? X : -X and (X <s 1) ? X : -X + if (Pred == ICmpInst::ICMP_SLT && (C1->isZero() || C1->isOne())) { + return (CmpLHS == FalseVal) ? SPF_ABS : SPF_NABS; + } } } @@ -365,7 +387,15 @@ static Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp, /// 1. The icmp predicate is inverted /// 2. The select operands are reversed /// 3. The magnitude of C2 and C1 are flipped -static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, +/// +/// This also tries to turn +/// --- Single bit tests: +/// if ((x & C) == 0) x |= C to x |= C +/// if ((x & C) != 0) x ^= C to x &= ~C +/// if ((x & C) == 0) x ^= C to x |= C +/// if ((x & C) != 0) x &= ~C to x &= ~C +/// if ((x & C) == 0) x &= ~C to nothing +static Value *foldSelectICmpAndOr(SelectInst &SI, Value *TrueVal, Value *FalseVal, InstCombiner::BuilderTy *Builder) { const ICmpInst *IC = dyn_cast<ICmpInst>(SI.getCondition()); @@ -384,6 +414,25 @@ static Value *foldSelectICmpAndOr(const SelectInst &SI, Value *TrueVal, return nullptr; const APInt *C2; + if (match(TrueVal, m_Specific(X))) { + // if ((X & C) != 0) X ^= C becomes X &= ~C + if (match(FalseVal, m_Xor(m_Specific(X), m_APInt(C2))) && C1 == C2) + return Builder->CreateAnd(X, ~(*C1)); + // if ((X & C) != 0) X &= ~C becomes X &= ~C + if (match(FalseVal, m_And(m_Specific(X), m_APInt(C2))) && *C1 == ~(*C2)) + return FalseVal; + } else if (match(FalseVal, m_Specific(X))) { + // if ((X & C) == 0) X ^= C becomes X |= C + if (match(TrueVal, m_Xor(m_Specific(X), m_APInt(C2))) && C1 == C2) + return Builder->CreateOr(X, *C1); + // if ((X & C) == 0) X &= ~C becomes nothing + if (match(TrueVal, m_And(m_Specific(X), m_APInt(C2))) && *C1 == ~(*C2)) + return X; + // if ((X & C) == 0) X |= C becomes X |= C + if (match(TrueVal, m_Or(m_Specific(X), m_APInt(C2))) && C1 == C2) + return TrueVal; + } + bool OrOnTrueVal = false; bool OrOnFalseVal = match(FalseVal, m_Or(m_Specific(TrueVal), m_Power2(C2))); if (!OrOnFalseVal) @@ -677,6 +726,22 @@ Instruction *InstCombiner::FoldSPFofSPF(Instruction *Inner, } } } + + // ABS(ABS(X)) -> ABS(X) + // NABS(NABS(X)) -> NABS(X) + if (SPF1 == SPF2 && (SPF1 == SPF_ABS || SPF1 == SPF_NABS)) { + return ReplaceInstUsesWith(Outer, Inner); + } + + // ABS(NABS(X)) -> ABS(X) + // NABS(ABS(X)) -> NABS(X) + if ((SPF1 == SPF_ABS && SPF2 == SPF_NABS) || + (SPF1 == SPF_NABS && SPF2 == SPF_ABS)) { + SelectInst *SI = cast<SelectInst>(Inner); + Value *NewSI = Builder->CreateSelect( + SI->getCondition(), SI->getFalseValue(), SI->getTrueValue()); + return ReplaceInstUsesWith(Outer, NewSI); + } return nullptr; } @@ -981,7 +1046,6 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { // TODO. // ABS(-X) -> ABS(X) - // ABS(ABS(X)) -> ABS(X) } // See if we can fold the select into a phi node if the condition is a select. diff --git a/lib/Transforms/InstCombine/InstCombineShifts.cpp b/lib/Transforms/InstCombine/InstCombineShifts.cpp index cc6665c..2495747 100644 --- a/lib/Transforms/InstCombine/InstCombineShifts.cpp +++ b/lib/Transforms/InstCombine/InstCombineShifts.cpp @@ -789,11 +789,6 @@ Instruction *InstCombiner::visitAShr(BinaryOperator &I) { // have a sign-extend idiom. Value *X; if (match(Op0, m_Shl(m_Value(X), m_Specific(Op1)))) { - // If the left shift is just shifting out partial signbits, delete the - // extension. - if (cast<OverflowingBinaryOperator>(Op0)->hasNoSignedWrap()) - return ReplaceInstUsesWith(I, X); - // If the input is an extension from the shifted amount value, e.g. // %x = zext i8 %A to i32 // %y = shl i32 %x, 24 diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 8c5e202..cb16584 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -144,7 +144,7 @@ Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { // If the operand is the PHI induction variable: if (PHIInVal == PHIUser) { // Scalarize the binary operation. Its first operand is the - // scalar PHI and the second operand is extracted from the other + // scalar PHI, and the second operand is extracted from the other // vector operand. BinaryOperator *B0 = cast<BinaryOperator>(PHIUser); unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0; @@ -361,7 +361,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector. - // Okay, we can handle this if the vector we are insertinting into is + // We can handle this if the vector we are inserting into is // transitively ok. if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { // If so, update the mask to reflect the inserted undef. @@ -376,7 +376,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, // This must be extracting from either LHS or RHS. if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { - // Okay, we can handle this if the vector we are insertinting into is + // We can handle this if the vector we are inserting into is // transitively ok. if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { // If so, update the mask to reflect the inserted value. @@ -403,7 +403,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, /// We are building a shuffle to create V, which is a sequence of insertelement, /// extractelement pairs. If PermittedRHS is set, then we must either use it or -/// not rely on the second vector source. Return an std::pair containing the +/// not rely on the second vector source. Return a std::pair containing the /// left and right vectors of the proposed shuffle (or 0), and set the Mask /// parameter as required. /// diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 4c36887..08e2446 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -42,6 +42,7 @@ #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GetElementPtrTypeIterator.h" @@ -395,6 +396,127 @@ static bool RightDistributesOverLeft(Instruction::BinaryOps LOp, return false; } +/// This function returns identity value for given opcode, which can be used to +/// factor patterns like (X * 2) + X ==> (X * 2) + (X * 1) ==> X * (2 + 1). +static Value *getIdentityValue(Instruction::BinaryOps OpCode, Value *V) { + if (isa<Constant>(V)) + return nullptr; + + if (OpCode == Instruction::Mul) + return ConstantInt::get(V->getType(), 1); + + // TODO: We can handle other cases e.g. Instruction::And, Instruction::Or etc. + + return nullptr; +} + +/// This function factors binary ops which can be combined using distributive +/// laws. This also factor SHL as MUL e.g. SHL(X, 2) ==> MUL(X, 4). +static Instruction::BinaryOps +getBinOpsForFactorization(BinaryOperator *Op, Value *&LHS, Value *&RHS) { + if (!Op) + return Instruction::BinaryOpsEnd; + + if (Op->getOpcode() == Instruction::Shl) { + if (Constant *CST = dyn_cast<Constant>(Op->getOperand(1))) { + // The multiplier is really 1 << CST. + RHS = ConstantExpr::getShl(ConstantInt::get(Op->getType(), 1), CST); + LHS = Op->getOperand(0); + return Instruction::Mul; + } + } + + // TODO: We can add other conversions e.g. shr => div etc. + + LHS = Op->getOperand(0); + RHS = Op->getOperand(1); + return Op->getOpcode(); +} + +/// This tries to simplify binary operations by factorizing out common terms +/// (e. g. "(A*B)+(A*C)" -> "A*(B+C)"). +static Value *tryFactorization(InstCombiner::BuilderTy *Builder, + const DataLayout *DL, BinaryOperator &I, + Instruction::BinaryOps InnerOpcode, Value *A, + Value *B, Value *C, Value *D) { + + // If any of A, B, C, D are null, we can not factor I, return early. + // Checking A and C should be enough. + if (!A || !C || !B || !D) + return nullptr; + + Value *SimplifiedInst = nullptr; + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); + + // Does "X op' Y" always equal "Y op' X"? + bool InnerCommutative = Instruction::isCommutative(InnerOpcode); + + // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"? + if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode)) + // Does the instruction have the form "(A op' B) op (A op' D)" or, in the + // commutative case, "(A op' B) op (C op' A)"? + if (A == C || (InnerCommutative && A == D)) { + if (A != C) + std::swap(C, D); + // Consider forming "A op' (B op D)". + // If "B op D" simplifies then it can be formed with no cost. + Value *V = SimplifyBinOp(TopLevelOpcode, B, D, DL); + // If "B op D" doesn't simplify then only go on if both of the existing + // operations "A op' B" and "C op' D" will be zapped as no longer used. + if (!V && LHS->hasOneUse() && RHS->hasOneUse()) + V = Builder->CreateBinOp(TopLevelOpcode, B, D, RHS->getName()); + if (V) { + SimplifiedInst = Builder->CreateBinOp(InnerOpcode, A, V); + } + } + + // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"? + if (!SimplifiedInst && RightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) + // Does the instruction have the form "(A op' B) op (C op' B)" or, in the + // commutative case, "(A op' B) op (B op' D)"? + if (B == D || (InnerCommutative && B == C)) { + if (B != D) + std::swap(C, D); + // Consider forming "(A op C) op' B". + // If "A op C" simplifies then it can be formed with no cost. + Value *V = SimplifyBinOp(TopLevelOpcode, A, C, DL); + + // If "A op C" doesn't simplify then only go on if both of the existing + // operations "A op' B" and "C op' D" will be zapped as no longer used. + if (!V && LHS->hasOneUse() && RHS->hasOneUse()) + V = Builder->CreateBinOp(TopLevelOpcode, A, C, LHS->getName()); + if (V) { + SimplifiedInst = Builder->CreateBinOp(InnerOpcode, V, B); + } + } + + if (SimplifiedInst) { + ++NumFactor; + SimplifiedInst->takeName(&I); + + // Check if we can add NSW flag to SimplifiedInst. If so, set NSW flag. + // TODO: Check for NUW. + if (BinaryOperator *BO = dyn_cast<BinaryOperator>(SimplifiedInst)) { + if (isa<OverflowingBinaryOperator>(SimplifiedInst)) { + bool HasNSW = false; + if (isa<OverflowingBinaryOperator>(&I)) + HasNSW = I.hasNoSignedWrap(); + + if (BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS)) + if (isa<OverflowingBinaryOperator>(Op0)) + HasNSW &= Op0->hasNoSignedWrap(); + + if (BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS)) + if (isa<OverflowingBinaryOperator>(Op1)) + HasNSW &= Op1->hasNoSignedWrap(); + BO->setHasNoSignedWrap(HasNSW); + } + } + } + return SimplifiedInst; +} + /// SimplifyUsingDistributiveLaws - This tries to simplify binary operations /// which some other binary operation distributes over either by factorizing /// out common terms (eg "(A*B)+(A*C)" -> "A*(B+C)") or expanding out if this @@ -404,65 +526,33 @@ Value *InstCombiner::SimplifyUsingDistributiveLaws(BinaryOperator &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); BinaryOperator *Op0 = dyn_cast<BinaryOperator>(LHS); BinaryOperator *Op1 = dyn_cast<BinaryOperator>(RHS); - Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); // op // Factorization. - if (Op0 && Op1 && Op0->getOpcode() == Op1->getOpcode()) { - // The instruction has the form "(A op' B) op (C op' D)". Try to factorize - // a common term. - Value *A = Op0->getOperand(0), *B = Op0->getOperand(1); - Value *C = Op1->getOperand(0), *D = Op1->getOperand(1); - Instruction::BinaryOps InnerOpcode = Op0->getOpcode(); // op' + Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr; + Instruction::BinaryOps LHSOpcode = getBinOpsForFactorization(Op0, A, B); + Instruction::BinaryOps RHSOpcode = getBinOpsForFactorization(Op1, C, D); + + // The instruction has the form "(A op' B) op (C op' D)". Try to factorize + // a common term. + if (LHSOpcode == RHSOpcode) { + if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, C, D)) + return V; + } - // Does "X op' Y" always equal "Y op' X"? - bool InnerCommutative = Instruction::isCommutative(InnerOpcode); - - // Does "X op' (Y op Z)" always equal "(X op' Y) op (X op' Z)"? - if (LeftDistributesOverRight(InnerOpcode, TopLevelOpcode)) - // Does the instruction have the form "(A op' B) op (A op' D)" or, in the - // commutative case, "(A op' B) op (C op' A)"? - if (A == C || (InnerCommutative && A == D)) { - if (A != C) - std::swap(C, D); - // Consider forming "A op' (B op D)". - // If "B op D" simplifies then it can be formed with no cost. - Value *V = SimplifyBinOp(TopLevelOpcode, B, D, DL); - // If "B op D" doesn't simplify then only go on if both of the existing - // operations "A op' B" and "C op' D" will be zapped as no longer used. - if (!V && Op0->hasOneUse() && Op1->hasOneUse()) - V = Builder->CreateBinOp(TopLevelOpcode, B, D, Op1->getName()); - if (V) { - ++NumFactor; - V = Builder->CreateBinOp(InnerOpcode, A, V); - V->takeName(&I); - return V; - } - } + // The instruction has the form "(A op' B) op (C)". Try to factorize common + // term. + if (Value *V = tryFactorization(Builder, DL, I, LHSOpcode, A, B, RHS, + getIdentityValue(LHSOpcode, RHS))) + return V; - // Does "(X op Y) op' Z" always equal "(X op' Z) op (Y op' Z)"? - if (RightDistributesOverLeft(TopLevelOpcode, InnerOpcode)) - // Does the instruction have the form "(A op' B) op (C op' B)" or, in the - // commutative case, "(A op' B) op (B op' D)"? - if (B == D || (InnerCommutative && B == C)) { - if (B != D) - std::swap(C, D); - // Consider forming "(A op C) op' B". - // If "A op C" simplifies then it can be formed with no cost. - Value *V = SimplifyBinOp(TopLevelOpcode, A, C, DL); - // If "A op C" doesn't simplify then only go on if both of the existing - // operations "A op' B" and "C op' D" will be zapped as no longer used. - if (!V && Op0->hasOneUse() && Op1->hasOneUse()) - V = Builder->CreateBinOp(TopLevelOpcode, A, C, Op0->getName()); - if (V) { - ++NumFactor; - V = Builder->CreateBinOp(InnerOpcode, V, B); - V->takeName(&I); - return V; - } - } - } + // The instruction has the form "(B) op (C op' D)". Try to factorize common + // term. + if (Value *V = tryFactorization(Builder, DL, I, RHSOpcode, LHS, + getIdentityValue(RHSOpcode, LHS), C, D)) + return V; // Expansion. + Instruction::BinaryOps TopLevelOpcode = I.getOpcode(); if (Op0 && RightDistributesOverLeft(Op0->getOpcode(), TopLevelOpcode)) { // The instruction has the form "(A op' B) op C". See if expanding it out // to "(A op C) op' (B op C)" results in simplifications. @@ -1030,6 +1120,12 @@ Value *InstCombiner::Descale(Value *Val, APInt Scale, bool &NoSignedWrap) { return nullptr; } + // If Op is zero then Val = Op * Scale. + if (match(Op, m_Zero())) { + NoSignedWrap = true; + return Op; + } + // We know that we can successfully descale, so from here on we can safely // modify the IR. Op holds the descaled version of the deepest term in the // expression. NoSignedWrap is 'true' if multiplying Op by Scale is known @@ -1106,6 +1202,11 @@ static Value *CreateBinOpAsGiven(BinaryOperator &Inst, Value *LHS, Value *RHS, Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { if (!Inst.getType()->isVectorTy()) return nullptr; + // It may not be safe to reorder shuffles and things like div, urem, etc. + // because we may trap when executing those ops on unknown vector elements. + // See PR20059. + if (!isSafeToSpeculativelyExecute(&Inst, DL)) return nullptr; + unsigned VWidth = cast<VectorType>(Inst.getType())->getNumElements(); Value *LHS = Inst.getOperand(0), *RHS = Inst.getOperand(1); assert(cast<VectorType>(LHS->getType())->getNumElements() == VWidth); @@ -1138,7 +1239,9 @@ Value *InstCombiner::SimplifyVectorOp(BinaryOperator &Inst) { if (isa<ShuffleVectorInst>(RHS)) Shuffle = cast<ShuffleVectorInst>(RHS); if (isa<Constant>(LHS)) C1 = cast<Constant>(LHS); if (isa<Constant>(RHS)) C1 = cast<Constant>(RHS); - if (Shuffle && C1 && isa<UndefValue>(Shuffle->getOperand(1)) && + if (Shuffle && C1 && + (isa<ConstantVector>(C1) || isa<ConstantDataVector>(C1)) && + isa<UndefValue>(Shuffle->getOperand(1)) && Shuffle->getType() == Shuffle->getOperand(0)->getType()) { SmallVector<int, 16> ShMask = Shuffle->getShuffleMask(); // Find constant C2 that has property: @@ -1220,6 +1323,91 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) { if (MadeChange) return &GEP; } + // Check to see if the inputs to the PHI node are getelementptr instructions. + if (PHINode *PN = dyn_cast<PHINode>(PtrOp)) { + GetElementPtrInst *Op1 = dyn_cast<GetElementPtrInst>(PN->getOperand(0)); + if (!Op1) + return nullptr; + + signed DI = -1; + + for (auto I = PN->op_begin()+1, E = PN->op_end(); I !=E; ++I) { + GetElementPtrInst *Op2 = dyn_cast<GetElementPtrInst>(*I); + if (!Op2 || Op1->getNumOperands() != Op2->getNumOperands()) + return nullptr; + + // Keep track of the type as we walk the GEP. + Type *CurTy = Op1->getOperand(0)->getType()->getScalarType(); + + for (unsigned J = 0, F = Op1->getNumOperands(); J != F; ++J) { + if (Op1->getOperand(J)->getType() != Op2->getOperand(J)->getType()) + return nullptr; + + if (Op1->getOperand(J) != Op2->getOperand(J)) { + if (DI == -1) { + // We have not seen any differences yet in the GEPs feeding the + // PHI yet, so we record this one if it is allowed to be a + // variable. + + // The first two arguments can vary for any GEP, the rest have to be + // static for struct slots + if (J > 1 && CurTy->isStructTy()) + return nullptr; + + DI = J; + } else { + // The GEP is different by more than one input. While this could be + // extended to support GEPs that vary by more than one variable it + // doesn't make sense since it greatly increases the complexity and + // would result in an R+R+R addressing mode which no backend + // directly supports and would need to be broken into several + // simpler instructions anyway. + return nullptr; + } + } + + // Sink down a layer of the type for the next iteration. + if (J > 0) { + if (CompositeType *CT = dyn_cast<CompositeType>(CurTy)) { + CurTy = CT->getTypeAtIndex(Op1->getOperand(J)); + } else { + CurTy = nullptr; + } + } + } + } + + GetElementPtrInst *NewGEP = cast<GetElementPtrInst>(Op1->clone()); + + if (DI == -1) { + // All the GEPs feeding the PHI are identical. Clone one down into our + // BB so that it can be merged with the current GEP. + GEP.getParent()->getInstList().insert(GEP.getParent()->getFirstNonPHI(), + NewGEP); + } else { + // All the GEPs feeding the PHI differ at a single offset. Clone a GEP + // into the current block so it can be merged, and create a new PHI to + // set that index. + Instruction *InsertPt = Builder->GetInsertPoint(); + Builder->SetInsertPoint(PN); + PHINode *NewPN = Builder->CreatePHI(Op1->getOperand(DI)->getType(), + PN->getNumOperands()); + Builder->SetInsertPoint(InsertPt); + + for (auto &I : PN->operands()) + NewPN->addIncoming(cast<GEPOperator>(I)->getOperand(DI), + PN->getIncomingBlock(I)); + + NewGEP->setOperand(DI, NewPN); + GEP.getParent()->getInstList().insert(GEP.getParent()->getFirstNonPHI(), + NewGEP); + NewGEP->setOperand(DI, NewPN); + } + + GEP.setOperand(0, NewGEP); + PtrOp = NewGEP; + } + // Combine Indices - If the source pointer to this getelementptr instruction // is a getelementptr instruction, combine the indices of the two // getelementptr instructions into a single instruction. @@ -2014,7 +2202,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { // Simplify the list of clauses, eg by removing repeated catch clauses // (these are often created by inlining). bool MakeNewInstruction = false; // If true, recreate using the following: - SmallVector<Value *, 16> NewClauses; // - Clauses for the new instruction; + SmallVector<Constant *, 16> NewClauses; // - Clauses for the new instruction; bool CleanupFlag = LI.isCleanup(); // - The new instruction is a cleanup. SmallPtrSet<Value *, 16> AlreadyCaught; // Typeinfos known caught already. @@ -2022,8 +2210,8 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { bool isLastClause = i + 1 == e; if (LI.isCatch(i)) { // A catch clause. - Value *CatchClause = LI.getClause(i); - Constant *TypeInfo = cast<Constant>(CatchClause->stripPointerCasts()); + Constant *CatchClause = LI.getClause(i); + Constant *TypeInfo = CatchClause->stripPointerCasts(); // If we already saw this clause, there is no point in having a second // copy of it. @@ -2052,7 +2240,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { // equal (for example if one represents a C++ class, and the other some // class derived from it). assert(LI.isFilter(i) && "Unsupported landingpad clause!"); - Value *FilterClause = LI.getClause(i); + Constant *FilterClause = LI.getClause(i); ArrayType *FilterType = cast<ArrayType>(FilterClause->getType()); unsigned NumTypeInfos = FilterType->getNumElements(); @@ -2096,8 +2284,8 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { // catch-alls. If so, the filter can be discarded. bool SawCatchAll = false; for (unsigned j = 0; j != NumTypeInfos; ++j) { - Value *Elt = Filter->getOperand(j); - Constant *TypeInfo = cast<Constant>(Elt->stripPointerCasts()); + Constant *Elt = Filter->getOperand(j); + Constant *TypeInfo = Elt->stripPointerCasts(); if (isCatchAll(Personality, TypeInfo)) { // This element is a catch-all. Bail out, noting this fact. SawCatchAll = true; @@ -2202,7 +2390,7 @@ Instruction *InstCombiner::visitLandingPadInst(LandingPadInst &LI) { continue; // If Filter is a subset of LFilter, i.e. every element of Filter is also // an element of LFilter, then discard LFilter. - SmallVectorImpl<Value *>::iterator J = NewClauses.begin() + j; + SmallVectorImpl<Constant *>::iterator J = NewClauses.begin() + j; // If Filter is empty then it is a subset of LFilter. if (!FElts) { // Discard LFilter. diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 95fca75..5e5ddc1 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -16,6 +16,7 @@ #include "llvm/Transforms/Instrumentation.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallString.h" @@ -39,15 +40,14 @@ #include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Endian.h" -#include "llvm/Support/system_error.h" #include "llvm/Transforms/Utils/ASanStackFrameLayout.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" -#include "llvm/Transforms/Utils/SpecialCaseList.h" #include <algorithm> #include <string> +#include <system_error> using namespace llvm; @@ -70,7 +70,7 @@ static const uintptr_t kRetiredStackFrameMagic = 0x45E0360E; static const char *const kAsanModuleCtorName = "asan.module_ctor"; static const char *const kAsanModuleDtorName = "asan.module_dtor"; -static const int kAsanCtorAndCtorPriority = 1; +static const int kAsanCtorAndDtorPriority = 1; static const char *const kAsanReportErrorTemplate = "__asan_report_"; static const char *const kAsanReportLoadN = "__asan_report_load_n"; static const char *const kAsanReportStoreN = "__asan_report_store_n"; @@ -79,7 +79,7 @@ static const char *const kAsanUnregisterGlobalsName = "__asan_unregister_globals"; static const char *const kAsanPoisonGlobalsName = "__asan_before_dynamic_init"; static const char *const kAsanUnpoisonGlobalsName = "__asan_after_dynamic_init"; -static const char *const kAsanInitName = "__asan_init_v3"; +static const char *const kAsanInitName = "__asan_init_v4"; static const char *const kAsanCovModuleInitName = "__sanitizer_cov_module_init"; static const char *const kAsanCovName = "__sanitizer_cov"; static const char *const kAsanPtrCmp = "__sanitizer_ptr_cmp"; @@ -128,9 +128,8 @@ static cl::opt<int> ClMaxInsnsToInstrumentPerBB("asan-max-ins-per-bb", // This flag may need to be replaced with -f[no]asan-stack. static cl::opt<bool> ClStack("asan-stack", cl::desc("Handle stack memory"), cl::Hidden, cl::init(true)); -// This flag may need to be replaced with -f[no]asan-use-after-return. static cl::opt<bool> ClUseAfterReturn("asan-use-after-return", - cl::desc("Check return-after-free"), cl::Hidden, cl::init(false)); + cl::desc("Check return-after-free"), cl::Hidden, cl::init(true)); // This flag may need to be replaced with -f[no]asan-globals. static cl::opt<bool> ClGlobals("asan-globals", cl::desc("Handle global objects"), cl::Hidden, cl::init(true)); @@ -142,16 +141,13 @@ static cl::opt<int> ClCoverageBlockThreshold("asan-coverage-block-threshold", "are more than this number of blocks."), cl::Hidden, cl::init(1500)); static cl::opt<bool> ClInitializers("asan-initialization-order", - cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(false)); + cl::desc("Handle C++ initializer order"), cl::Hidden, cl::init(true)); static cl::opt<bool> ClInvalidPointerPairs("asan-detect-invalid-pointer-pair", cl::desc("Instrument <, <=, >, >=, - with pointer operands"), cl::Hidden, cl::init(false)); static cl::opt<unsigned> ClRealignStack("asan-realign-stack", cl::desc("Realign stack to the value of this flag (power of two)"), cl::Hidden, cl::init(32)); -static cl::opt<std::string> ClBlacklistFile("asan-blacklist", - cl::desc("File containing the list of objects to ignore " - "during instrumentation"), cl::Hidden); static cl::opt<int> ClInstrumentationWithCallsThreshold( "asan-instrumentation-with-call-threshold", cl::desc("If the function being instrumented contains more than " @@ -216,29 +212,87 @@ STATISTIC(NumOptimizedAccessesToGlobalVar, "Number of optimized accesses to global vars"); namespace { -/// A set of dynamically initialized globals extracted from metadata. -class SetOfDynamicallyInitializedGlobals { +/// Frontend-provided metadata for global variables. +class GlobalsMetadata { public: - void Init(Module& M) { - // Clang generates metadata identifying all dynamically initialized globals. - NamedMDNode *DynamicGlobals = - M.getNamedMetadata("llvm.asan.dynamically_initialized_globals"); - if (!DynamicGlobals) + GlobalsMetadata() : inited_(false) {} + void init(Module& M) { + assert(!inited_); + inited_ = true; + NamedMDNode *Globals = M.getNamedMetadata("llvm.asan.globals"); + if (!Globals) return; - for (int i = 0, n = DynamicGlobals->getNumOperands(); i < n; ++i) { - MDNode *MDN = DynamicGlobals->getOperand(i); - assert(MDN->getNumOperands() == 1); - Value *VG = MDN->getOperand(0); - // The optimizer may optimize away a global entirely, in which case we - // cannot instrument access to it. - if (!VG) + for (auto MDN : Globals->operands()) { + // Format of the metadata node for the global: + // { + // global, + // source_location, + // i1 is_dynamically_initialized, + // i1 is_blacklisted + // } + assert(MDN->getNumOperands() == 4); + Value *V = MDN->getOperand(0); + // The optimizer may optimize away a global entirely. + if (!V) continue; - DynInitGlobals.insert(cast<GlobalVariable>(VG)); + GlobalVariable *GV = cast<GlobalVariable>(V); + if (Value *Loc = MDN->getOperand(1)) { + GlobalVariable *GVLoc = cast<GlobalVariable>(Loc); + // We may already know the source location for GV, if it was merged + // with another global. + if (SourceLocation.insert(std::make_pair(GV, GVLoc)).second) + addSourceLocationGlobal(GVLoc); + } + ConstantInt *IsDynInit = cast<ConstantInt>(MDN->getOperand(2)); + if (IsDynInit->isOne()) + DynInitGlobals.insert(GV); + ConstantInt *IsBlacklisted = cast<ConstantInt>(MDN->getOperand(3)); + if (IsBlacklisted->isOne()) + BlacklistedGlobals.insert(GV); } } - bool Contains(GlobalVariable *G) { return DynInitGlobals.count(G) != 0; } + + GlobalVariable *getSourceLocation(GlobalVariable *G) const { + auto Pos = SourceLocation.find(G); + return (Pos != SourceLocation.end()) ? Pos->second : nullptr; + } + + /// Check if the global is dynamically initialized. + bool isDynInit(GlobalVariable *G) const { + return DynInitGlobals.count(G); + } + + /// Check if the global was blacklisted. + bool isBlacklisted(GlobalVariable *G) const { + return BlacklistedGlobals.count(G); + } + + /// Check if the global was generated to describe source location of another + /// global (we don't want to instrument them). + bool isSourceLocationGlobal(GlobalVariable *G) const { + return LocationGlobals.count(G); + } + private: - SmallSet<GlobalValue*, 32> DynInitGlobals; + bool inited_; + DenseMap<GlobalVariable*, GlobalVariable*> SourceLocation; + DenseSet<GlobalVariable*> DynInitGlobals; + DenseSet<GlobalVariable*> BlacklistedGlobals; + DenseSet<GlobalVariable*> LocationGlobals; + + void addSourceLocationGlobal(GlobalVariable *SourceLocGV) { + // Source location global is a struct with layout: + // { + // filename, + // i32 line_number, + // i32 column_number, + // } + LocationGlobals.insert(SourceLocGV); + ConstantStruct *Contents = + cast<ConstantStruct>(SourceLocGV->getInitializer()); + GlobalVariable *FilenameGV = cast<GlobalVariable>(Contents->getOperand(0)); + LocationGlobals.insert(FilenameGV); + } }; /// This struct defines the shadow mapping using the rule: @@ -306,16 +360,7 @@ static size_t RedzoneSizeForScale(int MappingScale) { /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer : public FunctionPass { - AddressSanitizer(bool CheckInitOrder = true, - bool CheckUseAfterReturn = false, - bool CheckLifetime = false, - StringRef BlacklistFile = StringRef()) - : FunctionPass(ID), - CheckInitOrder(CheckInitOrder || ClInitializers), - CheckUseAfterReturn(CheckUseAfterReturn || ClUseAfterReturn), - CheckLifetime(CheckLifetime || ClCheckLifetime), - BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile - : BlacklistFile) {} + AddressSanitizer() : FunctionPass(ID) {} const char *getPassName() const override { return "AddressSanitizerFunctionPass"; } @@ -344,11 +389,6 @@ struct AddressSanitizer : public FunctionPass { bool InjectCoverage(Function &F, const ArrayRef<BasicBlock*> AllBlocks); void InjectCoverageAtBlock(Function &F, BasicBlock &BB); - bool CheckInitOrder; - bool CheckUseAfterReturn; - bool CheckLifetime; - SmallString<64> BlacklistFile; - LLVMContext *C; const DataLayout *DL; int LongSize; @@ -359,7 +399,6 @@ struct AddressSanitizer : public FunctionPass { Function *AsanHandleNoReturnFunc; Function *AsanCovFunction; Function *AsanPtrCmpFunction, *AsanPtrSubFunction; - std::unique_ptr<SpecialCaseList> BL; // This array is indexed by AccessIsWrite and log2(AccessSize). Function *AsanErrorCallback[2][kNumberOfAccessSizes]; Function *AsanMemoryAccessCallback[2][kNumberOfAccessSizes]; @@ -368,19 +407,14 @@ struct AddressSanitizer : public FunctionPass { *AsanMemoryAccessCallbackSized[2]; Function *AsanMemmove, *AsanMemcpy, *AsanMemset; InlineAsm *EmptyAsm; - SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals; + GlobalsMetadata GlobalsMD; friend struct FunctionStackPoisoner; }; class AddressSanitizerModule : public ModulePass { public: - AddressSanitizerModule(bool CheckInitOrder = true, - StringRef BlacklistFile = StringRef()) - : ModulePass(ID), - CheckInitOrder(CheckInitOrder || ClInitializers), - BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile - : BlacklistFile) {} + AddressSanitizerModule() : ModulePass(ID) {} bool runOnModule(Module &M) override; static char ID; // Pass identification, replacement for typeid const char *getPassName() const override { @@ -390,17 +424,15 @@ class AddressSanitizerModule : public ModulePass { private: void initializeCallbacks(Module &M); + bool InstrumentGlobals(IRBuilder<> &IRB, Module &M); bool ShouldInstrumentGlobal(GlobalVariable *G); + void poisonOneInitializer(Function &GlobalInit, GlobalValue *ModuleName); void createInitializerPoisonCalls(Module &M, GlobalValue *ModuleName); size_t MinRedzoneSizeForGlobal() const { return RedzoneSizeForScale(Mapping.Scale); } - bool CheckInitOrder; - SmallString<64> BlacklistFile; - - std::unique_ptr<SpecialCaseList> BL; - SetOfDynamicallyInitializedGlobals DynamicallyInitializedGlobals; + GlobalsMetadata GlobalsMD; Type *IntptrTy; LLVMContext *C; const DataLayout *DL; @@ -497,7 +529,7 @@ struct FunctionStackPoisoner : public InstVisitor<FunctionStackPoisoner> { /// \brief Collect lifetime intrinsic calls to check for use-after-scope /// errors. void visitIntrinsicInst(IntrinsicInst &II) { - if (!ASan.CheckLifetime) return; + if (!ClCheckLifetime) return; Intrinsic::ID ID = II.getIntrinsicID(); if (ID != Intrinsic::lifetime_start && ID != Intrinsic::lifetime_end) @@ -552,20 +584,16 @@ char AddressSanitizer::ID = 0; INITIALIZE_PASS(AddressSanitizer, "asan", "AddressSanitizer: detects use-after-free and out-of-bounds bugs.", false, false) -FunctionPass *llvm::createAddressSanitizerFunctionPass( - bool CheckInitOrder, bool CheckUseAfterReturn, bool CheckLifetime, - StringRef BlacklistFile) { - return new AddressSanitizer(CheckInitOrder, CheckUseAfterReturn, - CheckLifetime, BlacklistFile); +FunctionPass *llvm::createAddressSanitizerFunctionPass() { + return new AddressSanitizer(); } char AddressSanitizerModule::ID = 0; INITIALIZE_PASS(AddressSanitizerModule, "asan-module", "AddressSanitizer: detects use-after-free and out-of-bounds bugs." "ModulePass", false, false) -ModulePass *llvm::createAddressSanitizerModulePass( - bool CheckInitOrder, StringRef BlacklistFile) { - return new AddressSanitizerModule(CheckInitOrder, BlacklistFile); +ModulePass *llvm::createAddressSanitizerModulePass() { + return new AddressSanitizerModule(); } static size_t TypeSizeToSizeIndex(uint32_t TypeSize) { @@ -682,7 +710,7 @@ bool AddressSanitizer::GlobalIsLinkerInitialized(GlobalVariable *G) { // If a global variable does not have dynamic initialization we don't // have to instrument it. However, if a global does not have initializer // at all, we assume it has dynamic initializer (in other TU). - return G->hasInitializer() && !DynamicallyInitializedGlobals.Contains(G); + return G->hasInitializer() && !GlobalsMD.isDynInit(G); } void @@ -706,7 +734,7 @@ void AddressSanitizer::instrumentMop(Instruction *I, bool UseCalls) { if (GlobalVariable *G = dyn_cast<GlobalVariable>(Addr)) { // If initialization order checking is disabled, a simple access to a // dynamically initialized global is always valid. - if (!CheckInitOrder || GlobalIsLinkerInitialized(G)) { + if (!ClInitializers || GlobalIsLinkerInitialized(G)) { NumOptimizedAccessesToGlobalVar++; return; } @@ -851,48 +879,36 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, Crash->setDebugLoc(OrigIns->getDebugLoc()); } -void AddressSanitizerModule::createInitializerPoisonCalls( - Module &M, GlobalValue *ModuleName) { - // We do all of our poisoning and unpoisoning within a global constructor. - // These are called _GLOBAL__(sub_)?I_.*. - // TODO: Consider looking through the functions in - // M.getGlobalVariable("llvm.global_ctors") instead of using this stringly - // typed approach. - Function *GlobalInit = nullptr; - for (auto &F : M.getFunctionList()) { - StringRef FName = F.getName(); - - const char kGlobalPrefix[] = "_GLOBAL__"; - if (!FName.startswith(kGlobalPrefix)) - continue; - FName = FName.substr(strlen(kGlobalPrefix)); - - const char kOptionalSub[] = "sub_"; - if (FName.startswith(kOptionalSub)) - FName = FName.substr(strlen(kOptionalSub)); - - if (FName.startswith("I_")) { - GlobalInit = &F; - break; - } - } - // If that function is not present, this TU contains no globals, or they have - // all been optimized away - if (!GlobalInit) - return; - +void AddressSanitizerModule::poisonOneInitializer(Function &GlobalInit, + GlobalValue *ModuleName) { // Set up the arguments to our poison/unpoison functions. - IRBuilder<> IRB(GlobalInit->begin()->getFirstInsertionPt()); + IRBuilder<> IRB(GlobalInit.begin()->getFirstInsertionPt()); // Add a call to poison all external globals before the given function starts. Value *ModuleNameAddr = ConstantExpr::getPointerCast(ModuleName, IntptrTy); IRB.CreateCall(AsanPoisonGlobals, ModuleNameAddr); // Add calls to unpoison all globals before each return instruction. - for (Function::iterator I = GlobalInit->begin(), E = GlobalInit->end(); - I != E; ++I) { - if (ReturnInst *RI = dyn_cast<ReturnInst>(I->getTerminator())) { + for (auto &BB : GlobalInit.getBasicBlockList()) + if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) CallInst::Create(AsanUnpoisonGlobals, "", RI); +} + +void AddressSanitizerModule::createInitializerPoisonCalls( + Module &M, GlobalValue *ModuleName) { + GlobalVariable *GV = M.getGlobalVariable("llvm.global_ctors"); + + ConstantArray *CA = cast<ConstantArray>(GV->getInitializer()); + for (Use &OP : CA->operands()) { + if (isa<ConstantAggregateZero>(OP)) + continue; + ConstantStruct *CS = cast<ConstantStruct>(OP); + + // Must have a function or null ptr. + // (CS->getOperand(0) is the init priority.) + if (Function* F = dyn_cast<Function>(CS->getOperand(1))) { + if (F->getName() != kAsanModuleCtorName) + poisonOneInitializer(*F, ModuleName); } } } @@ -901,16 +917,20 @@ bool AddressSanitizerModule::ShouldInstrumentGlobal(GlobalVariable *G) { Type *Ty = cast<PointerType>(G->getType())->getElementType(); DEBUG(dbgs() << "GLOBAL: " << *G << "\n"); - if (BL->isIn(*G)) return false; + if (GlobalsMD.isBlacklisted(G)) return false; + if (GlobalsMD.isSourceLocationGlobal(G)) return false; if (!Ty->isSized()) return false; if (!G->hasInitializer()) return false; if (GlobalWasGeneratedByAsan(G)) return false; // Our own global. // Touch only those globals that will not be defined in other modules. - // Don't handle ODR type linkages since other modules may be built w/o asan. + // Don't handle ODR linkage types and COMDATs since other modules may be built + // without ASan. if (G->getLinkage() != GlobalVariable::ExternalLinkage && G->getLinkage() != GlobalVariable::PrivateLinkage && G->getLinkage() != GlobalVariable::InternalLinkage) return false; + if (G->hasComdat()) + return false; // Two problems with thread-locals: // - The address of the main thread's copy can't be computed at link-time. // - Need to poison all copies, not just the main thread's one. @@ -1001,39 +1021,16 @@ void AddressSanitizerModule::initializeCallbacks(Module &M) { // This function replaces all global variables with new variables that have // trailing redzones. It also creates a function that poisons // redzones and inserts this function into llvm.global_ctors. -bool AddressSanitizerModule::runOnModule(Module &M) { - if (!ClGlobals) return false; - - DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); - if (!DLP) - return false; - DL = &DLP->getDataLayout(); - - BL.reset(SpecialCaseList::createOrDie(BlacklistFile)); - if (BL->isIn(M)) return false; - C = &(M.getContext()); - int LongSize = DL->getPointerSizeInBits(); - IntptrTy = Type::getIntNTy(*C, LongSize); - Mapping = getShadowMapping(M, LongSize); - initializeCallbacks(M); - DynamicallyInitializedGlobals.Init(M); +bool AddressSanitizerModule::InstrumentGlobals(IRBuilder<> &IRB, Module &M) { + GlobalsMD.init(M); SmallVector<GlobalVariable *, 16> GlobalsToChange; - for (Module::GlobalListType::iterator G = M.global_begin(), - E = M.global_end(); G != E; ++G) { - if (ShouldInstrumentGlobal(G)) - GlobalsToChange.push_back(G); + for (auto &G : M.globals()) { + if (ShouldInstrumentGlobal(&G)) + GlobalsToChange.push_back(&G); } - Function *CtorFunc = M.getFunction(kAsanModuleCtorName); - assert(CtorFunc); - IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator()); - - Function *CovFunc = M.getFunction(kAsanCovName); - int nCov = CovFunc ? CovFunc->getNumUses() : 0; - IRB.CreateCall(AsanCovModuleInit, ConstantInt::get(IntptrTy, nCov)); - size_t n = GlobalsToChange.size(); if (n == 0) return false; @@ -1044,10 +1041,11 @@ bool AddressSanitizerModule::runOnModule(Module &M) { // const char *name; // const char *module_name; // size_t has_dynamic_init; + // void *source_location; // We initialize an array of such structures and pass it to a run-time call. - StructType *GlobalStructTy = StructType::get(IntptrTy, IntptrTy, - IntptrTy, IntptrTy, - IntptrTy, IntptrTy, NULL); + StructType *GlobalStructTy = + StructType::get(IntptrTy, IntptrTy, IntptrTy, IntptrTy, IntptrTy, + IntptrTy, IntptrTy, NULL); SmallVector<Constant *, 16> Initializers(n); bool HasDynamicallyInitializedGlobals = false; @@ -1075,11 +1073,6 @@ bool AddressSanitizerModule::runOnModule(Module &M) { RightRedzoneSize += MinRZ - (SizeInBytes % MinRZ); assert(((RightRedzoneSize + SizeInBytes) % MinRZ) == 0); Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); - // Determine whether this global should be poisoned in initialization. - bool GlobalHasDynamicInitializer = - DynamicallyInitializedGlobals.Contains(G); - // Don't check initialization order if this global is blacklisted. - GlobalHasDynamicInitializer &= !BL->isIn(*G, "init"); StructType *NewTy = StructType::get(Ty, RightRedZoneTy, NULL); Constant *NewInitializer = ConstantStruct::get( @@ -1108,18 +1101,21 @@ bool AddressSanitizerModule::runOnModule(Module &M) { NewGlobal->takeName(G); G->eraseFromParent(); + bool GlobalHasDynamicInitializer = GlobalsMD.isDynInit(G); + GlobalVariable *SourceLoc = GlobalsMD.getSourceLocation(G); + Initializers[i] = ConstantStruct::get( - GlobalStructTy, - ConstantExpr::getPointerCast(NewGlobal, IntptrTy), + GlobalStructTy, ConstantExpr::getPointerCast(NewGlobal, IntptrTy), ConstantInt::get(IntptrTy, SizeInBytes), ConstantInt::get(IntptrTy, SizeInBytes + RightRedzoneSize), ConstantExpr::getPointerCast(Name, IntptrTy), ConstantExpr::getPointerCast(ModuleName, IntptrTy), ConstantInt::get(IntptrTy, GlobalHasDynamicInitializer), + SourceLoc ? ConstantExpr::getPointerCast(SourceLoc, IntptrTy) + : ConstantInt::get(IntptrTy, 0), NULL); - // Populate the first and last globals declared in this TU. - if (CheckInitOrder && GlobalHasDynamicInitializer) + if (ClInitializers && GlobalHasDynamicInitializer) HasDynamicallyInitializedGlobals = true; DEBUG(dbgs() << "NEW GLOBAL: " << *NewGlobal << "\n"); @@ -1131,7 +1127,7 @@ bool AddressSanitizerModule::runOnModule(Module &M) { ConstantArray::get(ArrayOfGlobalStructTy, Initializers), ""); // Create calls for poisoning before initializers run and unpoisoning after. - if (CheckInitOrder && HasDynamicallyInitializedGlobals) + if (HasDynamicallyInitializedGlobals) createInitializerPoisonCalls(M, ModuleName); IRB.CreateCall2(AsanRegisterGlobals, IRB.CreatePointerCast(AllGlobals, IntptrTy), @@ -1147,12 +1143,42 @@ bool AddressSanitizerModule::runOnModule(Module &M) { IRB_Dtor.CreateCall2(AsanUnregisterGlobals, IRB.CreatePointerCast(AllGlobals, IntptrTy), ConstantInt::get(IntptrTy, n)); - appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndCtorPriority); + appendToGlobalDtors(M, AsanDtorFunction, kAsanCtorAndDtorPriority); DEBUG(dbgs() << M); return true; } +bool AddressSanitizerModule::runOnModule(Module &M) { + DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); + if (!DLP) + return false; + DL = &DLP->getDataLayout(); + C = &(M.getContext()); + int LongSize = DL->getPointerSizeInBits(); + IntptrTy = Type::getIntNTy(*C, LongSize); + Mapping = getShadowMapping(M, LongSize); + initializeCallbacks(M); + + bool Changed = false; + + Function *CtorFunc = M.getFunction(kAsanModuleCtorName); + assert(CtorFunc); + IRBuilder<> IRB(CtorFunc->getEntryBlock().getTerminator()); + + if (ClCoverage > 0) { + Function *CovFunc = M.getFunction(kAsanCovName); + int nCov = CovFunc ? CovFunc->getNumUses() : 0; + IRB.CreateCall(AsanCovModuleInit, ConstantInt::get(IntptrTy, nCov)); + Changed = true; + } + + if (ClGlobals) + Changed |= InstrumentGlobals(IRB, M); + + return Changed; +} + void AddressSanitizer::initializeCallbacks(Module &M) { IRBuilder<> IRB(*C); // Create __asan_report* callbacks. @@ -1216,8 +1242,7 @@ bool AddressSanitizer::doInitialization(Module &M) { report_fatal_error("data layout missing"); DL = &DLP->getDataLayout(); - BL.reset(SpecialCaseList::createOrDie(BlacklistFile)); - DynamicallyInitializedGlobals.Init(M); + GlobalsMD.init(M); C = &(M.getContext()); LongSize = DL->getPointerSizeInBits(); @@ -1236,7 +1261,7 @@ bool AddressSanitizer::doInitialization(Module &M) { Mapping = getShadowMapping(M, LongSize); - appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndCtorPriority); + appendToGlobalCtors(M, AsanCtorFunction, kAsanCtorAndDtorPriority); return true; } @@ -1267,7 +1292,9 @@ void AddressSanitizer::InjectCoverageAtBlock(Function &F, BasicBlock &BB) { break; } + DebugLoc EntryLoc = IP->getDebugLoc().getFnDebugLoc(*C); IRBuilder<> IRB(IP); + IRB.SetCurrentDebugLocation(EntryLoc); Type *Int8Ty = IRB.getInt8Ty(); GlobalVariable *Guard = new GlobalVariable( *F.getParent(), Int8Ty, false, GlobalValue::PrivateLinkage, @@ -1279,10 +1306,10 @@ void AddressSanitizer::InjectCoverageAtBlock(Function &F, BasicBlock &BB) { Instruction *Ins = SplitBlockAndInsertIfThen( Cmp, IP, false, MDBuilder(*C).createBranchWeights(1, 100000)); IRB.SetInsertPoint(Ins); + IRB.SetCurrentDebugLocation(EntryLoc); // We pass &F to __sanitizer_cov. We could avoid this and rely on // GET_CALLER_PC, but having the PC of the first instruction is just nice. - Instruction *Call = IRB.CreateCall(AsanCovFunction); - Call->setDebugLoc(IP->getDebugLoc()); + IRB.CreateCall(AsanCovFunction); StoreInst *Store = IRB.CreateStore(ConstantInt::get(Int8Ty, 1), Guard); Store->setAtomic(Monotonic); Store->setAlignment(1); @@ -1316,14 +1343,13 @@ bool AddressSanitizer::InjectCoverage(Function &F, (unsigned)ClCoverageBlockThreshold < AllBlocks.size()) { InjectCoverageAtBlock(F, F.getEntryBlock()); } else { - for (size_t i = 0, n = AllBlocks.size(); i < n; i++) - InjectCoverageAtBlock(F, *AllBlocks[i]); + for (auto BB : AllBlocks) + InjectCoverageAtBlock(F, *BB); } return true; } bool AddressSanitizer::runOnFunction(Function &F) { - if (BL->isIn(F)) return false; if (&F == AsanCtorFunction) return false; if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage) return false; DEBUG(dbgs() << "ASAN instrumenting:\n" << F << "\n"); @@ -1350,29 +1376,28 @@ bool AddressSanitizer::runOnFunction(Function &F) { unsigned Alignment; // Fill the set of memory operations to instrument. - for (Function::iterator FI = F.begin(), FE = F.end(); - FI != FE; ++FI) { - AllBlocks.push_back(FI); + for (auto &BB : F) { + AllBlocks.push_back(&BB); TempsToInstrument.clear(); int NumInsnsPerBB = 0; - for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); - BI != BE; ++BI) { - if (LooksLikeCodeInBug11395(BI)) return false; - if (Value *Addr = isInterestingMemoryAccess(BI, &IsWrite, &Alignment)) { + for (auto &Inst : BB) { + if (LooksLikeCodeInBug11395(&Inst)) return false; + if (Value *Addr = + isInterestingMemoryAccess(&Inst, &IsWrite, &Alignment)) { if (ClOpt && ClOptSameTemp) { if (!TempsToInstrument.insert(Addr)) continue; // We've seen this temp in the current BB. } } else if (ClInvalidPointerPairs && - isInterestingPointerComparisonOrSubtraction(BI)) { - PointerComparisonsOrSubtracts.push_back(BI); + isInterestingPointerComparisonOrSubtraction(&Inst)) { + PointerComparisonsOrSubtracts.push_back(&Inst); continue; - } else if (isa<MemIntrinsic>(BI)) { + } else if (isa<MemIntrinsic>(Inst)) { // ok, take it. } else { - if (isa<AllocaInst>(BI)) + if (isa<AllocaInst>(Inst)) NumAllocas++; - CallSite CS(BI); + CallSite CS(&Inst); if (CS) { // A call inside BB. TempsToInstrument.clear(); @@ -1381,7 +1406,7 @@ bool AddressSanitizer::runOnFunction(Function &F) { } continue; } - ToInstrument.push_back(BI); + ToInstrument.push_back(&Inst); NumInsnsPerBB++; if (NumInsnsPerBB >= ClMaxInsnsToInstrumentPerBB) break; @@ -1406,8 +1431,7 @@ bool AddressSanitizer::runOnFunction(Function &F) { // Instrument. int NumInstrumented = 0; - for (size_t i = 0, n = ToInstrument.size(); i != n; i++) { - Instruction *Inst = ToInstrument[i]; + for (auto Inst : ToInstrument) { if (ClDebugMin < 0 || ClDebugMax < 0 || (NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) { if (isInterestingMemoryAccess(Inst, &IsWrite, &Alignment)) @@ -1423,14 +1447,13 @@ bool AddressSanitizer::runOnFunction(Function &F) { // We must unpoison the stack before every NoReturn call (throw, _exit, etc). // See e.g. http://code.google.com/p/address-sanitizer/issues/detail?id=37 - for (size_t i = 0, n = NoReturnCalls.size(); i != n; i++) { - Instruction *CI = NoReturnCalls[i]; + for (auto CI : NoReturnCalls) { IRBuilder<> IRB(CI); IRB.CreateCall(AsanHandleNoReturnFunc); } - for (size_t i = 0, n = PointerComparisonsOrSubtracts.size(); i != n; i++) { - instrumentPointerComparisonOrSubtraction(PointerComparisonsOrSubtracts[i]); + for (auto Inst : PointerComparisonsOrSubtracts) { + instrumentPointerComparisonOrSubtraction(Inst); NumInstrumented++; } @@ -1543,12 +1566,10 @@ void FunctionStackPoisoner::SetShadowToStackAfterReturnInlined( } static DebugLoc getFunctionEntryDebugLocation(Function &F) { - BasicBlock::iterator I = F.getEntryBlock().begin(), - E = F.getEntryBlock().end(); - for (; I != E; ++I) - if (!isa<AllocaInst>(I)) - break; - return I->getDebugLoc(); + for (const auto &Inst : F.getEntryBlock()) + if (!isa<AllocaInst>(Inst)) + return Inst.getDebugLoc(); + return DebugLoc(); } void FunctionStackPoisoner::poisonStack() { @@ -1562,8 +1583,7 @@ void FunctionStackPoisoner::poisonStack() { SmallVector<ASanStackVariableDescription, 16> SVD; SVD.reserve(AllocaVec.size()); - for (size_t i = 0, n = AllocaVec.size(); i < n; i++) { - AllocaInst *AI = AllocaVec[i]; + for (AllocaInst *AI : AllocaVec) { ASanStackVariableDescription D = { AI->getName().data(), getAllocaSizeInBytes(AI), AI->getAlignment(), AI, 0}; @@ -1577,7 +1597,7 @@ void FunctionStackPoisoner::poisonStack() { DEBUG(dbgs() << L.DescriptionString << " --- " << L.FrameSize << "\n"); uint64_t LocalStackSize = L.FrameSize; bool DoStackMalloc = - ASan.CheckUseAfterReturn && LocalStackSize <= kMaxStackMallocSize; + ClUseAfterReturn && LocalStackSize <= kMaxStackMallocSize; Type *ByteArrayTy = ArrayType::get(IRB.getInt8Ty(), LocalStackSize); AllocaInst *MyAlloca = @@ -1618,8 +1638,7 @@ void FunctionStackPoisoner::poisonStack() { // Insert poison calls for lifetime intrinsics for alloca. bool HavePoisonedAllocas = false; - for (size_t i = 0, n = AllocaPoisonCallVec.size(); i < n; i++) { - const AllocaPoisonCall &APC = AllocaPoisonCallVec[i]; + for (const auto &APC : AllocaPoisonCallVec) { assert(APC.InsBefore); assert(APC.AI); IRBuilder<> IRB(APC.InsBefore); @@ -1628,11 +1647,10 @@ void FunctionStackPoisoner::poisonStack() { } // Replace Alloca instructions with base+offset. - for (size_t i = 0, n = SVD.size(); i < n; i++) { - AllocaInst *AI = SVD[i].AI; + for (const auto &Desc : SVD) { + AllocaInst *AI = Desc.AI; Value *NewAllocaPtr = IRB.CreateIntToPtr( - IRB.CreateAdd(LocalStackBase, - ConstantInt::get(IntptrTy, SVD[i].Offset)), + IRB.CreateAdd(LocalStackBase, ConstantInt::get(IntptrTy, Desc.Offset)), AI->getType()); replaceDbgDeclareForAlloca(AI, NewAllocaPtr, DIB); AI->replaceAllUsesWith(NewAllocaPtr); @@ -1665,8 +1683,7 @@ void FunctionStackPoisoner::poisonStack() { poisonRedZones(L.ShadowBytes, IRB, ShadowBase, true); // (Un)poison the stack before all ret instructions. - for (size_t i = 0, n = RetVec.size(); i < n; i++) { - Instruction *Ret = RetVec[i]; + for (auto Ret : RetVec) { IRBuilder<> IRBRet(Ret); // Mark the current frame as retired. IRBRet.CreateStore(ConstantInt::get(IntptrTy, kRetiredStackFrameMagic), @@ -1720,8 +1737,8 @@ void FunctionStackPoisoner::poisonStack() { } // We are done. Remove the old unused alloca instructions. - for (size_t i = 0, n = AllocaVec.size(); i < n; i++) - AllocaVec[i]->eraseFromParent(); + for (auto AI : AllocaVec) + AI->eraseFromParent(); } void FunctionStackPoisoner::poisonAlloca(Value *V, uint64_t Size, diff --git a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 7f468f7..799e14b 100644 --- a/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -59,9 +59,9 @@ #include "llvm/IR/Value.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/SpecialCaseList.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/Utils/SpecialCaseList.h" #include <iterator> using namespace llvm; @@ -120,6 +120,51 @@ static cl::opt<bool> ClDebugNonzeroLabels( namespace { +StringRef GetGlobalTypeString(const GlobalValue &G) { + // Types of GlobalVariables are always pointer types. + Type *GType = G.getType()->getElementType(); + // For now we support blacklisting struct types only. + if (StructType *SGType = dyn_cast<StructType>(GType)) { + if (!SGType->isLiteral()) + return SGType->getName(); + } + return "<unknown type>"; +} + +class DFSanABIList { + std::unique_ptr<SpecialCaseList> SCL; + + public: + DFSanABIList(SpecialCaseList *SCL) : SCL(SCL) {} + + /// Returns whether either this function or its source file are listed in the + /// given category. + bool isIn(const Function &F, const StringRef Category) const { + return isIn(*F.getParent(), Category) || + SCL->inSection("fun", F.getName(), Category); + } + + /// Returns whether this global alias is listed in the given category. + /// + /// If GA aliases a function, the alias's name is matched as a function name + /// would be. Similarly, aliases of globals are matched like globals. + bool isIn(const GlobalAlias &GA, const StringRef Category) const { + if (isIn(*GA.getParent(), Category)) + return true; + + if (isa<FunctionType>(GA.getType()->getElementType())) + return SCL->inSection("fun", GA.getName(), Category); + + return SCL->inSection("global", GA.getName(), Category) || + SCL->inSection("type", GetGlobalTypeString(GA), Category); + } + + /// Returns whether this module is listed in the given category. + bool isIn(const Module &M, const StringRef Category) const { + return SCL->inSection("src", M.getModuleIdentifier(), Category); + } +}; + class DataFlowSanitizer : public ModulePass { friend struct DFSanFunction; friend class DFSanVisitor; @@ -190,7 +235,7 @@ class DataFlowSanitizer : public ModulePass { Constant *DFSanSetLabelFn; Constant *DFSanNonzeroLabelFn; MDNode *ColdCallWeights; - std::unique_ptr<SpecialCaseList> ABIList; + DFSanABIList ABIList; DenseMap<Value *, Function *> UnwrappedFnMap; AttributeSet ReadOnlyNoneAttrs; @@ -395,11 +440,11 @@ bool DataFlowSanitizer::doInitialization(Module &M) { } bool DataFlowSanitizer::isInstrumented(const Function *F) { - return !ABIList->isIn(*F, "uninstrumented"); + return !ABIList.isIn(*F, "uninstrumented"); } bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) { - return !ABIList->isIn(*GA, "uninstrumented"); + return !ABIList.isIn(*GA, "uninstrumented"); } DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() { @@ -407,11 +452,11 @@ DataFlowSanitizer::InstrumentedABI DataFlowSanitizer::getInstrumentedABI() { } DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) { - if (ABIList->isIn(*F, "functional")) + if (ABIList.isIn(*F, "functional")) return WK_Functional; - if (ABIList->isIn(*F, "discard")) + if (ABIList.isIn(*F, "discard")) return WK_Discard; - if (ABIList->isIn(*F, "custom")) + if (ABIList.isIn(*F, "custom")) return WK_Custom; return WK_Warning; @@ -500,7 +545,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { if (!DL) return false; - if (ABIList->isIn(M, "skip")) + if (ABIList.isIn(M, "skip")) return false; if (!GetArgTLSPtr) { @@ -557,7 +602,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { ++i; // Don't stop on weak. We assume people aren't playing games with the // instrumentedness of overridden weak aliases. - if (Function *F = dyn_cast<Function>(GA->getAliasee())) { + if (auto F = dyn_cast<Function>(GA->getBaseObject())) { bool GAInst = isInstrumented(GA), FInst = isInstrumented(F); if (GAInst && FInst) { addGlobalNamePrefix(GA); @@ -567,7 +612,7 @@ bool DataFlowSanitizer::runOnModule(Module &M) { // below will take care of instrumenting it. Function *NewF = buildWrapperFunction(F, "", GA->getLinkage(), F->getFunctionType()); - GA->replaceAllUsesWith(NewF); + GA->replaceAllUsesWith(ConstantExpr::getBitCast(NewF, GA->getType())); NewF->takeName(GA); GA->eraseFromParent(); FnsToInstrument.push_back(NewF); diff --git a/lib/Transforms/Instrumentation/DebugIR.cpp b/lib/Transforms/Instrumentation/DebugIR.cpp index 18bda1a..f2f1738 100644 --- a/lib/Transforms/Instrumentation/DebugIR.cpp +++ b/lib/Transforms/Instrumentation/DebugIR.cpp @@ -354,7 +354,10 @@ private: std::string getTypeName(Type *T) { std::string TypeName; raw_string_ostream TypeStream(TypeName); - T->print(TypeStream); + if (T) + T->print(TypeStream); + else + TypeStream << "Printing <null> Type"; TypeStream.flush(); return TypeName; } diff --git a/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/lib/Transforms/Instrumentation/GCOVProfiling.cpp index 8330a9b..cfeb62e 100644 --- a/lib/Transforms/Instrumentation/GCOVProfiling.cpp +++ b/lib/Transforms/Instrumentation/GCOVProfiling.cpp @@ -211,6 +211,7 @@ namespace { class GCOVLines : public GCOVRecord { public: void addLine(uint32_t Line) { + assert(Line != 0 && "Line zero is not a valid real line number."); Lines.push_back(Line); } @@ -453,10 +454,17 @@ static bool functionHasLines(Function *F) { for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) { + // Debug intrinsic locations correspond to the location of the + // declaration, not necessarily any statements or expressions. + if (isa<DbgInfoIntrinsic>(I)) continue; + const DebugLoc &Loc = I->getDebugLoc(); if (Loc.isUnknown()) continue; - if (Loc.getLine() != 0) - return true; + + // Artificial lines such as calls to the global constructors. + if (Loc.getLine() == 0) continue; + + return true; } } return false; @@ -515,8 +523,16 @@ void GCOVProfiler::emitProfileNotes() { uint32_t Line = 0; for (BasicBlock::iterator I = BB->begin(), IE = BB->end(); I != IE; ++I) { + // Debug intrinsic locations correspond to the location of the + // declaration, not necessarily any statements or expressions. + if (isa<DbgInfoIntrinsic>(I)) continue; + const DebugLoc &Loc = I->getDebugLoc(); if (Loc.isUnknown()) continue; + + // Artificial lines such as calls to the global constructors. + if (Loc.getLine() == 0) continue; + if (Line == Loc.getLine()) continue; Line = Loc.getLine(); if (SP != getDISubprogram(Loc.getScope(*Ctx))) continue; diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp index b8e632e..496ab48 100644 --- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp +++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp @@ -10,8 +10,6 @@ /// This file is a part of MemorySanitizer, a detector of uninitialized /// reads. /// -/// Status: early prototype. -/// /// The algorithm of the tool is similar to Memcheck /// (http://goo.gl/QKbem). We associate a few shadow bits with every /// byte of the application memory, poison the shadow of the malloc-ed @@ -117,7 +115,6 @@ #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/ModuleUtils.h" -#include "llvm/Transforms/Utils/SpecialCaseList.h" using namespace llvm; @@ -178,10 +175,6 @@ static cl::opt<bool> ClDumpStrictInstructions("msan-dump-strict-instructions", cl::desc("print out instructions with default strict semantics"), cl::Hidden, cl::init(false)); -static cl::opt<std::string> ClBlacklistFile("msan-blacklist", - cl::desc("File containing the list of functions where MemorySanitizer " - "should not report bugs"), cl::Hidden); - static cl::opt<int> ClInstrumentationWithCallThreshold( "msan-instrumentation-with-call-threshold", cl::desc( @@ -211,13 +204,11 @@ namespace { /// uninitialized reads. class MemorySanitizer : public FunctionPass { public: - MemorySanitizer(int TrackOrigins = 0, - StringRef BlacklistFile = StringRef()) + MemorySanitizer(int TrackOrigins = 0) : FunctionPass(ID), TrackOrigins(std::max(TrackOrigins, (int)ClTrackOrigins)), DL(nullptr), WarningFn(nullptr), - BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile : BlacklistFile), WrapIndirectCalls(!ClWrapIndirectCalls.empty()) {} const char *getPassName() const override { return "MemorySanitizer"; } bool runOnFunction(Function &F) override; @@ -282,10 +273,6 @@ class MemorySanitizer : public FunctionPass { MDNode *ColdCallWeights; /// \brief Branch weights for origin store. MDNode *OriginStoreWeights; - /// \brief Path to blacklist file. - SmallString<64> BlacklistFile; - /// \brief The blacklist. - std::unique_ptr<SpecialCaseList> BL; /// \brief An empty volatile inline asm that prevents callback merge. InlineAsm *EmptyAsm; @@ -305,9 +292,8 @@ INITIALIZE_PASS(MemorySanitizer, "msan", "MemorySanitizer: detects uninitialized reads.", false, false) -FunctionPass *llvm::createMemorySanitizerPass(int TrackOrigins, - StringRef BlacklistFile) { - return new MemorySanitizer(TrackOrigins, BlacklistFile); +FunctionPass *llvm::createMemorySanitizerPass(int TrackOrigins) { + return new MemorySanitizer(TrackOrigins); } /// \brief Create a non-const global initialized with the given string. @@ -431,7 +417,6 @@ bool MemorySanitizer::doInitialization(Module &M) { report_fatal_error("data layout missing"); DL = &DLP->getDataLayout(); - BL.reset(SpecialCaseList::createOrDie(BlacklistFile)); C = &(M.getContext()); unsigned PtrSize = DL->getPointerSizeInBits(/* AddressSpace */0); switch (PtrSize) { @@ -526,7 +511,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // The following flags disable parts of MSan instrumentation based on // blacklist contents and command-line options. bool InsertChecks; - bool LoadShadow; + bool PropagateShadow; bool PoisonStack; bool PoisonUndef; bool CheckReturnValue; @@ -544,11 +529,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { MemorySanitizerVisitor(Function &F, MemorySanitizer &MS) : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)) { - bool SanitizeFunction = !MS.BL->isIn(F) && F.getAttributes().hasAttribute( - AttributeSet::FunctionIndex, - Attribute::SanitizeMemory); + bool SanitizeFunction = F.getAttributes().hasAttribute( + AttributeSet::FunctionIndex, Attribute::SanitizeMemory); InsertChecks = SanitizeFunction; - LoadShadow = SanitizeFunction; + PropagateShadow = SanitizeFunction; PoisonStack = SanitizeFunction && ClPoisonStack; PoisonUndef = SanitizeFunction && ClPoisonUndef; // FIXME: Consider using SpecialCaseList to specify a list of functions that @@ -585,7 +569,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex))); IRB.CreateCall3(Fn, ConvertedShadow2, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), - updateOrigin(Origin, IRB)); + Origin); } else { Value *Cmp = IRB.CreateICmpNE( ConvertedShadow, getCleanShadow(ConvertedShadow), "_mscmp"); @@ -599,26 +583,26 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } void materializeStores(bool InstrumentWithCalls) { - for (size_t i = 0, n = StoreList.size(); i < n; i++) { - StoreInst &I = *dyn_cast<StoreInst>(StoreList[i]); + for (auto Inst : StoreList) { + StoreInst &SI = *dyn_cast<StoreInst>(Inst); - IRBuilder<> IRB(&I); - Value *Val = I.getValueOperand(); - Value *Addr = I.getPointerOperand(); - Value *Shadow = I.isAtomic() ? getCleanShadow(Val) : getShadow(Val); + IRBuilder<> IRB(&SI); + Value *Val = SI.getValueOperand(); + Value *Addr = SI.getPointerOperand(); + Value *Shadow = SI.isAtomic() ? getCleanShadow(Val) : getShadow(Val); Value *ShadowPtr = getShadowPtr(Addr, Shadow->getType(), IRB); StoreInst *NewSI = - IRB.CreateAlignedStore(Shadow, ShadowPtr, I.getAlignment()); + IRB.CreateAlignedStore(Shadow, ShadowPtr, SI.getAlignment()); DEBUG(dbgs() << " STORE: " << *NewSI << "\n"); (void)NewSI; - if (ClCheckAccessAddress) insertShadowCheck(Addr, &I); + if (ClCheckAccessAddress) insertShadowCheck(Addr, &SI); - if (I.isAtomic()) I.setOrdering(addReleaseOrdering(I.getOrdering())); + if (SI.isAtomic()) SI.setOrdering(addReleaseOrdering(SI.getOrdering())); if (MS.TrackOrigins) { - unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment()); + unsigned Alignment = std::max(kMinOriginAlignment, SI.getAlignment()); storeOrigin(IRB, Addr, Shadow, getOrigin(Val), Alignment, InstrumentWithCalls); } @@ -662,18 +646,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } void materializeChecks(bool InstrumentWithCalls) { - for (size_t i = 0, n = InstrumentationList.size(); i < n; i++) { - Instruction *OrigIns = InstrumentationList[i].OrigIns; - Value *Shadow = InstrumentationList[i].Shadow; - Value *Origin = InstrumentationList[i].Origin; + for (const auto &ShadowData : InstrumentationList) { + Instruction *OrigIns = ShadowData.OrigIns; + Value *Shadow = ShadowData.Shadow; + Value *Origin = ShadowData.Origin; materializeOneCheck(OrigIns, Shadow, Origin, InstrumentWithCalls); } DEBUG(dbgs() << "DONE:\n" << F); } void materializeIndirectCalls() { - for (size_t i = 0, n = IndirectCallList.size(); i < n; i++) { - CallSite CS = IndirectCallList[i]; + for (auto &CS : IndirectCallList) { Instruction *I = CS.getInstruction(); BasicBlock *B = I->getParent(); IRBuilder<> IRB(I); @@ -732,15 +715,13 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // Finalize PHI nodes. - for (size_t i = 0, n = ShadowPHINodes.size(); i < n; i++) { - PHINode *PN = ShadowPHINodes[i]; + for (PHINode *PN : ShadowPHINodes) { PHINode *PNS = cast<PHINode>(getShadow(PN)); PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : nullptr; size_t NumValues = PN->getNumIncomingValues(); for (size_t v = 0; v < NumValues; v++) { PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v)); - if (PNO) - PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v)); + if (PNO) PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v)); } } @@ -874,7 +855,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// \brief Set SV to be the shadow value for V. void setShadow(Value *V, Value *SV) { assert(!ShadowMap.count(V) && "Values may only have one shadow"); - ShadowMap[V] = SV; + ShadowMap[V] = PropagateShadow ? SV : getCleanShadow(V); } /// \brief Set Origin to be the origin value for V. @@ -926,6 +907,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { /// This function either returns the value set earlier with setShadow, /// or extracts if from ParamTLS (for function arguments). Value *getShadow(Value *V) { + if (!PropagateShadow) return getCleanShadow(V); if (Instruction *I = dyn_cast<Instruction>(V)) { // For instructions the shadow is already stored in the map. Value *Shadow = ShadowMap[V]; @@ -950,22 +932,21 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Function *F = A->getParent(); IRBuilder<> EntryIRB(F->getEntryBlock().getFirstNonPHI()); unsigned ArgOffset = 0; - for (Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end(); - AI != AE; ++AI) { - if (!AI->getType()->isSized()) { + for (auto &FArg : F->args()) { + if (!FArg.getType()->isSized()) { DEBUG(dbgs() << "Arg is not sized\n"); continue; } - unsigned Size = AI->hasByValAttr() - ? MS.DL->getTypeAllocSize(AI->getType()->getPointerElementType()) - : MS.DL->getTypeAllocSize(AI->getType()); - if (A == AI) { - Value *Base = getShadowPtrForArgument(AI, EntryIRB, ArgOffset); - if (AI->hasByValAttr()) { + unsigned Size = FArg.hasByValAttr() + ? MS.DL->getTypeAllocSize(FArg.getType()->getPointerElementType()) + : MS.DL->getTypeAllocSize(FArg.getType()); + if (A == &FArg) { + Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset); + if (FArg.hasByValAttr()) { // ByVal pointer itself has clean shadow. We copy the actual // argument shadow to the underlying memory. // Figure out maximal valid memcpy alignment. - unsigned ArgAlign = AI->getParamAlignment(); + unsigned ArgAlign = FArg.getParamAlignment(); if (ArgAlign == 0) { Type *EltType = A->getType()->getPointerElementType(); ArgAlign = MS.DL->getABITypeAlignment(EltType); @@ -980,10 +961,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { } else { *ShadowPtr = EntryIRB.CreateAlignedLoad(Base, kShadowTLSAlignment); } - DEBUG(dbgs() << " ARG: " << *AI << " ==> " << + DEBUG(dbgs() << " ARG: " << FArg << " ==> " << **ShadowPtr << "\n"); if (MS.TrackOrigins) { - Value* OriginPtr = getOriginPtrForArgument(AI, EntryIRB, ArgOffset); + Value *OriginPtr = + getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset); setOrigin(A, EntryIRB.CreateLoad(OriginPtr)); } } @@ -1093,7 +1075,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { IRBuilder<> IRB(I.getNextNode()); Type *ShadowTy = getShadowTy(&I); Value *Addr = I.getPointerOperand(); - if (LoadShadow) { + if (PropagateShadow) { Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB); setShadow(&I, IRB.CreateAlignedLoad(ShadowPtr, I.getAlignment(), "_msld")); @@ -1108,7 +1090,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { I.setOrdering(addAcquireOrdering(I.getOrdering())); if (MS.TrackOrigins) { - if (LoadShadow) { + if (PropagateShadow) { unsigned Alignment = std::max(kMinOriginAlignment, I.getAlignment()); setOrigin(&I, IRB.CreateAlignedLoad(getOriginPtr(Addr, IRB), Alignment)); @@ -1320,10 +1302,14 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { if (!Origin) { Origin = OpOrigin; } else { - Value *FlatShadow = MSV->convertToShadowTyNoVec(OpShadow, IRB); - Value *Cond = IRB.CreateICmpNE(FlatShadow, - MSV->getCleanShadow(FlatShadow)); - Origin = IRB.CreateSelect(Cond, OpOrigin, Origin); + Constant *ConstOrigin = dyn_cast<Constant>(OpOrigin); + // No point in adding something that might result in 0 origin value. + if (!ConstOrigin || !ConstOrigin->isNullValue()) { + Value *FlatShadow = MSV->convertToShadowTyNoVec(OpShadow, IRB); + Value *Cond = + IRB.CreateICmpNE(FlatShadow, MSV->getCleanShadow(FlatShadow)); + Origin = IRB.CreateSelect(Cond, OpOrigin, Origin); + } } } return *this; @@ -1411,13 +1397,61 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { SC.Done(&I); } + // \brief Handle multiplication by constant. + // + // Handle a special case of multiplication by constant that may have one or + // more zeros in the lower bits. This makes corresponding number of lower bits + // of the result zero as well. We model it by shifting the other operand + // shadow left by the required number of bits. Effectively, we transform + // (X * (A * 2**B)) to ((X << B) * A) and instrument (X << B) as (Sx << B). + // We use multiplication by 2**N instead of shift to cover the case of + // multiplication by 0, which may occur in some elements of a vector operand. + void handleMulByConstant(BinaryOperator &I, Constant *ConstArg, + Value *OtherArg) { + Constant *ShadowMul; + Type *Ty = ConstArg->getType(); + if (Ty->isVectorTy()) { + unsigned NumElements = Ty->getVectorNumElements(); + Type *EltTy = Ty->getSequentialElementType(); + SmallVector<Constant *, 16> Elements; + for (unsigned Idx = 0; Idx < NumElements; ++Idx) { + ConstantInt *Elt = + dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx)); + APInt V = Elt->getValue(); + APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros(); + Elements.push_back(ConstantInt::get(EltTy, V2)); + } + ShadowMul = ConstantVector::get(Elements); + } else { + ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg); + APInt V = Elt->getValue(); + APInt V2 = APInt(V.getBitWidth(), 1) << V.countTrailingZeros(); + ShadowMul = ConstantInt::get(Elt->getType(), V2); + } + + IRBuilder<> IRB(&I); + setShadow(&I, + IRB.CreateMul(getShadow(OtherArg), ShadowMul, "msprop_mul_cst")); + setOrigin(&I, getOrigin(OtherArg)); + } + + void visitMul(BinaryOperator &I) { + Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0)); + Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1)); + if (constOp0 && !constOp1) + handleMulByConstant(I, constOp0, I.getOperand(1)); + else if (constOp1 && !constOp0) + handleMulByConstant(I, constOp1, I.getOperand(0)); + else + handleShadowOr(I); + } + void visitFAdd(BinaryOperator &I) { handleShadowOr(I); } void visitFSub(BinaryOperator &I) { handleShadowOr(I); } void visitFMul(BinaryOperator &I) { handleShadowOr(I); } void visitAdd(BinaryOperator &I) { handleShadowOr(I); } void visitSub(BinaryOperator &I) { handleShadowOr(I); } void visitXor(BinaryOperator &I) { handleShadowOr(I); } - void visitMul(BinaryOperator &I) { handleShadowOr(I); } void handleDiv(Instruction &I) { IRBuilder<> IRB(&I); @@ -1723,7 +1757,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { Value *Addr = I.getArgOperand(0); Type *ShadowTy = getShadowTy(&I); - if (LoadShadow) { + if (PropagateShadow) { Value *ShadowPtr = getShadowPtr(Addr, ShadowTy, IRB); // We don't know the pointer alignment (could be unaligned SSE load!). // Have to assume to worst case. @@ -1736,7 +1770,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { insertShadowCheck(Addr, &I); if (MS.TrackOrigins) { - if (LoadShadow) + if (PropagateShadow) setOrigin(&I, IRB.CreateLoad(getOriginPtr(Addr, IRB))); else setOrigin(&I, getCleanOrigin()); @@ -1946,6 +1980,120 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { setOriginForNaryOp(I); } + // \brief Get an X86_MMX-sized vector type. + Type *getMMXVectorTy(unsigned EltSizeInBits) { + const unsigned X86_MMXSizeInBits = 64; + return VectorType::get(IntegerType::get(*MS.C, EltSizeInBits), + X86_MMXSizeInBits / EltSizeInBits); + } + + // \brief Returns a signed counterpart for an (un)signed-saturate-and-pack + // intrinsic. + Intrinsic::ID getSignedPackIntrinsic(Intrinsic::ID id) { + switch (id) { + case llvm::Intrinsic::x86_sse2_packsswb_128: + case llvm::Intrinsic::x86_sse2_packuswb_128: + return llvm::Intrinsic::x86_sse2_packsswb_128; + + case llvm::Intrinsic::x86_sse2_packssdw_128: + case llvm::Intrinsic::x86_sse41_packusdw: + return llvm::Intrinsic::x86_sse2_packssdw_128; + + case llvm::Intrinsic::x86_avx2_packsswb: + case llvm::Intrinsic::x86_avx2_packuswb: + return llvm::Intrinsic::x86_avx2_packsswb; + + case llvm::Intrinsic::x86_avx2_packssdw: + case llvm::Intrinsic::x86_avx2_packusdw: + return llvm::Intrinsic::x86_avx2_packssdw; + + case llvm::Intrinsic::x86_mmx_packsswb: + case llvm::Intrinsic::x86_mmx_packuswb: + return llvm::Intrinsic::x86_mmx_packsswb; + + case llvm::Intrinsic::x86_mmx_packssdw: + return llvm::Intrinsic::x86_mmx_packssdw; + default: + llvm_unreachable("unexpected intrinsic id"); + } + } + + // \brief Instrument vector pack instrinsic. + // + // This function instruments intrinsics like x86_mmx_packsswb, that + // packs elements of 2 input vectors into half as many bits with saturation. + // Shadow is propagated with the signed variant of the same intrinsic applied + // to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer). + // EltSizeInBits is used only for x86mmx arguments. + void handleVectorPackIntrinsic(IntrinsicInst &I, unsigned EltSizeInBits = 0) { + assert(I.getNumArgOperands() == 2); + bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy(); + IRBuilder<> IRB(&I); + Value *S1 = getShadow(&I, 0); + Value *S2 = getShadow(&I, 1); + assert(isX86_MMX || S1->getType()->isVectorTy()); + + // SExt and ICmpNE below must apply to individual elements of input vectors. + // In case of x86mmx arguments, cast them to appropriate vector types and + // back. + Type *T = isX86_MMX ? getMMXVectorTy(EltSizeInBits) : S1->getType(); + if (isX86_MMX) { + S1 = IRB.CreateBitCast(S1, T); + S2 = IRB.CreateBitCast(S2, T); + } + Value *S1_ext = IRB.CreateSExt( + IRB.CreateICmpNE(S1, llvm::Constant::getNullValue(T)), T); + Value *S2_ext = IRB.CreateSExt( + IRB.CreateICmpNE(S2, llvm::Constant::getNullValue(T)), T); + if (isX86_MMX) { + Type *X86_MMXTy = Type::getX86_MMXTy(*MS.C); + S1_ext = IRB.CreateBitCast(S1_ext, X86_MMXTy); + S2_ext = IRB.CreateBitCast(S2_ext, X86_MMXTy); + } + + Function *ShadowFn = Intrinsic::getDeclaration( + F.getParent(), getSignedPackIntrinsic(I.getIntrinsicID())); + + Value *S = IRB.CreateCall2(ShadowFn, S1_ext, S2_ext, "_msprop_vector_pack"); + if (isX86_MMX) S = IRB.CreateBitCast(S, getShadowTy(&I)); + setShadow(&I, S); + setOriginForNaryOp(I); + } + + // \brief Instrument sum-of-absolute-differencies intrinsic. + void handleVectorSadIntrinsic(IntrinsicInst &I) { + const unsigned SignificantBitsPerResultElement = 16; + bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy(); + Type *ResTy = isX86_MMX ? IntegerType::get(*MS.C, 64) : I.getType(); + unsigned ZeroBitsPerResultElement = + ResTy->getScalarSizeInBits() - SignificantBitsPerResultElement; + + IRBuilder<> IRB(&I); + Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1)); + S = IRB.CreateBitCast(S, ResTy); + S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)), + ResTy); + S = IRB.CreateLShr(S, ZeroBitsPerResultElement); + S = IRB.CreateBitCast(S, getShadowTy(&I)); + setShadow(&I, S); + setOriginForNaryOp(I); + } + + // \brief Instrument multiply-add intrinsic. + void handleVectorPmaddIntrinsic(IntrinsicInst &I, + unsigned EltSizeInBits = 0) { + bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy(); + Type *ResTy = isX86_MMX ? getMMXVectorTy(EltSizeInBits * 2) : I.getType(); + IRBuilder<> IRB(&I); + Value *S = IRB.CreateOr(getShadow(&I, 0), getShadow(&I, 1)); + S = IRB.CreateBitCast(S, ResTy); + S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)), + ResTy); + S = IRB.CreateBitCast(S, getShadowTy(&I)); + setShadow(&I, S); + setOriginForNaryOp(I); + } + void visitIntrinsicInst(IntrinsicInst &I) { switch (I.getIntrinsicID()) { case llvm::Intrinsic::bswap: @@ -2062,6 +2210,47 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { // case llvm::Intrinsic::x86_sse2_psll_dq_bs: // case llvm::Intrinsic::x86_sse2_psrl_dq_bs: + case llvm::Intrinsic::x86_sse2_packsswb_128: + case llvm::Intrinsic::x86_sse2_packssdw_128: + case llvm::Intrinsic::x86_sse2_packuswb_128: + case llvm::Intrinsic::x86_sse41_packusdw: + case llvm::Intrinsic::x86_avx2_packsswb: + case llvm::Intrinsic::x86_avx2_packssdw: + case llvm::Intrinsic::x86_avx2_packuswb: + case llvm::Intrinsic::x86_avx2_packusdw: + handleVectorPackIntrinsic(I); + break; + + case llvm::Intrinsic::x86_mmx_packsswb: + case llvm::Intrinsic::x86_mmx_packuswb: + handleVectorPackIntrinsic(I, 16); + break; + + case llvm::Intrinsic::x86_mmx_packssdw: + handleVectorPackIntrinsic(I, 32); + break; + + case llvm::Intrinsic::x86_mmx_psad_bw: + case llvm::Intrinsic::x86_sse2_psad_bw: + case llvm::Intrinsic::x86_avx2_psad_bw: + handleVectorSadIntrinsic(I); + break; + + case llvm::Intrinsic::x86_sse2_pmadd_wd: + case llvm::Intrinsic::x86_avx2_pmadd_wd: + case llvm::Intrinsic::x86_ssse3_pmadd_ub_sw_128: + case llvm::Intrinsic::x86_avx2_pmadd_ub_sw: + handleVectorPmaddIntrinsic(I); + break; + + case llvm::Intrinsic::x86_ssse3_pmadd_ub_sw: + handleVectorPmaddIntrinsic(I, 8); + break; + + case llvm::Intrinsic::x86_mmx_pmadd_wd: + handleVectorPmaddIntrinsic(I, 16); + break; + default: if (!handleUnknownIntrinsic(I)) visitInstruction(I); @@ -2083,12 +2272,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { return; } - // Allow only tail calls with the same types, otherwise - // we may have a false positive: shadow for a non-void RetVal - // will get propagated to a void RetVal. - if (Call->isTailCall() && Call->getType() != Call->getParent()->getType()) - Call->setTailCall(false); - assert(!isa<IntrinsicInst>(&I) && "intrinsics are handled elsewhere"); // We are going to insert code that relies on the fact that the callee @@ -2211,6 +2394,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> { void visitPHINode(PHINode &I) { IRBuilder<> IRB(&I); + if (!PropagateShadow) { + setShadow(&I, getCleanShadow(&I)); + return; + } + ShadowPHINodes.push_back(&I); setShadow(&I, IRB.CreatePHI(getShadowTy(&I), I.getNumIncomingValues(), "_msphi_s")); diff --git a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp index 8fe9bca..89386a6 100644 --- a/lib/Transforms/Instrumentation/ThreadSanitizer.cpp +++ b/lib/Transforms/Instrumentation/ThreadSanitizer.cpp @@ -40,14 +40,11 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/ModuleUtils.h" -#include "llvm/Transforms/Utils/SpecialCaseList.h" using namespace llvm; #define DEBUG_TYPE "tsan" -static cl::opt<std::string> ClBlacklistFile("tsan-blacklist", - cl::desc("Blacklist file"), cl::Hidden); static cl::opt<bool> ClInstrumentMemoryAccesses( "tsan-instrument-memory-accesses", cl::init(true), cl::desc("Instrument memory accesses"), cl::Hidden); @@ -76,11 +73,7 @@ namespace { /// ThreadSanitizer: instrument the code in module to find races. struct ThreadSanitizer : public FunctionPass { - ThreadSanitizer(StringRef BlacklistFile = StringRef()) - : FunctionPass(ID), - DL(nullptr), - BlacklistFile(BlacklistFile.empty() ? ClBlacklistFile - : BlacklistFile) { } + ThreadSanitizer() : FunctionPass(ID), DL(nullptr) {} const char *getPassName() const override; bool runOnFunction(Function &F) override; bool doInitialization(Module &M) override; @@ -98,8 +91,6 @@ struct ThreadSanitizer : public FunctionPass { const DataLayout *DL; Type *IntptrTy; - SmallString<64> BlacklistFile; - std::unique_ptr<SpecialCaseList> BL; IntegerType *OrdTy; // Callbacks to run-time library are computed in doInitialization. Function *TsanFuncEntry; @@ -129,8 +120,8 @@ const char *ThreadSanitizer::getPassName() const { return "ThreadSanitizer"; } -FunctionPass *llvm::createThreadSanitizerPass(StringRef BlacklistFile) { - return new ThreadSanitizer(BlacklistFile); +FunctionPass *llvm::createThreadSanitizerPass() { + return new ThreadSanitizer(); } static Function *checkInterfaceFunction(Constant *FuncOrBitcast) { @@ -228,7 +219,6 @@ bool ThreadSanitizer::doInitialization(Module &M) { if (!DLP) report_fatal_error("data layout missing"); DL = &DLP->getDataLayout(); - BL.reset(SpecialCaseList::createOrDie(BlacklistFile)); // Always insert a call to __tsan_init into the module's CTORs. IRBuilder<> IRB(M.getContext()); @@ -322,7 +312,6 @@ static bool isAtomic(Instruction *I) { bool ThreadSanitizer::runOnFunction(Function &F) { if (!DL) return false; - if (BL->isIn(F)) return false; initializeCallbacks(*F.getParent()); SmallVector<Instruction*, 8> RetVec; SmallVector<Instruction*, 8> AllLoadsAndStores; @@ -331,22 +320,20 @@ bool ThreadSanitizer::runOnFunction(Function &F) { SmallVector<Instruction*, 8> MemIntrinCalls; bool Res = false; bool HasCalls = false; + bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeThread); // Traverse all instructions, collect loads/stores/returns, check for calls. - for (Function::iterator FI = F.begin(), FE = F.end(); - FI != FE; ++FI) { - BasicBlock &BB = *FI; - for (BasicBlock::iterator BI = BB.begin(), BE = BB.end(); - BI != BE; ++BI) { - if (isAtomic(BI)) - AtomicAccesses.push_back(BI); - else if (isa<LoadInst>(BI) || isa<StoreInst>(BI)) - LocalLoadsAndStores.push_back(BI); - else if (isa<ReturnInst>(BI)) - RetVec.push_back(BI); - else if (isa<CallInst>(BI) || isa<InvokeInst>(BI)) { - if (isa<MemIntrinsic>(BI)) - MemIntrinCalls.push_back(BI); + for (auto &BB : F) { + for (auto &Inst : BB) { + if (isAtomic(&Inst)) + AtomicAccesses.push_back(&Inst); + else if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst)) + LocalLoadsAndStores.push_back(&Inst); + else if (isa<ReturnInst>(Inst)) + RetVec.push_back(&Inst); + else if (isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) { + if (isa<MemIntrinsic>(Inst)) + MemIntrinCalls.push_back(&Inst); HasCalls = true; chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores); } @@ -358,21 +345,22 @@ bool ThreadSanitizer::runOnFunction(Function &F) { // FIXME: many of these accesses do not need to be checked for races // (e.g. variables that do not escape, etc). - // Instrument memory accesses. - if (ClInstrumentMemoryAccesses && F.hasFnAttribute(Attribute::SanitizeThread)) - for (size_t i = 0, n = AllLoadsAndStores.size(); i < n; ++i) { - Res |= instrumentLoadOrStore(AllLoadsAndStores[i]); + // Instrument memory accesses only if we want to report bugs in the function. + if (ClInstrumentMemoryAccesses && SanitizeFunction) + for (auto Inst : AllLoadsAndStores) { + Res |= instrumentLoadOrStore(Inst); } - // Instrument atomic memory accesses. + // Instrument atomic memory accesses in any case (they can be used to + // implement synchronization). if (ClInstrumentAtomics) - for (size_t i = 0, n = AtomicAccesses.size(); i < n; ++i) { - Res |= instrumentAtomic(AtomicAccesses[i]); + for (auto Inst : AtomicAccesses) { + Res |= instrumentAtomic(Inst); } - if (ClInstrumentMemIntrinsics) - for (size_t i = 0, n = MemIntrinCalls.size(); i < n; ++i) { - Res |= instrumentMemIntrinsic(MemIntrinCalls[i]); + if (ClInstrumentMemIntrinsics && SanitizeFunction) + for (auto Inst : MemIntrinCalls) { + Res |= instrumentMemIntrinsic(Inst); } // Instrument function entry/exit points if there were instrumented accesses. @@ -382,8 +370,8 @@ bool ThreadSanitizer::runOnFunction(Function &F) { Intrinsic::getDeclaration(F.getParent(), Intrinsic::returnaddress), IRB.getInt32(0)); IRB.CreateCall(TsanFuncEntry, ReturnAddress); - for (size_t i = 0, n = RetVec.size(); i < n; ++i) { - IRBuilder<> IRBRet(RetVec[i]); + for (auto RetInst : RetVec) { + IRBuilder<> IRBRet(RetInst); IRBRet.CreateCall(TsanFuncExit); } Res = true; @@ -543,8 +531,14 @@ bool ThreadSanitizer::instrumentAtomic(Instruction *I) { IRB.CreateIntCast(CASI->getNewValOperand(), Ty, false), createOrdering(&IRB, CASI->getSuccessOrdering()), createOrdering(&IRB, CASI->getFailureOrdering())}; - CallInst *C = CallInst::Create(TsanAtomicCAS[Idx], ArrayRef<Value*>(Args)); - ReplaceInstWithInst(I, C); + CallInst *C = IRB.CreateCall(TsanAtomicCAS[Idx], Args); + Value *Success = IRB.CreateICmpEQ(C, CASI->getCompareOperand()); + + Value *Res = IRB.CreateInsertValue(UndefValue::get(CASI->getType()), C, 0); + Res = IRB.CreateInsertValue(Res, Success, 1); + + I->replaceAllUsesWith(Res); + I->eraseFromParent(); } else if (FenceInst *FI = dyn_cast<FenceInst>(I)) { Value *Args[] = {createOrdering(&IRB, FI->getOrdering())}; Function *F = FI->getSynchScope() == SingleThread ? diff --git a/lib/Transforms/Scalar/Android.mk b/lib/Transforms/Scalar/Android.mk index 079cc86..5e22de6 100644 --- a/lib/Transforms/Scalar/Android.mk +++ b/lib/Transforms/Scalar/Android.mk @@ -8,11 +8,11 @@ transforms_scalar_SRC_FILES := \ DCE.cpp \ DeadStoreElimination.cpp \ EarlyCSE.cpp \ - GlobalMerge.cpp \ GVN.cpp \ IndVarSimplify.cpp \ JumpThreading.cpp \ LICM.cpp \ + LoadCombine.cpp \ LoopDeletion.cpp \ LoopIdiomRecognize.cpp \ LoopInstSimplify.cpp \ diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt index 3ad1488..2dcfa23 100644 --- a/lib/Transforms/Scalar/CMakeLists.txt +++ b/lib/Transforms/Scalar/CMakeLists.txt @@ -8,10 +8,10 @@ add_llvm_library(LLVMScalarOpts EarlyCSE.cpp FlattenCFGPass.cpp GVN.cpp - GlobalMerge.cpp IndVarSimplify.cpp JumpThreading.cpp LICM.cpp + LoadCombine.cpp LoopDeletion.cpp LoopIdiomRecognize.cpp LoopInstSimplify.cpp diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 6d07ddd..106eba0 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -1464,6 +1464,13 @@ void GVN::AnalyzeLoadAvailability(LoadInst *LI, LoadDepVect &Deps, continue; } + // Loading from calloc (which zero initializes memory) -> zero + if (isCallocLikeFn(DepInst, TLI)) { + ValuesPerBlock.push_back(AvailableValueInBlock::get( + DepBB, Constant::getNullValue(LI->getType()))); + continue; + } + if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) { // Reject loads and stores that are to the same address but are of // different types if we have to. @@ -1791,6 +1798,10 @@ static void patchReplacementInstruction(Instruction *I, Value *Repl) { case LLVMContext::MD_fpmath: ReplInst->setMetadata(Kind, MDNode::getMostGenericFPMath(IMD, ReplMD)); break; + case LLVMContext::MD_invariant_load: + // Only set the !invariant.load if it is present in both instructions. + ReplInst->setMetadata(Kind, IMD); + break; } } } @@ -1988,6 +1999,15 @@ bool GVN::processLoad(LoadInst *L) { } } + // If this load follows a calloc (which zero initializes memory), + // then the loaded value is zero + if (isCallocLikeFn(DepInst, TLI)) { + L->replaceAllUsesWith(Constant::getNullValue(L->getType())); + markInstructionForDeletion(L); + ++NumGVNLoad; + return true; + } + return false; } diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 230a381..6e50d33 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -158,6 +158,15 @@ bool JumpThreading::runOnFunction(Function &F) { TLI = &getAnalysis<TargetLibraryInfo>(); LVI = &getAnalysis<LazyValueInfo>(); + // Remove unreachable blocks from function as they may result in infinite + // loop. We do threading if we found something profitable. Jump threading a + // branch can create other opportunities. If these opportunities form a cycle + // i.e. if any jump treading is undoing previous threading in the path, then + // we will loop forever. We take care of this issue by not jump threading for + // back edges. This works for normal cases but not for unreachable blocks as + // they may have cycle with no back edge. + removeUnreachableBlocks(F); + FindLoopHeaders(F); bool Changed, EverChanged = false; diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index 0a8d16f..abcceb2 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -192,6 +192,14 @@ namespace { SmallVectorImpl<BasicBlock*> &ExitBlocks, SmallVectorImpl<Instruction*> &InsertPts, PredIteratorCache &PIC); + + /// \brief Create a copy of the instruction in the exit block and patch up + /// SSA. + /// PN is a user of I in ExitBlock that can be used to get the number and + /// list of predecessors fast. + Instruction *CloneInstructionInExitBlock(Instruction &I, + BasicBlock &ExitBlock, + PHINode &PN); }; } @@ -531,6 +539,35 @@ bool LICM::isNotUsedInLoop(Instruction &I) { return true; } +Instruction *LICM::CloneInstructionInExitBlock(Instruction &I, + BasicBlock &ExitBlock, + PHINode &PN) { + Instruction *New = I.clone(); + ExitBlock.getInstList().insert(ExitBlock.getFirstInsertionPt(), New); + if (!I.getName().empty()) New->setName(I.getName() + ".le"); + + // Build LCSSA PHI nodes for any in-loop operands. Note that this is + // particularly cheap because we can rip off the PHI node that we're + // replacing for the number and blocks of the predecessors. + // OPT: If this shows up in a profile, we can instead finish sinking all + // invariant instructions, and then walk their operands to re-establish + // LCSSA. That will eliminate creating PHI nodes just to nuke them when + // sinking bottom-up. + for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE; + ++OI) + if (Instruction *OInst = dyn_cast<Instruction>(*OI)) + if (Loop *OLoop = LI->getLoopFor(OInst->getParent())) + if (!OLoop->contains(&PN)) { + PHINode *OpPN = + PHINode::Create(OInst->getType(), PN.getNumIncomingValues(), + OInst->getName() + ".lcssa", ExitBlock.begin()); + for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) + OpPN->addIncoming(OInst, PN.getIncomingBlock(i)); + *OI = OpPN; + } + return New; +} + /// sink - When an instruction is found to only be used outside of the loop, /// this function moves it to the exit blocks and patches up SSA form as needed. /// This method is guaranteed to remove the original instruction from its @@ -550,6 +587,9 @@ void LICM::sink(Instruction &I) { SmallPtrSet<BasicBlock *, 32> ExitBlockSet(ExitBlocks.begin(), ExitBlocks.end()); #endif + // Clones of this instruction. Don't create more than one per exit block! + SmallDenseMap<BasicBlock *, Instruction *, 32> SunkCopies; + // If this instruction is only used outside of the loop, then all users are // PHI nodes in exit blocks due to LCSSA form. Just RAUW them with clones of // the instruction. @@ -561,30 +601,13 @@ void LICM::sink(Instruction &I) { assert(ExitBlockSet.count(ExitBlock) && "The LCSSA PHI is not in an exit block!"); - Instruction *New = I.clone(); - ExitBlock->getInstList().insert(ExitBlock->getFirstInsertionPt(), New); - if (!I.getName().empty()) - New->setName(I.getName() + ".le"); - - // Build LCSSA PHI nodes for any in-loop operands. Note that this is - // particularly cheap because we can rip off the PHI node that we're - // replacing for the number and blocks of the predecessors. - // OPT: If this shows up in a profile, we can instead finish sinking all - // invariant instructions, and then walk their operands to re-establish - // LCSSA. That will eliminate creating PHI nodes just to nuke them when - // sinking bottom-up. - for (User::op_iterator OI = New->op_begin(), OE = New->op_end(); OI != OE; - ++OI) - if (Instruction *OInst = dyn_cast<Instruction>(*OI)) - if (Loop *OLoop = LI->getLoopFor(OInst->getParent())) - if (!OLoop->contains(PN)) { - PHINode *OpPN = PHINode::Create( - OInst->getType(), PN->getNumIncomingValues(), - OInst->getName() + ".lcssa", ExitBlock->begin()); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - OpPN->addIncoming(OInst, PN->getIncomingBlock(i)); - *OI = OpPN; - } + Instruction *New; + auto It = SunkCopies.find(ExitBlock); + if (It != SunkCopies.end()) + New = It->second; + else + New = SunkCopies[ExitBlock] = + CloneInstructionInExitBlock(I, *ExitBlock, *PN); PN->replaceAllUsesWith(New); PN->eraseFromParent(); @@ -616,7 +639,7 @@ void LICM::hoist(Instruction &I) { /// bool LICM::isSafeToExecuteUnconditionally(Instruction &Inst) { // If it is not a trapping instruction, it is always safe to hoist. - if (isSafeToSpeculativelyExecute(&Inst)) + if (isSafeToSpeculativelyExecute(&Inst, DL)) return true; return isGuaranteedToExecute(Inst); diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp new file mode 100644 index 0000000..846aa70 --- /dev/null +++ b/lib/Transforms/Scalar/LoadCombine.cpp @@ -0,0 +1,268 @@ +//===- LoadCombine.cpp - Combine Adjacent Loads ---------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This transformation combines adjacent loads. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar.h" + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/TargetFolder.h" +#include "llvm/Pass.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +#define DEBUG_TYPE "load-combine" + +STATISTIC(NumLoadsAnalyzed, "Number of loads analyzed for combining"); +STATISTIC(NumLoadsCombined, "Number of loads combined"); + +namespace { +struct PointerOffsetPair { + Value *Pointer; + uint64_t Offset; +}; + +struct LoadPOPPair { + LoadPOPPair(LoadInst *L, PointerOffsetPair P, unsigned O) + : Load(L), POP(P), InsertOrder(O) {} + LoadPOPPair() {} + LoadInst *Load; + PointerOffsetPair POP; + /// \brief The new load needs to be created before the first load in IR order. + unsigned InsertOrder; +}; + +class LoadCombine : public BasicBlockPass { + LLVMContext *C; + const DataLayout *DL; + +public: + LoadCombine() + : BasicBlockPass(ID), + C(nullptr), DL(nullptr) { + initializeSROAPass(*PassRegistry::getPassRegistry()); + } + bool doInitialization(Function &) override; + bool runOnBasicBlock(BasicBlock &BB) override; + void getAnalysisUsage(AnalysisUsage &AU) const override; + + const char *getPassName() const override { return "LoadCombine"; } + static char ID; + + typedef IRBuilder<true, TargetFolder> BuilderTy; + +private: + BuilderTy *Builder; + + PointerOffsetPair getPointerOffsetPair(LoadInst &); + bool combineLoads(DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> &); + bool aggregateLoads(SmallVectorImpl<LoadPOPPair> &); + bool combineLoads(SmallVectorImpl<LoadPOPPair> &); +}; +} + +bool LoadCombine::doInitialization(Function &F) { + DEBUG(dbgs() << "LoadCombine function: " << F.getName() << "\n"); + C = &F.getContext(); + DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); + if (!DLP) { + DEBUG(dbgs() << " Skipping LoadCombine -- no target data!\n"); + return false; + } + DL = &DLP->getDataLayout(); + return true; +} + +PointerOffsetPair LoadCombine::getPointerOffsetPair(LoadInst &LI) { + PointerOffsetPair POP; + POP.Pointer = LI.getPointerOperand(); + POP.Offset = 0; + while (isa<BitCastInst>(POP.Pointer) || isa<GetElementPtrInst>(POP.Pointer)) { + if (auto *GEP = dyn_cast<GetElementPtrInst>(POP.Pointer)) { + unsigned BitWidth = DL->getPointerTypeSizeInBits(GEP->getType()); + APInt Offset(BitWidth, 0); + if (GEP->accumulateConstantOffset(*DL, Offset)) + POP.Offset += Offset.getZExtValue(); + else + // Can't handle GEPs with variable indices. + return POP; + POP.Pointer = GEP->getPointerOperand(); + } else if (auto *BC = dyn_cast<BitCastInst>(POP.Pointer)) + POP.Pointer = BC->getOperand(0); + } + return POP; +} + +bool LoadCombine::combineLoads( + DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> &LoadMap) { + bool Combined = false; + for (auto &Loads : LoadMap) { + if (Loads.second.size() < 2) + continue; + std::sort(Loads.second.begin(), Loads.second.end(), + [](const LoadPOPPair &A, const LoadPOPPair &B) { + return A.POP.Offset < B.POP.Offset; + }); + if (aggregateLoads(Loads.second)) + Combined = true; + } + return Combined; +} + +/// \brief Try to aggregate loads from a sorted list of loads to be combined. +/// +/// It is guaranteed that no writes occur between any of the loads. All loads +/// have the same base pointer. There are at least two loads. +bool LoadCombine::aggregateLoads(SmallVectorImpl<LoadPOPPair> &Loads) { + assert(Loads.size() >= 2 && "Insufficient loads!"); + LoadInst *BaseLoad = nullptr; + SmallVector<LoadPOPPair, 8> AggregateLoads; + bool Combined = false; + uint64_t PrevOffset = -1ull; + uint64_t PrevSize = 0; + for (auto &L : Loads) { + if (PrevOffset == -1ull) { + BaseLoad = L.Load; + PrevOffset = L.POP.Offset; + PrevSize = DL->getTypeStoreSize(L.Load->getType()); + AggregateLoads.push_back(L); + continue; + } + if (L.Load->getAlignment() > BaseLoad->getAlignment()) + continue; + if (L.POP.Offset > PrevOffset + PrevSize) { + // No other load will be combinable + if (combineLoads(AggregateLoads)) + Combined = true; + AggregateLoads.clear(); + PrevOffset = -1; + continue; + } + if (L.POP.Offset != PrevOffset + PrevSize) + // This load is offset less than the size of the last load. + // FIXME: We may want to handle this case. + continue; + PrevOffset = L.POP.Offset; + PrevSize = DL->getTypeStoreSize(L.Load->getType()); + AggregateLoads.push_back(L); + } + if (combineLoads(AggregateLoads)) + Combined = true; + return Combined; +} + +/// \brief Given a list of combinable load. Combine the maximum number of them. +bool LoadCombine::combineLoads(SmallVectorImpl<LoadPOPPair> &Loads) { + // Remove loads from the end while the size is not a power of 2. + unsigned TotalSize = 0; + for (const auto &L : Loads) + TotalSize += L.Load->getType()->getPrimitiveSizeInBits(); + while (TotalSize != 0 && !isPowerOf2_32(TotalSize)) + TotalSize -= Loads.pop_back_val().Load->getType()->getPrimitiveSizeInBits(); + if (Loads.size() < 2) + return false; + + DEBUG({ + dbgs() << "***** Combining Loads ******\n"; + for (const auto &L : Loads) { + dbgs() << L.POP.Offset << ": " << *L.Load << "\n"; + } + }); + + // Find first load. This is where we put the new load. + LoadPOPPair FirstLP; + FirstLP.InsertOrder = -1u; + for (const auto &L : Loads) + if (L.InsertOrder < FirstLP.InsertOrder) + FirstLP = L; + + unsigned AddressSpace = + FirstLP.POP.Pointer->getType()->getPointerAddressSpace(); + + Builder->SetInsertPoint(FirstLP.Load); + Value *Ptr = Builder->CreateConstGEP1_64( + Builder->CreatePointerCast(Loads[0].POP.Pointer, + Builder->getInt8PtrTy(AddressSpace)), + Loads[0].POP.Offset); + LoadInst *NewLoad = new LoadInst( + Builder->CreatePointerCast( + Ptr, PointerType::get(IntegerType::get(Ptr->getContext(), TotalSize), + Ptr->getType()->getPointerAddressSpace())), + Twine(Loads[0].Load->getName()) + ".combined", false, + Loads[0].Load->getAlignment(), FirstLP.Load); + + for (const auto &L : Loads) { + Builder->SetInsertPoint(L.Load); + Value *V = Builder->CreateExtractInteger( + *DL, NewLoad, cast<IntegerType>(L.Load->getType()), + L.POP.Offset - Loads[0].POP.Offset, "combine.extract"); + L.Load->replaceAllUsesWith(V); + } + + NumLoadsCombined = NumLoadsCombined + Loads.size(); + return true; +} + +bool LoadCombine::runOnBasicBlock(BasicBlock &BB) { + if (skipOptnoneFunction(BB) || !DL) + return false; + + IRBuilder<true, TargetFolder> + TheBuilder(BB.getContext(), TargetFolder(DL)); + Builder = &TheBuilder; + + DenseMap<const Value *, SmallVector<LoadPOPPair, 8>> LoadMap; + + bool Combined = false; + unsigned Index = 0; + for (auto &I : BB) { + if (I.mayWriteToMemory() || I.mayThrow()) { + if (combineLoads(LoadMap)) + Combined = true; + LoadMap.clear(); + continue; + } + LoadInst *LI = dyn_cast<LoadInst>(&I); + if (!LI) + continue; + ++NumLoadsAnalyzed; + if (!LI->isSimple() || !LI->getType()->isIntegerTy()) + continue; + auto POP = getPointerOffsetPair(*LI); + if (!POP.Pointer) + continue; + LoadMap[POP.Pointer].push_back(LoadPOPPair(LI, POP, Index++)); + } + if (combineLoads(LoadMap)) + Combined = true; + return Combined; +} + +void LoadCombine::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); +} + +char LoadCombine::ID = 0; + +BasicBlockPass *llvm::createLoadCombinePass() { + return new LoadCombine(); +} + +INITIALIZE_PASS(LoadCombine, "load-combine", "Combine Adjacent Loads", false, + false) diff --git a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp index 26a83df..a12f5a7 100644 --- a/lib/Transforms/Scalar/LoopIdiomRecognize.cpp +++ b/lib/Transforms/Scalar/LoopIdiomRecognize.cpp @@ -112,7 +112,7 @@ namespace { /// the variable involved in the comparion is returned. This function will /// be called to see if the precondition and postcondition of the loop /// are in desirable form. - Value *matchCondition (BranchInst *Br, BasicBlock *NonZeroTarget) const; + Value *matchCondition(BranchInst *Br, BasicBlock *NonZeroTarget) const; /// Return true iff the idiom is detected in the loop. and 1) \p CntInst /// is set to the instruction counting the population bit. 2) \p CntPhi @@ -122,7 +122,7 @@ namespace { (Instruction *&CntInst, PHINode *&CntPhi, Value *&Var) const; /// Insert ctpop intrinsic function and some obviously dead instructions. - void transform (Instruction *CntInst, PHINode *CntPhi, Value *Var); + void transform(Instruction *CntInst, PHINode *CntPhi, Value *Var); /// Create llvm.ctpop.* intrinsic function. CallInst *createPopcntIntrinsic(IRBuilderTy &IRB, Value *Val, DebugLoc DL); diff --git a/lib/Transforms/Scalar/LoopRerollPass.cpp b/lib/Transforms/Scalar/LoopRerollPass.cpp index 8b5e036..b6fbb16 100644 --- a/lib/Transforms/Scalar/LoopRerollPass.cpp +++ b/lib/Transforms/Scalar/LoopRerollPass.cpp @@ -924,8 +924,10 @@ bool LoopReroll::reroll(Instruction *IV, Loop *L, BasicBlock *Header, // them, and this matching fails. As an exception, we allow the alias // set tracker to handle regular (simple) load/store dependencies. if (FutureSideEffects && - ((!isSimpleLoadStore(J1) && !isSafeToSpeculativelyExecute(J1)) || - (!isSimpleLoadStore(J2) && !isSafeToSpeculativelyExecute(J2)))) { + ((!isSimpleLoadStore(J1) && + !isSafeToSpeculativelyExecute(J1, DL)) || + (!isSimpleLoadStore(J2) && + !isSafeToSpeculativelyExecute(J2, DL)))) { DEBUG(dbgs() << "LRR: iteration root match failed at " << *J1 << " vs. " << *J2 << " (side effects prevent reordering)\n"); diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index fc28fd2..00c0f88 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -18,8 +18,10 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -36,7 +38,8 @@ UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden, static cl::opt<unsigned> UnrollCount("unroll-count", cl::init(0), cl::Hidden, - cl::desc("Use this unroll count for all loops, for testing purposes")); + cl::desc("Use this unroll count for all loops including those with " + "unroll_count pragma values, for testing purposes")); static cl::opt<bool> UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden, @@ -47,6 +50,11 @@ static cl::opt<bool> UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden, cl::desc("Unroll loops with run-time trip counts")); +static cl::opt<unsigned> +PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden, + cl::desc("Unrolled size limit for loops with an unroll(enable) or " + "unroll_count pragma.")); + namespace { class LoopUnroll : public LoopPass { public: @@ -109,6 +117,66 @@ namespace { // For now, recreate dom info, if loop is unrolled. AU.addPreserved<DominatorTreeWrapperPass>(); } + + // Fill in the UnrollingPreferences parameter with values from the + // TargetTransformationInfo. + void getUnrollingPreferences(Loop *L, const TargetTransformInfo &TTI, + TargetTransformInfo::UnrollingPreferences &UP) { + UP.Threshold = CurrentThreshold; + UP.OptSizeThreshold = OptSizeUnrollThreshold; + UP.PartialThreshold = CurrentThreshold; + UP.PartialOptSizeThreshold = OptSizeUnrollThreshold; + UP.Count = CurrentCount; + UP.MaxCount = UINT_MAX; + UP.Partial = CurrentAllowPartial; + UP.Runtime = CurrentRuntime; + TTI.getUnrollingPreferences(L, UP); + } + + // Select and return an unroll count based on parameters from + // user, unroll preferences, unroll pragmas, or a heuristic. + // SetExplicitly is set to true if the unroll count is is set by + // the user or a pragma rather than selected heuristically. + unsigned + selectUnrollCount(const Loop *L, unsigned TripCount, bool HasEnablePragma, + unsigned PragmaCount, + const TargetTransformInfo::UnrollingPreferences &UP, + bool &SetExplicitly); + + + // Select threshold values used to limit unrolling based on a + // total unrolled size. Parameters Threshold and PartialThreshold + // are set to the maximum unrolled size for fully and partially + // unrolled loops respectively. + void selectThresholds(const Loop *L, bool HasPragma, + const TargetTransformInfo::UnrollingPreferences &UP, + unsigned &Threshold, unsigned &PartialThreshold) { + // Determine the current unrolling threshold. While this is + // normally set from UnrollThreshold, it is overridden to a + // smaller value if the current function is marked as + // optimize-for-size, and the unroll threshold was not user + // specified. + Threshold = UserThreshold ? CurrentThreshold : UP.Threshold; + PartialThreshold = UserThreshold ? CurrentThreshold : UP.PartialThreshold; + if (!UserThreshold && + L->getHeader()->getParent()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize)) { + Threshold = UP.OptSizeThreshold; + PartialThreshold = UP.PartialOptSizeThreshold; + } + if (HasPragma) { + // If the loop has an unrolling pragma, we want to be more + // aggressive with unrolling limits. Set thresholds to at + // least the PragmaTheshold value which is larger than the + // default limits. + if (Threshold != NoThreshold) + Threshold = std::max<unsigned>(Threshold, PragmaUnrollThreshold); + if (PartialThreshold != NoThreshold) + PartialThreshold = + std::max<unsigned>(PartialThreshold, PragmaUnrollThreshold); + } + } }; } @@ -151,6 +219,103 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, return LoopSize; } +// Returns the value associated with the given metadata node name (for +// example, "llvm.loop.unroll.count"). If no such named metadata node +// exists, then nullptr is returned. +static const ConstantInt *GetUnrollMetadataValue(const Loop *L, + StringRef Name) { + MDNode *LoopID = L->getLoopID(); + if (!LoopID) return nullptr; + + // First operand should refer to the loop id itself. + assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); + assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); + + for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { + const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i)); + if (!MD) continue; + + const MDString *S = dyn_cast<MDString>(MD->getOperand(0)); + if (!S) continue; + + if (Name.equals(S->getString())) { + assert(MD->getNumOperands() == 2 && + "Unroll hint metadata should have two operands."); + return cast<ConstantInt>(MD->getOperand(1)); + } + } + return nullptr; +} + +// Returns true if the loop has an unroll(enable) pragma. +static bool HasUnrollEnablePragma(const Loop *L) { + const ConstantInt *EnableValue = + GetUnrollMetadataValue(L, "llvm.loop.unroll.enable"); + return (EnableValue && EnableValue->getZExtValue()); +} + +// Returns true if the loop has an unroll(disable) pragma. +static bool HasUnrollDisablePragma(const Loop *L) { + const ConstantInt *EnableValue = + GetUnrollMetadataValue(L, "llvm.loop.unroll.enable"); + return (EnableValue && !EnableValue->getZExtValue()); +} + +// If loop has an unroll_count pragma return the (necessarily +// positive) value from the pragma. Otherwise return 0. +static unsigned UnrollCountPragmaValue(const Loop *L) { + const ConstantInt *CountValue = + GetUnrollMetadataValue(L, "llvm.loop.unroll.count"); + if (CountValue) { + unsigned Count = CountValue->getZExtValue(); + assert(Count >= 1 && "Unroll count must be positive."); + return Count; + } + return 0; +} + +unsigned LoopUnroll::selectUnrollCount( + const Loop *L, unsigned TripCount, bool HasEnablePragma, + unsigned PragmaCount, const TargetTransformInfo::UnrollingPreferences &UP, + bool &SetExplicitly) { + SetExplicitly = true; + + // User-specified count (either as a command-line option or + // constructor parameter) has highest precedence. + unsigned Count = UserCount ? CurrentCount : 0; + + // If there is no user-specified count, unroll pragmas have the next + // highest precendence. + if (Count == 0) { + if (PragmaCount) { + Count = PragmaCount; + } else if (HasEnablePragma) { + // unroll(enable) pragma without an unroll_count pragma + // indicates to unroll loop fully. + Count = TripCount; + } + } + + if (Count == 0) + Count = UP.Count; + + if (Count == 0) { + SetExplicitly = false; + if (TripCount == 0) + // Runtime trip count. + Count = UnrollRuntimeCount; + else + // Conservative heuristic: if we know the trip count, see if we can + // completely unroll (subject to the threshold, checked below); otherwise + // try to find greatest modulo of the trip count which is still under + // threshold value. + Count = TripCount; + } + if (TripCount && Count > TripCount) + return TripCount; + return Count; +} + bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { if (skipOptnoneFunction(L)) return false; @@ -162,33 +327,16 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { BasicBlock *Header = L->getHeader(); DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName() << "] Loop %" << Header->getName() << "\n"); - (void)Header; - TargetTransformInfo::UnrollingPreferences UP; - UP.Threshold = CurrentThreshold; - UP.OptSizeThreshold = OptSizeUnrollThreshold; - UP.PartialThreshold = CurrentThreshold; - UP.PartialOptSizeThreshold = OptSizeUnrollThreshold; - UP.Count = CurrentCount; - UP.MaxCount = UINT_MAX; - UP.Partial = CurrentAllowPartial; - UP.Runtime = CurrentRuntime; - TTI.getUnrollingPreferences(L, UP); - - // Determine the current unrolling threshold. While this is normally set - // from UnrollThreshold, it is overridden to a smaller value if the current - // function is marked as optimize-for-size, and the unroll threshold was - // not user specified. - unsigned Threshold = UserThreshold ? CurrentThreshold : UP.Threshold; - unsigned PartialThreshold = - UserThreshold ? CurrentThreshold : UP.PartialThreshold; - if (!UserThreshold && - Header->getParent()->getAttributes(). - hasAttribute(AttributeSet::FunctionIndex, - Attribute::OptimizeForSize)) { - Threshold = UP.OptSizeThreshold; - PartialThreshold = UP.PartialOptSizeThreshold; + if (HasUnrollDisablePragma(L)) { + return false; } + bool HasEnablePragma = HasUnrollEnablePragma(L); + unsigned PragmaCount = UnrollCountPragmaValue(L); + bool HasPragma = HasEnablePragma || PragmaCount > 0; + + TargetTransformInfo::UnrollingPreferences UP; + getUnrollingPreferences(L, TTI, UP); // Find trip count and trip multiple if count is not available unsigned TripCount = 0; @@ -202,79 +350,117 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { TripMultiple = SE->getSmallConstantTripMultiple(L, LatchBlock); } - bool Runtime = UserRuntime ? CurrentRuntime : UP.Runtime; - - // Use a default unroll-count if the user doesn't specify a value - // and the trip count is a run-time value. The default is different - // for run-time or compile-time trip count loops. - unsigned Count = UserCount ? CurrentCount : UP.Count; - if (Runtime && Count == 0 && TripCount == 0) - Count = UnrollRuntimeCount; + // Select an initial unroll count. This may be reduced later based + // on size thresholds. + bool CountSetExplicitly; + unsigned Count = selectUnrollCount(L, TripCount, HasEnablePragma, PragmaCount, + UP, CountSetExplicitly); + + unsigned NumInlineCandidates; + bool notDuplicatable; + unsigned LoopSize = + ApproximateLoopSize(L, NumInlineCandidates, notDuplicatable, TTI); + DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); + uint64_t UnrolledSize = (uint64_t)LoopSize * Count; + if (notDuplicatable) { + DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable" + << " instructions.\n"); + return false; + } + if (NumInlineCandidates != 0) { + DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n"); + return false; + } - if (Count == 0) { - // Conservative heuristic: if we know the trip count, see if we can - // completely unroll (subject to the threshold, checked below); otherwise - // try to find greatest modulo of the trip count which is still under - // threshold value. - if (TripCount == 0) - return false; - Count = TripCount; + unsigned Threshold, PartialThreshold; + selectThresholds(L, HasPragma, UP, Threshold, PartialThreshold); + + // Given Count, TripCount and thresholds determine the type of + // unrolling which is to be performed. + enum { Full = 0, Partial = 1, Runtime = 2 }; + int Unrolling; + if (TripCount && Count == TripCount) { + if (Threshold != NoThreshold && UnrolledSize > Threshold) { + DEBUG(dbgs() << " Too large to fully unroll with count: " << Count + << " because size: " << UnrolledSize << ">" << Threshold + << "\n"); + Unrolling = Partial; + } else { + Unrolling = Full; + } + } else if (TripCount && Count < TripCount) { + Unrolling = Partial; + } else { + Unrolling = Runtime; } - // Enforce the threshold. - if (Threshold != NoThreshold && PartialThreshold != NoThreshold) { - unsigned NumInlineCandidates; - bool notDuplicatable; - unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates, - notDuplicatable, TTI); - DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); - if (notDuplicatable) { - DEBUG(dbgs() << " Not unrolling loop which contains non-duplicatable" - << " instructions.\n"); + // Reduce count based on the type of unrolling and the threshold values. + unsigned OriginalCount = Count; + bool AllowRuntime = UserRuntime ? CurrentRuntime : UP.Runtime; + if (Unrolling == Partial) { + bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial; + if (!AllowPartial && !CountSetExplicitly) { + DEBUG(dbgs() << " will not try to unroll partially because " + << "-unroll-allow-partial not given\n"); return false; } - if (NumInlineCandidates != 0) { - DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n"); + if (PartialThreshold != NoThreshold && UnrolledSize > PartialThreshold) { + // Reduce unroll count to be modulo of TripCount for partial unrolling. + Count = PartialThreshold / LoopSize; + while (Count != 0 && TripCount % Count != 0) + Count--; + } + } else if (Unrolling == Runtime) { + if (!AllowRuntime && !CountSetExplicitly) { + DEBUG(dbgs() << " will not try to unroll loop with runtime trip count " + << "-unroll-runtime not given\n"); return false; } - uint64_t Size = (uint64_t)LoopSize*Count; - if (TripCount != 1 && - (Size > Threshold || (Count != TripCount && Size > PartialThreshold))) { - if (Size > Threshold) - DEBUG(dbgs() << " Too large to fully unroll with count: " << Count - << " because size: " << Size << ">" << Threshold << "\n"); - - bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial; - if (!AllowPartial && !(Runtime && TripCount == 0)) { - DEBUG(dbgs() << " will not try to unroll partially because " - << "-unroll-allow-partial not given\n"); - return false; - } - if (TripCount) { - // Reduce unroll count to be modulo of TripCount for partial unrolling - Count = PartialThreshold / LoopSize; - while (Count != 0 && TripCount%Count != 0) - Count--; - } - else if (Runtime) { - // Reduce unroll count to be a lower power-of-two value - while (Count != 0 && Size > PartialThreshold) { - Count >>= 1; - Size = LoopSize*Count; - } - } - if (Count > UP.MaxCount) - Count = UP.MaxCount; - if (Count < 2) { - DEBUG(dbgs() << " could not unroll partially\n"); - return false; + // Reduce unroll count to be the largest power-of-two factor of + // the original count which satisfies the threshold limit. + while (Count != 0 && UnrolledSize > PartialThreshold) { + Count >>= 1; + UnrolledSize = LoopSize * Count; + } + if (Count > UP.MaxCount) + Count = UP.MaxCount; + DEBUG(dbgs() << " partially unrolling with count: " << Count << "\n"); + } + + if (HasPragma) { + // Emit optimization remarks if we are unable to unroll the loop + // as directed by a pragma. + DebugLoc LoopLoc = L->getStartLoc(); + Function *F = Header->getParent(); + LLVMContext &Ctx = F->getContext(); + if (HasEnablePragma && PragmaCount == 0) { + if (TripCount && Count != TripCount) { + emitOptimizationRemarkMissed( + Ctx, DEBUG_TYPE, *F, LoopLoc, + "Unable to fully unroll loop as directed by unroll(enable) pragma " + "because unrolled size is too large."); + } else if (!TripCount) { + emitOptimizationRemarkMissed( + Ctx, DEBUG_TYPE, *F, LoopLoc, + "Unable to fully unroll loop as directed by unroll(enable) pragma " + "because loop has a runtime trip count."); } - DEBUG(dbgs() << " partially unrolling with count: " << Count << "\n"); + } else if (PragmaCount > 0 && Count != OriginalCount) { + emitOptimizationRemarkMissed( + Ctx, DEBUG_TYPE, *F, LoopLoc, + "Unable to unroll loop the number of times directed by " + "unroll_count pragma because unrolled size is too large."); } } + if (Unrolling != Full && Count < 2) { + // Partial unrolling by 1 is a nop. For full unrolling, a factor + // of 1 makes sense because loop control can be eliminated. + return false; + } + // Unroll the loop. - if (!UnrollLoop(L, Count, TripCount, Runtime, TripMultiple, LI, this, &LPM)) + if (!UnrollLoop(L, Count, TripCount, AllowRuntime, TripMultiple, LI, this, &LPM)) return false; return true; diff --git a/lib/Transforms/Scalar/LowerAtomic.cpp b/lib/Transforms/Scalar/LowerAtomic.cpp index 4251ac4..3314e1e 100644 --- a/lib/Transforms/Scalar/LowerAtomic.cpp +++ b/lib/Transforms/Scalar/LowerAtomic.cpp @@ -32,7 +32,10 @@ static bool LowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI) { Value *Res = Builder.CreateSelect(Equal, Val, Orig); Builder.CreateStore(Res, Ptr); - CXI->replaceAllUsesWith(Orig); + Res = Builder.CreateInsertValue(UndefValue::get(CXI->getType()), Orig, 0); + Res = Builder.CreateInsertValue(Res, Equal, 1); + + CXI->replaceAllUsesWith(Res); CXI->eraseFromParent(); return true; } diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp index 986d6a4..ea2cf7c 100644 --- a/lib/Transforms/Scalar/Reassociate.cpp +++ b/lib/Transforms/Scalar/Reassociate.cpp @@ -1368,11 +1368,10 @@ Value *Reassociate::OptimizeXor(Instruction *I, Value *Reassociate::OptimizeAdd(Instruction *I, SmallVectorImpl<ValueEntry> &Ops) { // Scan the operand lists looking for X and -X pairs. If we find any, we - // can simplify the expression. X+-X == 0. While we're at it, scan for any + // can simplify expressions like X+-X == 0 and X+~X ==-1. While we're at it, + // scan for any // duplicates. We want to canonicalize Y+Y+Y+Z -> 3*Y+Z. - // - // TODO: We could handle "X + ~X" -> "-1" if we wanted, since "-X = ~X+1". - // + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { Value *TheOp = Ops[i].Op; // Check to see if we've seen this operand before. If so, we factor all @@ -1412,19 +1411,28 @@ Value *Reassociate::OptimizeAdd(Instruction *I, continue; } - // Check for X and -X in the operand list. - if (!BinaryOperator::isNeg(TheOp)) + // Check for X and -X or X and ~X in the operand list. + if (!BinaryOperator::isNeg(TheOp) && !BinaryOperator::isNot(TheOp)) continue; - Value *X = BinaryOperator::getNegArgument(TheOp); + Value *X = nullptr; + if (BinaryOperator::isNeg(TheOp)) + X = BinaryOperator::getNegArgument(TheOp); + else if (BinaryOperator::isNot(TheOp)) + X = BinaryOperator::getNotArgument(TheOp); + unsigned FoundX = FindInOperandList(Ops, i, X); if (FoundX == i) continue; // Remove X and -X from the operand list. - if (Ops.size() == 2) + if (Ops.size() == 2 && BinaryOperator::isNeg(TheOp)) return Constant::getNullValue(X->getType()); + // Remove X and ~X from the operand list. + if (Ops.size() == 2 && BinaryOperator::isNot(TheOp)) + return Constant::getAllOnesValue(X->getType()); + Ops.erase(Ops.begin()+i); if (i < FoundX) --FoundX; @@ -1434,6 +1442,13 @@ Value *Reassociate::OptimizeAdd(Instruction *I, ++NumAnnihil; --i; // Revisit element. e -= 2; // Removed two elements. + + // if X and ~X we append -1 to the operand list. + if (BinaryOperator::isNot(TheOp)) { + Value *V = Constant::getAllOnesValue(X->getType()); + Ops.insert(Ops.end(), ValueEntry(getRank(V), V)); + e += 1; + } } // Scan the operand list, checking to see if there are any common factors diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index feeb231..90c3520 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -494,7 +494,9 @@ private: void visitResumeInst (TerminatorInst &I) { /*returns void*/ } void visitUnreachableInst(TerminatorInst &I) { /*returns void*/ } void visitFenceInst (FenceInst &I) { /*returns void*/ } - void visitAtomicCmpXchgInst (AtomicCmpXchgInst &I) { markOverdefined(&I); } + void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) { + markAnythingOverdefined(&I); + } void visitAtomicRMWInst (AtomicRMWInst &I) { markOverdefined(&I); } void visitAllocaInst (Instruction &I) { markOverdefined(&I); } void visitVAArgInst (Instruction &I) { markAnythingOverdefined(&I); } diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp index 04bf4f8..8c7f253 100644 --- a/lib/Transforms/Scalar/SROA.cpp +++ b/lib/Transforms/Scalar/SROA.cpp @@ -1032,11 +1032,6 @@ static Type *findCommonType(AllocaSlices::const_iterator B, UserTy = SI->getValueOperand()->getType(); } - if (!UserTy || (Ty && Ty != UserTy)) - TyIsCommon = false; // Give up on anything but an iN type. - else - Ty = UserTy; - if (IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) { // If the type is larger than the partition, skip it. We only encounter // this for split integer operations where we want to use the type of the @@ -1051,6 +1046,13 @@ static Type *findCommonType(AllocaSlices::const_iterator B, if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth()) ITy = UserITy; } + + // To avoid depending on the order of slices, Ty and TyIsCommon must not + // depend on types skipped above. + if (!UserTy || (Ty && Ty != UserTy)) + TyIsCommon = false; // Give up on anything but an iN type. + else + Ty = UserTy; } return TyIsCommon ? Ty : ITy; @@ -1128,7 +1130,7 @@ static bool isSafePHIToSpeculate(PHINode &PN, // If this pointer is always safe to load, or if we can prove that there // is already a load in the block, then we can move the load to the pred // block. - if (InVal->isDereferenceablePointer() || + if (InVal->isDereferenceablePointer(DL) || isSafeToLoadUnconditionally(InVal, TI, MaxAlign, DL)) continue; @@ -1196,8 +1198,8 @@ static bool isSafeSelectToSpeculate(SelectInst &SI, const DataLayout *DL = nullptr) { Value *TValue = SI.getTrueValue(); Value *FValue = SI.getFalseValue(); - bool TDerefable = TValue->isDereferenceablePointer(); - bool FDerefable = FValue->isDereferenceablePointer(); + bool TDerefable = TValue->isDereferenceablePointer(DL); + bool FDerefable = FValue->isDereferenceablePointer(DL); for (User *U : SI.users()) { LoadInst *LI = dyn_cast<LoadInst>(U); diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp index 8e557aa..73c97ff 100644 --- a/lib/Transforms/Scalar/SampleProfile.cpp +++ b/lib/Transforms/Scalar/SampleProfile.cpp @@ -450,13 +450,14 @@ void SampleModuleProfile::dump() { /// /// \returns true if the file was loaded successfully, false otherwise. bool SampleModuleProfile::loadText() { - std::unique_ptr<MemoryBuffer> Buffer; - error_code EC = MemoryBuffer::getFile(Filename, Buffer); - if (EC) { + ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr = + MemoryBuffer::getFile(Filename); + if (std::error_code EC = BufferOrErr.getError()) { std::string Msg(EC.message()); M.getContext().diagnose(DiagnosticInfoSampleProfile(Filename.data(), Msg)); return false; } + std::unique_ptr<MemoryBuffer> Buffer = std::move(BufferOrErr.get()); line_iterator LineIt(*Buffer, '#'); // Read the profile of each function. Since each function may be diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp index f8f828c..edf012d 100644 --- a/lib/Transforms/Scalar/Scalar.cpp +++ b/lib/Transforms/Scalar/Scalar.cpp @@ -65,6 +65,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) { initializeSinkingPass(Registry); initializeTailCallElimPass(Registry); initializeSeparateConstOffsetFromGEPPass(Registry); + initializeLoadCombinePass(Registry); } void LLVMInitializeScalarOpts(LLVMPassRegistryRef R) { diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index 58192fc..e2a24a7 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -1142,8 +1142,8 @@ public: /// We can do this to a select if its only uses are loads and if the operand to /// the select can be loaded unconditionally. static bool isSafeSelectToSpeculate(SelectInst *SI, const DataLayout *DL) { - bool TDerefable = SI->getTrueValue()->isDereferenceablePointer(); - bool FDerefable = SI->getFalseValue()->isDereferenceablePointer(); + bool TDerefable = SI->getTrueValue()->isDereferenceablePointer(DL); + bool FDerefable = SI->getFalseValue()->isDereferenceablePointer(DL); for (User *U : SI->users()) { LoadInst *LI = dyn_cast<LoadInst>(U); @@ -1226,7 +1226,7 @@ static bool isSafePHIToSpeculate(PHINode *PN, const DataLayout *DL) { // If this pointer is always safe to load, or if we can prove that there is // already a load in the block, then we can move the load to the pred block. - if (InVal->isDereferenceablePointer() || + if (InVal->isDereferenceablePointer(DL) || isSafeToLoadUnconditionally(InVal, Pred->getTerminator(), MaxAlign, DL)) continue; diff --git a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp index b8529e1..62f2026 100644 --- a/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp +++ b/lib/Transforms/Scalar/SeparateConstOffsetFromGEP.cpp @@ -121,41 +121,75 @@ class ConstantOffsetExtractor { /// numeric value of the extracted constant offset (0 if failed), and a /// new index representing the remainder (equal to the original index minus /// the constant offset). - /// \p Idx The given GEP index - /// \p NewIdx The new index to replace - /// \p DL The datalayout of the module - /// \p IP Calculating the new index requires new instructions. IP indicates - /// where to insert them (typically right before the GEP). + /// \p Idx The given GEP index + /// \p NewIdx The new index to replace (output) + /// \p DL The datalayout of the module + /// \p GEP The given GEP static int64_t Extract(Value *Idx, Value *&NewIdx, const DataLayout *DL, - Instruction *IP); + GetElementPtrInst *GEP); /// Looks for a constant offset without extracting it. The meaning of the /// arguments and the return value are the same as Extract. - static int64_t Find(Value *Idx, const DataLayout *DL); + static int64_t Find(Value *Idx, const DataLayout *DL, GetElementPtrInst *GEP); private: ConstantOffsetExtractor(const DataLayout *Layout, Instruction *InsertionPt) : DL(Layout), IP(InsertionPt) {} - /// Searches the expression that computes V for a constant offset. If the - /// searching is successful, update UserChain as a path from V to the constant - /// offset. - int64_t find(Value *V); - /// A helper function to look into both operands of a binary operator U. - /// \p IsSub Whether U is a sub operator. If so, we need to negate the - /// constant offset at some point. - int64_t findInEitherOperand(User *U, bool IsSub); - /// After finding the constant offset and how it is reached from the GEP - /// index, we build a new index which is a clone of the old one except the - /// constant offset is removed. For example, given (a + (b + 5)) and knowning - /// the constant offset is 5, this function returns (a + b). + /// Searches the expression that computes V for a non-zero constant C s.t. + /// V can be reassociated into the form V' + C. If the searching is + /// successful, returns C and update UserChain as a def-use chain from C to V; + /// otherwise, UserChain is empty. /// - /// We cannot simply change the constant to zero because the expression that - /// computes the index or its intermediate result may be used by others. - Value *rebuildWithoutConstantOffset(); - // A helper function for rebuildWithoutConstantOffset that rebuilds the direct - // user (U) of the constant offset (C). - Value *rebuildLeafWithoutConstantOffset(User *U, Value *C); - /// Returns a clone of U except the first occurrence of From with To. - Value *cloneAndReplace(User *U, Value *From, Value *To); + /// \p V The given expression + /// \p SignExtended Whether V will be sign-extended in the computation of the + /// GEP index + /// \p ZeroExtended Whether V will be zero-extended in the computation of the + /// GEP index + /// \p NonNegative Whether V is guaranteed to be non-negative. For example, + /// an index of an inbounds GEP is guaranteed to be + /// non-negative. Levaraging this, we can better split + /// inbounds GEPs. + APInt find(Value *V, bool SignExtended, bool ZeroExtended, bool NonNegative); + /// A helper function to look into both operands of a binary operator. + APInt findInEitherOperand(BinaryOperator *BO, bool SignExtended, + bool ZeroExtended); + /// After finding the constant offset C from the GEP index I, we build a new + /// index I' s.t. I' + C = I. This function builds and returns the new + /// index I' according to UserChain produced by function "find". + /// + /// The building conceptually takes two steps: + /// 1) iteratively distribute s/zext towards the leaves of the expression tree + /// that computes I + /// 2) reassociate the expression tree to the form I' + C. + /// + /// For example, to extract the 5 from sext(a + (b + 5)), we first distribute + /// sext to a, b and 5 so that we have + /// sext(a) + (sext(b) + 5). + /// Then, we reassociate it to + /// (sext(a) + sext(b)) + 5. + /// Given this form, we know I' is sext(a) + sext(b). + Value *rebuildWithoutConstOffset(); + /// After the first step of rebuilding the GEP index without the constant + /// offset, distribute s/zext to the operands of all operators in UserChain. + /// e.g., zext(sext(a + (b + 5)) (assuming no overflow) => + /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5))). + /// + /// The function also updates UserChain to point to new subexpressions after + /// distributing s/zext. e.g., the old UserChain of the above example is + /// 5 -> b + 5 -> a + (b + 5) -> sext(...) -> zext(sext(...)), + /// and the new UserChain is + /// zext(sext(5)) -> zext(sext(b)) + zext(sext(5)) -> + /// zext(sext(a)) + (zext(sext(b)) + zext(sext(5)) + /// + /// \p ChainIndex The index to UserChain. ChainIndex is initially + /// UserChain.size() - 1, and is decremented during + /// the recursion. + Value *distributeExtsAndCloneChain(unsigned ChainIndex); + /// Reassociates the GEP index to the form I' + C and returns I'. + Value *removeConstOffset(unsigned ChainIndex); + /// A helper function to apply ExtInsts, a list of s/zext, to value V. + /// e.g., if ExtInsts = [sext i32 to i64, zext i16 to i32], this function + /// returns "sext i32 (zext i16 V to i32) to i64". + Value *applyExts(Value *V); /// Returns true if LHS and RHS have no bits in common, i.e., LHS | RHS == 0. bool NoCommonBits(Value *LHS, Value *RHS) const; @@ -163,20 +197,26 @@ class ConstantOffsetExtractor { /// \p KnownOne Mask of all bits that are known to be one. /// \p KnownZero Mask of all bits that are known to be zero. void ComputeKnownBits(Value *V, APInt &KnownOne, APInt &KnownZero) const; - /// Finds the first use of Used in U. Returns -1 if not found. - static unsigned FindFirstUse(User *U, Value *Used); - /// Returns whether OPC (sext or zext) can be distributed to the operands of - /// BO. e.g., sext can be distributed to the operands of an "add nsw" because - /// sext (add nsw a, b) == add nsw (sext a), (sext b). - static bool Distributable(unsigned OPC, BinaryOperator *BO); + /// A helper function that returns whether we can trace into the operands + /// of binary operator BO for a constant offset. + /// + /// \p SignExtended Whether BO is surrounded by sext + /// \p ZeroExtended Whether BO is surrounded by zext + /// \p NonNegative Whether BO is known to be non-negative, e.g., an in-bound + /// array index. + bool CanTraceInto(bool SignExtended, bool ZeroExtended, BinaryOperator *BO, + bool NonNegative); /// The path from the constant offset to the old GEP index. e.g., if the GEP /// index is "a * b + (c + 5)". After running function find, UserChain[0] will /// be the constant 5, UserChain[1] will be the subexpression "c + 5", and /// UserChain[2] will be the entire expression "a * b + (c + 5)". /// - /// This path helps rebuildWithoutConstantOffset rebuild the new GEP index. + /// This path helps to rebuild the new GEP index. SmallVector<User *, 8> UserChain; + /// A data structure used in rebuildWithoutConstOffset. Contains all + /// sext/zext instructions along UserChain. + SmallVector<CastInst *, 16> ExtInsts; /// The data layout of the module. Used in ComputeKnownBits. const DataLayout *DL; Instruction *IP; /// Insertion position of cloned instructions. @@ -196,6 +236,15 @@ class SeparateConstOffsetFromGEP : public FunctionPass { AU.addRequired<DataLayoutPass>(); AU.addRequired<TargetTransformInfo>(); } + + bool doInitialization(Module &M) override { + DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); + if (DLP == nullptr) + report_fatal_error("data layout missing"); + DL = &DLP->getDataLayout(); + return false; + } + bool runOnFunction(Function &F) override; private: @@ -206,8 +255,42 @@ class SeparateConstOffsetFromGEP : public FunctionPass { /// function only inspects the GEP without changing it. The output /// NeedsExtraction indicates whether we can extract a non-zero constant /// offset from any index. - int64_t accumulateByteOffset(GetElementPtrInst *GEP, const DataLayout *DL, - bool &NeedsExtraction); + int64_t accumulateByteOffset(GetElementPtrInst *GEP, bool &NeedsExtraction); + /// Canonicalize array indices to pointer-size integers. This helps to + /// simplify the logic of splitting a GEP. For example, if a + b is a + /// pointer-size integer, we have + /// gep base, a + b = gep (gep base, a), b + /// However, this equality may not hold if the size of a + b is smaller than + /// the pointer size, because LLVM conceptually sign-extends GEP indices to + /// pointer size before computing the address + /// (http://llvm.org/docs/LangRef.html#id181). + /// + /// This canonicalization is very likely already done in clang and + /// instcombine. Therefore, the program will probably remain the same. + /// + /// Returns true if the module changes. + /// + /// Verified in @i32_add in split-gep.ll + bool canonicalizeArrayIndicesToPointerSize(GetElementPtrInst *GEP); + /// For each array index that is in the form of zext(a), convert it to sext(a) + /// if we can prove zext(a) <= max signed value of typeof(a). We prefer + /// sext(a) to zext(a), because in the special case where x + y >= 0 and + /// (x >= 0 or y >= 0), function CanTraceInto can split sext(x + y), + /// while no such case exists for zext(x + y). + /// + /// Note that + /// zext(x + y) = zext(x) + zext(y) + /// is wrong, e.g., + /// zext i32(UINT_MAX + 1) to i64 != + /// (zext i32 UINT_MAX to i64) + (zext i32 1 to i64) + /// + /// Returns true if the module changes. + /// + /// Verified in @inbounds_zext_add in split-gep.ll and @sum_of_array3 in + /// split-gep-and-gvn.ll + bool convertInBoundsZExtToSExt(GetElementPtrInst *GEP); + + const DataLayout *DL; }; } // anonymous namespace @@ -227,181 +310,272 @@ FunctionPass *llvm::createSeparateConstOffsetFromGEPPass() { return new SeparateConstOffsetFromGEP(); } -bool ConstantOffsetExtractor::Distributable(unsigned OPC, BinaryOperator *BO) { - assert(OPC == Instruction::SExt || OPC == Instruction::ZExt); +bool ConstantOffsetExtractor::CanTraceInto(bool SignExtended, + bool ZeroExtended, + BinaryOperator *BO, + bool NonNegative) { + // We only consider ADD, SUB and OR, because a non-zero constant found in + // expressions composed of these operations can be easily hoisted as a + // constant offset by reassociation. + if (BO->getOpcode() != Instruction::Add && + BO->getOpcode() != Instruction::Sub && + BO->getOpcode() != Instruction::Or) { + return false; + } + + Value *LHS = BO->getOperand(0), *RHS = BO->getOperand(1); + // Do not trace into "or" unless it is equivalent to "add". If LHS and RHS + // don't have common bits, (LHS | RHS) is equivalent to (LHS + RHS). + if (BO->getOpcode() == Instruction::Or && !NoCommonBits(LHS, RHS)) + return false; + + // In addition, tracing into BO requires that its surrounding s/zext (if + // any) is distributable to both operands. + // + // Suppose BO = A op B. + // SignExtended | ZeroExtended | Distributable? + // --------------+--------------+---------------------------------- + // 0 | 0 | true because no s/zext exists + // 0 | 1 | zext(BO) == zext(A) op zext(B) + // 1 | 0 | sext(BO) == sext(A) op sext(B) + // 1 | 1 | zext(sext(BO)) == + // | | zext(sext(A)) op zext(sext(B)) + if (BO->getOpcode() == Instruction::Add && !ZeroExtended && NonNegative) { + // If a + b >= 0 and (a >= 0 or b >= 0), then + // sext(a + b) = sext(a) + sext(b) + // even if the addition is not marked nsw. + // + // Leveraging this invarient, we can trace into an sext'ed inbound GEP + // index if the constant offset is non-negative. + // + // Verified in @sext_add in split-gep.ll. + if (ConstantInt *ConstLHS = dyn_cast<ConstantInt>(LHS)) { + if (!ConstLHS->isNegative()) + return true; + } + if (ConstantInt *ConstRHS = dyn_cast<ConstantInt>(RHS)) { + if (!ConstRHS->isNegative()) + return true; + } + } // sext (add/sub nsw A, B) == add/sub nsw (sext A), (sext B) // zext (add/sub nuw A, B) == add/sub nuw (zext A), (zext B) if (BO->getOpcode() == Instruction::Add || BO->getOpcode() == Instruction::Sub) { - return (OPC == Instruction::SExt && BO->hasNoSignedWrap()) || - (OPC == Instruction::ZExt && BO->hasNoUnsignedWrap()); + if (SignExtended && !BO->hasNoSignedWrap()) + return false; + if (ZeroExtended && !BO->hasNoUnsignedWrap()) + return false; } - // sext/zext (and/or/xor A, B) == and/or/xor (sext/zext A), (sext/zext B) - // -instcombine also leverages this invariant to do the reverse - // transformation to reduce integer casts. - return BO->getOpcode() == Instruction::And || - BO->getOpcode() == Instruction::Or || - BO->getOpcode() == Instruction::Xor; + return true; } -int64_t ConstantOffsetExtractor::findInEitherOperand(User *U, bool IsSub) { - assert(U->getNumOperands() == 2); - int64_t ConstantOffset = find(U->getOperand(0)); +APInt ConstantOffsetExtractor::findInEitherOperand(BinaryOperator *BO, + bool SignExtended, + bool ZeroExtended) { + // BO being non-negative does not shed light on whether its operands are + // non-negative. Clear the NonNegative flag here. + APInt ConstantOffset = find(BO->getOperand(0), SignExtended, ZeroExtended, + /* NonNegative */ false); // If we found a constant offset in the left operand, stop and return that. // This shortcut might cause us to miss opportunities of combining the // constant offsets in both operands, e.g., (a + 4) + (b + 5) => (a + b) + 9. // However, such cases are probably already handled by -instcombine, // given this pass runs after the standard optimizations. if (ConstantOffset != 0) return ConstantOffset; - ConstantOffset = find(U->getOperand(1)); + ConstantOffset = find(BO->getOperand(1), SignExtended, ZeroExtended, + /* NonNegative */ false); // If U is a sub operator, negate the constant offset found in the right // operand. - return IsSub ? -ConstantOffset : ConstantOffset; + if (BO->getOpcode() == Instruction::Sub) + ConstantOffset = -ConstantOffset; + return ConstantOffset; } -int64_t ConstantOffsetExtractor::find(Value *V) { - // TODO(jingyue): We can even trace into integer/pointer casts, such as +APInt ConstantOffsetExtractor::find(Value *V, bool SignExtended, + bool ZeroExtended, bool NonNegative) { + // TODO(jingyue): We could trace into integer/pointer casts, such as // inttoptr, ptrtoint, bitcast, and addrspacecast. We choose to handle only // integers because it gives good enough results for our benchmarks. - assert(V->getType()->isIntegerTy()); + unsigned BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); + // We cannot do much with Values that are not a User, such as an Argument. User *U = dyn_cast<User>(V); - // We cannot do much with Values that are not a User, such as BasicBlock and - // MDNode. - if (U == nullptr) return 0; + if (U == nullptr) return APInt(BitWidth, 0); - int64_t ConstantOffset = 0; - if (ConstantInt *CI = dyn_cast<ConstantInt>(U)) { + APInt ConstantOffset(BitWidth, 0); + if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) { // Hooray, we found it! - ConstantOffset = CI->getSExtValue(); - } else if (Operator *O = dyn_cast<Operator>(U)) { - // The GEP index may be more complicated than a simple addition of a - // varaible and a constant. Therefore, we trace into subexpressions for more - // hoisting opportunities. - switch (O->getOpcode()) { - case Instruction::Add: { - ConstantOffset = findInEitherOperand(U, false); - break; - } - case Instruction::Sub: { - ConstantOffset = findInEitherOperand(U, true); - break; - } - case Instruction::Or: { - // If LHS and RHS don't have common bits, (LHS | RHS) is equivalent to - // (LHS + RHS). - if (NoCommonBits(U->getOperand(0), U->getOperand(1))) - ConstantOffset = findInEitherOperand(U, false); - break; - } - case Instruction::SExt: - case Instruction::ZExt: { - // We trace into sext/zext if the operator can be distributed to its - // operand. e.g., we can transform into "sext (add nsw a, 5)" and - // extract constant 5, because - // sext (add nsw a, 5) == add nsw (sext a), 5 - if (BinaryOperator *BO = dyn_cast<BinaryOperator>(U->getOperand(0))) { - if (Distributable(O->getOpcode(), BO)) - ConstantOffset = find(U->getOperand(0)); - } - break; - } + ConstantOffset = CI->getValue(); + } else if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V)) { + // Trace into subexpressions for more hoisting opportunities. + if (CanTraceInto(SignExtended, ZeroExtended, BO, NonNegative)) { + ConstantOffset = findInEitherOperand(BO, SignExtended, ZeroExtended); } + } else if (isa<SExtInst>(V)) { + ConstantOffset = find(U->getOperand(0), /* SignExtended */ true, + ZeroExtended, NonNegative).sext(BitWidth); + } else if (isa<ZExtInst>(V)) { + // As an optimization, we can clear the SignExtended flag because + // sext(zext(a)) = zext(a). Verified in @sext_zext in split-gep.ll. + // + // Clear the NonNegative flag, because zext(a) >= 0 does not imply a >= 0. + ConstantOffset = + find(U->getOperand(0), /* SignExtended */ false, + /* ZeroExtended */ true, /* NonNegative */ false).zext(BitWidth); } - // If we found a non-zero constant offset, adds it to the path for future - // transformation (rebuildWithoutConstantOffset). Zero is a valid constant - // offset, but doesn't help this optimization. + + // If we found a non-zero constant offset, add it to the path for + // rebuildWithoutConstOffset. Zero is a valid constant offset, but doesn't + // help this optimization. if (ConstantOffset != 0) UserChain.push_back(U); return ConstantOffset; } -unsigned ConstantOffsetExtractor::FindFirstUse(User *U, Value *Used) { - for (unsigned I = 0, E = U->getNumOperands(); I < E; ++I) { - if (U->getOperand(I) == Used) - return I; +Value *ConstantOffsetExtractor::applyExts(Value *V) { + Value *Current = V; + // ExtInsts is built in the use-def order. Therefore, we apply them to V + // in the reversed order. + for (auto I = ExtInsts.rbegin(), E = ExtInsts.rend(); I != E; ++I) { + if (Constant *C = dyn_cast<Constant>(Current)) { + // If Current is a constant, apply s/zext using ConstantExpr::getCast. + // ConstantExpr::getCast emits a ConstantInt if C is a ConstantInt. + Current = ConstantExpr::getCast((*I)->getOpcode(), C, (*I)->getType()); + } else { + Instruction *Ext = (*I)->clone(); + Ext->setOperand(0, Current); + Ext->insertBefore(IP); + Current = Ext; + } } - return -1; + return Current; } -Value *ConstantOffsetExtractor::cloneAndReplace(User *U, Value *From, - Value *To) { - // Finds in U the first use of From. It is safe to ignore future occurrences - // of From, because findInEitherOperand similarly stops searching the right - // operand when the first operand has a non-zero constant offset. - unsigned OpNo = FindFirstUse(U, From); - assert(OpNo != (unsigned)-1 && "UserChain wasn't built correctly"); - - // ConstantOffsetExtractor::find only follows Operators (i.e., Instructions - // and ConstantExprs). Therefore, U is either an Instruction or a - // ConstantExpr. - if (Instruction *I = dyn_cast<Instruction>(U)) { - Instruction *Clone = I->clone(); - Clone->setOperand(OpNo, To); - Clone->insertBefore(IP); - return Clone; +Value *ConstantOffsetExtractor::rebuildWithoutConstOffset() { + distributeExtsAndCloneChain(UserChain.size() - 1); + // Remove all nullptrs (used to be s/zext) from UserChain. + unsigned NewSize = 0; + for (auto I = UserChain.begin(), E = UserChain.end(); I != E; ++I) { + if (*I != nullptr) { + UserChain[NewSize] = *I; + NewSize++; + } } - // cast<Constant>(To) is safe because a ConstantExpr only uses Constants. - return cast<ConstantExpr>(U) - ->getWithOperandReplaced(OpNo, cast<Constant>(To)); + UserChain.resize(NewSize); + return removeConstOffset(UserChain.size() - 1); } -Value *ConstantOffsetExtractor::rebuildLeafWithoutConstantOffset(User *U, - Value *C) { - assert(U->getNumOperands() <= 2 && - "We didn't trace into any operator with more than 2 operands"); - // If U has only one operand which is the constant offset, removing the - // constant offset leaves U as a null value. - if (U->getNumOperands() == 1) - return Constant::getNullValue(U->getType()); - - // U->getNumOperands() == 2 - unsigned OpNo = FindFirstUse(U, C); // U->getOperand(OpNo) == C - assert(OpNo < 2 && "UserChain wasn't built correctly"); - Value *TheOther = U->getOperand(1 - OpNo); // The other operand of U - // If U = C - X, removing C makes U = -X; otherwise U will simply be X. - if (!isa<SubOperator>(U) || OpNo == 1) - return TheOther; - if (isa<ConstantExpr>(U)) - return ConstantExpr::getNeg(cast<Constant>(TheOther)); - return BinaryOperator::CreateNeg(TheOther, "", IP); +Value * +ConstantOffsetExtractor::distributeExtsAndCloneChain(unsigned ChainIndex) { + User *U = UserChain[ChainIndex]; + if (ChainIndex == 0) { + assert(isa<ConstantInt>(U)); + // If U is a ConstantInt, applyExts will return a ConstantInt as well. + return UserChain[ChainIndex] = cast<ConstantInt>(applyExts(U)); + } + + if (CastInst *Cast = dyn_cast<CastInst>(U)) { + assert((isa<SExtInst>(Cast) || isa<ZExtInst>(Cast)) && + "We only traced into two types of CastInst: sext and zext"); + ExtInsts.push_back(Cast); + UserChain[ChainIndex] = nullptr; + return distributeExtsAndCloneChain(ChainIndex - 1); + } + + // Function find only trace into BinaryOperator and CastInst. + BinaryOperator *BO = cast<BinaryOperator>(U); + // OpNo = which operand of BO is UserChain[ChainIndex - 1] + unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1); + Value *TheOther = applyExts(BO->getOperand(1 - OpNo)); + Value *NextInChain = distributeExtsAndCloneChain(ChainIndex - 1); + + BinaryOperator *NewBO = nullptr; + if (OpNo == 0) { + NewBO = BinaryOperator::Create(BO->getOpcode(), NextInChain, TheOther, + BO->getName(), IP); + } else { + NewBO = BinaryOperator::Create(BO->getOpcode(), TheOther, NextInChain, + BO->getName(), IP); + } + return UserChain[ChainIndex] = NewBO; } -Value *ConstantOffsetExtractor::rebuildWithoutConstantOffset() { - assert(UserChain.size() > 0 && "you at least found a constant, right?"); - // Start with the constant and go up through UserChain, each time building a - // clone of the subexpression but with the constant removed. - // e.g., to build a clone of (a + (b + (c + 5)) but with the 5 removed, we - // first c, then (b + c), and finally (a + (b + c)). - // - // Fast path: if the GEP index is a constant, simply returns 0. - if (UserChain.size() == 1) - return ConstantInt::get(UserChain[0]->getType(), 0); - - Value *Remainder = - rebuildLeafWithoutConstantOffset(UserChain[1], UserChain[0]); - for (size_t I = 2; I < UserChain.size(); ++I) - Remainder = cloneAndReplace(UserChain[I], UserChain[I - 1], Remainder); - return Remainder; +Value *ConstantOffsetExtractor::removeConstOffset(unsigned ChainIndex) { + if (ChainIndex == 0) { + assert(isa<ConstantInt>(UserChain[ChainIndex])); + return ConstantInt::getNullValue(UserChain[ChainIndex]->getType()); + } + + BinaryOperator *BO = cast<BinaryOperator>(UserChain[ChainIndex]); + unsigned OpNo = (BO->getOperand(0) == UserChain[ChainIndex - 1] ? 0 : 1); + assert(BO->getOperand(OpNo) == UserChain[ChainIndex - 1]); + Value *NextInChain = removeConstOffset(ChainIndex - 1); + Value *TheOther = BO->getOperand(1 - OpNo); + + // If NextInChain is 0 and not the LHS of a sub, we can simplify the + // sub-expression to be just TheOther. + if (ConstantInt *CI = dyn_cast<ConstantInt>(NextInChain)) { + if (CI->isZero() && !(BO->getOpcode() == Instruction::Sub && OpNo == 0)) + return TheOther; + } + + if (BO->getOpcode() == Instruction::Or) { + // Rebuild "or" as "add", because "or" may be invalid for the new + // epxression. + // + // For instance, given + // a | (b + 5) where a and b + 5 have no common bits, + // we can extract 5 as the constant offset. + // + // However, reusing the "or" in the new index would give us + // (a | b) + 5 + // which does not equal a | (b + 5). + // + // Replacing the "or" with "add" is fine, because + // a | (b + 5) = a + (b + 5) = (a + b) + 5 + return BinaryOperator::CreateAdd(BO->getOperand(0), BO->getOperand(1), + BO->getName(), IP); + } + + // We can reuse BO in this case, because the new expression shares the same + // instruction type and BO is used at most once. + assert(BO->getNumUses() <= 1 && + "distributeExtsAndCloneChain clones each BinaryOperator in " + "UserChain, so no one should be used more than " + "once"); + BO->setOperand(OpNo, NextInChain); + BO->setHasNoSignedWrap(false); + BO->setHasNoUnsignedWrap(false); + // Make sure it appears after all instructions we've inserted so far. + BO->moveBefore(IP); + return BO; } int64_t ConstantOffsetExtractor::Extract(Value *Idx, Value *&NewIdx, const DataLayout *DL, - Instruction *IP) { - ConstantOffsetExtractor Extractor(DL, IP); + GetElementPtrInst *GEP) { + ConstantOffsetExtractor Extractor(DL, GEP); // Find a non-zero constant offset first. - int64_t ConstantOffset = Extractor.find(Idx); - if (ConstantOffset == 0) - return 0; - // Then rebuild a new index with the constant removed. - NewIdx = Extractor.rebuildWithoutConstantOffset(); - return ConstantOffset; + APInt ConstantOffset = + Extractor.find(Idx, /* SignExtended */ false, /* ZeroExtended */ false, + GEP->isInBounds()); + if (ConstantOffset != 0) { + // Separates the constant offset from the GEP index. + NewIdx = Extractor.rebuildWithoutConstOffset(); + } + return ConstantOffset.getSExtValue(); } -int64_t ConstantOffsetExtractor::Find(Value *Idx, const DataLayout *DL) { - return ConstantOffsetExtractor(DL, nullptr).find(Idx); +int64_t ConstantOffsetExtractor::Find(Value *Idx, const DataLayout *DL, + GetElementPtrInst *GEP) { + // If Idx is an index of an inbound GEP, Idx is guaranteed to be non-negative. + return ConstantOffsetExtractor(DL, GEP) + .find(Idx, /* SignExtended */ false, /* ZeroExtended */ false, + GEP->isInBounds()) + .getSExtValue(); } void ConstantOffsetExtractor::ComputeKnownBits(Value *V, APInt &KnownOne, @@ -421,8 +595,64 @@ bool ConstantOffsetExtractor::NoCommonBits(Value *LHS, Value *RHS) const { return (LHSKnownZero | RHSKnownZero).isAllOnesValue(); } -int64_t SeparateConstOffsetFromGEP::accumulateByteOffset( - GetElementPtrInst *GEP, const DataLayout *DL, bool &NeedsExtraction) { +bool SeparateConstOffsetFromGEP::canonicalizeArrayIndicesToPointerSize( + GetElementPtrInst *GEP) { + bool Changed = false; + Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); + gep_type_iterator GTI = gep_type_begin(*GEP); + for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end(); + I != E; ++I, ++GTI) { + // Skip struct member indices which must be i32. + if (isa<SequentialType>(*GTI)) { + if ((*I)->getType() != IntPtrTy) { + *I = CastInst::CreateIntegerCast(*I, IntPtrTy, true, "idxprom", GEP); + Changed = true; + } + } + } + return Changed; +} + +bool +SeparateConstOffsetFromGEP::convertInBoundsZExtToSExt(GetElementPtrInst *GEP) { + if (!GEP->isInBounds()) + return false; + + // TODO: consider alloca + GlobalVariable *UnderlyingObject = + dyn_cast<GlobalVariable>(GEP->getPointerOperand()); + if (UnderlyingObject == nullptr) + return false; + + uint64_t ObjectSize = + DL->getTypeAllocSize(UnderlyingObject->getType()->getElementType()); + gep_type_iterator GTI = gep_type_begin(*GEP); + bool Changed = false; + for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end(); I != E; + ++I, ++GTI) { + if (isa<SequentialType>(*GTI)) { + if (ZExtInst *Extended = dyn_cast<ZExtInst>(*I)) { + unsigned SrcBitWidth = + cast<IntegerType>(Extended->getSrcTy())->getBitWidth(); + // For GEP operand zext(a), if a <= max signed value of typeof(a), then + // the sign bit of a is zero and sext(a) = zext(a). Because the GEP is + // in bounds, we know a <= ObjectSize, so the condition can be reduced + // to ObjectSize <= max signed value of typeof(a). + if (ObjectSize <= + APInt::getSignedMaxValue(SrcBitWidth).getZExtValue()) { + *I = new SExtInst(Extended->getOperand(0), Extended->getType(), + Extended->getName(), GEP); + Changed = true; + } + } + } + } + return Changed; +} + +int64_t +SeparateConstOffsetFromGEP::accumulateByteOffset(GetElementPtrInst *GEP, + bool &NeedsExtraction) { NeedsExtraction = false; int64_t AccumulativeByteOffset = 0; gep_type_iterator GTI = gep_type_begin(*GEP); @@ -430,7 +660,7 @@ int64_t SeparateConstOffsetFromGEP::accumulateByteOffset( if (isa<SequentialType>(*GTI)) { // Tries to extract a constant offset from this GEP index. int64_t ConstantOffset = - ConstantOffsetExtractor::Find(GEP->getOperand(I), DL); + ConstantOffsetExtractor::Find(GEP->getOperand(I), DL, GEP); if (ConstantOffset != 0) { NeedsExtraction = true; // A GEP may have multiple indices. We accumulate the extracted @@ -455,31 +685,11 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { return false; bool Changed = false; + Changed |= canonicalizeArrayIndicesToPointerSize(GEP); + Changed |= convertInBoundsZExtToSExt(GEP); - // Shortcuts integer casts. Eliminating these explicit casts can make - // subsequent optimizations more obvious: ConstantOffsetExtractor needn't - // trace into these casts. - if (GEP->isInBounds()) { - // Doing this to inbounds GEPs is safe because their indices are guaranteed - // to be non-negative and in bounds. - gep_type_iterator GTI = gep_type_begin(*GEP); - for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { - if (isa<SequentialType>(*GTI)) { - if (Operator *O = dyn_cast<Operator>(GEP->getOperand(I))) { - if (O->getOpcode() == Instruction::SExt || - O->getOpcode() == Instruction::ZExt) { - GEP->setOperand(I, O->getOperand(0)); - Changed = true; - } - } - } - } - } - - const DataLayout *DL = &getAnalysis<DataLayoutPass>().getDataLayout(); bool NeedsExtraction; - int64_t AccumulativeByteOffset = - accumulateByteOffset(GEP, DL, NeedsExtraction); + int64_t AccumulativeByteOffset = accumulateByteOffset(GEP, NeedsExtraction); if (!NeedsExtraction) return Changed; @@ -506,30 +716,29 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { assert(NewIdx != nullptr && "ConstantOffset != 0 implies NewIdx is set"); GEP->setOperand(I, NewIdx); - // Clear the inbounds attribute because the new index may be off-bound. - // e.g., - // - // b = add i64 a, 5 - // addr = gep inbounds float* p, i64 b - // - // is transformed to: - // - // addr2 = gep float* p, i64 a - // addr = gep float* addr2, i64 5 - // - // If a is -4, although the old index b is in bounds, the new index a is - // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the - // inbounds keyword is not present, the offsets are added to the base - // address with silently-wrapping two's complement arithmetic". - // Therefore, the final code will be a semantically equivalent. - // - // TODO(jingyue): do some range analysis to keep as many inbounds as - // possible. GEPs with inbounds are more friendly to alias analysis. - GEP->setIsInBounds(false); - Changed = true; } } } + // Clear the inbounds attribute because the new index may be off-bound. + // e.g., + // + // b = add i64 a, 5 + // addr = gep inbounds float* p, i64 b + // + // is transformed to: + // + // addr2 = gep float* p, i64 a + // addr = gep float* addr2, i64 5 + // + // If a is -4, although the old index b is in bounds, the new index a is + // off-bound. http://llvm.org/docs/LangRef.html#id181 says "if the + // inbounds keyword is not present, the offsets are added to the base + // address with silently-wrapping two's complement arithmetic". + // Therefore, the final code will be a semantically equivalent. + // + // TODO(jingyue): do some range analysis to keep as many inbounds as + // possible. GEPs with inbounds are more friendly to alias analysis. + GEP->setIsInBounds(false); // Offsets the base with the accumulative byte offset. // @@ -562,9 +771,9 @@ bool SeparateConstOffsetFromGEP::splitGEP(GetElementPtrInst *GEP) { Instruction *NewGEP = GEP->clone(); NewGEP->insertBefore(GEP); - Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); uint64_t ElementTypeSizeOfGEP = DL->getTypeAllocSize(GEP->getType()->getElementType()); + Type *IntPtrTy = DL->getIntPtrType(GEP->getType()); if (AccumulativeByteOffset % ElementTypeSizeOfGEP == 0) { // Very likely. As long as %gep is natually aligned, the byte offset we // extracted should be a multiple of sizeof(*%gep). diff --git a/lib/Transforms/Scalar/Sink.cpp b/lib/Transforms/Scalar/Sink.cpp index 482c33a..7348c45 100644 --- a/lib/Transforms/Scalar/Sink.cpp +++ b/lib/Transforms/Scalar/Sink.cpp @@ -18,6 +18,7 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Debug.h" @@ -34,6 +35,7 @@ namespace { DominatorTree *DT; LoopInfo *LI; AliasAnalysis *AA; + const DataLayout *DL; public: static char ID; // Pass identification @@ -98,6 +100,8 @@ bool Sinking::runOnFunction(Function &F) { DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); LI = &getAnalysis<LoopInfo>(); AA = &getAnalysis<AliasAnalysis>(); + DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); + DL = DLP ? &DLP->getDataLayout() : nullptr; bool MadeChange, EverMadeChange = false; @@ -193,7 +197,7 @@ bool Sinking::IsAcceptableTarget(Instruction *Inst, if (SuccToSinkTo->getUniquePredecessor() != Inst->getParent()) { // We cannot sink a load across a critical edge - there may be stores in // other code paths. - if (!isSafeToSpeculativelyExecute(Inst)) + if (!isSafeToSpeculativelyExecute(Inst, DL)) return false; // We don't want to sink across a critical edge if we don't dominate the diff --git a/lib/Transforms/Utils/Android.mk b/lib/Transforms/Utils/Android.mk index cbd8dd0..2390027 100644 --- a/lib/Transforms/Utils/Android.mk +++ b/lib/Transforms/Utils/Android.mk @@ -33,7 +33,6 @@ transforms_utils_SRC_FILES := \ SimplifyIndVar.cpp \ SimplifyInstructions.cpp \ SimplifyLibCalls.cpp \ - SpecialCaseList.cpp \ UnifyFunctionExitNodes.cpp \ Utils.cpp \ ValueMapper.cpp diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index e10ca90..fcf548f 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -33,7 +33,6 @@ add_llvm_library(LLVMTransformUtils SimplifyIndVar.cpp SimplifyInstructions.cpp SimplifyLibCalls.cpp - SpecialCaseList.cpp UnifyFunctionExitNodes.cpp Utils.cpp ValueMapper.cpp diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index eb67db1..3f75b3e 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -107,7 +107,7 @@ Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); I != E; ++I) { GlobalAlias *GA = cast<GlobalAlias>(VMap[I]); - if (const GlobalObject *C = I->getAliasee()) + if (const Constant *C = I->getAliasee()) GA->setAliasee(cast<GlobalObject>(MapValue(C, VMap))); } diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index e01d0c3..f0a9f2b 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -189,6 +189,7 @@ static void HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), Split, Invoke.getOuterResumeDest(), InvokeArgs, CI->getName(), BB); + II->setDebugLoc(CI->getDebugLoc()); II->setCallingConv(CI->getCallingConv()); II->setAttributes(CI->getAttributes()); @@ -466,7 +467,13 @@ static void fixupLineNumbers(Function *Fn, Function::iterator FI, for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; ++BI) { DebugLoc DL = BI->getDebugLoc(); - if (!DL.isUnknown()) { + if (DL.isUnknown()) { + // If the inlined instruction has no line number, make it look as if it + // originates from the call location. This is important for + // ((__always_inline__, __nodebug__)) functions which must use caller + // location for all instructions in their function body. + BI->setDebugLoc(TheCallDL); + } else { BI->setDebugLoc(updateInlinedAtInfo(DL, TheCallDL, BI->getContext())); if (DbgValueInst *DVI = dyn_cast<DbgValueInst>(BI)) { LLVMContext &Ctx = BI->getContext(); diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp index f7787da..ef42291 100644 --- a/lib/Transforms/Utils/LoopSimplify.cpp +++ b/lib/Transforms/Utils/LoopSimplify.cpp @@ -50,6 +50,7 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" @@ -473,7 +474,8 @@ static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, /// explicit if they accepted the analysis directly and then updated it. static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, - ScalarEvolution *SE, Pass *PP) { + ScalarEvolution *SE, Pass *PP, + const DataLayout *DL) { bool Changed = false; ReprocessLoop: @@ -672,7 +674,7 @@ ReprocessLoop: // The block has now been cleared of all instructions except for // a comparison and a conditional branch. SimplifyCFG may be able // to fold it now. - if (!FoldBranchToCommonDest(BI)) continue; + if (!FoldBranchToCommonDest(BI, DL)) continue; // Success. The block is now dead, so remove it from the loop, // update the dominator tree and delete it. @@ -709,7 +711,8 @@ ReprocessLoop: } bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP, - AliasAnalysis *AA, ScalarEvolution *SE) { + AliasAnalysis *AA, ScalarEvolution *SE, + const DataLayout *DL) { bool Changed = false; // Worklist maintains our depth-first queue of loops in this nest to process. @@ -726,7 +729,8 @@ bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, Pass *PP, } while (!Worklist.empty()) - Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, AA, DT, LI, SE, PP); + Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, AA, DT, LI, + SE, PP, DL); return Changed; } @@ -744,6 +748,7 @@ namespace { DominatorTree *DT; LoopInfo *LI; ScalarEvolution *SE; + const DataLayout *DL; bool runOnFunction(Function &F) override; @@ -787,10 +792,12 @@ bool LoopSimplify::runOnFunction(Function &F) { LI = &getAnalysis<LoopInfo>(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); SE = getAnalysisIfAvailable<ScalarEvolution>(); + DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>(); + DL = DLP ? &DLP->getDataLayout() : nullptr; // Simplify each loop nest in the function. for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) - Changed |= simplifyLoop(*I, DT, LI, this, AA, SE); + Changed |= simplifyLoop(*I, DT, LI, this, AA, SE, DL); return Changed; } diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index d953e30..c86b82c 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/LLVMContext.h" @@ -242,21 +243,25 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, Twine("completely unrolled loop with ") + Twine(TripCount) + " iterations"); } else { + auto EmitDiag = [&](const Twine &T) { + emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, + "unrolled loop by a factor of " + Twine(Count) + + T); + }; + DEBUG(dbgs() << "UNROLLING loop %" << Header->getName() << " by " << Count); - Twine DiagMsg("unrolled loop by a factor of " + Twine(Count)); if (TripMultiple == 0 || BreakoutTrip != TripMultiple) { DEBUG(dbgs() << " with a breakout at trip " << BreakoutTrip); - DiagMsg.concat(" with a breakout at trip " + Twine(BreakoutTrip)); + EmitDiag(" with a breakout at trip " + Twine(BreakoutTrip)); } else if (TripMultiple != 1) { DEBUG(dbgs() << " with " << TripMultiple << " trips per branch"); - DiagMsg.concat(" with " + Twine(TripMultiple) + " trips per branch"); + EmitDiag(" with " + Twine(TripMultiple) + " trips per branch"); } else if (RuntimeTripCount) { DEBUG(dbgs() << " with run-time trip count"); - DiagMsg.concat(" with run-time trip count"); + EmitDiag(" with run-time trip count"); } DEBUG(dbgs() << "!\n"); - emitOptimizationRemark(Ctx, DEBUG_TYPE, *F, LoopLoc, DiagMsg); } bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); @@ -485,8 +490,19 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, if (!OuterL && !CompletelyUnroll) OuterL = L; if (OuterL) { + DataLayoutPass *DLP = PP->getAnalysisIfAvailable<DataLayoutPass>(); + const DataLayout *DL = DLP ? &DLP->getDataLayout() : nullptr; ScalarEvolution *SE = PP->getAnalysisIfAvailable<ScalarEvolution>(); - simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE); + simplifyLoop(OuterL, DT, LI, PP, /*AliasAnalysis*/ nullptr, SE, DL); + + // LCSSA must be performed on the outermost affected loop. The unrolled + // loop's last loop latch is guaranteed to be in the outermost loop after + // deleteLoopFromQueue updates LoopInfo. + Loop *LatchLoop = LI->getLoopFor(Latches.back()); + if (!OuterL->contains(LatchLoop)) + while (OuterL->getParentLoop() != LatchLoop) + OuterL = OuterL->getParentLoop(); + formLCSSARecursively(*OuterL, *DT, SE); } } diff --git a/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/lib/Transforms/Utils/LoopUnrollRuntime.cpp index 5bef091..a96c46a 100644 --- a/lib/Transforms/Utils/LoopUnrollRuntime.cpp +++ b/lib/Transforms/Utils/LoopUnrollRuntime.cpp @@ -280,17 +280,17 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, SCEVExpander Expander(*SE, "loop-unroll"); Value *TripCount = Expander.expandCodeFor(TripCountSC, TripCountSC->getType(), PreHeaderBR); - Type *CountTy = TripCount->getType(); - BinaryOperator *ModVal = - BinaryOperator::CreateURem(TripCount, - ConstantInt::get(CountTy, Count), - "xtraiter"); - ModVal->insertBefore(PreHeaderBR); - - // Check if for no extra iterations, then jump to unrolled loop - Value *BranchVal = new ICmpInst(PreHeaderBR, - ICmpInst::ICMP_NE, ModVal, - ConstantInt::get(CountTy, 0), "lcmp"); + + IRBuilder<> B(PreHeaderBR); + Value *ModVal = B.CreateAnd(TripCount, Count - 1, "xtraiter"); + + // Check if for no extra iterations, then jump to unrolled loop. We have to + // check that the trip count computation didn't overflow when adding one to + // the backedge taken count. + Value *LCmp = B.CreateIsNotNull(ModVal, "lcmp.mod"); + Value *OverflowCheck = B.CreateIsNull(TripCount, "lcmp.overflow"); + Value *BranchVal = B.CreateOr(OverflowCheck, LCmp, "lcmp.or"); + // Branch to either the extra iterations or the unrolled loop // We will fix up the true branch label when adding loop body copies BranchInst::Create(PEnd, PEnd, BranchVal, PreHeaderBR); @@ -344,6 +344,7 @@ bool llvm::UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, } // The comparison w/ the extra iteration value and branch + Type *CountTy = TripCount->getType(); Value *BranchVal = new ICmpInst(*NewBB, ICmpInst::ICMP_EQ, ModVal, ConstantInt::get(CountTy, leftOverIters), "un.tmp"); diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp index 9ef694c..eac693b 100644 --- a/lib/Transforms/Utils/LowerSwitch.cpp +++ b/lib/Transforms/Utils/LowerSwitch.cpp @@ -14,11 +14,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/ADT/STLExtras.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/LLVMContext.h" +#include "llvm/IR/CFG.h" #include "llvm/Pass.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" @@ -58,16 +60,18 @@ namespace { Low(low), High(high), BB(bb) { } }; - typedef std::vector<CaseRange> CaseVector; + typedef std::vector<CaseRange> CaseVector; typedef std::vector<CaseRange>::iterator CaseItr; private: void processSwitchInst(SwitchInst *SI); - BasicBlock* switchConvert(CaseItr Begin, CaseItr End, Value* Val, - BasicBlock* OrigBlock, BasicBlock* Default); - BasicBlock* newLeafBlock(CaseRange& Leaf, Value* Val, - BasicBlock* OrigBlock, BasicBlock* Default); - unsigned Clusterify(CaseVector& Cases, SwitchInst *SI); + BasicBlock *switchConvert(CaseItr Begin, CaseItr End, + ConstantInt *LowerBound, ConstantInt *UpperBound, + Value *Val, BasicBlock *OrigBlock, + BasicBlock *Default); + BasicBlock *newLeafBlock(CaseRange &Leaf, Value *Val, BasicBlock *OrigBlock, + BasicBlock *Default); + unsigned Clusterify(CaseVector &Cases, SwitchInst *SI); }; /// The comparison function for sorting the switch case values in the vector. @@ -129,15 +133,26 @@ static raw_ostream& operator<<(raw_ostream &O, // switchConvert - Convert the switch statement into a binary lookup of // the case values. The function recursively builds this tree. -// -BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, - Value* Val, BasicBlock* OrigBlock, - BasicBlock* Default) -{ +// LowerBound and UpperBound are used to keep track of the bounds for Val +// that have already been checked by a block emitted by one of the previous +// calls to switchConvert in the call stack. +BasicBlock *LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, + ConstantInt *LowerBound, + ConstantInt *UpperBound, Value *Val, + BasicBlock *OrigBlock, + BasicBlock *Default) { unsigned Size = End - Begin; - if (Size == 1) + if (Size == 1) { + // Check if the Case Range is perfectly squeezed in between + // already checked Upper and Lower bounds. If it is then we can avoid + // emitting the code that checks if the value actually falls in the range + // because the bounds already tell us so. + if (Begin->Low == LowerBound && Begin->High == UpperBound) { + return Begin->BB; + } return newLeafBlock(*Begin, Val, OrigBlock, Default); + } unsigned Mid = Size / 2; std::vector<CaseRange> LHS(Begin, Begin + Mid); @@ -145,15 +160,50 @@ BasicBlock* LowerSwitch::switchConvert(CaseItr Begin, CaseItr End, std::vector<CaseRange> RHS(Begin + Mid, End); DEBUG(dbgs() << "RHS: " << RHS << "\n"); - CaseRange& Pivot = *(Begin + Mid); - DEBUG(dbgs() << "Pivot ==> " - << cast<ConstantInt>(Pivot.Low)->getValue() << " -" - << cast<ConstantInt>(Pivot.High)->getValue() << "\n"); + CaseRange &Pivot = *(Begin + Mid); + DEBUG(dbgs() << "Pivot ==> " + << cast<ConstantInt>(Pivot.Low)->getValue() + << " -" << cast<ConstantInt>(Pivot.High)->getValue() << "\n"); + + // NewLowerBound here should never be the integer minimal value. + // This is because it is computed from a case range that is never + // the smallest, so there is always a case range that has at least + // a smaller value. + ConstantInt *NewLowerBound = cast<ConstantInt>(Pivot.Low); + ConstantInt *NewUpperBound; + + // If we don't have a Default block then it means that we can never + // have a value outside of a case range, so set the UpperBound to the highest + // value in the LHS part of the case ranges. + if (Default != nullptr) { + // Because NewLowerBound is never the smallest representable integer + // it is safe here to subtract one. + NewUpperBound = ConstantInt::get(NewLowerBound->getContext(), + NewLowerBound->getValue() - 1); + } else { + CaseItr LastLHS = LHS.begin() + LHS.size() - 1; + NewUpperBound = cast<ConstantInt>(LastLHS->High); + } - BasicBlock* LBranch = switchConvert(LHS.begin(), LHS.end(), Val, - OrigBlock, Default); - BasicBlock* RBranch = switchConvert(RHS.begin(), RHS.end(), Val, - OrigBlock, Default); + DEBUG(dbgs() << "LHS Bounds ==> "; + if (LowerBound) { + dbgs() << cast<ConstantInt>(LowerBound)->getSExtValue(); + } else { + dbgs() << "NONE"; + } + dbgs() << " - " << NewUpperBound->getSExtValue() << "\n"; + dbgs() << "RHS Bounds ==> "; + dbgs() << NewLowerBound->getSExtValue() << " - "; + if (UpperBound) { + dbgs() << cast<ConstantInt>(UpperBound)->getSExtValue() << "\n"; + } else { + dbgs() << "NONE\n"; + }); + + BasicBlock *LBranch = switchConvert(LHS.begin(), LHS.end(), LowerBound, + NewUpperBound, Val, OrigBlock, Default); + BasicBlock *RBranch = switchConvert(RHS.begin(), RHS.end(), NewLowerBound, + UpperBound, Val, OrigBlock, Default); // Create a new node that checks if the value is < pivot. Go to the // left branch if it is and right branch if not. @@ -291,13 +341,19 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) { return; } + const bool DefaultIsUnreachable = + Default->size() == 1 && isa<UnreachableInst>(Default->getTerminator()); // Create a new, empty default block so that the new hierarchy of // if-then statements go to this and the PHI nodes are happy. - BasicBlock* NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault"); - F->getBasicBlockList().insert(Default, NewDefault); - - BranchInst::Create(Default, NewDefault); - + // if the default block is set as an unreachable we avoid creating one + // because will never be a valid target. + BasicBlock *NewDefault = nullptr; + if (!DefaultIsUnreachable) { + NewDefault = BasicBlock::Create(SI->getContext(), "NewDefault"); + F->getBasicBlockList().insert(Default, NewDefault); + + BranchInst::Create(Default, NewDefault); + } // If there is an entry in any PHI nodes for the default edge, make sure // to update them as well. for (BasicBlock::iterator I = Default->begin(); isa<PHINode>(I); ++I) { @@ -316,12 +372,31 @@ void LowerSwitch::processSwitchInst(SwitchInst *SI) { DEBUG(dbgs() << "Cases: " << Cases << "\n"); (void)numCmps; - BasicBlock* SwitchBlock = switchConvert(Cases.begin(), Cases.end(), Val, - OrigBlock, NewDefault); + ConstantInt *UpperBound = nullptr; + ConstantInt *LowerBound = nullptr; + + // Optimize the condition where Default is an unreachable block. In this case + // we can make the bounds tightly fitted around the case value ranges, + // because we know that the value passed to the switch should always be + // exactly one of the case values. + if (DefaultIsUnreachable) { + CaseItr LastCase = Cases.begin() + Cases.size() - 1; + UpperBound = cast<ConstantInt>(LastCase->High); + LowerBound = cast<ConstantInt>(Cases.begin()->Low); + } + BasicBlock *SwitchBlock = + switchConvert(Cases.begin(), Cases.end(), LowerBound, UpperBound, Val, + OrigBlock, NewDefault); // Branch to our shiny new if-then stuff... BranchInst::Create(SwitchBlock, OrigBlock); // We are now done with the switch instruction, delete it. CurBlock->getInstList().erase(SI); + + pred_iterator PI = pred_begin(Default), E = pred_end(Default); + // If the Default block has no more predecessors just remove it + if (PI == E) { + DeleteDeadBlock(Default); + } } diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp index 150dbdd..960b198 100644 --- a/lib/Transforms/Utils/SimplifyCFG.cpp +++ b/lib/Transforms/Utils/SimplifyCFG.cpp @@ -201,8 +201,8 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred, /// ComputeSpeculationCost - Compute an abstract "cost" of speculating the /// given instruction, which is assumed to be safe to speculate. 1 means /// cheap, 2 means less cheap, and UINT_MAX means prohibitively expensive. -static unsigned ComputeSpeculationCost(const User *I) { - assert(isSafeToSpeculativelyExecute(I) && +static unsigned ComputeSpeculationCost(const User *I, const DataLayout *DL) { + assert(isSafeToSpeculativelyExecute(I, DL) && "Instruction is not safe to speculatively execute!"); switch (Operator::getOpcode(I)) { default: @@ -227,6 +227,9 @@ static unsigned ComputeSpeculationCost(const User *I) { case Instruction::Trunc: case Instruction::ZExt: case Instruction::SExt: + case Instruction::BitCast: + case Instruction::ExtractElement: + case Instruction::InsertElement: return 1; // These are all cheap. case Instruction::Call: @@ -254,7 +257,8 @@ static unsigned ComputeSpeculationCost(const User *I) { /// CostRemaining, false is returned and CostRemaining is undefined. static bool DominatesMergePoint(Value *V, BasicBlock *BB, SmallPtrSet<Instruction*, 4> *AggressiveInsts, - unsigned &CostRemaining) { + unsigned &CostRemaining, + const DataLayout *DL) { Instruction *I = dyn_cast<Instruction>(V); if (!I) { // Non-instructions all dominate instructions, but not all constantexprs @@ -287,10 +291,10 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // Okay, it looks like the instruction IS in the "condition". Check to // see if it's a cheap instruction to unconditionally compute, and if it // only uses stuff defined outside of the condition. If so, hoist it out. - if (!isSafeToSpeculativelyExecute(I)) + if (!isSafeToSpeculativelyExecute(I, DL)) return false; - unsigned Cost = ComputeSpeculationCost(I); + unsigned Cost = ComputeSpeculationCost(I, DL); if (Cost > CostRemaining) return false; @@ -300,7 +304,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB, // Okay, we can only really hoist these out if their operands do // not take us over the cost threshold. for (User::op_iterator i = I->op_begin(), e = I->op_end(); i != e; ++i) - if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining)) + if (!DominatesMergePoint(*i, BB, AggressiveInsts, CostRemaining, DL)) return false; // Okay, it's safe to do this! Remember this instruction. AggressiveInsts->insert(I); @@ -994,7 +998,7 @@ static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2, /// HoistThenElseCodeToIf - Given a conditional branch that goes to BB1 and /// BB2, hoist any common code in the two blocks up into the branch block. The /// caller of this function guarantees that BI's block dominates BB1 and BB2. -static bool HoistThenElseCodeToIf(BranchInst *BI) { +static bool HoistThenElseCodeToIf(BranchInst *BI, const DataLayout *DL) { // This does very trivial matching, with limited scanning, to find identical // instructions in the two blocks. In particular, we don't want to get into // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As @@ -1068,9 +1072,9 @@ HoistTerminator: if (BB1V == BB2V) continue; - if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V)) + if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V, DL)) return Changed; - if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V)) + if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V, DL)) return Changed; } } @@ -1387,7 +1391,8 @@ static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB, /// \endcode /// /// \returns true if the conditional block is removed. -static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { +static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB, + const DataLayout *DL) { // Be conservative for now. FP select instruction can often be expensive. Value *BrCond = BI->getCondition(); if (isa<FCmpInst>(BrCond)) @@ -1430,13 +1435,13 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { return false; // Don't hoist the instruction if it's unsafe or expensive. - if (!isSafeToSpeculativelyExecute(I) && + if (!isSafeToSpeculativelyExecute(I, DL) && !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(I, BB, ThenBB, EndBB)))) return false; if (!SpeculatedStoreValue && - ComputeSpeculationCost(I) > PHINodeFoldingThreshold) + ComputeSpeculationCost(I, DL) > PHINodeFoldingThreshold) return false; // Store the store speculation candidate. @@ -1487,11 +1492,11 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB) { if (!OrigCE && !ThenCE) continue; // Known safe and cheap. - if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) || - (OrigCE && !isSafeToSpeculativelyExecute(OrigCE))) + if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE, DL)) || + (OrigCE && !isSafeToSpeculativelyExecute(OrigCE, DL))) return false; - unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE) : 0; - unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE) : 0; + unsigned OrigCost = OrigCE ? ComputeSpeculationCost(OrigCE, DL) : 0; + unsigned ThenCost = ThenCE ? ComputeSpeculationCost(ThenCE, DL) : 0; if (OrigCost + ThenCost > 2 * PHINodeFoldingThreshold) return false; @@ -1738,9 +1743,9 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const DataLayout *DL) { } if (!DominatesMergePoint(PN->getIncomingValue(0), BB, &AggressiveInsts, - MaxCostVal0) || + MaxCostVal0, DL) || !DominatesMergePoint(PN->getIncomingValue(1), BB, &AggressiveInsts, - MaxCostVal1)) + MaxCostVal1, DL)) return false; } @@ -1958,7 +1963,7 @@ static bool checkCSEInPredecessor(Instruction *Inst, BasicBlock *PB) { /// FoldBranchToCommonDest - If this basic block is simple enough, and if a /// predecessor branches to us and one of our successors, fold the block into /// the predecessor and use logical operations to pick the right destination. -bool llvm::FoldBranchToCommonDest(BranchInst *BI) { +bool llvm::FoldBranchToCommonDest(BranchInst *BI, const DataLayout *DL) { BasicBlock *BB = BI->getParent(); Instruction *Cond = nullptr; @@ -2010,7 +2015,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { Instruction *BonusInst = nullptr; if (&*FrontIt != Cond && FrontIt->hasOneUse() && FrontIt->user_back() == Cond && - isSafeToSpeculativelyExecute(FrontIt)) { + isSafeToSpeculativelyExecute(FrontIt, DL)) { BonusInst = &*FrontIt; ++FrontIt; @@ -2025,7 +2030,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI) { // Make sure the instruction after the condition is the cond branch. BasicBlock::iterator CondIt = Cond; ++CondIt; - // Ingore dbg intrinsics. + // Ignore dbg intrinsics. while (isa<DbgInfoIntrinsic>(CondIt)) ++CondIt; if (&*CondIt != BI) @@ -2340,7 +2345,7 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { } // If this is a conditional branch in an empty block, and if any - // predecessors is a conditional branch to one of our destinations, + // predecessors are a conditional branch to one of our destinations, // fold the conditions into logical ops and one cond br. BasicBlock::iterator BBI = BB->begin(); // Ignore dbg intrinsics. @@ -2375,16 +2380,33 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) { // Do not perform this transformation if it would require // insertion of a large number of select instructions. For targets // without predication/cmovs, this is a big pessimization. - BasicBlock *CommonDest = PBI->getSuccessor(PBIOp); + // Also do not perform this transformation if any phi node in the common + // destination block can trap when reached by BB or PBB (PR17073). In that + // case, it would be unsafe to hoist the operation into a select instruction. + + BasicBlock *CommonDest = PBI->getSuccessor(PBIOp); unsigned NumPhis = 0; for (BasicBlock::iterator II = CommonDest->begin(); - isa<PHINode>(II); ++II, ++NumPhis) + isa<PHINode>(II); ++II, ++NumPhis) { if (NumPhis > 2) // Disable this xform. return false; + PHINode *PN = cast<PHINode>(II); + Value *BIV = PN->getIncomingValueForBlock(BB); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BIV)) + if (CE->canTrap()) + return false; + + unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent()); + Value *PBIV = PN->getIncomingValue(PBBIdx); + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(PBIV)) + if (CE->canTrap()) + return false; + } + // Finally, if everything is ok, fold the branches to logical ops. - BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1); + BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1); DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent() << "AND: " << *BI->getParent()); @@ -3308,6 +3330,11 @@ static bool ForwardSwitchConditionToPHI(SwitchInst *SI) { /// ValidLookupTableConstant - Return true if the backend will be able to handle /// initializing an array of constants like C. static bool ValidLookupTableConstant(Constant *C) { + if (C->isThreadDependent()) + return false; + if (C->isDLLImportDependent()) + return false; + if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) return CE->isGEPWithNoNotionalOverIndexing(); @@ -3521,7 +3548,8 @@ SwitchLookupTable::SwitchLookupTable(Module &M, // Fill in any holes in the table with the default result. if (Values.size() < TableSize) { - assert(DefaultValue && "Need a default value to fill the lookup table holes."); + assert(DefaultValue && + "Need a default value to fill the lookup table holes."); assert(DefaultValue->getType() == ValueType); for (uint64_t I = 0; I < TableSize; ++I) { if (!TableContents[I]) @@ -3990,7 +4018,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder){ // branches to us and our successor, fold the comparison into the // predecessor and use logical operations to update the incoming value // for PHI nodes in common successor. - if (FoldBranchToCommonDest(BI)) + if (FoldBranchToCommonDest(BI, DL)) return SimplifyCFG(BB, TTI, DL) | true; return false; } @@ -4034,7 +4062,7 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // If this basic block is ONLY a compare and a branch, and if a predecessor // branches to us and one of our successors, fold the comparison into the // predecessor and use logical operations to pick the right destination. - if (FoldBranchToCommonDest(BI)) + if (FoldBranchToCommonDest(BI, DL)) return SimplifyCFG(BB, TTI, DL) | true; // We have a conditional branch to two blocks that are only reachable @@ -4043,24 +4071,24 @@ bool SimplifyCFGOpt::SimplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder) { // can hoist it up to the branching block. if (BI->getSuccessor(0)->getSinglePredecessor()) { if (BI->getSuccessor(1)->getSinglePredecessor()) { - if (HoistThenElseCodeToIf(BI)) + if (HoistThenElseCodeToIf(BI, DL)) return SimplifyCFG(BB, TTI, DL) | true; } else { // If Successor #1 has multiple preds, we may be able to conditionally - // execute Successor #0 if it branches to successor #1. + // execute Successor #0 if it branches to Successor #1. TerminatorInst *Succ0TI = BI->getSuccessor(0)->getTerminator(); if (Succ0TI->getNumSuccessors() == 1 && Succ0TI->getSuccessor(0) == BI->getSuccessor(1)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0))) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(0), DL)) return SimplifyCFG(BB, TTI, DL) | true; } } else if (BI->getSuccessor(1)->getSinglePredecessor()) { // If Successor #0 has multiple preds, we may be able to conditionally - // execute Successor #1 if it branches to successor #0. + // execute Successor #1 if it branches to Successor #0. TerminatorInst *Succ1TI = BI->getSuccessor(1)->getTerminator(); if (Succ1TI->getNumSuccessors() == 1 && Succ1TI->getSuccessor(0) == BI->getSuccessor(0)) - if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1))) + if (SpeculativelyExecuteBB(BI, BI->getSuccessor(1), DL)) return SimplifyCFG(BB, TTI, DL) | true; } diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 34d8a10..cb8a41d 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -209,6 +209,29 @@ namespace { class LoopVectorizationLegality; class LoopVectorizationCostModel; +/// Optimization analysis message produced during vectorization. Messages inform +/// the user why vectorization did not occur. +class Report { + std::string Message; + raw_string_ostream Out; + Instruction *Instr; + +public: + Report(Instruction *I = nullptr) : Out(Message), Instr(I) { + Out << "loop not vectorized: "; + } + + template <typename A> Report &operator<<(const A &Value) { + Out << Value; + return *this; + } + + Instruction *getInstr() { return Instr; } + + std::string &str() { return Out.str(); } + operator Twine() { return Out.str(); } +}; + /// InnerLoopVectorizer vectorizes loops which contain only one basic /// block to a specified vectorization factor (VF). /// This class performs the widening of scalars into vectors, or multiple @@ -515,10 +538,12 @@ public: unsigned NumPredStores; LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, const DataLayout *DL, - DominatorTree *DT, TargetLibraryInfo *TLI) + DominatorTree *DT, TargetLibraryInfo *TLI, + Function *F) : NumLoads(0), NumStores(0), NumPredStores(0), TheLoop(L), SE(SE), DL(DL), - DT(DT), TLI(TLI), Induction(nullptr), WidestIndTy(nullptr), - HasFunNoNaNAttr(false), MaxSafeDepDistBytes(-1U) {} + DT(DT), TLI(TLI), TheFunction(F), Induction(nullptr), + WidestIndTy(nullptr), HasFunNoNaNAttr(false), MaxSafeDepDistBytes(-1U) { + } /// This enum represents the kinds of reductions that we support. enum ReductionKind { @@ -747,6 +772,16 @@ private: /// invariant. void collectStridedAcccess(Value *LoadOrStoreInst); + /// Report an analysis message to assist the user in diagnosing loops that are + /// not vectorized. + void emitAnalysis(Report &Message) { + DebugLoc DL = TheLoop->getStartLoc(); + if (Instruction *I = Message.getInstr()) + DL = I->getDebugLoc(); + emitOptimizationRemarkAnalysis(TheFunction->getContext(), DEBUG_TYPE, + *TheFunction, DL, Message.str()); + } + /// The loop that we evaluate. Loop *TheLoop; /// Scev analysis. @@ -757,6 +792,8 @@ private: DominatorTree *DT; /// Target Library Info. TargetLibraryInfo *TLI; + /// Parent function + Function *TheFunction; // --- vectorization state --- // @@ -906,7 +943,7 @@ public: } /// Return the loop vectorizer metadata prefix. - static StringRef Prefix() { return "llvm.vectorizer."; } + static StringRef Prefix() { return "llvm.loop.vectorize."; } MDNode *createHint(LLVMContext &Context, StringRef Name, unsigned V) const { SmallVector<Value*, 2> Vals; @@ -942,6 +979,29 @@ public: LoopID = NewLoopID; } + std::string emitRemark() const { + Report R; + R << "vectorization "; + switch (Force) { + case LoopVectorizeHints::FK_Disabled: + R << "is explicitly disabled"; + break; + case LoopVectorizeHints::FK_Enabled: + R << "is explicitly enabled"; + if (Width != 0 && Unroll != 0) + R << " with width " << Width << " and interleave count " << Unroll; + else if (Width != 0) + R << " with width " << Width; + else if (Unroll != 0) + R << " with interleave count " << Unroll; + break; + case LoopVectorizeHints::FK_Undefined: + R << "was not specified"; + break; + } + return R.str(); + } + unsigned getWidth() const { return Width; } unsigned getUnroll() const { return Unroll; } enum ForceKind getForce() const { return Force; } @@ -1125,18 +1185,37 @@ struct LoopVectorize : public FunctionPass { : "?")) << " width=" << Hints.getWidth() << " unroll=" << Hints.getUnroll() << "\n"); + // Function containing loop + Function *F = L->getHeader()->getParent(); + + // Looking at the diagnostic output is the only way to determine if a loop + // was vectorized (other than looking at the IR or machine code), so it + // is important to generate an optimization remark for each loop. Most of + // these messages are generated by emitOptimizationRemarkAnalysis. Remarks + // generated by emitOptimizationRemark and emitOptimizationRemarkMissed are + // less verbose reporting vectorized loops and unvectorized loops that may + // benefit from vectorization, respectively. + if (Hints.getForce() == LoopVectorizeHints::FK_Disabled) { DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); + emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F, + L->getStartLoc(), Hints.emitRemark()); return false; } if (!AlwaysVectorize && Hints.getForce() != LoopVectorizeHints::FK_Enabled) { DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n"); + emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F, + L->getStartLoc(), Hints.emitRemark()); return false; } if (Hints.getWidth() == 1 && Hints.getUnroll() == 1) { DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n"); + emitOptimizationRemarkAnalysis( + F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), + "loop not vectorized: vector width and interleave count are " + "explicitly set to 1"); return false; } @@ -1151,14 +1230,19 @@ struct LoopVectorize : public FunctionPass { DEBUG(dbgs() << " But vectorizing was explicitly forced.\n"); else { DEBUG(dbgs() << "\n"); + emitOptimizationRemarkAnalysis( + F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), + "vectorization is not beneficial and is not explicitly forced"); return false; } } // Check if it is legal to vectorize the loop. - LoopVectorizationLegality LVL(L, SE, DL, DT, TLI); + LoopVectorizationLegality LVL(L, SE, DL, DT, TLI, F); if (!LVL.canVectorize()) { DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); + emitOptimizationRemarkMissed(F->getContext(), DEBUG_TYPE, *F, + L->getStartLoc(), Hints.emitRemark()); return false; } @@ -1167,7 +1251,6 @@ struct LoopVectorize : public FunctionPass { // Check the function attributes to find out if this function should be // optimized for size. - Function *F = L->getHeader()->getParent(); bool OptForSize = Hints.getForce() != LoopVectorizeHints::FK_Enabled && F->hasFnAttribute(Attribute::OptimizeForSize); @@ -1190,6 +1273,11 @@ struct LoopVectorize : public FunctionPass { if (F->hasFnAttribute(Attribute::NoImplicitFloat)) { DEBUG(dbgs() << "LV: Can't vectorize when the NoImplicitFloat" "attribute is used.\n"); + emitOptimizationRemarkAnalysis( + F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), + "loop not vectorized due to NoImplicitFloat attribute"); + emitOptimizationRemarkMissed(F->getContext(), DEBUG_TYPE, *F, + L->getStartLoc(), Hints.emitRemark()); return false; } @@ -1208,9 +1296,14 @@ struct LoopVectorize : public FunctionPass { DEBUG(dbgs() << "LV: Unroll Factor is " << UF << '\n'); if (VF.Width == 1) { - DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial.\n"); - if (UF == 1) + DEBUG(dbgs() << "LV: Vectorization is possible but not beneficial\n"); + + if (UF == 1) { + emitOptimizationRemarkAnalysis( + F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), + "not beneficial to vectorize and user disabled interleaving"); return false; + } DEBUG(dbgs() << "LV: Trying to at least unroll the loops.\n"); // Report the unrolling decision. @@ -1220,6 +1313,7 @@ struct LoopVectorize : public FunctionPass { " (vectorization not beneficial)")); // We decided not to vectorize, but we may want to unroll. + InnerLoopUnroller Unroller(L, SE, LI, DT, DL, TLI, UF); Unroller.vectorize(&LVL); } else { @@ -1909,20 +2003,23 @@ void InnerLoopVectorizer::createEmptyLoop() { the vectorized instructions while the old loop will continue to run the scalar remainder. - [ ] <-- vector loop bypass (may consist of multiple blocks). - / | - / v - | [ ] <-- vector pre header. - | | - | v - | [ ] \ - | [ ]_| <-- vector loop. - | | - \ v - >[ ] <--- middle-block. - / | - / v - | [ ] <--- new preheader. + [ ] <-- Back-edge taken count overflow check. + / | + / v + | [ ] <-- vector loop bypass (may consist of multiple blocks). + | / | + | / v + || [ ] <-- vector pre header. + || | + || v + || [ ] \ + || [ ]_| <-- vector loop. + || | + | \ v + | >[ ] <--- middle-block. + | / | + | / v + -|- >[ ] <--- new preheader. | | | v | [ ] \ @@ -1936,6 +2033,7 @@ void InnerLoopVectorizer::createEmptyLoop() { BasicBlock *OldBasicBlock = OrigLoop->getHeader(); BasicBlock *BypassBlock = OrigLoop->getLoopPreheader(); BasicBlock *ExitBlock = OrigLoop->getExitBlock(); + assert(BypassBlock && "Invalid loop structure"); assert(ExitBlock && "Must have an exit block"); // Some loops have a single integer induction variable, while other loops @@ -1958,18 +2056,30 @@ void InnerLoopVectorizer::createEmptyLoop() { IdxTy->getPrimitiveSizeInBits()) ExitCount = SE->getTruncateOrNoop(ExitCount, IdxTy); - ExitCount = SE->getNoopOrZeroExtend(ExitCount, IdxTy); + const SCEV *BackedgeTakeCount = SE->getNoopOrZeroExtend(ExitCount, IdxTy); // Get the total trip count from the count by adding 1. - ExitCount = SE->getAddExpr(ExitCount, - SE->getConstant(ExitCount->getType(), 1)); + ExitCount = SE->getAddExpr(BackedgeTakeCount, + SE->getConstant(BackedgeTakeCount->getType(), 1)); // Expand the trip count and place the new instructions in the preheader. // Notice that the pre-header does not change, only the loop body. SCEVExpander Exp(*SE, "induction"); - // Count holds the overall loop count (N). - Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(), - BypassBlock->getTerminator()); + // We need to test whether the backedge-taken count is uint##_max. Adding one + // to it will cause overflow and an incorrect loop trip count in the vector + // body. In case of overflow we want to directly jump to the scalar remainder + // loop. + Value *BackedgeCount = + Exp.expandCodeFor(BackedgeTakeCount, BackedgeTakeCount->getType(), + BypassBlock->getTerminator()); + if (BackedgeCount->getType()->isPointerTy()) + BackedgeCount = CastInst::CreatePointerCast(BackedgeCount, IdxTy, + "backedge.ptrcnt.to.int", + BypassBlock->getTerminator()); + Instruction *CheckBCOverflow = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, BackedgeCount, + Constant::getAllOnesValue(BackedgeCount->getType()), + "backedge.overflow", BypassBlock->getTerminator()); // The loop index does not have to start at Zero. Find the original start // value from the induction PHI node. If we don't have an induction variable @@ -1980,7 +2090,18 @@ void InnerLoopVectorizer::createEmptyLoop() { IdxTy): ConstantInt::get(IdxTy, 0); - assert(BypassBlock && "Invalid loop structure"); + // We need an instruction to anchor the overflow check on. StartIdx needs to + // be defined before the overflow check branch. Because the scalar preheader + // is going to merge the start index and so the overflow branch block needs to + // contain a definition of the start index. + Instruction *OverflowCheckAnchor = BinaryOperator::CreateAdd( + StartIdx, ConstantInt::get(IdxTy, 0), "overflow.check.anchor", + BypassBlock->getTerminator()); + + // Count holds the overall loop count (N). + Value *Count = Exp.expandCodeFor(ExitCount, ExitCount->getType(), + BypassBlock->getTerminator()); + LoopBypassBlocks.push_back(BypassBlock); // Split the single block loop into the two loop structure described above. @@ -2049,29 +2170,45 @@ void InnerLoopVectorizer::createEmptyLoop() { // Now, compare the new count to zero. If it is zero skip the vector loop and // jump to the scalar loop. - Value *Cmp = BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, - "cmp.zero"); + Value *Cmp = + BypassBuilder.CreateICmpEQ(IdxEndRoundDown, StartIdx, "cmp.zero"); BasicBlock *LastBypassBlock = BypassBlock; + // Generate code to check that the loops trip count that we computed by adding + // one to the backedge-taken count will not overflow. + { + auto PastOverflowCheck = + std::next(BasicBlock::iterator(OverflowCheckAnchor)); + BasicBlock *CheckBlock = + LastBypassBlock->splitBasicBlock(PastOverflowCheck, "overflow.checked"); + if (ParentLoop) + ParentLoop->addBasicBlockToLoop(CheckBlock, LI->getBase()); + LoopBypassBlocks.push_back(CheckBlock); + Instruction *OldTerm = LastBypassBlock->getTerminator(); + BranchInst::Create(ScalarPH, CheckBlock, CheckBCOverflow, OldTerm); + OldTerm->eraseFromParent(); + LastBypassBlock = CheckBlock; + } + // Generate the code to check that the strides we assumed to be one are really // one. We want the new basic block to start at the first instruction in a // sequence of instructions that form a check. Instruction *StrideCheck; Instruction *FirstCheckInst; std::tie(FirstCheckInst, StrideCheck) = - addStrideCheck(BypassBlock->getTerminator()); + addStrideCheck(LastBypassBlock->getTerminator()); if (StrideCheck) { // Create a new block containing the stride check. BasicBlock *CheckBlock = - BypassBlock->splitBasicBlock(FirstCheckInst, "vector.stridecheck"); + LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.stridecheck"); if (ParentLoop) ParentLoop->addBasicBlockToLoop(CheckBlock, LI->getBase()); LoopBypassBlocks.push_back(CheckBlock); // Replace the branch into the memory check block with a conditional branch // for the "few elements case". - Instruction *OldTerm = BypassBlock->getTerminator(); + Instruction *OldTerm = LastBypassBlock->getTerminator(); BranchInst::Create(MiddleBlock, CheckBlock, Cmp, OldTerm); OldTerm->eraseFromParent(); @@ -2134,6 +2271,19 @@ void InnerLoopVectorizer::createEmptyLoop() { PHINode::Create(OrigPhi->getType(), 2, "trunc.resume.val", MiddleBlock->getTerminator()) : nullptr; + // Create phi nodes to merge from the backedge-taken check block. + PHINode *BCResumeVal = PHINode::Create(ResumeValTy, 3, "bc.resume.val", + ScalarPH->getTerminator()); + BCResumeVal->addIncoming(ResumeVal, MiddleBlock); + + PHINode *BCTruncResumeVal = nullptr; + if (OrigPhi == OldInduction) { + BCTruncResumeVal = + PHINode::Create(OrigPhi->getType(), 2, "bc.trunc.resume.val", + ScalarPH->getTerminator()); + BCTruncResumeVal->addIncoming(TruncResumeVal, MiddleBlock); + } + Value *EndValue = nullptr; switch (II.IK) { case LoopVectorizationLegality::IK_NoInduction: @@ -2150,10 +2300,12 @@ void InnerLoopVectorizer::createEmptyLoop() { BypassBuilder.CreateTrunc(IdxEndRoundDown, OrigPhi->getType()); // The new PHI merges the original incoming value, in case of a bypass, // or the value at the end of the vectorized loop. - for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) + for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) TruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[I]); TruncResumeVal->addIncoming(EndValue, VecBody); + BCTruncResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[0]); + // We know what the end value is. EndValue = IdxEndRoundDown; // We also know which PHI node holds it. @@ -2199,7 +2351,7 @@ void InnerLoopVectorizer::createEmptyLoop() { // The new PHI merges the original incoming value, in case of a bypass, // or the value at the end of the vectorized loop. - for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) { + for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) { if (OrigPhi == OldInduction) ResumeVal->addIncoming(StartIdx, LoopBypassBlocks[I]); else @@ -2209,11 +2361,16 @@ void InnerLoopVectorizer::createEmptyLoop() { // Fix the scalar body counter (PHI node). unsigned BlockIdx = OrigPhi->getBasicBlockIndex(ScalarPH); - // The old inductions phi node in the scalar body needs the truncated value. - if (OrigPhi == OldInduction) - OrigPhi->setIncomingValue(BlockIdx, TruncResumeVal); - else - OrigPhi->setIncomingValue(BlockIdx, ResumeVal); + + // The old induction's phi node in the scalar body needs the truncated + // value. + if (OrigPhi == OldInduction) { + BCResumeVal->addIncoming(StartIdx, LoopBypassBlocks[0]); + OrigPhi->setIncomingValue(BlockIdx, BCTruncResumeVal); + } else { + BCResumeVal->addIncoming(II.StartValue, LoopBypassBlocks[0]); + OrigPhi->setIncomingValue(BlockIdx, BCResumeVal); + } } // If we are generating a new induction variable then we also need to @@ -2224,7 +2381,7 @@ void InnerLoopVectorizer::createEmptyLoop() { assert(!ResumeIndex && "Unexpected resume value found"); ResumeIndex = PHINode::Create(IdxTy, 2, "new.indc.resume.val", MiddleBlock->getTerminator()); - for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) + for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) ResumeIndex->addIncoming(StartIdx, LoopBypassBlocks[I]); ResumeIndex->addIncoming(IdxEndRoundDown, VecBody); } @@ -2494,7 +2651,7 @@ void InnerLoopVectorizer::vectorizeLoop() { // To do so, we need to generate the 'identity' vector and override // one of the elements with the incoming scalar reduction. We need // to do it in the vector-loop preheader. - Builder.SetInsertPoint(LoopBypassBlocks.front()->getTerminator()); + Builder.SetInsertPoint(LoopBypassBlocks[1]->getTerminator()); // This is the vector-clone of the value that leaves the loop. VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr); @@ -2568,7 +2725,7 @@ void InnerLoopVectorizer::vectorizeLoop() { VectorParts &RdxExitVal = getVectorValue(RdxDesc.LoopExitInstr); PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi"); Value *StartVal = (part == 0) ? VectorStart : Identity; - for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I) + for (unsigned I = 1, E = LoopBypassBlocks.size(); I != E; ++I) NewPhi->addIncoming(StartVal, LoopBypassBlocks[I]); NewPhi->addIncoming(RdxExitVal[part], LoopVectorBody.back()); @@ -2626,6 +2783,13 @@ void InnerLoopVectorizer::vectorizeLoop() { Builder.getInt32(0)); } + // Create a phi node that merges control-flow from the backedge-taken check + // block and the middle block. + PHINode *BCBlockPhi = PHINode::Create(RdxPhi->getType(), 2, "bc.merge.rdx", + LoopScalarPreHeader->getTerminator()); + BCBlockPhi->addIncoming(RdxDesc.StartValue, LoopBypassBlocks[0]); + BCBlockPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock); + // Now, we need to fix the users of the reduction variable // inside and outside of the scalar remainder loop. // We know that the loop is in LCSSA form. We need to update the @@ -2655,7 +2819,7 @@ void InnerLoopVectorizer::vectorizeLoop() { assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index"); // Pick the other block. int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); - (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, ReducedPartRdx); + (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi); (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr); }// end of for each redux variable. @@ -3062,9 +3226,14 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { scalarizeInstruction(it); break; default: + bool HasScalarOpd = hasVectorInstrinsicScalarOpd(ID, 1); for (unsigned Part = 0; Part < UF; ++Part) { SmallVector<Value *, 4> Args; for (unsigned i = 0, ie = CI->getNumArgOperands(); i != ie; ++i) { + if (HasScalarOpd && i == 1) { + Args.push_back(CI->getArgOperand(i)); + continue; + } VectorParts &Arg = getVectorValue(CI->getArgOperand(i)); Args.push_back(Arg[Part]); } @@ -3112,8 +3281,8 @@ void InnerLoopVectorizer::updateAnalysis() { } } - DT->addNewBlock(LoopMiddleBlock, LoopBypassBlocks.front()); - DT->addNewBlock(LoopScalarPreHeader, LoopMiddleBlock); + DT->addNewBlock(LoopMiddleBlock, LoopBypassBlocks[1]); + DT->addNewBlock(LoopScalarPreHeader, LoopBypassBlocks[0]); DT->changeImmediateDominator(LoopScalarBody, LoopScalarPreHeader); DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock); @@ -3138,8 +3307,10 @@ static bool canIfConvertPHINodes(BasicBlock *BB) { } bool LoopVectorizationLegality::canVectorizeWithIfConvert() { - if (!EnableIfConversion) + if (!EnableIfConversion) { + emitAnalysis(Report() << "if-conversion is disabled"); return false; + } assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable"); @@ -3169,16 +3340,24 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { BasicBlock *BB = *BI; // We don't support switch statements inside loops. - if (!isa<BranchInst>(BB->getTerminator())) + if (!isa<BranchInst>(BB->getTerminator())) { + emitAnalysis(Report(BB->getTerminator()) + << "loop contains a switch statement"); return false; + } // We must be able to predicate all blocks that need to be predicated. if (blockNeedsPredication(BB)) { - if (!blockCanBePredicated(BB, SafePointes)) + if (!blockCanBePredicated(BB, SafePointes)) { + emitAnalysis(Report(BB->getTerminator()) + << "control flow cannot be substituted for a select"); return false; - } else if (BB != Header && !canIfConvertPHINodes(BB)) + } + } else if (BB != Header && !canIfConvertPHINodes(BB)) { + emitAnalysis(Report(BB->getTerminator()) + << "control flow cannot be substituted for a select"); return false; - + } } // We can if-convert this loop. @@ -3188,20 +3367,31 @@ bool LoopVectorizationLegality::canVectorizeWithIfConvert() { bool LoopVectorizationLegality::canVectorize() { // We must have a loop in canonical form. Loops with indirectbr in them cannot // be canonicalized. - if (!TheLoop->getLoopPreheader()) + if (!TheLoop->getLoopPreheader()) { + emitAnalysis( + Report() << "loop control flow is not understood by vectorizer"); return false; + } // We can only vectorize innermost loops. - if (TheLoop->getSubLoopsVector().size()) + if (TheLoop->getSubLoopsVector().size()) { + emitAnalysis(Report() << "loop is not the innermost loop"); return false; + } // We must have a single backedge. - if (TheLoop->getNumBackEdges() != 1) + if (TheLoop->getNumBackEdges() != 1) { + emitAnalysis( + Report() << "loop control flow is not understood by vectorizer"); return false; + } // We must have a single exiting block. - if (!TheLoop->getExitingBlock()) + if (!TheLoop->getExitingBlock()) { + emitAnalysis( + Report() << "loop control flow is not understood by vectorizer"); return false; + } // We need to have a loop header. DEBUG(dbgs() << "LV: Found a loop: " << @@ -3217,6 +3407,7 @@ bool LoopVectorizationLegality::canVectorize() { // ScalarEvolution needs to be able to find the exit count. const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop); if (ExitCount == SE->getCouldNotCompute()) { + emitAnalysis(Report() << "could not determine number of loop iterations"); DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n"); return false; } @@ -3310,6 +3501,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && !PhiTy->isPointerTy()) { + emitAnalysis(Report(it) + << "loop control flow is not understood by vectorizer"); DEBUG(dbgs() << "LV: Found an non-int non-pointer PHI.\n"); return false; } @@ -3320,13 +3513,17 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (*bb != Header) { // Check that this instruction has no outside users or is an // identified reduction value with an outside user. - if(!hasOutsideLoopUser(TheLoop, it, AllowedExit)) + if (!hasOutsideLoopUser(TheLoop, it, AllowedExit)) continue; + emitAnalysis(Report(it) << "value that could not be identified as " + "reduction is used outside the loop"); return false; } // We only allow if-converted PHIs with more than two incoming values. if (Phi->getNumIncomingValues() != 2) { + emitAnalysis(Report(it) + << "control flow not understood by vectorizer"); DEBUG(dbgs() << "LV: Found an invalid PHI.\n"); return false; } @@ -3357,8 +3554,11 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Until we explicitly handle the case of an induction variable with // an outside loop user we have to give up vectorizing this loop. - if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) + if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) { + emitAnalysis(Report(it) << "use of induction value outside of the " + "loop is not handled by vectorizer"); return false; + } continue; } @@ -3401,6 +3601,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { continue; } + emitAnalysis(Report(it) << "unvectorizable operation"); DEBUG(dbgs() << "LV: Found an unidentified PHI."<< *Phi <<"\n"); return false; }// end of PHI handling @@ -3409,14 +3610,29 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // calls and we do handle certain intrinsic and libm functions. CallInst *CI = dyn_cast<CallInst>(it); if (CI && !getIntrinsicIDForCall(CI, TLI) && !isa<DbgInfoIntrinsic>(CI)) { + emitAnalysis(Report(it) << "call instruction cannot be vectorized"); DEBUG(dbgs() << "LV: Found a call site.\n"); return false; } + // Intrinsics such as powi,cttz and ctlz are legal to vectorize if the + // second argument is the same (i.e. loop invariant) + if (CI && + hasVectorInstrinsicScalarOpd(getIntrinsicIDForCall(CI, TLI), 1)) { + if (!SE->isLoopInvariant(SE->getSCEV(CI->getOperand(1)), TheLoop)) { + emitAnalysis(Report(it) + << "intrinsic instruction cannot be vectorized"); + DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n"); + return false; + } + } + // Check that the instruction return type is vectorizable. // Also, we can't vectorize extractelement instructions. if ((!VectorType::isValidElementType(it->getType()) && !it->getType()->isVoidTy()) || isa<ExtractElementInst>(it)) { + emitAnalysis(Report(it) + << "instruction return type cannot be vectorized"); DEBUG(dbgs() << "LV: Found unvectorizable type.\n"); return false; } @@ -3424,8 +3640,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Check that the stored type is vectorizable. if (StoreInst *ST = dyn_cast<StoreInst>(it)) { Type *T = ST->getValueOperand()->getType(); - if (!VectorType::isValidElementType(T)) + if (!VectorType::isValidElementType(T)) { + emitAnalysis(Report(ST) << "store instruction cannot be vectorized"); return false; + } if (EnableMemAccessVersioning) collectStridedAcccess(ST); } @@ -3436,8 +3654,10 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // Reduction instructions are allowed to have exit users. // All other instructions must not have external users. - if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) + if (hasOutsideLoopUser(TheLoop, it, AllowedExit)) { + emitAnalysis(Report(it) << "value cannot be used outside the loop"); return false; + } } // next instr. @@ -3445,8 +3665,11 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { if (!Induction) { DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); - if (Inductions.empty()) + if (Inductions.empty()) { + emitAnalysis(Report() + << "loop induction variable could not be identified"); return false; + } } return true; @@ -4353,8 +4576,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() { continue; LoadInst *Ld = dyn_cast<LoadInst>(it); - if (!Ld) return false; - if (!Ld->isSimple() && !IsAnnotatedParallel) { + if (!Ld || (!Ld->isSimple() && !IsAnnotatedParallel)) { + emitAnalysis(Report(Ld) + << "read with atomic ordering or volatile read"); DEBUG(dbgs() << "LV: Found a non-simple load.\n"); return false; } @@ -4367,8 +4591,13 @@ bool LoopVectorizationLegality::canVectorizeMemory() { // Save 'store' instructions. Abort if other instructions write to memory. if (it->mayWriteToMemory()) { StoreInst *St = dyn_cast<StoreInst>(it); - if (!St) return false; + if (!St) { + emitAnalysis(Report(it) << "instruction cannot be vectorized"); + return false; + } if (!St->isSimple() && !IsAnnotatedParallel) { + emitAnalysis(Report(St) + << "write with atomic ordering or volatile write"); DEBUG(dbgs() << "LV: Found a non-simple store.\n"); return false; } @@ -4405,6 +4634,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() { Value* Ptr = ST->getPointerOperand(); if (isUniform(Ptr)) { + emitAnalysis( + Report(ST) + << "write to a loop invariant address could not be vectorized"); DEBUG(dbgs() << "LV: We don't allow storing to uniform addresses\n"); return false; } @@ -4483,6 +4715,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() { } if (NeedRTCheck && !CanDoRT) { + emitAnalysis(Report() << "cannot identify array bounds"); DEBUG(dbgs() << "LV: We can't vectorize because we can't find " << "the array bounds.\n"); PtrRtCheck.reset(); @@ -4513,6 +4746,14 @@ bool LoopVectorizationLegality::canVectorizeMemory() { // Check that we did not collect too many pointers or found an unsizeable // pointer. if (!CanDoRT || NumComparisons > RuntimeMemoryCheckThreshold) { + if (!CanDoRT && NumComparisons > 0) + emitAnalysis(Report() + << "cannot check memory dependencies at runtime"); + else + emitAnalysis(Report() + << NumComparisons << " exceeds limit of " + << RuntimeMemoryCheckThreshold + << " dependent memory operations checked at runtime"); DEBUG(dbgs() << "LV: Can't vectorize with memory checks\n"); PtrRtCheck.reset(); return false; @@ -4522,6 +4763,9 @@ bool LoopVectorizationLegality::canVectorizeMemory() { } } + if (!CanVecMem) + emitAnalysis(Report() << "unsafe dependent memory operations in loop"); + DEBUG(dbgs() << "LV: We" << (NeedRTCheck ? "" : " don't") << " need a runtime memory check.\n"); @@ -5774,4 +6018,3 @@ Value *InnerLoopUnroller::getConsecutiveVector(Value* Val, int StartIdx, Constant *C = ConstantInt::get(ITy, StartIdx, Negate); return Builder.CreateAdd(Val, C, "induction"); } - diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp index e13ba95..53a43d9 100644 --- a/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -149,6 +149,48 @@ static bool isSplat(ArrayRef<Value *> VL) { return true; } +///\returns Opcode that can be clubbed with \p Op to create an alternate +/// sequence which can later be merged as a ShuffleVector instruction. +static unsigned getAltOpcode(unsigned Op) { + switch (Op) { + case Instruction::FAdd: + return Instruction::FSub; + case Instruction::FSub: + return Instruction::FAdd; + case Instruction::Add: + return Instruction::Sub; + case Instruction::Sub: + return Instruction::Add; + default: + return 0; + } +} + +///\returns bool representing if Opcode \p Op can be part +/// of an alternate sequence which can later be merged as +/// a ShuffleVector instruction. +static bool canCombineAsAltInst(unsigned Op) { + if (Op == Instruction::FAdd || Op == Instruction::FSub || + Op == Instruction::Sub || Op == Instruction::Add) + return true; + return false; +} + +/// \returns ShuffleVector instruction if intructions in \p VL have +/// alternate fadd,fsub / fsub,fadd/add,sub/sub,add sequence. +/// (i.e. e.g. opcodes of fadd,fsub,fadd,fsub...) +static unsigned isAltInst(ArrayRef<Value *> VL) { + Instruction *I0 = dyn_cast<Instruction>(VL[0]); + unsigned Opcode = I0->getOpcode(); + unsigned AltOpcode = getAltOpcode(Opcode); + for (int i = 1, e = VL.size(); i < e; i++) { + Instruction *I = dyn_cast<Instruction>(VL[i]); + if (!I || I->getOpcode() != ((i & 1) ? AltOpcode : Opcode)) + return 0; + } + return Instruction::ShuffleVector; +} + /// \returns The opcode if all of the Instructions in \p VL have the same /// opcode, or zero. static unsigned getSameOpcode(ArrayRef<Value *> VL) { @@ -158,8 +200,11 @@ static unsigned getSameOpcode(ArrayRef<Value *> VL) { unsigned Opcode = I0->getOpcode(); for (int i = 1, e = VL.size(); i < e; i++) { Instruction *I = dyn_cast<Instruction>(VL[i]); - if (!I || Opcode != I->getOpcode()) + if (!I || Opcode != I->getOpcode()) { + if (canCombineAsAltInst(Opcode) && i == 1) + return isAltInst(VL); return 0; + } } return Opcode; } @@ -377,6 +422,7 @@ public: /// \brief Perform LICM and CSE on the newly generated gather sequences. void optimizeGatherSequence(); + private: struct TreeEntry; @@ -594,6 +640,7 @@ void BoUpSLP::buildTree(ArrayRef<Value *> Roots, void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { bool SameTy = getSameType(VL); (void)SameTy; + bool isAltShuffle = false; assert(SameTy && "Invalid types!"); if (Depth == RecursionMaxDepth) { @@ -615,10 +662,19 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { newTreeEntry(VL, false); return; } + unsigned Opcode = getSameOpcode(VL); + + // Check that this shuffle vector refers to the alternate + // sequence of opcodes. + if (Opcode == Instruction::ShuffleVector) { + Instruction *I0 = dyn_cast<Instruction>(VL[0]); + unsigned Op = I0->getOpcode(); + if (Op != Instruction::ShuffleVector) + isAltShuffle = true; + } // If all of the operands are identical or constant we have a simple solution. - if (allConstant(VL) || isSplat(VL) || !getSameBlock(VL) || - !getSameOpcode(VL)) { + if (allConstant(VL) || isSplat(VL) || !getSameBlock(VL) || !Opcode) { DEBUG(dbgs() << "SLP: Gathering due to C,S,B,O. \n"); newTreeEntry(VL, false); return; @@ -754,8 +810,6 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { DEBUG(dbgs() << "SLP: We are able to schedule this bundle.\n"); - unsigned Opcode = getSameOpcode(VL); - // Check if it is safe to sink the loads or the stores. if (Opcode == Instruction::Load || Opcode == Instruction::Store) { Instruction *Last = getLastInstruction(VL); @@ -914,8 +968,20 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { if (isa<BinaryOperator>(VL0) && VL0->isCommutative()) { ValueList Left, Right; reorderInputsAccordingToOpcode(VL, Left, Right); - buildTree_rec(Left, Depth + 1); - buildTree_rec(Right, Depth + 1); + BasicBlock *LeftBB = getSameBlock(Left); + BasicBlock *RightBB = getSameBlock(Right); + // If we have common uses on separate paths in the tree make sure we + // process the one with greater common depth first. + // We can use block numbering to determine the subtree traversal as + // earler user has to come in between the common use and the later user. + if (LeftBB && RightBB && LeftBB == RightBB && + getLastIndex(Right) > getLastIndex(Left)) { + buildTree_rec(Right, Depth + 1); + buildTree_rec(Left, Depth + 1); + } else { + buildTree_rec(Left, Depth + 1); + buildTree_rec(Right, Depth + 1); + } return; } @@ -929,6 +995,51 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { } return; } + case Instruction::GetElementPtr: { + // We don't combine GEPs with complicated (nested) indexing. + for (unsigned j = 0; j < VL.size(); ++j) { + if (cast<Instruction>(VL[j])->getNumOperands() != 2) { + DEBUG(dbgs() << "SLP: not-vectorizable GEP (nested indexes).\n"); + newTreeEntry(VL, false); + return; + } + } + + // We can't combine several GEPs into one vector if they operate on + // different types. + Type *Ty0 = cast<Instruction>(VL0)->getOperand(0)->getType(); + for (unsigned j = 0; j < VL.size(); ++j) { + Type *CurTy = cast<Instruction>(VL[j])->getOperand(0)->getType(); + if (Ty0 != CurTy) { + DEBUG(dbgs() << "SLP: not-vectorizable GEP (different types).\n"); + newTreeEntry(VL, false); + return; + } + } + + // We don't combine GEPs with non-constant indexes. + for (unsigned j = 0; j < VL.size(); ++j) { + auto Op = cast<Instruction>(VL[j])->getOperand(1); + if (!isa<ConstantInt>(Op)) { + DEBUG( + dbgs() << "SLP: not-vectorizable GEP (non-constant indexes).\n"); + newTreeEntry(VL, false); + return; + } + } + + newTreeEntry(VL, true); + DEBUG(dbgs() << "SLP: added a vector of GEPs.\n"); + for (unsigned i = 0, e = 2; i < e; ++i) { + ValueList Operands; + // Prepare the operand vector. + for (unsigned j = 0; j < VL.size(); ++j) + Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); + + buildTree_rec(Operands, Depth + 1); + } + return; + } case Instruction::Store: { // Check if the stores are consecutive or of we need to swizzle them. for (unsigned i = 0, e = VL.size() - 1; i < e; ++i) @@ -961,9 +1072,10 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { DEBUG(dbgs() << "SLP: Non-vectorizable call.\n"); return; } - Function *Int = CI->getCalledFunction(); - + Value *A1I = nullptr; + if (hasVectorInstrinsicScalarOpd(ID, 1)) + A1I = CI->getArgOperand(1); for (unsigned i = 1, e = VL.size(); i != e; ++i) { CallInst *CI2 = dyn_cast<CallInst>(VL[i]); if (!CI2 || CI2->getCalledFunction() != Int || @@ -973,6 +1085,18 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { << "\n"); return; } + // ctlz,cttz and powi are special intrinsics whose second argument + // should be same in order for them to be vectorized. + if (hasVectorInstrinsicScalarOpd(ID, 1)) { + Value *A1J = CI2->getArgOperand(1); + if (A1I != A1J) { + newTreeEntry(VL, false); + DEBUG(dbgs() << "SLP: mismatched arguments in call:" << *CI + << " argument "<< A1I<<"!=" << A1J + << "\n"); + return; + } + } } newTreeEntry(VL, true); @@ -987,6 +1111,26 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) { } return; } + case Instruction::ShuffleVector: { + // If this is not an alternate sequence of opcode like add-sub + // then do not vectorize this instruction. + if (!isAltShuffle) { + newTreeEntry(VL, false); + DEBUG(dbgs() << "SLP: ShuffleVector are not vectorized.\n"); + return; + } + newTreeEntry(VL, true); + DEBUG(dbgs() << "SLP: added a ShuffleVector op.\n"); + for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) { + ValueList Operands; + // Prepare the operand vector. + for (unsigned j = 0; j < VL.size(); ++j) + Operands.push_back(cast<Instruction>(VL[j])->getOperand(i)); + + buildTree_rec(Operands, Depth + 1); + } + return; + } default: newTreeEntry(VL, false); DEBUG(dbgs() << "SLP: Gathering unknown instruction.\n"); @@ -1010,11 +1154,9 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { } return getGatherCost(E->Scalars); } - - assert(getSameOpcode(VL) && getSameType(VL) && getSameBlock(VL) && - "Invalid VL"); + unsigned Opcode = getSameOpcode(VL); + assert(Opcode && getSameType(VL) && getSameBlock(VL) && "Invalid VL"); Instruction *VL0 = cast<Instruction>(VL[0]); - unsigned Opcode = VL0->getOpcode(); switch (Opcode) { case Instruction::PHI: { return 0; @@ -1121,6 +1263,20 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { } return VecCost - ScalarCost; } + case Instruction::GetElementPtr: { + TargetTransformInfo::OperandValueKind Op1VK = + TargetTransformInfo::OK_AnyValue; + TargetTransformInfo::OperandValueKind Op2VK = + TargetTransformInfo::OK_UniformConstantValue; + + int ScalarCost = + VecTy->getNumElements() * + TTI->getArithmeticInstrCost(Instruction::Add, ScalarTy, Op1VK, Op2VK); + int VecCost = + TTI->getArithmeticInstrCost(Instruction::Add, VecTy, Op1VK, Op2VK); + + return VecCost - ScalarCost; + } case Instruction::Load: { // Cost of wide load - cost of scalar loads. int ScalarLdCost = VecTy->getNumElements() * @@ -1158,6 +1314,32 @@ int BoUpSLP::getEntryCost(TreeEntry *E) { return VecCallCost - ScalarCallCost; } + case Instruction::ShuffleVector: { + TargetTransformInfo::OperandValueKind Op1VK = + TargetTransformInfo::OK_AnyValue; + TargetTransformInfo::OperandValueKind Op2VK = + TargetTransformInfo::OK_AnyValue; + int ScalarCost = 0; + int VecCost = 0; + for (unsigned i = 0; i < VL.size(); ++i) { + Instruction *I = cast<Instruction>(VL[i]); + if (!I) + break; + ScalarCost += + TTI->getArithmeticInstrCost(I->getOpcode(), ScalarTy, Op1VK, Op2VK); + } + // VecCost is equal to sum of the cost of creating 2 vectors + // and the cost of creating shuffle. + Instruction *I0 = cast<Instruction>(VL[0]); + VecCost = + TTI->getArithmeticInstrCost(I0->getOpcode(), VecTy, Op1VK, Op2VK); + Instruction *I1 = cast<Instruction>(VL[1]); + VecCost += + TTI->getArithmeticInstrCost(I1->getOpcode(), VecTy, Op1VK, Op2VK); + VecCost += + TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, VecTy, 0); + return VecCost - ScalarCost; + } default: llvm_unreachable("Unknown instruction"); } @@ -1438,9 +1620,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { setInsertPointAfterBundle(E->Scalars); return Gather(E->Scalars, VecTy); } - - unsigned Opcode = VL0->getOpcode(); - assert(Opcode == getSameOpcode(E->Scalars) && "Invalid opcode"); + unsigned Opcode = getSameOpcode(E->Scalars); switch (Opcode) { case Instruction::PHI: { @@ -1649,12 +1829,52 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { E->VectorizedValue = S; return propagateMetadata(S, E->Scalars); } + case Instruction::GetElementPtr: { + setInsertPointAfterBundle(E->Scalars); + + ValueList Op0VL; + for (int i = 0, e = E->Scalars.size(); i < e; ++i) + Op0VL.push_back(cast<GetElementPtrInst>(E->Scalars[i])->getOperand(0)); + + Value *Op0 = vectorizeTree(Op0VL); + + std::vector<Value *> OpVecs; + for (int j = 1, e = cast<GetElementPtrInst>(VL0)->getNumOperands(); j < e; + ++j) { + ValueList OpVL; + for (int i = 0, e = E->Scalars.size(); i < e; ++i) + OpVL.push_back(cast<GetElementPtrInst>(E->Scalars[i])->getOperand(j)); + + Value *OpVec = vectorizeTree(OpVL); + OpVecs.push_back(OpVec); + } + + Value *V = Builder.CreateGEP(Op0, OpVecs); + E->VectorizedValue = V; + + if (Instruction *I = dyn_cast<Instruction>(V)) + return propagateMetadata(I, E->Scalars); + + return V; + } case Instruction::Call: { CallInst *CI = cast<CallInst>(VL0); setInsertPointAfterBundle(E->Scalars); + Function *FI; + Intrinsic::ID IID = Intrinsic::not_intrinsic; + if (CI && (FI = CI->getCalledFunction())) { + IID = (Intrinsic::ID) FI->getIntrinsicID(); + } std::vector<Value *> OpVecs; for (int j = 0, e = CI->getNumArgOperands(); j < e; ++j) { ValueList OpVL; + // ctlz,cttz and powi are special intrinsics whose second argument is + // a scalar. This argument should not be vectorized. + if (hasVectorInstrinsicScalarOpd(IID, 1) && j == 1) { + CallInst *CEI = cast<CallInst>(E->Scalars[0]); + OpVecs.push_back(CEI->getArgOperand(j)); + continue; + } for (int i = 0, e = E->Scalars.size(); i < e; ++i) { CallInst *CEI = cast<CallInst>(E->Scalars[i]); OpVL.push_back(CEI->getArgOperand(j)); @@ -1673,6 +1893,49 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) { E->VectorizedValue = V; return V; } + case Instruction::ShuffleVector: { + ValueList LHSVL, RHSVL; + for (int i = 0, e = E->Scalars.size(); i < e; ++i) { + LHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(0)); + RHSVL.push_back(cast<Instruction>(E->Scalars[i])->getOperand(1)); + } + setInsertPointAfterBundle(E->Scalars); + + Value *LHS = vectorizeTree(LHSVL); + Value *RHS = vectorizeTree(RHSVL); + + if (Value *V = alreadyVectorized(E->Scalars)) + return V; + + // Create a vector of LHS op1 RHS + BinaryOperator *BinOp0 = cast<BinaryOperator>(VL0); + Value *V0 = Builder.CreateBinOp(BinOp0->getOpcode(), LHS, RHS); + + // Create a vector of LHS op2 RHS + Instruction *VL1 = cast<Instruction>(E->Scalars[1]); + BinaryOperator *BinOp1 = cast<BinaryOperator>(VL1); + Value *V1 = Builder.CreateBinOp(BinOp1->getOpcode(), LHS, RHS); + + // Create appropriate shuffle to take alternative operations from + // the vector. + std::vector<Constant *> Mask(E->Scalars.size()); + unsigned e = E->Scalars.size(); + for (unsigned i = 0; i < e; ++i) { + if (i & 1) + Mask[i] = Builder.getInt32(e + i); + else + Mask[i] = Builder.getInt32(i); + } + + Value *ShuffleMask = ConstantVector::get(Mask); + + Value *V = Builder.CreateShuffleVector(V0, V1, ShuffleMask); + E->VectorizedValue = V; + if (Instruction *I = dyn_cast<Instruction>(V)) + return propagateMetadata(I, E->Scalars); + + return V; + } default: llvm_unreachable("unknown inst"); } @@ -1741,7 +2004,6 @@ Value *BoUpSLP::vectorizeTree() { // For each lane: for (int Lane = 0, LE = Entry->Scalars.size(); Lane != LE; ++Lane) { Value *Scalar = Entry->Scalars[Lane]; - // No need to handle users of gathered values. if (Entry->NeedToGather) continue; @@ -1925,7 +2187,6 @@ struct SLPVectorizer : public FunctionPass { for (po_iterator<BasicBlock*> it = po_begin(&F.getEntryBlock()), e = po_end(&F.getEntryBlock()); it != e; ++it) { BasicBlock *BB = *it; - // Vectorize trees that end at stores. if (unsigned count = collectStores(BB, R)) { (void)count; |