From 12ae52767f2d0e4312ba059c0e97ed8beb9777d5 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Wed, 19 Oct 2011 18:48:52 +0000 Subject: Fix parsing of a line with only a # in it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142537 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 4e8e15c..d7ee1c4 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -1228,7 +1228,8 @@ bool AsmParser::ParseStatement() { /// EatToEndOfLine uses the Lexer to eat the characters to the end of the line /// since they may not be able to be tokenized to get to the end of line token. void AsmParser::EatToEndOfLine() { - Lexer.LexUntilEndOfLine(); + if (!Lexer.is(AsmToken::EndOfStatement)) + Lexer.LexUntilEndOfLine(); // Eat EOL. Lex(); } -- cgit v1.1 From 815af82b74fa0901e818f5d16ee418675f399101 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Wed, 19 Oct 2011 20:43:16 +0000 Subject: Improve code generation for vselect on SSE2: When checking the availability of instructions using the TLI, a 'promoted' instruction IS available. It means that the value is bitcasted to another type for which there is an operation. The correct check for the availablity of an instruction is to check if it should be expanded. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142542 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 7fe3530..4e02b90 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -394,10 +394,12 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // If we can't even use the basic vector operations of // AND,OR,XOR, we will have to scalarize the op. - if (!TLI.isOperationLegalOrCustom(ISD::AND, VT) || - !TLI.isOperationLegalOrCustom(ISD::XOR, VT) || - !TLI.isOperationLegalOrCustom(ISD::OR, VT)) - return DAG.UnrollVectorOp(Op.getNode()); + // Notice that the operation may be 'promoted' which means that it is + // 'bitcasted' to another type which is handled. + if (TLI.getOperationAction(ISD::AND, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::XOR, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::OR, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); assert(VT.getSizeInBits() == Op.getOperand(1).getValueType().getSizeInBits() && "Invalid mask size"); @@ -421,9 +423,9 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { DebugLoc DL = Op.getDebugLoc(); // Make sure that the SINT_TO_FP and SRL instructions are available. - if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, VT) || - !TLI.isOperationLegalOrCustom(ISD::SRL, VT)) - return DAG.UnrollVectorOp(Op.getNode()); + if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::SRL, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); EVT SVT = VT.getScalarType(); assert((SVT.getSizeInBits() == 64 || SVT.getSizeInBits() == 32) && -- cgit v1.1 From fd230df463107f6439a866103df6201bb3b0ef40 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 19 Oct 2011 22:22:54 +0000 Subject: Fix TLS lowering bug. The CopyFromReg must be glued to the TLSCALL. rdar://10291355 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142550 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 410cc95..f1b160c 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -7527,7 +7527,8 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { // And our return value (tls address) is in the standard call return value // location. unsigned Reg = Subtarget->is64Bit() ? X86::RAX : X86::EAX; - return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy()); + return DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(), + Chain.getValue(1)); } assert(false && -- cgit v1.1 From 9be72d43948b3bad4a0ac8ea01e24fd36e6db615 Mon Sep 17 00:00:00 2001 From: Devang Patel Date: Wed, 19 Oct 2011 23:56:07 +0000 Subject: Initialze ScalarEvalution dependency. Patch by Pranav Bhandarkar! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142556 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/LoopUnrollPass.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index 91395b2..37f4c2c 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -101,6 +101,7 @@ INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false) INITIALIZE_PASS_DEPENDENCY(LoopInfo) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) INITIALIZE_PASS_DEPENDENCY(LCSSA) +INITIALIZE_PASS_DEPENDENCY(ScalarEvolution) INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false) Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) { -- cgit v1.1 From 6690bca623d1f6405b95db5b1760f7ba8436e3fb Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 20 Oct 2011 00:07:12 +0000 Subject: Revert 142337. Thumb1 still doesn't support dynamic stack realignment. :( git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142557 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 7 +++++-- lib/Target/ARM/ARMFrameLowering.cpp | 6 ++---- lib/Target/ARM/Thumb1FrameLowering.cpp | 30 ++++-------------------------- 3 files changed, 11 insertions(+), 32 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 48e3c52..7c42342 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -626,10 +626,13 @@ bool ARMBaseRegisterInfo::hasBasePointer(const MachineFunction &MF) const { bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { const MachineFrameInfo *MFI = MF.getFrameInfo(); + const ARMFunctionInfo *AFI = MF.getInfo(); // We can't realign the stack if: // 1. Dynamic stack realignment is explicitly disabled, - // 2. There are VLAs in the function and the base pointer is disabled. - return (RealignStack && (!MFI->hasVarSizedObjects() || EnableBasePointer)); + // 2. This is a Thumb1 function (it's not useful, so we don't bother), or + // 3. There are VLAs in the function and the base pointer is disabled. + return (RealignStack && !AFI->isThumb1OnlyFunction() && + (!MFI->hasVarSizedObjects() || EnableBasePointer)); } bool ARMBaseRegisterInfo:: diff --git a/lib/Target/ARM/ARMFrameLowering.cpp b/lib/Target/ARM/ARMFrameLowering.cpp index 4bac6c5..2d1de6f 100644 --- a/lib/Target/ARM/ARMFrameLowering.cpp +++ b/lib/Target/ARM/ARMFrameLowering.cpp @@ -881,12 +881,10 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // for sure what the stack size will be, but for this, an estimate is good // enough. If there anything changes it, it'll be a spill, which implies // we've used all the registers and so R4 is already used, so not marking - // it here will be OK. Also spill R4 if Thumb1 function requires stack - // realignment. + // it here will be OK. // FIXME: It will be better just to find spare register here. unsigned StackSize = estimateStackSize(MF); - if (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF) || - StackSize > 508) + if (MFI->hasVarSizedObjects() || StackSize > 508) MF.getRegInfo().setPhysRegUsed(ARM::R4); } diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index ad1edc8..d848177 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -155,32 +155,10 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); AFI->setDPRCalleeSavedAreaSize(DPRCSSize); - // If we need dynamic stack realignment, do it here. Be paranoid and make - // sure if we also have VLAs, we have a base pointer for frame access. - if (RegInfo->needsStackRealignment(MF)) { - // We cannot use sp as source/dest register here, thus we're emitting the - // following sequence: - // mov r4, sp - // lsrs r4, r4, Log2MaxAlign - // lsls r4, r4, Log2MaxAlign - // mov sp, r4 - unsigned MaxAlign = MFI->getMaxAlignment(); - unsigned Log2MaxAlign = Log2_32(MaxAlign); - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) - .addReg(ARM::SP, RegState::Kill)); - AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSRri), - ARM::R4)) - .addReg(ARM::R4, RegState::Kill) - .addImm(Log2MaxAlign)); - AddDefaultPred(AddDefaultT1CC(BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSLri), - ARM::R4)) - .addReg(ARM::R4, RegState::Kill) - .addImm(Log2MaxAlign)); - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) - .addReg(ARM::R4, RegState::Kill)); - - AFI->setShouldRestoreSPFromFP(true); - } + // Thumb1 does not currently support dynamic stack realignment. Report a + // fatal error rather then silently generate bad code. + if (RegInfo->needsStackRealignment(MF)) + report_fatal_error("Dynamic stack realignment not supported for thumb1."); // If we need a base pointer, set it up here. It's whatever the value // of the stack pointer is at this point. Any variable size objects -- cgit v1.1 From 0cd0fee91eadcee37d01398e05176e7c63bda2a7 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Thu, 20 Oct 2011 00:34:35 +0000 Subject: "@string = constant i8 0" is a value i8* string of length zero. Analyze that correctly in GetStringLength, fixing PR11181! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142558 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ValueTracking.cpp | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 4d94f61..e80ee65 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -1525,8 +1525,7 @@ Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, /// null-terminated C string pointed to by V. If successful, it returns true /// and returns the string in Str. If unsuccessful, it returns false. bool llvm::GetConstantStringInfo(const Value *V, std::string &Str, - uint64_t Offset, - bool StopAtNul) { + uint64_t Offset, bool StopAtNul) { // If V is NULL then return false; if (V == NULL) return false; @@ -1536,7 +1535,7 @@ bool llvm::GetConstantStringInfo(const Value *V, std::string &Str, // If the value is not a GEP instruction nor a constant expression with a // GEP instruction, then return false because ConstantArray can't occur - // any other way + // any other way. const User *GEP = 0; if (const GetElementPtrInst *GEPI = dyn_cast(V)) { GEP = GEPI; @@ -1576,7 +1575,7 @@ bool llvm::GetConstantStringInfo(const Value *V, std::string &Str, return GetConstantStringInfo(GEP->getOperand(0), Str, StartIdx+Offset, StopAtNul); } - + // The GEP instruction, constant or instruction, must reference a global // variable that is a constant and is initialized. The referenced constant // initializer is the array that we'll use for optimization. @@ -1585,8 +1584,8 @@ bool llvm::GetConstantStringInfo(const Value *V, std::string &Str, return false; const Constant *GlobalInit = GV->getInitializer(); - // Handle the ConstantAggregateZero case - if (isa(GlobalInit)) { + // Handle the all-zeros case + if (GlobalInit->isNullValue()) { // This is a degenerate case. The initializer is constant zero so the // length of the string must be zero. Str.clear(); @@ -1667,6 +1666,14 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet &PHIs) { return Len1; } + // As a special-case, "@string = constant i8 0" is also a string with zero + // length, not wrapped in a bitcast or GEP. + if (GlobalVariable *GV = dyn_cast(V)) { + if (GV->isConstant() && GV->hasDefinitiveInitializer()) + if (GV->getInitializer()->isNullValue()) return 1; + return 0; + } + // If the value is not a GEP instruction nor a constant expression with a // GEP instruction, then return unknown. User *GEP = 0; -- cgit v1.1 From 400ea5bde0a1154ba68ef7a0b1af6f0274cd6916 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Thu, 20 Oct 2011 03:23:14 +0000 Subject: Simplify; no intended functional change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142567 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/IPA/CallGraph.cpp | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp index 2e79eab..0df3e8a 100644 --- a/lib/Analysis/IPA/CallGraph.cpp +++ b/lib/Analysis/IPA/CallGraph.cpp @@ -127,16 +127,9 @@ private: } } - // Loop over all of the users of the function, looking for non-call uses. - for (Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E; ++I){ - User *U = *I; - if ((!isa(U) && !isa(U)) - || !CallSite(cast(U)).isCallee(I)) { - // Not a call, or being used as a parameter rather than as the callee. - ExternalCallingNode->addCalledFunction(CallSite(), Node); - break; - } - } + // If this function has its address taken, anything could call it. + if (F->hasAddressTaken()) + ExternalCallingNode->addCalledFunction(CallSite(), Node); // If this function is not defined in this translation unit, it could call // anything. -- cgit v1.1 From 531afb166c28d20b34de657ef772694e30775ca0 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Thu, 20 Oct 2011 04:05:33 +0000 Subject: A FIXME about block addresses and indirectbr. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142569 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InlineCost.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 40ac9a2..1f332e8 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -135,6 +135,12 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, // for example) would be referring to the original function, and this indirect // jump would jump from the inlined copy of the function into the original // function which is extremely undefined behavior. + // FIXME: This logic isn't really right; we can safely inline functions + // with indirectbr's as long as no other function or global references the + // blockaddress of a block within the current function. And as a QOI issue, + // if someone is using a blockaddress wihtout an indirectbr, and that + // reference somehow ends up in another function or global, we probably + // don't want to inline this function. if (isa(BB->getTerminator())) containsIndirectBr = true; -- cgit v1.1 From c66330504c3f433430a28cd7f7f981e555c51bce Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Thu, 20 Oct 2011 05:23:42 +0000 Subject: Refactor code from inlining and globalopt that checks whether a function definition is unused, and enhance it so it can tell that functions which are only used by a blockaddress are in fact dead. This probably doesn't happen much on most code, but the Linux kernel's _THIS_IP_ can trigger this issue with blockaddress. (GlobalDCE can also handle the given tescase, but we only run that at -O3.) Found while looking at PR11180. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142572 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/GlobalOpt.cpp | 2 +- lib/Transforms/IPO/Inliner.cpp | 5 +---- lib/Transforms/Utils/BasicInliner.cpp | 4 ++-- lib/VMCore/Function.cpp | 15 +++++++++++++++ 4 files changed, 19 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp index 3552d03..c57e9fc 100644 --- a/lib/Transforms/IPO/GlobalOpt.cpp +++ b/lib/Transforms/IPO/GlobalOpt.cpp @@ -1890,7 +1890,7 @@ bool GlobalOpt::OptimizeFunctions(Module &M) { if (!F->hasName() && !F->isDeclaration()) F->setLinkage(GlobalValue::InternalLinkage); F->removeDeadConstantUsers(); - if (F->use_empty() && (F->hasLocalLinkage() || F->hasLinkOnceLinkage())) { + if (F->isDefTriviallyDead()) { F->eraseFromParent(); Changed = true; ++NumFnDeleted; diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index f00935b..bdc9fe4 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -533,10 +533,7 @@ bool Inliner::removeDeadFunctions(CallGraph &CG, if (DNR && DNR->count(F)) continue; - if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() && - !F->hasAvailableExternallyLinkage()) - continue; - if (!F->use_empty()) + if (!F->isDefTriviallyDead()) continue; // Remove any call graph edges from the function to its callees. diff --git a/lib/Transforms/Utils/BasicInliner.cpp b/lib/Transforms/Utils/BasicInliner.cpp index 23a30cc..50c91b6 100644 --- a/lib/Transforms/Utils/BasicInliner.cpp +++ b/lib/Transforms/Utils/BasicInliner.cpp @@ -131,8 +131,8 @@ void BasicInlinerImpl::inlineFunctions() { // Inline InlineFunctionInfo IFI(0, TD); if (InlineFunction(CS, IFI)) { - if (Callee->use_empty() && (Callee->hasLocalLinkage() || - Callee->hasAvailableExternallyLinkage())) + Callee->removeDeadConstantUsers(); + if (Callee->isDefTriviallyDead()) DeadFunctions.insert(Callee); Changed = true; CallSites.erase(CallSites.begin() + index); diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp index be0f056..bb8f62a 100644 --- a/lib/VMCore/Function.cpp +++ b/lib/VMCore/Function.cpp @@ -402,6 +402,7 @@ Function *Intrinsic::getDeclaration(Module *M, ID id, ArrayRef Tys) { bool Function::hasAddressTaken(const User* *PutOffender) const { for (Value::const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) { const User *U = *I; + // FIXME: Check for blockaddress, which does not take the address. if (!isa(U) && !isa(U)) return PutOffender ? (*PutOffender = U, true) : true; ImmutableCallSite CS(cast(U)); @@ -411,6 +412,20 @@ bool Function::hasAddressTaken(const User* *PutOffender) const { return false; } +bool Function::isDefTriviallyDead() const { + // Check the linkage + if (!hasLinkOnceLinkage() && !hasLocalLinkage() && + !hasAvailableExternallyLinkage()) + return false; + + // Check if the function is used by anything other than a blockaddress. + for (Value::const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) + if (!isa(*I)) + return false; + + return true; +} + /// callsFunctionThatReturnsTwice - Return true if the function has a call to /// setjmp or other function that gcc recognizes as "returning twice". bool Function::callsFunctionThatReturnsTwice() const { -- cgit v1.1 From 28e65b52458f944c8204fecf7eab466c90b6686f Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Thu, 20 Oct 2011 13:38:16 +0000 Subject: Fix a type in the legalization of CONCAT_VECTORS. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142579 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index a5c4c2d..4553071 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2926,7 +2926,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { DebugLoc dl = N->getDebugLoc(); - SDValue Op0 = N->getOperand(1); + SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); assert(Op0.getValueType() == Op1.getValueType() && "Invalid input vector types"); -- cgit v1.1 From d0b614754eb2d5ce9c2b0841270872129f956059 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Thu, 20 Oct 2011 14:48:50 +0000 Subject: ARM VTBX (one register) assembly parsing and encoding. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142581 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 412b3ca..7187ab0 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -4906,10 +4906,10 @@ def VTBL4Pseudo // VTBX : Vector Table Extension def VTBX1 : N3V<1,1,0b11,0b1000,1,0, (outs DPR:$Vd), - (ins DPR:$orig, DPR:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, - "vtbx", "8", "$Vd, \\{$Vn\\}, $Vm", "$orig = $Vd", + (ins DPR:$orig, VecListOneD:$Vn, DPR:$Vm), NVTBLFrm, IIC_VTBX1, + "vtbx", "8", "$Vd, $Vn, $Vm", "$orig = $Vd", [(set DPR:$Vd, (v8i8 (int_arm_neon_vtbx1 - DPR:$orig, DPR:$Vn, DPR:$Vm)))]>; + DPR:$orig, VecListOneD:$Vn, DPR:$Vm)))]>; let hasExtraSrcRegAllocReq = 1 in { def VTBX2 : N3V<1,1,0b11,0b1001,1,0, (outs DPR:$Vd), -- cgit v1.1 From 6b09c77b7a831f57ccedb20c760031492a0af043 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Thu, 20 Oct 2011 15:04:25 +0000 Subject: ARM VLD1/VST1 (one register, no writeback) assembly parsing and encoding. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142583 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 7187ab0..d940089 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -245,9 +245,9 @@ let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1 in { // VLD1 : Vector Load (multiple single elements) class VLD1D op7_4, string Dt> - : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd), + : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd), (ins addrmode6:$Rn), IIC_VLD1, - "vld1", Dt, "\\{$Vd\\}, $Rn", "", []> { + "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDInstruction"; @@ -1138,8 +1138,8 @@ class VSTQQQQWBPseudo // VST1 : Vector Store (multiple single elements) class VST1D op7_4, string Dt> - : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd), - IIC_VST1, "vst1", Dt, "\\{$Vd\\}, $Rn", "", []> { + : NLdSt<0,0b00,0b0111,op7_4, (outs), (ins addrmode6:$Rn, VecListOneD:$Vd), + IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVSTInstruction"; -- cgit v1.1 From 01817c39a9d7ff864d0b5de4941eec93d2f9e3a8 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Thu, 20 Oct 2011 17:28:20 +0000 Subject: Tidy up. Trailing whitespace. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142591 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 7215ed3..b1f7fd6 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -527,7 +527,7 @@ static bool tryAddingSymbolicOperand(uint64_t Address, int32_t Value, MI.addOperand(MCOperand::CreateExpr(ARMMCExpr::CreateLower16(Expr, *Ctx))); else if (SymbolicOp.VariantKind == LLVMDisassembler_VariantKind_None) MI.addOperand(MCOperand::CreateExpr(Expr)); - else + else assert(0 && "bad SymbolicOp.VariantKind"); return true; @@ -3074,7 +3074,7 @@ static DecodeStatus DecodePostIdxReg(llvm::MCInst &Inst, unsigned Insn, static DecodeStatus DecodeThumbBLXOffset(llvm::MCInst &Inst, unsigned Val, uint64_t Address, const void *Decoder) { - if (!tryAddingSymbolicOperand(Address, + if (!tryAddingSymbolicOperand(Address, (Address & ~2u) + SignExtend32<22>(Val << 1) + 4, true, 4, Inst, Decoder)) Inst.addOperand(MCOperand::CreateImm(SignExtend32<22>(Val << 1))); -- cgit v1.1 From e6de9f30cbdbeca7f6632420f2cd5728d9a2dc1c Mon Sep 17 00:00:00 2001 From: Devang Patel Date: Thu, 20 Oct 2011 17:31:18 +0000 Subject: Add a comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142592 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineLICM.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 969a9b0..c693386 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -788,7 +788,9 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) { // If it is load then check if it is guaranteed to execute by making sure that // it dominates all exiting blocks. If it doesn't, then there is a path out of - // the loop which does not execute this load, so we can't hoist it. + // the loop which does not execute this load, so we can't hoist it. Loads + // from constant memory are not safe to speculate all the time, for example + // indexed load from a jump table. // Stores and side effects are already checked by isSafeToMove. if (I.getDesc().mayLoad() && !isLoadFromGOT(I) && !IsGuaranteedToExecute(I.getParent())) -- cgit v1.1 From 06e16bbec02d289552f942abe7a6353b51cdb5ea Mon Sep 17 00:00:00 2001 From: Devang Patel Date: Thu, 20 Oct 2011 17:42:23 +0000 Subject: As Evan suggested, loads from constant pool are safe to speculate. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142593 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineLICM.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index c693386..964e971 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -762,15 +762,15 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI) { } } -/// isLoadFromGOT - Return true if this machine instruction loads from -/// global offset table. -static bool isLoadFromGOT(MachineInstr &MI) { +/// isLoadFromGOTOrConstantPool - Return true if this machine instruction +/// loads from global offset table or constant pool. +static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) { assert (MI.getDesc().mayLoad() && "Expected MI that loads!"); for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), E = MI.memoperands_end(); I != E; ++I) { if (const Value *V = (*I)->getValue()) { if (const PseudoSourceValue *PSV = dyn_cast(V)) - if (PSV == PSV->getGOT()) + if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool()) return true; } } @@ -792,7 +792,7 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) { // from constant memory are not safe to speculate all the time, for example // indexed load from a jump table. // Stores and side effects are already checked by isSafeToMove. - if (I.getDesc().mayLoad() && !isLoadFromGOT(I) && + if (I.getDesc().mayLoad() && !isLoadFromGOTOrConstantPool(I) && !IsGuaranteedToExecute(I.getParent())) return false; -- cgit v1.1 From e65177f965063c16321166c13d90a91aa57b5ee8 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 20 Oct 2011 19:19:10 +0000 Subject: Revert r142579, "Fix a type in the legalization of CONCAT_VECTORS". This is causing one of the unit tests to infinitely loop, which resulted in the buildbots stalling. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142604 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 4553071..a5c4c2d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2926,7 +2926,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { DebugLoc dl = N->getDebugLoc(); - SDValue Op0 = N->getOperand(0); + SDValue Op0 = N->getOperand(1); SDValue Op1 = N->getOperand(1); assert(Op0.getValueType() == Op1.getValueType() && "Invalid input vector types"); -- cgit v1.1 From 1dda3d511e19918c4487e9d5a45eb5856284494e Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Thu, 20 Oct 2011 19:24:44 +0000 Subject: Haven't yet found a nice way to handle TargetData verification in the AsmParser. This patch adds validation for target data layout strings upon construction of TargetData objects. An attempt to construct a TargetData object from a malformed string will trigger an assertion. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142605 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/TargetData.cpp | 113 ++++++++++++++++++++++++++++++++++++---------- 1 file changed, 88 insertions(+), 25 deletions(-) (limited to 'lib') diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp index bd6a6b6..ff60e0b 100644 --- a/lib/Target/TargetData.cpp +++ b/lib/Target/TargetData.cpp @@ -125,15 +125,15 @@ const TargetAlignElem TargetData::InvalidAlignmentElem = //===----------------------------------------------------------------------===// /// getInt - Get an integer ignoring errors. -static unsigned getInt(StringRef R) { - unsigned Result = 0; +static int getInt(StringRef R) { + int Result = 0; R.getAsInteger(10, Result); return Result; } -void TargetData::init(StringRef Desc) { +void TargetData::init() { initializeTargetDataPass(*PassRegistry::getPassRegistry()); - + LayoutMap = 0; LittleEndian = false; PointerMemSize = 8; @@ -152,6 +152,12 @@ void TargetData::init(StringRef Desc) { setAlignment(VECTOR_ALIGN, 8, 8, 64); // v2i32, v1i64, ... setAlignment(VECTOR_ALIGN, 16, 16, 128); // v16i8, v8i16, v4i32, ... setAlignment(AGGREGATE_ALIGN, 0, 8, 0); // struct +} + +std::string TargetData::parseSpecifier(StringRef Desc, TargetData *td) { + + if (td) + td->init(); while (!Desc.empty()) { std::pair Split = Desc.split('-'); @@ -169,28 +175,54 @@ void TargetData::init(StringRef Desc) { switch (Specifier[0]) { case 'E': - LittleEndian = false; + if (td) + td->LittleEndian = false; break; case 'e': - LittleEndian = true; + if (td) + td->LittleEndian = true; break; - case 'p': + case 'p': { + // Pointer size. Split = Token.split(':'); - PointerMemSize = getInt(Split.first) / 8; + int PointerMemSizeBits = getInt(Split.first); + if (PointerMemSizeBits < 0 || PointerMemSizeBits % 8 != 0) + return "invalid pointer size, must be a positive 8-bit multiple"; + if (td) + td->PointerMemSize = PointerMemSizeBits / 8; + + // Pointer ABI alignment. Split = Split.second.split(':'); - PointerABIAlign = getInt(Split.first) / 8; + int PointerABIAlignBits = getInt(Split.first); + if (PointerABIAlignBits < 0 || PointerABIAlignBits % 8 != 0) { + return "invalid pointer ABI alignment, " + "must be a positive 8-bit multiple"; + } + if (td) + td->PointerABIAlign = PointerABIAlignBits / 8; + + // Pointer preferred alignment. Split = Split.second.split(':'); - PointerPrefAlign = getInt(Split.first) / 8; - if (PointerPrefAlign == 0) - PointerPrefAlign = PointerABIAlign; + int PointerPrefAlignBits = getInt(Split.first); + if (PointerPrefAlignBits < 0 || PointerPrefAlignBits % 8 != 0) { + return "invalid pointer preferred alignment, " + "must be a positive 8-bit multiple"; + } + if (td) { + td->PointerPrefAlign = PointerPrefAlignBits / 8; + if (td->PointerPrefAlign == 0) + td->PointerPrefAlign = td->PointerABIAlign; + } break; + } case 'i': case 'v': case 'f': case 'a': case 's': { AlignTypeEnum AlignType; - switch (Specifier[0]) { + char field = Specifier[0]; + switch (field) { default: case 'i': AlignType = INTEGER_ALIGN; break; case 'v': AlignType = VECTOR_ALIGN; break; @@ -198,37 +230,66 @@ void TargetData::init(StringRef Desc) { case 'a': AlignType = AGGREGATE_ALIGN; break; case 's': AlignType = STACK_ALIGN; break; } - unsigned Size = getInt(Specifier.substr(1)); + int Size = getInt(Specifier.substr(1)); + if (Size < 0) { + return std::string("invalid ") + field + "-size field, " + "must be positive"; + } + Split = Token.split(':'); - unsigned ABIAlign = getInt(Split.first) / 8; + int ABIAlignBits = getInt(Split.first); + if (ABIAlignBits < 0 || ABIAlignBits % 8 != 0) { + return std::string("invalid ") + field +"-abi-alignment field, " + "must be a positive 8-bit multiple"; + } + unsigned ABIAlign = ABIAlignBits / 8; Split = Split.second.split(':'); - unsigned PrefAlign = getInt(Split.first) / 8; + + int PrefAlignBits = getInt(Split.first); + if (PrefAlignBits < 0 || PrefAlignBits % 8 != 0) { + return std::string("invalid ") + field +"-preferred-alignment field, " + "must be a positive 8-bit multiple"; + } + unsigned PrefAlign = PrefAlignBits / 8; if (PrefAlign == 0) PrefAlign = ABIAlign; - setAlignment(AlignType, ABIAlign, PrefAlign, Size); + + if (td) + td->setAlignment(AlignType, ABIAlign, PrefAlign, Size); break; } case 'n': // Native integer types. Specifier = Specifier.substr(1); do { - if (unsigned Width = getInt(Specifier)) - LegalIntWidths.push_back(Width); + int Width = getInt(Specifier); + if (Width <= 0) { + return std::string("invalid native integer size \'") + Specifier.str() + + "\', must be a positive integer."; + } + if (td && Width != 0) + td->LegalIntWidths.push_back(Width); Split = Token.split(':'); Specifier = Split.first; Token = Split.second; } while (!Specifier.empty() || !Token.empty()); break; - case 'S': // Stack natural alignment. - StackNaturalAlign = getInt(Specifier.substr(1)); - StackNaturalAlign /= 8; - // FIXME: Should we really be truncating these alingments and - // sizes silently? + case 'S': { // Stack natural alignment. + int StackNaturalAlignBits = getInt(Specifier.substr(1)); + if (StackNaturalAlignBits < 0 || StackNaturalAlignBits % 8 != 0) { + return "invalid natural stack alignment (S-field), " + "must be a positive 8-bit multiple"; + } + if (td) + td->StackNaturalAlign = StackNaturalAlignBits / 8; break; + } default: break; } } + + return ""; } /// Default ctor. @@ -242,7 +303,9 @@ TargetData::TargetData() : ImmutablePass(ID) { TargetData::TargetData(const Module *M) : ImmutablePass(ID) { - init(M->getDataLayout()); + std::string errMsg = parseSpecifier(M->getDataLayout(), this); + assert(errMsg == "" && "Module M has malformed target data layout string."); + (void)errMsg; } void -- cgit v1.1 From 767f8be9eed51f41c8ad03de7684761f82bf26c9 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Thu, 20 Oct 2011 20:37:11 +0000 Subject: Add missing operand. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142615 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 34023af..7df743b 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -5924,7 +5924,8 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { unsigned VReg1 = MRI->createVirtualRegister(TRC); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::LDRcp)) .addReg(VReg1, RegState::Define) - .addConstantPoolIndex(Idx)); + .addConstantPoolIndex(Idx) + .addImm(0)); AddDefaultPred(BuildMI(DispatchBB, dl, TII->get(ARM::CMPrr)) .addReg(NewVReg1) .addReg(VReg1, RegState::Kill)); -- cgit v1.1 From a3a6f215d0a56adb7fee009d3f0f8b55e26137fd Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Thu, 20 Oct 2011 21:24:38 +0000 Subject: Separate out ARM MSR instructions into M-class versions and AR-class versions. This fixes some roundtripping failures. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142618 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 46 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 42 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index b9cbc83..d5f0c0a 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -4562,8 +4562,13 @@ def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary, // same and the assembly parser has no way to distinguish between them. The mask // operand contains the special register (R Bit) in bit 4 and bits 3-0 contains // the mask with the fields to be accessed in the special register. -def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary, - "msr", "\t$mask, $Rn", []> { +// +// NOTE: There are separate versions of these instructions for M-class versus +// AR-class processors. M-class processors can accept a wider range of +// mask values than AR-class processors can. +def MSRm : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary, + "msr", "\t$mask, $Rn", []>, + Requires<[IsMClass]> { bits<5> mask; bits<4> Rn; @@ -4576,8 +4581,9 @@ def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary, let Inst{3-0} = Rn; } -def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary, - "msr", "\t$mask, $a", []> { +def MSRmi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary, + "msr", "\t$mask, $a", []>, + Requires<[IsMClass]> { bits<5> mask; bits<12> a; @@ -4589,6 +4595,38 @@ def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary, let Inst{11-0} = a; } +def MSRar : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary, + "msr", "\t$mask, $Rn", []>, + Requires<[IsARClass]> { + bits<5> mask; + bits<4> Rn; + + let Inst{23} = 0; + let Inst{22} = 0; + let Inst{21-20} = 0b10; + let Inst{19-18} = mask{3-2}; + let Inst{17-16} = 0b00; + let Inst{15-12} = 0b1111; + let Inst{11-4} = 0b00000000; + let Inst{3-0} = Rn; +} + +def MSRari : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary, + "msr", "\t$mask, $a", []>, + Requires<[IsARClass]> { + bits<5> mask; + bits<12> a; + + let Inst{23} = 0; + let Inst{22} = 0; + let Inst{21-20} = 0b10; + let Inst{19-18} = mask{3-2}; + let Inst{17-16} = 0b00; + let Inst{15-12} = 0b1111; + let Inst{11-0} = a; +} + + //===----------------------------------------------------------------------===// // TLS Instructions // -- cgit v1.1 From ff764815e6531be6b2d944bd6a3f1fcfc682db01 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Thu, 20 Oct 2011 21:44:34 +0000 Subject: Delete the list-tdrr scheduler. Top-down schedulers are going away because they don't support physical register dependencies. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142620 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 214 ++----------------------- 1 file changed, 11 insertions(+), 203 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index e757def..1729910 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -45,10 +45,6 @@ static RegisterScheduler "Bottom-up register reduction list scheduling", createBURRListDAGScheduler); static RegisterScheduler - tdrListrDAGScheduler("list-tdrr", - "Top-down register reduction list scheduling", - createTDRRListDAGScheduler); -static RegisterScheduler sourceListDAGScheduler("source", "Similar to list-burr but schedules in source " "order when possible", @@ -121,10 +117,6 @@ namespace { /// class ScheduleDAGRRList : public ScheduleDAGSDNodes { private: - /// isBottomUp - This is true if the scheduling problem is bottom-up, false if - /// it is top-down. - bool isBottomUp; - /// NeedLatency - True if the scheduler will make use of latency information. /// bool NeedLatency; @@ -166,7 +158,7 @@ public: ScheduleDAGRRList(MachineFunction &mf, bool needlatency, SchedulingPriorityQueue *availqueue, CodeGenOpt::Level OptLevel) - : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()), + : ScheduleDAGSDNodes(mf), NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), Topo(SUnits) { @@ -221,8 +213,6 @@ private: void ReleasePred(SUnit *SU, const SDep *PredEdge); void ReleasePredecessors(SUnit *SU); - void ReleaseSucc(SUnit *SU, const SDep *SuccEdge); - void ReleaseSuccessors(SUnit *SU); void ReleasePending(); void AdvanceToCycle(unsigned NextCycle); void AdvancePastStalls(SUnit *SU); @@ -242,10 +232,6 @@ private: SUnit *PickNodeToScheduleBottomUp(); void ListScheduleBottomUp(); - void ScheduleNodeTopDown(SUnit*); - void ListScheduleTopDown(); - - /// CreateNewSUnit - Creates a new SUnit and returns a pointer to it. /// Updates the topological ordering if required. SUnit *CreateNewSUnit(SDNode *N) { @@ -343,11 +329,8 @@ void ScheduleDAGRRList::Schedule() { HazardRec->Reset(); - // Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate. - if (isBottomUp) - ListScheduleBottomUp(); - else - ListScheduleTopDown(); + // Execute the actual scheduling loop. + ListScheduleBottomUp(); #ifndef NDEBUG for (int i = 0; i < NumFactors; ++i) { @@ -457,8 +440,7 @@ void ScheduleDAGRRList::ReleasePending() { // Check to see if any of the pending instructions are ready to issue. If // so, add them to the available queue. for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { - unsigned ReadyCycle = - isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth(); + unsigned ReadyCycle = PendingQueue[i]->getHeight(); if (ReadyCycle < MinAvailableCycle) MinAvailableCycle = ReadyCycle; @@ -487,10 +469,7 @@ void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { } else { for (; CurCycle != NextCycle; ++CurCycle) { - if (isBottomUp) - HazardRec->RecedeCycle(); - else - HazardRec->AdvanceCycle(); + HazardRec->RecedeCycle(); } } // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the @@ -511,7 +490,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { // currently need to treat these nodes like real instructions. // if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return; - unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth(); + unsigned ReadyCycle = SU->getHeight(); // Bump CurCycle to account for latency. We assume the latency of other // available instructions may be hidden by the stall (not a full pipe stall). @@ -522,7 +501,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { // Calls are scheduled in their preceding cycle, so don't conflict with // hazards from instructions after the call. EmitNode will reset the // scoreboard state before emitting the call. - if (isBottomUp && SU->isCall) + if (SU->isCall) return; // FIXME: For resource conflicts in very long non-pipelined stages, we @@ -530,7 +509,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { int Stalls = 0; while (true) { ScheduleHazardRecognizer::HazardType HT = - HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls); + HazardRec->getHazardType(SU, -Stalls); if (HT == ScheduleHazardRecognizer::NoHazard) break; @@ -568,17 +547,13 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) { HazardRec->Reset(); return; } - if (isBottomUp && SU->isCall) { + if (SU->isCall) { // Calls are scheduled with their preceding instructions. For bottom-up // scheduling, clear the pipeline state before emitting. HazardRec->Reset(); } HazardRec->EmitInstruction(SU); - - if (!isBottomUp && SU->isCall) { - HazardRec->Reset(); - } } static void resetVRegCycle(SUnit *SU); @@ -1300,100 +1275,11 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { std::reverse(Sequence.begin(), Sequence.end()); #ifndef NDEBUG - VerifySchedule(isBottomUp); + VerifySchedule(/*isBottomUp=*/true); #endif } //===----------------------------------------------------------------------===// -// Top-Down Scheduling -//===----------------------------------------------------------------------===// - -/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to -/// the AvailableQueue if the count reaches zero. Also update its cycle bound. -void ScheduleDAGRRList::ReleaseSucc(SUnit *SU, const SDep *SuccEdge) { - SUnit *SuccSU = SuccEdge->getSUnit(); - -#ifndef NDEBUG - if (SuccSU->NumPredsLeft == 0) { - dbgs() << "*** Scheduling failed! ***\n"; - SuccSU->dump(this); - dbgs() << " has been released too many times!\n"; - llvm_unreachable(0); - } -#endif - --SuccSU->NumPredsLeft; - - // If all the node's predecessors are scheduled, this node is ready - // to be scheduled. Ignore the special ExitSU node. - if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) { - SuccSU->isAvailable = true; - AvailableQueue->push(SuccSU); - } -} - -void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) { - // Top down: release successors - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - assert(!I->isAssignedRegDep() && - "The list-tdrr scheduler doesn't yet support physreg dependencies!"); - - ReleaseSucc(SU, &*I); - } -} - -/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending -/// count of its successors. If a successor pending count is zero, add it to -/// the Available queue. -void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) { - DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); - DEBUG(SU->dump(this)); - - assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); - SU->setDepthToAtLeast(CurCycle); - Sequence.push_back(SU); - - ReleaseSuccessors(SU); - SU->isScheduled = true; - AvailableQueue->ScheduledNode(SU); -} - -/// ListScheduleTopDown - The main loop of list scheduling for top-down -/// schedulers. -void ScheduleDAGRRList::ListScheduleTopDown() { - AvailableQueue->setCurCycle(CurCycle); - - // Release any successors of the special Entry node. - ReleaseSuccessors(&EntrySU); - - // All leaves to Available queue. - for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - // It is available if it has no predecessors. - if (SUnits[i].Preds.empty()) { - AvailableQueue->push(&SUnits[i]); - SUnits[i].isAvailable = true; - } - } - - // While Available queue is not empty, grab the node with the highest - // priority. If it is not ready put it back. Schedule the node. - Sequence.reserve(SUnits.size()); - while (!AvailableQueue->empty()) { - SUnit *CurSU = AvailableQueue->pop(); - - if (CurSU) - ScheduleNodeTopDown(CurSU); - ++CurCycle; - AvailableQueue->setCurCycle(CurCycle); - } - -#ifndef NDEBUG - VerifySchedule(isBottomUp); -#endif -} - - -//===----------------------------------------------------------------------===// // RegReductionPriorityQueue Definition //===----------------------------------------------------------------------===// // @@ -1437,21 +1323,6 @@ struct bu_ls_rr_sort : public queue_sort { bool operator()(SUnit* left, SUnit* right) const; }; -// td_ls_rr_sort - Priority function for top down register pressure reduction -// scheduler. -struct td_ls_rr_sort : public queue_sort { - enum { - IsBottomUp = false, - HasReadyFilter = false - }; - - RegReductionPQBase *SPQ; - td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {} - td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {} - - bool operator()(const SUnit* left, const SUnit* right) const; -}; - // src_ls_rr_sort - Priority function for source order scheduler. struct src_ls_rr_sort : public queue_sort { enum { @@ -1680,10 +1551,7 @@ public: SF DumpPicker = Picker; while (!DumpQueue.empty()) { SUnit *SU = popFromQueue(DumpQueue, DumpPicker, scheduleDAG); - if (isBottomUp()) - dbgs() << "Height " << SU->getHeight() << ": "; - else - dbgs() << "Depth " << SU->getDepth() << ": "; + dbgs() << "Height " << SU->getHeight() << ": "; SU->dump(DAG); } } @@ -1692,9 +1560,6 @@ public: typedef RegReductionPriorityQueue BURegReductionPriorityQueue; -typedef RegReductionPriorityQueue -TDRegReductionPriorityQueue; - typedef RegReductionPriorityQueue SrcRegReductionPriorityQueue; @@ -2907,49 +2772,6 @@ static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, return Sum; } - -// Top down -bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { - if (int res = checkSpecialNodes(left, right)) - return res < 0; - - unsigned LPriority = SPQ->getNodePriority(left); - unsigned RPriority = SPQ->getNodePriority(right); - bool LIsTarget = left->getNode() && left->getNode()->isMachineOpcode(); - bool RIsTarget = right->getNode() && right->getNode()->isMachineOpcode(); - bool LIsFloater = LIsTarget && left->NumPreds == 0; - bool RIsFloater = RIsTarget && right->NumPreds == 0; - unsigned LBonus = (LimitedSumOfUnscheduledPredsOfSuccs(left,1) == 1) ? 2 : 0; - unsigned RBonus = (LimitedSumOfUnscheduledPredsOfSuccs(right,1) == 1) ? 2 : 0; - - if (left->NumSuccs == 0 && right->NumSuccs != 0) - return false; - else if (left->NumSuccs != 0 && right->NumSuccs == 0) - return true; - - if (LIsFloater) - LBonus -= 2; - if (RIsFloater) - RBonus -= 2; - if (left->NumSuccs == 1) - LBonus += 2; - if (right->NumSuccs == 1) - RBonus += 2; - - if (LPriority+LBonus != RPriority+RBonus) - return LPriority+LBonus < RPriority+RBonus; - - if (left->getDepth() != right->getDepth()) - return left->getDepth() < right->getDepth(); - - if (left->NumSuccsLeft != right->NumSuccsLeft) - return left->NumSuccsLeft > right->NumSuccsLeft; - - assert(left->NodeQueueId && right->NodeQueueId && - "NodeQueueId cannot be zero"); - return (left->NodeQueueId > right->NodeQueueId); -} - //===----------------------------------------------------------------------===// // Public Constructor Functions //===----------------------------------------------------------------------===// @@ -2969,20 +2791,6 @@ llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, } llvm::ScheduleDAGSDNodes * -llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, - CodeGenOpt::Level OptLevel) { - const TargetMachine &TM = IS->TM; - const TargetInstrInfo *TII = TM.getInstrInfo(); - const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - - TDRegReductionPriorityQueue *PQ = - new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0); - ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); - PQ->setScheduleDAG(SD); - return SD; -} - -llvm::ScheduleDAGSDNodes * llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { const TargetMachine &TM = IS->TM; -- cgit v1.1 From 5bdab4a63cab2ed2f96a3490fa2349550da7c7f9 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Thu, 20 Oct 2011 21:45:36 +0000 Subject: Disable the PPC hazard recognizer. It currently only supports top-down scheduling and top-down scheduling is going away. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142621 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrInfo.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 649a45a..9517417 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -57,10 +57,16 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( unsigned Directive = TM->getSubtarget().getDarwinDirective(); if (Directive == PPC::DIR_440) { const InstrItineraryData *II = TM->getInstrItineraryData(); - return new PPCHazardRecognizer440(II, DAG); + // Disable the hazard recognizer for now, as it doesn't support + // bottom-up scheduling. + //return new PPCHazardRecognizer440(II, DAG); + return new ScheduleHazardRecognizer(); } else { - return new PPCHazardRecognizer970(*TII); + // Disable the hazard recognizer for now, as it doesn't support + // bottom-up scheduling. + //return new PPCHazardRecognizer970(*TII); + return new ScheduleHazardRecognizer(); } } -- cgit v1.1 From e1d0b4fc252443299db87264aa4f3a889b7480aa Mon Sep 17 00:00:00 2001 From: David Meyer Date: Thu, 20 Oct 2011 21:57:46 +0000 Subject: Remove unused include of sys/uio.h in MemoryBuffer.cpp. It was not correctly protected by ifdef either. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142623 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/MemoryBuffer.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp index 0771af5..cc3f6a8 100644 --- a/lib/Support/MemoryBuffer.cpp +++ b/lib/Support/MemoryBuffer.cpp @@ -29,7 +29,6 @@ #include #if !defined(_MSC_VER) && !defined(__MINGW32__) #include -#include #else #include #endif -- cgit v1.1 From cd20c58e980552daef182247005cf905fe8b06ba Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Thu, 20 Oct 2011 22:23:58 +0000 Subject: Revert r142618, r142622, and r142624, which were based on an incorrect reading of the ARMv7 docs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142626 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 46 ++++-------------------------------------- 1 file changed, 4 insertions(+), 42 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index d5f0c0a..b9cbc83 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -4562,13 +4562,8 @@ def MRSsys : ABI<0b0001, (outs GPR:$Rd), (ins), NoItinerary, // same and the assembly parser has no way to distinguish between them. The mask // operand contains the special register (R Bit) in bit 4 and bits 3-0 contains // the mask with the fields to be accessed in the special register. -// -// NOTE: There are separate versions of these instructions for M-class versus -// AR-class processors. M-class processors can accept a wider range of -// mask values than AR-class processors can. -def MSRm : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary, - "msr", "\t$mask, $Rn", []>, - Requires<[IsMClass]> { +def MSR : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary, + "msr", "\t$mask, $Rn", []> { bits<5> mask; bits<4> Rn; @@ -4581,9 +4576,8 @@ def MSRm : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary, let Inst{3-0} = Rn; } -def MSRmi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary, - "msr", "\t$mask, $a", []>, - Requires<[IsMClass]> { +def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary, + "msr", "\t$mask, $a", []> { bits<5> mask; bits<12> a; @@ -4595,38 +4589,6 @@ def MSRmi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary, let Inst{11-0} = a; } -def MSRar : ABI<0b0001, (outs), (ins msr_mask:$mask, GPR:$Rn), NoItinerary, - "msr", "\t$mask, $Rn", []>, - Requires<[IsARClass]> { - bits<5> mask; - bits<4> Rn; - - let Inst{23} = 0; - let Inst{22} = 0; - let Inst{21-20} = 0b10; - let Inst{19-18} = mask{3-2}; - let Inst{17-16} = 0b00; - let Inst{15-12} = 0b1111; - let Inst{11-4} = 0b00000000; - let Inst{3-0} = Rn; -} - -def MSRari : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary, - "msr", "\t$mask, $a", []>, - Requires<[IsARClass]> { - bits<5> mask; - bits<12> a; - - let Inst{23} = 0; - let Inst{22} = 0; - let Inst{21-20} = 0b10; - let Inst{19-18} = mask{3-2}; - let Inst{17-16} = 0b00; - let Inst{15-12} = 0b1111; - let Inst{11-0} = a; -} - - //===----------------------------------------------------------------------===// // TLS Instructions // -- cgit v1.1 From ed8db320af68556de5fcfe2cbec688003acc33f5 Mon Sep 17 00:00:00 2001 From: Richard Smith Date: Fri, 21 Oct 2011 01:22:04 +0000 Subject: Fix unused variable warning. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142630 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 9517417..36a10f4 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -56,9 +56,9 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( unsigned Directive = TM->getSubtarget().getDarwinDirective(); if (Directive == PPC::DIR_440) { - const InstrItineraryData *II = TM->getInstrItineraryData(); // Disable the hazard recognizer for now, as it doesn't support // bottom-up scheduling. + //const InstrItineraryData *II = TM->getInstrItineraryData(); //return new PPCHazardRecognizer440(II, DAG); return new ScheduleHazardRecognizer(); } -- cgit v1.1 From ec0e5475fdb9d7d3a4d8206d84f469bc83be1cf1 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Fri, 21 Oct 2011 01:23:41 +0000 Subject: Remove a now dead function, fixing -Wunused-function warnings from Clang. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142631 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 1729910..8ecbf34 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -2752,26 +2752,6 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() { } } -/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled -/// predecessors of the successors of the SUnit SU. Stop when the provided -/// limit is exceeded. -static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU, - unsigned Limit) { - unsigned Sum = 0; - for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - const SUnit *SuccSU = I->getSUnit(); - for (SUnit::const_pred_iterator II = SuccSU->Preds.begin(), - EE = SuccSU->Preds.end(); II != EE; ++II) { - SUnit *PredSU = II->getSUnit(); - if (!PredSU->isScheduled) - if (++Sum > Limit) - return Sum; - } - } - return Sum; -} - //===----------------------------------------------------------------------===// // Public Constructor Functions //===----------------------------------------------------------------------===// -- cgit v1.1 From db35087d21f09fdde81cab7e12fc0bcd8b7d00e9 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Fri, 21 Oct 2011 06:46:38 +0000 Subject: Implement a block placement pass based on the branch probability and block frequency analyses. This differs substantially from the existing block-placement pass in LLVM: 1) It operates on the Machine-IR in the CodeGen layer. This exposes much more (and more precise) information and opportunities. Also, the results are more stable due to fewer transforms ocurring after the pass runs. 2) It uses the generalized probability and frequency analyses. These can model static heuristics, code annotation derived heuristics as well as eventual profile loading. By basing the optimization on the analysis interface it can work from any (or a combination) of these inputs. 3) It uses a more aggressive algorithm, both building chains from tho bottom up to maximize benefit, and using an SCC-based walk to layout chains of blocks in a profitable ordering without O(N^2) iterations which the old pass involves. The pass is currently gated behind a flag, and not enabled by default because it still needs to grow some important features. Most notably, it needs to support loop aligning and careful layout of loop structures much as done by hand currently in CodePlacementOpt. Once it supports these, and has sufficient testing and quality tuning, it should replace both of these passes. Thanks to Nick Lewycky and Richard Smith for help authoring & debugging this, and to Jakob, Andy, Eric, Jim, and probably a few others I'm forgetting for reviewing and answering all my questions. Writing a backend pass is *sooo* much better now than it used to be. =D git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142641 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CMakeLists.txt | 1 + lib/CodeGen/CodeGen.cpp | 1 + lib/CodeGen/LLVMTargetMachine.cpp | 14 +- lib/CodeGen/MachineBlockPlacement.cpp | 624 ++++++++++++++++++++++++++++++++++ 4 files changed, 638 insertions(+), 2 deletions(-) create mode 100644 lib/CodeGen/MachineBlockPlacement.cpp (limited to 'lib') diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 9a5e551..1bbe7a0 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -37,6 +37,7 @@ add_llvm_library(LLVMCodeGen LocalStackSlotAllocation.cpp MachineBasicBlock.cpp MachineBlockFrequencyInfo.cpp + MachineBlockPlacement.cpp MachineBranchProbabilityInfo.cpp MachineCSE.cpp MachineDominators.cpp diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 424535b..a911534 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -28,6 +28,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeLiveStacksPass(Registry); initializeLiveVariablesPass(Registry); initializeMachineBlockFrequencyInfoPass(Registry); + initializeMachineBlockPlacementPass(Registry); initializeMachineCSEPass(Registry); initializeMachineDominatorTreePass(Registry); initializeMachineLICMPass(Registry); diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 759610a..90501f0 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -53,6 +53,8 @@ static cl::opt DisableTailDuplicate("disable-tail-duplicate", cl::Hidden, cl::desc("Disable tail duplication")); static cl::opt DisableEarlyTailDup("disable-early-taildup", cl::Hidden, cl::desc("Disable pre-register allocation tail duplication")); +static cl::opt EnableBlockPlacement("enable-block-placement", + cl::Hidden, cl::desc("Enable probability-driven block placement")); static cl::opt DisableCodePlace("disable-code-place", cl::Hidden, cl::desc("Disable code placement")); static cl::opt DisableSSC("disable-ssc", cl::Hidden, @@ -486,8 +488,16 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, PM.add(createGCInfoPrinter(dbgs())); if (OptLevel != CodeGenOpt::None && !DisableCodePlace) { - PM.add(createCodePlacementOptPass()); - printNoVerify(PM, "After CodePlacementOpt"); + if (EnableBlockPlacement) { + // MachineBlockPlacement is an experimental pass which is disabled by + // default currently. Eventually it should subsume CodePlacementOpt, so + // when enabled, the other is disabled. + PM.add(createMachineBlockPlacementPass()); + printNoVerify(PM, "After MachineBlockPlacement"); + } else { + PM.add(createCodePlacementOptPass()); + printNoVerify(PM, "After CodePlacementOpt"); + } } if (addPreEmitPass(PM, OptLevel)) diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp new file mode 100644 index 0000000..6831c1b --- /dev/null +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -0,0 +1,624 @@ +//===-- MachineBlockPlacement.cpp - Basic Block Code Layout optimization --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements basic block placement transformations using branch +// probability estimates. It is based around "Algo2" from Profile Guided Code +// Positioning [http://portal.acm.org/citation.cfm?id=989433]. +// +// We combine the BlockFrequencyInfo with BranchProbabilityInfo to simulate +// measured edge-weights. The BlockFrequencyInfo effectively summarizes the +// probability of starting from any particular block, and the +// BranchProbabilityInfo the probability of exiting the block via a particular +// edge. Combined they form a function-wide ordering of the edges. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "block-placement2" +#include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" +#include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Target/TargetInstrInfo.h" +#include +using namespace llvm; + +namespace { +/// \brief A structure for storing a weighted edge. +/// +/// This stores an edge and its weight, computed as the product of the +/// frequency that the starting block is entered with the probability of +/// a particular exit block. +struct WeightedEdge { + BlockFrequency EdgeFrequency; + MachineBasicBlock *From, *To; + + bool operator<(const WeightedEdge &RHS) const { + return EdgeFrequency < RHS.EdgeFrequency; + } +}; +} + +namespace { +struct BlockChain; +/// \brief Type for our function-wide basic block -> block chain mapping. +typedef DenseMap BlockToChainMapType; +} + +namespace { +/// \brief A chain of blocks which will be laid out contiguously. +/// +/// This is the datastructure representing a chain of consecutive blocks that +/// are profitable to layout together in order to maximize fallthrough +/// probabilities. We also can use a block chain to represent a sequence of +/// basic blocks which have some external (correctness) requirement for +/// sequential layout. +/// +/// Eventually, the block chains will form a directed graph over the function. +/// We provide an SCC-supporting-iterator in order to quicky build and walk the +/// SCCs of block chains within a function. +/// +/// The block chains also have support for calculating and caching probability +/// information related to the chain itself versus other chains. This is used +/// for ranking during the final layout of block chains. +struct BlockChain { + class SuccIterator; + + /// \brief The first and last basic block that from this chain. + /// + /// The chain is stored within the existing function ilist of basic blocks. + /// When merging chains or otherwise manipulating them, we splice the blocks + /// within this ilist, giving us very cheap storage here and constant time + /// merge operations. + /// + /// It is extremely important to note that LastBB is the iterator pointing + /// *at* the last basic block in the chain. That is, the chain consists of + /// the *closed* range [FirstBB, LastBB]. We cannot use half-open ranges + /// because the next basic block may get relocated to a different part of the + /// function at any time during the run of this pass. + MachineFunction::iterator FirstBB, LastBB; + + /// \brief A handle to the function-wide basic block to block chain mapping. + /// + /// This is retained in each block chain to simplify the computation of child + /// block chains for SCC-formation and iteration. We store the edges to child + /// basic blocks, and map them back to their associated chains using this + /// structure. + BlockToChainMapType &BlockToChain; + + /// \brief The weight used to rank two block chains in the same SCC. + /// + /// This is used during SCC layout of block chains to cache and rank the + /// chains. It is supposed to represent the expected frequency with which + /// control reaches a block within this chain, has the option of branching to + /// a block in some other chain participating in the SCC, but instead + /// continues within this chain. The higher this is, the more costly we + /// expect mis-predicted branches between this chain and other chains within + /// the SCC to be. Thus, since we expect branches between chains to be + /// predicted when backwards and not predicted when forwards, the higher this + /// is the more important that this chain is laid out first among those + /// chains in the same SCC as it. + BlockFrequency InChainEdgeFrequency; + + /// \brief Construct a new BlockChain. + /// + /// This builds a new block chain representing a single basic block in the + /// function. It also registers itself as the chain that block participates + /// in with the BlockToChain mapping. + BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB) + : FirstBB(BB), LastBB(BB), BlockToChain(BlockToChain) { + assert(BB && "Cannot create a chain with a null basic block"); + BlockToChain[BB] = this; + } + + /// \brief Merge another block chain into this one. + /// + /// This routine merges a block chain into this one. It takes care of forming + /// a contiguous sequence of basic blocks, updating the edge list, and + /// updating the block -> chain mapping. It does not free or tear down the + /// old chain, but the old chain's block list is no longer valid. + void merge(BlockChain *Chain) { + assert(Chain && "Cannot merge a null chain"); + MachineFunction::iterator EndBB = llvm::next(LastBB); + MachineFunction::iterator ChainEndBB = llvm::next(Chain->LastBB); + + // Update the incoming blocks to point to this chain. + for (MachineFunction::iterator BI = Chain->FirstBB, BE = ChainEndBB; + BI != BE; ++BI) { + assert(BlockToChain[BI] == Chain && "Incoming blocks not in chain"); + BlockToChain[BI] = this; + } + + // We splice the blocks together within the function (unless they already + // are adjacent) so we can represent the new chain with a pair of pointers + // to basic blocks within the function. This is also useful as each chain + // of blocks will end up being laid out contiguously within the function. + if (EndBB != Chain->FirstBB) + FirstBB->getParent()->splice(EndBB, Chain->FirstBB, ChainEndBB); + LastBB = Chain->LastBB; + } +}; +} + +namespace { +/// \brief Successor iterator for BlockChains. +/// +/// This is an iterator that walks over the successor block chains by looking +/// through its blocks successors and mapping those back to block chains. This +/// iterator is not a fully-functioning iterator, it is designed specifically +/// to support the interface required by SCCIterator when forming and walking +/// SCCs of BlockChains. +/// +/// Note that this iterator cannot be used while the chains are still being +/// formed and/or merged. Unlike the chains themselves, it does store end +/// iterators which could be moved if the chains are re-ordered. Once we begin +/// forming and iterating over an SCC of chains, the order of blocks within the +/// function must not change until we finish using the SCC iterators. +class BlockChain::SuccIterator + : public std::iterator { + BlockChain *Chain; + MachineFunction::iterator BI, BE; + MachineBasicBlock::succ_iterator SI; + +public: + explicit SuccIterator(BlockChain *Chain) + : Chain(Chain), BI(Chain->FirstBB), BE(llvm::next(Chain->LastBB)), + SI(BI->succ_begin()) { + while (BI != BE && BI->succ_begin() == BI->succ_end()) + ++BI; + if (BI != BE) + SI = BI->succ_begin(); + } + + /// \brief Helper function to create an end iterator for a particular chain. + /// + /// The "end" state is extremely arbitrary. We chose to have BI == BE, and SI + /// == Chain->FirstBB->succ_begin(). The value of SI doesn't really make any + /// sense, but rather than try to rationalize SI and our increment, when we + /// detect an "end" state, we just immediately call this function to build + /// the canonical end iterator. + static SuccIterator CreateEnd(BlockChain *Chain) { + SuccIterator It(Chain); + It.BI = It.BE; + return It; + } + + bool operator==(const SuccIterator &RHS) const { + return (Chain == RHS.Chain && BI == RHS.BI && SI == RHS.SI); + } + bool operator!=(const SuccIterator &RHS) const { + return !operator==(RHS); + } + + SuccIterator& operator++() { + assert(*this != CreateEnd(Chain) && "Cannot increment the end iterator"); + // There may be null successor pointers, skip over them. + // FIXME: I don't understand *why* there are null successor pointers. + do { + ++SI; + if (SI != BI->succ_end() && *SI) + return *this; + + // There may be a basic block without successors. Skip over them. + do { + ++BI; + if (BI == BE) + return *this = CreateEnd(Chain); + } while (BI->succ_begin() == BI->succ_end()); + SI = BI->succ_begin(); + } while (!*SI); + return *this; + } + SuccIterator operator++(int) { + SuccIterator tmp = *this; + ++*this; + return tmp; + } + + BlockChain *operator*() const { + assert(Chain->BlockToChain.lookup(*SI) && "Missing chain"); + return Chain->BlockToChain.lookup(*SI); + } +}; +} + +namespace { +/// \brief Sorter used with containers of BlockChain pointers. +/// +/// Sorts based on the \see BlockChain::InChainEdgeFrequency -- see its +/// comments for details on what this ordering represents. +struct ChainPtrPrioritySorter { + bool operator()(const BlockChain *LHS, const BlockChain *RHS) const { + assert(LHS && RHS && "Null chain entry"); + return LHS->InChainEdgeFrequency < RHS->InChainEdgeFrequency; + } +}; +} + +namespace { +class MachineBlockPlacement : public MachineFunctionPass { + /// \brief A handle to the branch probability pass. + const MachineBranchProbabilityInfo *MBPI; + + /// \brief A handle to the function-wide block frequency pass. + const MachineBlockFrequencyInfo *MBFI; + + /// \brief A handle to the target's instruction info. + const TargetInstrInfo *TII; + + /// \brief A prioritized list of edges in the BB-graph. + /// + /// For each function, we insert all control flow edges between BBs, along + /// with their "global" frequency. The Frequency of an edge being taken is + /// defined as the frequency of entering the source BB (from MBFI) times the + /// probability of taking a particular branch out of that block (from MBPI). + /// + /// Once built, this list is sorted in ascending frequency, making the last + /// edge the hottest one in the function. + SmallVector Edges; + + /// \brief Allocator and owner of BlockChain structures. + /// + /// We build BlockChains lazily by merging together high probability BB + /// sequences acording to the "Algo2" in the paper mentioned at the top of + /// the file. To reduce malloc traffic, we allocate them using this slab-like + /// allocator, and destroy them after the pass completes. + SpecificBumpPtrAllocator ChainAllocator; + + /// \brief Function wide BasicBlock to BlockChain mapping. + /// + /// This mapping allows efficiently moving from any given basic block to the + /// BlockChain it participates in, if any. We use it to, among other things, + /// allow implicitly defining edges between chains as the existing edges + /// between basic blocks. + DenseMap BlockToChain; + + /// \brief A prioritized sequence of chains. + /// + /// We build up the ideal sequence of basic block chains in reverse order + /// here, and then walk backwards to arrange the final function ordering. + SmallVector PChains; + +#ifndef NDEBUG + /// \brief A set of active chains used to sanity-check the pass algorithm. + /// + /// All operations on this member should be wrapped in an assert or NDEBUG. + SmallPtrSet ActiveChains; +#endif + + BlockChain *CreateChain(MachineBasicBlock *BB); + void PrioritizeEdges(MachineFunction &F); + void BuildBlockChains(); + void PrioritizeChains(MachineFunction &F); + void PlaceBlockChains(MachineFunction &F); + +public: + static char ID; // Pass identification, replacement for typeid + MachineBlockPlacement() : MachineFunctionPass(ID) { + initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &F); + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + const char *getPassName() const { return "Block Placement"; } +}; +} + +char MachineBlockPlacement::ID = 0; +INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2", + "Branch Probability Basic Block Placement", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2", + "Branch Probability Basic Block Placement", false, false) + +FunctionPass *llvm::createMachineBlockPlacementPass() { + return new MachineBlockPlacement(); +} + +namespace llvm { +/// \brief GraphTraits specialization for our BlockChain graph. +template <> struct GraphTraits { + typedef BlockChain NodeType; + typedef BlockChain::SuccIterator ChildIteratorType; + + static NodeType *getEntryNode(NodeType *N) { return N; } + static BlockChain::SuccIterator child_begin(NodeType *N) { + return BlockChain::SuccIterator(N); + } + static BlockChain::SuccIterator child_end(NodeType *N) { + return BlockChain::SuccIterator::CreateEnd(N); + } +}; +} + +/// \brief Helper to create a new chain for a single BB. +/// +/// Takes care of growing the Chains, setting up the BlockChain object, and any +/// debug checking logic. +/// \returns A pointer to the new BlockChain. +BlockChain *MachineBlockPlacement::CreateChain(MachineBasicBlock *BB) { + BlockChain *Chain = + new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB); + assert(ActiveChains.insert(Chain)); + return Chain; +} + +/// \brief Build a prioritized list of edges. +/// +/// The priority is determined by the product of the block frequency (how +/// likely it is to arrive at a particular block) times the probability of +/// taking this particular edge out of the block. This provides a function-wide +/// ordering of the edges. +void MachineBlockPlacement::PrioritizeEdges(MachineFunction &F) { + assert(Edges.empty() && "Already have an edge list"); + SmallVector Cond; // For AnalyzeBranch. + BlockChain *RequiredChain = 0; + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + MachineBasicBlock *From = &*FI; + // We only consider MBBs with analyzable branches. Even if the analysis + // fails, if there is no fallthrough, we can still work with the MBB. + MachineBasicBlock *TBB = 0, *FBB = 0; + Cond.clear(); + if (TII->AnalyzeBranch(*From, TBB, FBB, Cond) && From->canFallThrough()) { + // We push all unanalyzed blocks onto a chain eagerly to prevent them + // from being split later. Create the chain if needed, otherwise just + // keep track that these blocks reside on it. + if (!RequiredChain) + RequiredChain = CreateChain(From); + else + BlockToChain[From] = RequiredChain; + } else { + // As soon as we find an analyzable branch, add that block to and + // finalize any required chain that has been started. The required chain + // is only modeling potentially inexplicable fallthrough, so the first + // block to have analyzable fallthrough is a known-safe stopping point. + if (RequiredChain) { + BlockToChain[From] = RequiredChain; + RequiredChain->LastBB = FI; + RequiredChain = 0; + } + } + + BlockFrequency BaseFrequency = MBFI->getBlockFreq(From); + for (MachineBasicBlock::succ_iterator SI = From->succ_begin(), + SE = From->succ_end(); + SI != SE; ++SI) { + MachineBasicBlock *To = *SI; + WeightedEdge WE = { BaseFrequency * MBPI->getEdgeProbability(From, To), + From, To }; + Edges.push_back(WE); + } + } + assert(!RequiredChain && "Never found a terminator for a required chain"); + std::stable_sort(Edges.begin(), Edges.end()); +} + +/// \brief Build chains of basic blocks along hot paths. +/// +/// Build chains by trying to merge each pair of blocks from the mostly costly +/// edge first. This is essentially "Algo2" from the Profile Guided Code +/// Placement paper. While each node is considered a chain of one block, this +/// routine lazily build the chain objects themselves so that when possible it +/// can just merge a block into an existing chain. +void MachineBlockPlacement::BuildBlockChains() { + for (SmallVectorImpl::reverse_iterator EI = Edges.rbegin(), + EE = Edges.rend(); + EI != EE; ++EI) { + MachineBasicBlock *SourceB = EI->From, *DestB = EI->To; + if (SourceB == DestB) continue; + + BlockChain *SourceChain = BlockToChain.lookup(SourceB); + if (!SourceChain) SourceChain = CreateChain(SourceB); + BlockChain *DestChain = BlockToChain.lookup(DestB); + if (!DestChain) DestChain = CreateChain(DestB); + if (SourceChain == DestChain) + continue; + + bool IsSourceTail = + SourceChain->LastBB == MachineFunction::iterator(SourceB); + bool IsDestHead = + DestChain->FirstBB == MachineFunction::iterator(DestB); + + if (!IsSourceTail || !IsDestHead) + continue; + + SourceChain->merge(DestChain); + assert(ActiveChains.erase(DestChain)); + } +} + +/// \brief Prioritize the chains to minimize back-edges between chains. +/// +/// This is the trickiest part of the placement algorithm. Each chain is +/// a hot-path through a sequence of basic blocks, but there are conditional +/// branches away from this hot path, and to some other chain. Hardware branch +/// predictors favor back edges over forward edges, and so it is desirable to +/// arrange the targets of branches away from a hot path and to some other +/// chain to come later in the function, making them forward branches, and +/// helping the branch predictor to predict fallthrough. +/// +/// In some cases, this is easy. simply topologically walking from the entry +/// chain through its successors in order would work if there were no cycles +/// between the chains of blocks, but often there are. In such a case, we first +/// need to identify the participants in the cycle, and then rank them so that +/// the linearizing of the chains has the lowest *probability* of causing +/// a mispredicted branch. To compute the correct rank for a chain, we take the +/// complement of the branch probability for each branch leading away from the +/// chain and multiply it by the frequency of the source block for that branch. +/// This gives us the probability of that particular branch *not* being taken +/// in this function. The sum of these probabilities for each chain is used as +/// a rank, so that we order the chain with the highest such sum first. +/// FIXME: This seems like a good approximation, but there is probably a known +/// technique for ordering of an SCC given edge weights. It would be good to +/// use that, or even use its code if possible. +/// +/// Also notable is that we prioritize the chains from the bottom up, and so +/// all of the "first" and "before" relationships end up inverted in the code. +void MachineBlockPlacement::PrioritizeChains(MachineFunction &F) { + MachineBasicBlock *EntryB = &F.front(); + BlockChain *EntryChain = BlockToChain[EntryB]; + assert(EntryChain && "Missing chain for entry block"); + assert(EntryChain->FirstBB == F.begin() && + "Entry block is not the head of the entry block chain"); + + // Form an SCC and walk it from the bottom up. + SmallPtrSet IsInSCC; + for (scc_iterator I = scc_begin(EntryChain); + !I.isAtEnd(); ++I) { + const std::vector &SCC = *I; + PChains.insert(PChains.end(), SCC.begin(), SCC.end()); + + // If there is only one chain in the SCC, it's trivially sorted so just + // bail out early. Sorting the SCC is expensive. + if (SCC.size() == 1) + continue; + + // We work strictly on the PChains range from here on out to maximize + // locality. + SmallVectorImpl::iterator SCCEnd = PChains.end(), + SCCBegin = SCCEnd - SCC.size(); + IsInSCC.clear(); + IsInSCC.insert(SCCBegin, SCCEnd); + + // Compute the edge frequency of staying in a chain, despite the existency + // of an edge to some other chain within this SCC. + for (SmallVectorImpl::iterator SCCI = SCCBegin; + SCCI != SCCEnd; ++SCCI) { + BlockChain *Chain = *SCCI; + + // Special case the entry chain. Regardless of the weights of other + // chains, the entry chain *must* come first, so move it to the end, and + // avoid processing that chain at all. + if (Chain == EntryChain) { + --SCCEnd; + if (SCCI == SCCEnd) break; + Chain = *SCCI = *SCCEnd; + *SCCEnd = EntryChain; + } + + // Walk over every block in this chain looking for out-bound edges to + // other chains in this SCC. + for (MachineFunction::iterator BI = Chain->FirstBB, + BE = llvm::next(Chain->LastBB); + BI != BE; ++BI) { + MachineBasicBlock *From = &*BI; + for (MachineBasicBlock::succ_iterator SI = BI->succ_begin(), + SE = BI->succ_end(); + SI != SE; ++SI) { + MachineBasicBlock *To = *SI; + if (!To || !IsInSCC.count(BlockToChain[To])) + continue; + BranchProbability ComplEdgeProb = + MBPI->getEdgeProbability(From, To).getCompl(); + Chain->InChainEdgeFrequency += + MBFI->getBlockFreq(From) * ComplEdgeProb; + } + } + } + + // Sort the chains within the SCC according to their edge frequencies, + // which should make the least costly chain of blocks to mis-place be + // ordered first in the prioritized sequence. + std::stable_sort(SCCBegin, SCCEnd, ChainPtrPrioritySorter()); + } +} + +/// \brief Splice the function blocks together based on the chain priorities. +/// +/// Each chain is already represented as a contiguous range of blocks in the +/// function. Simply walk backwards down the prioritized chains and splice in +/// any chains out of order. Note that the first chain we visit is necessarily +/// the entry chain. It has no predecessors and so must be the top of the SCC. +/// Also, we cannot splice any chain prior to the entry chain as we can't +/// splice any blocks prior to the entry block. +void MachineBlockPlacement::PlaceBlockChains(MachineFunction &F) { + assert(!PChains.empty() && "No chains were prioritized"); + assert(PChains.back() == BlockToChain[&F.front()] && + "The entry chain must always be the final chain"); + + MachineFunction::iterator InsertPos = F.begin(); + for (SmallVectorImpl::reverse_iterator CI = PChains.rbegin(), + CE = PChains.rend(); + CI != CE; ++CI) { + BlockChain *Chain = *CI; + // Check that we process this chain only once for debugging. + assert(ActiveChains.erase(Chain) && "Processed a chain twice"); + + // If this chain is already in the right position, just skip past it. + // Otherwise, splice it into position. + if (InsertPos == Chain->FirstBB) + InsertPos = llvm::next(Chain->LastBB); + else + F.splice(InsertPos, Chain->FirstBB, llvm::next(Chain->LastBB)); + } + + // Note that we can't assert this is empty as there may be unreachable blocks + // in the function. +#ifndef NDEBUG + ActiveChains.clear(); +#endif + + // Now that every block is in its final position, update all of the + // terminators. + SmallVector Cond; // For AnalyzeBranch. + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + // FIXME: It would be awesome of updateTerminator would just return rather + // than assert when the branch cannot be analyzed in order to remove this + // boiler plate. + Cond.clear(); + MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + if (!TII->AnalyzeBranch(*FI, TBB, FBB, Cond)) + FI->updateTerminator(); + } +} + +bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { + // Check for single-block functions and skip them. + if (llvm::next(F.begin()) == F.end()) + return false; + + MBPI = &getAnalysis(); + MBFI = &getAnalysis(); + TII = F.getTarget().getInstrInfo(); + assert(Edges.empty()); + assert(BlockToChain.empty()); + assert(PChains.empty()); + assert(ActiveChains.empty()); + + PrioritizeEdges(F); + BuildBlockChains(); + PrioritizeChains(F); + PlaceBlockChains(F); + + Edges.clear(); + BlockToChain.clear(); + PChains.clear(); + ChainAllocator.DestroyAll(); + + // We always return true as we have no way to track whether the final order + // differs from the original order. + return true; +} -- cgit v1.1 From b4c945716f232ee07ec6fd3e1146175801fa1278 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 21 Oct 2011 06:55:01 +0000 Subject: Remove intrinsics for X86 BLSI, BLSMSK, and BLSR intrinsics and replace with custom isel lowering code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142642 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 54 +++++++++++++++++++++++++++++++++++++- lib/Target/X86/X86ISelLowering.h | 4 +++ lib/Target/X86/X86InstrInfo.td | 24 ++++++++++------- 3 files changed, 71 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f1b160c..96f04e7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1155,6 +1155,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setTargetDAGCombine(ISD::SINT_TO_FP); if (Subtarget->is64Bit()) setTargetDAGCombine(ISD::MUL); + if (Subtarget->hasBMI()) + setTargetDAGCombine(ISD::XOR); computeRegisterProperties(); @@ -13300,7 +13302,9 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, EVT VT = N->getValueType(0); - // Create ANDN instructions + // Create ANDN, BLSI, and BLSR instructions + // BLSI is X & (-X) + // BLSR is X & (X-1) if (Subtarget->hasBMI() && (VT == MVT::i32 || VT == MVT::i64)) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -13313,6 +13317,26 @@ static SDValue PerformAndCombine(SDNode *N, SelectionDAG &DAG, if (N1.getOpcode() == ISD::XOR && isAllOnes(N1.getOperand(1))) return DAG.getNode(X86ISD::ANDN, DL, VT, N1.getOperand(0), N0); + // Check LHS for neg + if (N0.getOpcode() == ISD::SUB && N0.getOperand(1) == N1 && + isZero(N0.getOperand(0))) + return DAG.getNode(X86ISD::BLSI, DL, VT, N1); + + // Check RHS for neg + if (N1.getOpcode() == ISD::SUB && N1.getOperand(1) == N0 && + isZero(N1.getOperand(0))) + return DAG.getNode(X86ISD::BLSI, DL, VT, N0); + + // Check LHS for X-1 + if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 && + isAllOnes(N0.getOperand(1))) + return DAG.getNode(X86ISD::BLSR, DL, VT, N1); + + // Check RHS for X-1 + if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 && + isAllOnes(N1.getOperand(1))) + return DAG.getNode(X86ISD::BLSR, DL, VT, N0); + return SDValue(); } @@ -13500,6 +13524,33 @@ static SDValue PerformOrCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const X86Subtarget *Subtarget) { + if (DCI.isBeforeLegalizeOps()) + return SDValue(); + + EVT VT = N->getValueType(0); + + if (VT != MVT::i32 && VT != MVT::i64) + return SDValue(); + + // Create BLSMSK instructions by finding X ^ (X-1) + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + DebugLoc DL = N->getDebugLoc(); + + if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1 && + isAllOnes(N0.getOperand(1))) + return DAG.getNode(X86ISD::BLSMSK, DL, VT, N1); + + if (N1.getOpcode() == ISD::ADD && N1.getOperand(0) == N0 && + isAllOnes(N1.getOperand(1))) + return DAG.getNode(X86ISD::BLSMSK, DL, VT, N0); + + return SDValue(); +} + /// PerformLOADCombine - Do target-specific dag combines on LOAD nodes. static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { @@ -14180,6 +14231,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case ISD::SRL: return PerformShiftCombine(N, DAG, Subtarget); case ISD::AND: return PerformAndCombine(N, DAG, DCI, Subtarget); case ISD::OR: return PerformOrCombine(N, DAG, DCI, Subtarget); + case ISD::XOR: return PerformXorCombine(N, DAG, DCI, Subtarget); case ISD::LOAD: return PerformLOADCombine(N, DAG, Subtarget); case ISD::STORE: return PerformSTORECombine(N, DAG, Subtarget); case ISD::SINT_TO_FP: return PerformSINT_TO_FPCombine(N, DAG, this); diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 342a5e6..0903b9f 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -230,6 +230,10 @@ namespace llvm { ANDN, // ANDN - Bitwise AND NOT with FLAGS results. + BLSI, // BLSI - Extract lowest set isolated bit + BLSMSK, // BLSMSK - Get mask up to lowest set bit + BLSR, // BLSR - Reset lowest set bit + UMUL, // LOW, HI, FLAGS = umul LHS, RHS // MUL_IMM - X86 specific multiply by immediate. diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 0994ab9..aa35cf0 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -226,6 +226,10 @@ def X86and_flag : SDNode<"X86ISD::AND", SDTBinaryArithWithFlags, [SDNPCommutative]>; def X86andn_flag : SDNode<"X86ISD::ANDN", SDTBinaryArithWithFlags>; +def X86blsi_flag : SDNode<"X86ISD::BLSI", SDTUnaryArithWithFlags>; +def X86blsmsk_flag : SDNode<"X86ISD::BLSMSK", SDTUnaryArithWithFlags>; +def X86blsr_flag : SDNode<"X86ISD::BLSR", SDTUnaryArithWithFlags>; + def X86mul_imm : SDNode<"X86ISD::MUL_IMM", SDTIntBinOp>; def X86WinAlloca : SDNode<"X86ISD::WIN_ALLOCA", SDTX86Void, @@ -1401,30 +1405,30 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in { } multiclass bmi_bls { + RegisterClass RC, X86MemOperand x86memop, SDNode OpNode, + PatFrag ld_frag> { def rr : I<0xF3, RegMRM, (outs RC:$dst), (ins RC:$src), !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (Int RC:$src)), (implicit EFLAGS)]>, T8, VEX_4V; + [(set RC:$dst, EFLAGS, (OpNode RC:$src))]>, T8, VEX_4V; def rm : I<0xF3, MemMRM, (outs RC:$dst), (ins x86memop:$src), !strconcat(mnemonic, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (Int (ld_frag addr:$src))), (implicit EFLAGS)]>, + [(set RC:$dst, EFLAGS, (OpNode (ld_frag addr:$src)))]>, T8, VEX_4V; } let Predicates = [HasBMI], Defs = [EFLAGS] in { defm BLSR32 : bmi_bls<"blsr{l}", MRM1r, MRM1m, GR32, i32mem, - int_x86_bmi_blsr_32, loadi32>; + X86blsr_flag, loadi32>; defm BLSR64 : bmi_bls<"blsr{q}", MRM1r, MRM1m, GR64, i64mem, - int_x86_bmi_blsr_64, loadi64>, VEX_W; + X86blsr_flag, loadi64>, VEX_W; defm BLSMSK32 : bmi_bls<"blsmsk{l}", MRM2r, MRM2m, GR32, i32mem, - int_x86_bmi_blsmsk_32, loadi32>; + X86blsmsk_flag, loadi32>; defm BLSMSK64 : bmi_bls<"blsmsk{q}", MRM2r, MRM2m, GR64, i64mem, - int_x86_bmi_blsmsk_64, loadi64>, VEX_W; + X86blsmsk_flag, loadi64>, VEX_W; defm BLSI32 : bmi_bls<"blsi{l}", MRM3r, MRM3m, GR32, i32mem, - int_x86_bmi_blsi_32, loadi32>; + X86blsi_flag, loadi32>; defm BLSI64 : bmi_bls<"blsi{q}", MRM3r, MRM3m, GR64, i64mem, - int_x86_bmi_blsi_64, loadi64>, VEX_W; + X86blsi_flag, loadi64>, VEX_W; } multiclass bmi_bextr_bzhi opc, string mnemonic, RegisterClass RC, -- cgit v1.1 From 4a85cc982a977ddeb0249eb9304326deabe3a7a5 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Fri, 21 Oct 2011 08:57:37 +0000 Subject: Add loop aligning to MachineBlockPlacement based on review discussion so it's a bit more plausible to use this instead of CodePlacementOpt. The code for this was shamelessly stolen from CodePlacementOpt, and then trimmed down a bit. There doesn't seem to be much utility in returning true/false from this pass as we may or may not have rewritten all of the blocks. Also, the statistic of counting how many loops were aligned doesn't seem terribly important so I removed it. If folks would like it to be included, I'm happy to add it back. This was probably the most egregious of the missing features, and now I'm going to start gathering some performance numbers and looking at specific loop structures that have different layout between the two. Test is updated to include both basic loop alignment and nested loop alignment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142645 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBlockPlacement.cpp | 42 ++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 6831c1b..7700efc 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -20,13 +20,14 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "block-placement2" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/DenseMap.h" @@ -35,6 +36,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" #include using namespace llvm; @@ -259,9 +261,15 @@ class MachineBlockPlacement : public MachineFunctionPass { /// \brief A handle to the function-wide block frequency pass. const MachineBlockFrequencyInfo *MBFI; + /// \brief A handle to the loop info. + const MachineLoopInfo *MLI; + /// \brief A handle to the target's instruction info. const TargetInstrInfo *TII; + /// \brief A handle to the target's lowering info. + const TargetLowering *TLI; + /// \brief A prioritized list of edges in the BB-graph. /// /// For each function, we insert all control flow edges between BBs, along @@ -307,6 +315,7 @@ class MachineBlockPlacement : public MachineFunctionPass { void BuildBlockChains(); void PrioritizeChains(MachineFunction &F); void PlaceBlockChains(MachineFunction &F); + void AlignLoops(MachineFunction &F); public: static char ID; // Pass identification, replacement for typeid @@ -319,6 +328,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); + AU.addRequired(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -331,6 +341,7 @@ INITIALIZE_PASS_BEGIN(MachineBlockPlacement, "block-placement2", "Branch Probability Basic Block Placement", false, false) INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_PASS_END(MachineBlockPlacement, "block-placement2", "Branch Probability Basic Block Placement", false, false) @@ -595,6 +606,28 @@ void MachineBlockPlacement::PlaceBlockChains(MachineFunction &F) { } } +/// \brief Recursive helper to align a loop and any nested loops. +static void AlignLoop(MachineFunction &F, MachineLoop *L, unsigned Align) { + // Recurse through nested loops. + for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) + AlignLoop(F, *I, Align); + + L->getTopBlock()->setAlignment(Align); +} + +/// \brief Align loop headers to target preferred alignments. +void MachineBlockPlacement::AlignLoops(MachineFunction &F) { + if (F.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) + return; + + unsigned Align = TLI->getPrefLoopAlignment(); + if (!Align) + return; // Don't care about loop alignment. + + for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I) + AlignLoop(F, *I, Align); +} + bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { // Check for single-block functions and skip them. if (llvm::next(F.begin()) == F.end()) @@ -602,7 +635,9 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { MBPI = &getAnalysis(); MBFI = &getAnalysis(); + MLI = &getAnalysis(); TII = F.getTarget().getInstrInfo(); + TLI = F.getTarget().getTargetLowering(); assert(Edges.empty()); assert(BlockToChain.empty()); assert(PChains.empty()); @@ -612,6 +647,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { BuildBlockChains(); PrioritizeChains(F); PlaceBlockChains(F); + AlignLoops(F); Edges.clear(); BlockToChain.clear(); -- cgit v1.1 From cda065df614dbf7ca0f6a169e9536d2241539b75 Mon Sep 17 00:00:00 2001 From: Anton Korobeynikov Date: Fri, 21 Oct 2011 09:38:50 +0000 Subject: Fix build on mingw-w64. Patch by Ruben Van Boxem! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142646 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Windows/Signals.inc | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc index 473168e..0d4b8a2 100644 --- a/lib/Support/Windows/Signals.inc +++ b/lib/Support/Windows/Signals.inc @@ -41,10 +41,11 @@ #pragma GCC diagnostic ignored "-Wformat" #pragma GCC diagnostic ignored "-Wformat-extra-args" - // MinGW does not have updated support for the 64-bit versions of the DebugHlp - // APIs. So we will have to load them manually. The structures and method - // signatures were pulled from DbgHelp.h in the Windows Platform SDK, and - // adjusted for brevity. + #if !defined(__MINGW64_VERSION_MAJOR) + // MinGW.org does not have updated support for the 64-bit versions of the + // DebugHlp APIs. So we will have to load them manually. The structures and + // method signatures were pulled from DbgHelp.h in the Windows Platform SDK, + // and adjusted for brevity. typedef struct _IMAGEHLP_LINE64 { DWORD SizeOfStruct; PVOID Key; @@ -147,6 +148,7 @@ static bool load64BitDebugHelp(void) { } return StackWalk64 != NULL; } + #endif // !defined(__MINGW64_VERSION_MAJOR) #endif // __MINGW32__ // Forward declare. @@ -207,8 +209,8 @@ static int CRTReportHook(int ReportType, char *Message, int *Return) { #endif static void RegisterHandler() { -#if __MINGW32__ - // On MinGW, we need to load up the symbols explicitly, because the +#if __MINGW32__ && !defined(__MINGW64_VERSION_MAJOR) + // On MinGW.org, we need to load up the symbols explicitly, because the // Win32 framework they include does not have support for the 64-bit // versions of the APIs we need. If we cannot load up the APIs (which // would be unexpected as they should exist on every version of Windows -- cgit v1.1 From 4bd222ae26d0411d5c67fd0ab5c043422b5f201b Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 21 Oct 2011 11:42:07 +0000 Subject: 1. Fix the widening of SETCC in WidenVecOp_SETCC. Use the correct return CC type. 2. Fix a typo in CONCAT_VECTORS which exposed the bug in #1. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142648 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 2 +- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 2 +- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 27 +++++++++++++---------- 3 files changed, 17 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index a5c4c2d..4553071 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2926,7 +2926,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { DebugLoc dl = N->getDebugLoc(); - SDValue Op0 = N->getOperand(1); + SDValue Op0 = N->getOperand(0); SDValue Op1 = N->getOperand(1); assert(Op0.getValueType() == Op1.getValueType() && "Invalid input vector types"); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 0a8db08..69c2100 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -633,7 +633,7 @@ private: SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N); SDValue WidenVecOp_STORE(SDNode* N); - SDValue WidenVecOp_SETCC(SDNode* N, unsigned ResNo); + SDValue WidenVecOp_SETCC(SDNode* N); SDValue WidenVecOp_Convert(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 7fc7525..e950e07 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2033,7 +2033,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) { case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break; case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break; case ISD::STORE: Res = WidenVecOp_STORE(N); break; - case ISD::SETCC: Res = WidenVecOp_SETCC(N, ResNo); break; + case ISD::SETCC: Res = WidenVecOp_SETCC(N); break; case ISD::FP_EXTEND: case ISD::FP_TO_SINT: @@ -2167,27 +2167,30 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { MVT::Other,&StChain[0],StChain.size()); } -SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N, unsigned ResNo) { - assert(ResNo < 2 && "Invalid res num to widen"); +SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SDValue InOp0 = GetWidenedVector(N->getOperand(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(1)); - EVT VT = InOp0.getValueType(); DebugLoc dl = N->getDebugLoc(); // WARNING: In this code we widen the compare instruction with garbage. // This garbage may contain denormal floats which may be slow. Is this a real // concern ? Should we zero the unused lanes if this is a float compare ? - SDValue Zero = DAG.getIntPtrConstant(0); - EVT ResVT = EVT::getVectorVT(*DAG.getContext(), - N->getValueType(0).getVectorElementType(), - VT.getVectorNumElements()); - + // Get a new SETCC node to compare the newly widened operands. + // Only some of the compared elements are legal. + EVT SVT = TLI.getSetCCResultType(InOp0.getValueType()); SDValue WideSETCC = DAG.getNode(ISD::SETCC, N->getDebugLoc(), - ResVT, InOp0, InOp1, N->getOperand(2)); + SVT, InOp0, InOp1, N->getOperand(2)); + + // Extract the needed results from the result vector. + EVT ResVT = EVT::getVectorVT(*DAG.getContext(), + SVT.getVectorElementType(), + N->getValueType(0).getVectorNumElements()); + SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, + ResVT, WideSETCC, DAG.getIntPtrConstant(0)); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, N->getValueType(0), - WideSETCC, Zero); + // Convert the result mask to the correct kind. + return DAG.getAnyExtOrTrunc(CC, dl, N->getValueType(0)); } -- cgit v1.1 From fe7b4998c603f181cad282894368028709a5c5e7 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Fri, 21 Oct 2011 16:14:12 +0000 Subject: Remove some outdated comments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142653 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index d940089..502c629 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -309,7 +309,7 @@ def VLD1q16Pseudo_UPD : VLDQWBPseudo; def VLD1q32Pseudo_UPD : VLDQWBPseudo; def VLD1q64Pseudo_UPD : VLDQWBPseudo; -// ...with 3 registers (some of these are only for the disassembler): +// ...with 3 registers class VLD1D3 op7_4, string Dt> : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt, @@ -339,7 +339,7 @@ def VLD1d64T_UPD : VLD1D3WB<{1,1,0,?}, "64">; def VLD1d64TPseudo : VLDQQPseudo; def VLD1d64TPseudo_UPD : VLDQQWBPseudo; -// ...with 4 registers (some of these are only for the disassembler): +// ...with 4 registers class VLD1D4 op7_4, string Dt> : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt, @@ -441,7 +441,7 @@ def VLD2q8Pseudo_UPD : VLDQQWBPseudo; def VLD2q16Pseudo_UPD : VLDQQWBPseudo; def VLD2q32Pseudo_UPD : VLDQQWBPseudo; -// ...with double-spaced registers (for disassembly only): +// ...with double-spaced registers def VLD2b8 : VLD2D<0b1001, {0,0,?,?}, "8">; def VLD2b16 : VLD2D<0b1001, {0,1,?,?}, "16">; def VLD2b32 : VLD2D<0b1001, {1,0,?,?}, "32">; @@ -1200,7 +1200,7 @@ def VST1q16Pseudo_UPD : VSTQWBPseudo; def VST1q32Pseudo_UPD : VSTQWBPseudo; def VST1q64Pseudo_UPD : VSTQWBPseudo; -// ...with 3 registers (some of these are only for the disassembler): +// ...with 3 registers class VST1D3 op7_4, string Dt> : NLdSt<0, 0b00, 0b0110, op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3), @@ -1232,7 +1232,7 @@ def VST1d64T_UPD : VST1D3WB<{1,1,0,?}, "64">; def VST1d64TPseudo : VSTQQPseudo; def VST1d64TPseudo_UPD : VSTQQWBPseudo; -// ...with 4 registers (some of these are only for the disassembler): +// ...with 4 registers class VST1D4 op7_4, string Dt> : NLdSt<0, 0b00, 0b0010, op7_4, (outs), (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2, DPR:$src3, DPR:$src4), @@ -1335,7 +1335,7 @@ def VST2q8Pseudo_UPD : VSTQQWBPseudo; def VST2q16Pseudo_UPD : VSTQQWBPseudo; def VST2q32Pseudo_UPD : VSTQQWBPseudo; -// ...with double-spaced registers (for disassembly only): +// ...with double-spaced registers def VST2b8 : VST2D<0b1001, {0,0,?,?}, "8">; def VST2b16 : VST2D<0b1001, {0,1,?,?}, "16">; def VST2b32 : VST2D<0b1001, {1,0,?,?}, "32">; @@ -3942,12 +3942,12 @@ def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1, (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", - [/* For disassembly only; pattern left blank */]>; + []>; def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, "vbif", "$Vd, $Vn, $Vm", "$src1 = $Vd", - [/* For disassembly only; pattern left blank */]>; + []>; // VBIT : Vector Bitwise Insert if True // like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst", @@ -3956,12 +3956,12 @@ def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1, (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm), N3RegFrm, IIC_VBINiD, "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", - [/* For disassembly only; pattern left blank */]>; + []>; def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1, (outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm), N3RegFrm, IIC_VBINiQ, "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd", - [/* For disassembly only; pattern left blank */]>; + []>; // VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking // for equivalent operations with different register constraints; it just @@ -4328,7 +4328,7 @@ def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0, IIC_VCNTiQ, "vcnt", "8", v16i8, v16i8, int_arm_neon_vcnt>; -// Vector Swap -- for disassembly only. +// Vector Swap def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0, (outs DPR:$Vd), (ins DPR:$Vm), NoItinerary, "vswp", "$Vd, $Vm", "", []>; -- cgit v1.1 From 293a5f69fad6053a328bf454e3f28d724d989231 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Fri, 21 Oct 2011 16:56:40 +0000 Subject: whitespace. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142657 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index eadd77e..b907e57 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -250,7 +250,7 @@ void ARMInstPrinter::printSORegRegOperand(const MCInst *MI, unsigned OpNum, O << ", " << ARM_AM::getShiftOpcStr(ShOpc); if (ShOpc == ARM_AM::rrx) return; - + O << ' ' << getRegisterName(MO2.getReg()); assert(ARM_AM::getSORegOffset(MO3.getImm()) == 0); } -- cgit v1.1 From 7926db82686be283ec4cdb68989806c69f388cb1 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Fri, 21 Oct 2011 16:59:08 +0000 Subject: Nuke an #if0 that got accidentally left in. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142658 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 31 ------------------------------- 1 file changed, 31 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 203a311..67a464f 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -1979,37 +1979,6 @@ int ARMAsmParser::tryParseRegister() { Parser.Lex(); // Eat identifier token. -#if 0 - // Also check for an index operand. This is only legal for vector registers, - // but that'll get caught OK in operand matching, so we don't need to - // explicitly filter everything else out here. - if (Parser.getTok().is(AsmToken::LBrac)) { - SMLoc SIdx = Parser.getTok().getLoc(); - Parser.Lex(); // Eat left bracket token. - - const MCExpr *ImmVal; - if (getParser().ParseExpression(ImmVal)) - return MatchOperand_ParseFail; - const MCConstantExpr *MCE = dyn_cast(ImmVal); - if (!MCE) { - TokError("immediate value expected for vector index"); - return MatchOperand_ParseFail; - } - - SMLoc E = Parser.getTok().getLoc(); - if (Parser.getTok().isNot(AsmToken::RBrac)) { - Error(E, "']' expected"); - return MatchOperand_ParseFail; - } - - Parser.Lex(); // Eat right bracket token. - - Operands.push_back(ARMOperand::CreateVectorIndex(MCE->getValue(), - SIdx, E, - getContext())); - } -#endif - return RegNum; } -- cgit v1.1 From a054bcb4cf26f9710bf6b7b256ba7313260a7335 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Fri, 21 Oct 2011 17:35:19 +0000 Subject: Fix pr11194. When promoting and splitting integers we need to use ZExtPromotedInteger and SExtPromotedInteger based on the operation we legalize. SetCC return type needs to be legalized via PromoteTargetBoolean. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142660 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index e950e07..8916e08 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -787,7 +787,17 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, break; } case TargetLowering::TypePromoteInteger: { - SDValue InOp = GetPromotedInteger(N->getOperand(0)); + SDValue InOp; + if (N->getOpcode() == ISD::SIGN_EXTEND || + N->getOpcode() == ISD::SINT_TO_FP) { + InOp = SExtPromotedInteger(N->getOperand(0)); + } else if ( + N->getOpcode() == ISD::ZERO_EXTEND || + N->getOpcode() == ISD::UINT_TO_FP) { + InOp = ZExtPromotedInteger(N->getOperand(0)); + } else { + InOp = GetPromotedInteger(N->getOperand(0)); + } EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InOp.getValueType().getVectorElementType(), LoVT.getVectorNumElements()); @@ -2189,8 +2199,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC, DAG.getIntPtrConstant(0)); - // Convert the result mask to the correct kind. - return DAG.getAnyExtOrTrunc(CC, dl, N->getValueType(0)); + return PromoteTargetBoolean(CC, N->getValueType(0)); } -- cgit v1.1 From 3cb056797b17785cfd2429ce8232143d5aeb5e42 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Fri, 21 Oct 2011 17:50:59 +0000 Subject: Expand the coverage of the libObject C bindings to include more SectionRef accessors as well as Symbol iterators. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142661 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/Object.cpp | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'lib') diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp index 2ea8db9..2ce98d7 100644 --- a/lib/Object/Object.cpp +++ b/lib/Object/Object.cpp @@ -18,6 +18,7 @@ using namespace llvm; using namespace object; +// ObjectFile creation LLVMObjectFileRef LLVMCreateObjectFile(LLVMMemoryBufferRef MemBuf) { return wrap(ObjectFile::createObjectFile(unwrap(MemBuf))); } @@ -26,6 +27,7 @@ void LLVMDisposeObjectFile(LLVMObjectFileRef ObjectFile) { delete unwrap(ObjectFile); } +// ObjectFile Section iterators LLVMSectionIteratorRef LLVMGetSections(LLVMObjectFileRef ObjectFile) { section_iterator SI = unwrap(ObjectFile)->begin_sections(); return wrap(new section_iterator(SI)); @@ -46,6 +48,28 @@ void LLVMMoveToNextSection(LLVMSectionIteratorRef SI) { if (ec) report_fatal_error("LLVMMoveToNextSection failed: " + ec.message()); } +// ObjectFile Symbol iterators +LLVMSymbolIteratorRef LLVMGetSymbols(LLVMObjectFileRef ObjectFile) { + symbol_iterator SI = unwrap(ObjectFile)->begin_symbols(); + return wrap(new symbol_iterator(SI)); +} + +void LLVMDisposeSymbolIterator(LLVMSymbolIteratorRef SI) { + delete unwrap(SI); +} + +LLVMBool LLVMIsSymbolIteratorAtEnd(LLVMObjectFileRef ObjectFile, + LLVMSymbolIteratorRef SI) { + return (*unwrap(SI) == unwrap(ObjectFile)->end_symbols()) ? 1 : 0; +} + +void LLVMMoveToNextSymbol(LLVMSymbolIteratorRef SI) { + error_code ec; + unwrap(SI)->increment(ec); + if (ec) report_fatal_error("LLVMMoveToNextSymbol failed: " + ec.message()); +} + +// SectionRef accessors const char *LLVMGetSectionName(LLVMSectionIteratorRef SI) { StringRef ret; if (error_code ec = (*unwrap(SI))->getName(ret)) @@ -66,3 +90,48 @@ const char *LLVMGetSectionContents(LLVMSectionIteratorRef SI) { report_fatal_error(ec.message()); return ret.data(); } + +uint64_t LLVMGetSectionAddress(LLVMSectionIteratorRef SI) { + uint64_t ret; + if (error_code ec = (*unwrap(SI))->getAddress(ret)) + report_fatal_error(ec.message()); + return ret; +} + +int LLVMGetSectionContainsSymbol(LLVMSectionIteratorRef SI, + LLVMSymbolIteratorRef Sym) { + bool ret; + if (error_code ec = (*unwrap(SI))->containsSymbol(**unwrap(Sym), ret)) + report_fatal_error(ec.message()); + return ret; +} + +// SymbolRef accessors +const char *LLVMGetSymbolName(LLVMSymbolIteratorRef SI) { + StringRef ret; + if (error_code ec = (*unwrap(SI))->getName(ret)) + report_fatal_error(ec.message()); + return ret.data(); +} + +uint64_t LLVMGetSymbolAddress(LLVMSymbolIteratorRef SI) { + uint64_t ret; + if (error_code ec = (*unwrap(SI))->getAddress(ret)) + report_fatal_error(ec.message()); + return ret; +} + +uint64_t LLVMGetSymbolOffset(LLVMSymbolIteratorRef SI) { + uint64_t ret; + if (error_code ec = (*unwrap(SI))->getOffset(ret)) + report_fatal_error(ec.message()); + return ret; +} + +uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI) { + uint64_t ret; + if (error_code ec = (*unwrap(SI))->getSize(ret)) + report_fatal_error(ec.message()); + return ret; +} + -- cgit v1.1 From e2fa64ef22321c5771667a8f00dca737ecb0799c Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Fri, 21 Oct 2011 18:21:22 +0000 Subject: Bind libObject API for obtaining the section containing a Symbol. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142667 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/Object.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp index 2ce98d7..638105b 100644 --- a/lib/Object/Object.cpp +++ b/lib/Object/Object.cpp @@ -48,6 +48,12 @@ void LLVMMoveToNextSection(LLVMSectionIteratorRef SI) { if (ec) report_fatal_error("LLVMMoveToNextSection failed: " + ec.message()); } +void LLVMMoveToContainingSection(LLVMSectionIteratorRef Sect, + LLVMSymbolIteratorRef Sym) { + if (error_code ec = (*unwrap(Sym))->getSection(*unwrap(Sect))) + report_fatal_error(ec.message()); +} + // ObjectFile Symbol iterators LLVMSymbolIteratorRef LLVMGetSymbols(LLVMObjectFileRef ObjectFile) { symbol_iterator SI = unwrap(ObjectFile)->begin_symbols(); -- cgit v1.1 From 7784f1d2d8b76a7eb9dd9b3fef7213770605532d Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Fri, 21 Oct 2011 18:43:28 +0000 Subject: Don't automatically set the "fc" bits on MSR instructions if the user didn't ask for them. This is a divergence from gas' behavior, but it is correct per the documentation and allows us to forge ahead with roundtrip testing. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142669 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 67a464f..0b585c1 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -2588,9 +2588,13 @@ parseMSRMaskOperand(SmallVectorImpl &Operands) { } else // No match for special register. return MatchOperand_NoMatch; - // Special register without flags are equivalent to "fc" flags. - if (!FlagsVal) - FlagsVal = 0x9; + // Special register without flags is NOT equivalent to "fc" flags. + // NOTE: This is a divergence from gas' behavior. Uncommenting the following + // two lines would enable gas compatibility at the expense of breaking + // round-tripping. + // + // if (!FlagsVal) + // FlagsVal = 0x9; // Bit 4: Special Reg (cpsr, apsr => 0; spsr => 1) if (SpecReg == "spsr") -- cgit v1.1 From 280dfad48940a0a51726308dd3daa3b1b0d18705 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Fri, 21 Oct 2011 18:54:25 +0000 Subject: ARM VLD parsing and encoding. Next step in the ongoing saga of NEON load/store assmebly parsing. Handle VLD1 instructions that take a two-register register list. Adjust the instruction definitions to only have the single encoded register as an operand. The super-register from the pseudo is kept as an implicit def, so passes which come after pseudo-expansion still know that the instruction defines the other subregs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142670 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMExpandPseudoInsts.cpp | 477 ++++++++++++------------ lib/Target/ARM/ARMInstrNEON.td | 16 +- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 12 + lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 8 - lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 9 + lib/Target/ARM/InstPrinter/ARMInstPrinter.h | 1 + 6 files changed, 276 insertions(+), 247 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 7872cb9..2bc6590 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -106,6 +106,12 @@ namespace { NEONRegSpacing RegSpacing; unsigned char NumRegs; // D registers loaded or stored unsigned char RegElts; // elements per D register; used for lane ops + // FIXME: Temporary flag to denote whether the real instruction takes + // a single register (like the encoding) or all of the registers in + // the list (like the asm syntax and the isel DAG). When all definitions + // are converted to take only the single encoded register, this will + // go away. + bool copyAllListRegs; // Comparison methods for binary search of the table. bool operator<(const NEONLdStTableEntry &TE) const { @@ -122,237 +128,237 @@ namespace { } static const NEONLdStTableEntry NEONLdStTable[] = { -{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, SingleSpc, 2, 4}, -{ ARM::VLD1DUPq16Pseudo_UPD, ARM::VLD1DUPq16_UPD, true, true, SingleSpc, 2, 4}, -{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, SingleSpc, 2, 2}, -{ ARM::VLD1DUPq32Pseudo_UPD, ARM::VLD1DUPq32_UPD, true, true, SingleSpc, 2, 2}, -{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, SingleSpc, 2, 8}, -{ ARM::VLD1DUPq8Pseudo_UPD, ARM::VLD1DUPq8_UPD, true, true, SingleSpc, 2, 8}, - -{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, EvenDblSpc, 1, 4 }, -{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, EvenDblSpc, 1, 4 }, -{ ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, EvenDblSpc, 1, 2 }, -{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, EvenDblSpc, 1, 2 }, -{ ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, EvenDblSpc, 1, 8 }, -{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, EvenDblSpc, 1, 8 }, - -{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, SingleSpc, 4, 1 }, -{ ARM::VLD1d64QPseudo_UPD, ARM::VLD1d64Q_UPD, true, true, SingleSpc, 4, 1 }, -{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, SingleSpc, 3, 1 }, -{ ARM::VLD1d64TPseudo_UPD, ARM::VLD1d64T_UPD, true, true, SingleSpc, 3, 1 }, - -{ ARM::VLD1q16Pseudo, ARM::VLD1q16, true, false, SingleSpc, 2, 4 }, -{ ARM::VLD1q16Pseudo_UPD, ARM::VLD1q16_UPD, true, true, SingleSpc, 2, 4 }, -{ ARM::VLD1q32Pseudo, ARM::VLD1q32, true, false, SingleSpc, 2, 2 }, -{ ARM::VLD1q32Pseudo_UPD, ARM::VLD1q32_UPD, true, true, SingleSpc, 2, 2 }, -{ ARM::VLD1q64Pseudo, ARM::VLD1q64, true, false, SingleSpc, 2, 1 }, -{ ARM::VLD1q64Pseudo_UPD, ARM::VLD1q64_UPD, true, true, SingleSpc, 2, 1 }, -{ ARM::VLD1q8Pseudo, ARM::VLD1q8, true, false, SingleSpc, 2, 8 }, -{ ARM::VLD1q8Pseudo_UPD, ARM::VLD1q8_UPD, true, true, SingleSpc, 2, 8 }, - -{ ARM::VLD2DUPd16Pseudo, ARM::VLD2DUPd16, true, false, SingleSpc, 2, 4}, -{ ARM::VLD2DUPd16Pseudo_UPD, ARM::VLD2DUPd16_UPD, true, true, SingleSpc, 2, 4}, -{ ARM::VLD2DUPd32Pseudo, ARM::VLD2DUPd32, true, false, SingleSpc, 2, 2}, -{ ARM::VLD2DUPd32Pseudo_UPD, ARM::VLD2DUPd32_UPD, true, true, SingleSpc, 2, 2}, -{ ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd8, true, false, SingleSpc, 2, 8}, -{ ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd8_UPD, true, true, SingleSpc, 2, 8}, - -{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, SingleSpc, 2, 4 }, -{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, SingleSpc, 2, 4 }, -{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, SingleSpc, 2, 2 }, -{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, SingleSpc, 2, 2 }, -{ ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, SingleSpc, 2, 8 }, -{ ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, SingleSpc, 2, 8 }, -{ ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, EvenDblSpc, 2, 4 }, -{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, EvenDblSpc, 2, 4 }, -{ ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, EvenDblSpc, 2, 2 }, -{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, EvenDblSpc, 2, 2 }, - -{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, SingleSpc, 2, 4 }, -{ ARM::VLD2d16Pseudo_UPD, ARM::VLD2d16_UPD, true, true, SingleSpc, 2, 4 }, -{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, SingleSpc, 2, 2 }, -{ ARM::VLD2d32Pseudo_UPD, ARM::VLD2d32_UPD, true, true, SingleSpc, 2, 2 }, -{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, SingleSpc, 2, 8 }, -{ ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, SingleSpc, 2, 8 }, - -{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, SingleSpc, 4, 4 }, -{ ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, SingleSpc, 4, 4 }, -{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, SingleSpc, 4, 2 }, -{ ARM::VLD2q32Pseudo_UPD, ARM::VLD2q32_UPD, true, true, SingleSpc, 4, 2 }, -{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, SingleSpc, 4, 8 }, -{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, SingleSpc, 4, 8 }, - -{ ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, SingleSpc, 3, 4}, -{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, SingleSpc, 3, 4}, -{ ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, SingleSpc, 3, 2}, -{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, SingleSpc, 3, 2}, -{ ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, SingleSpc, 3, 8}, -{ ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, SingleSpc, 3, 8}, - -{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, SingleSpc, 3, 4 }, -{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, SingleSpc, 3, 4 }, -{ ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, SingleSpc, 3, 2 }, -{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, SingleSpc, 3, 2 }, -{ ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, SingleSpc, 3, 8 }, -{ ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, SingleSpc, 3, 8 }, -{ ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, EvenDblSpc, 3, 4 }, -{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, EvenDblSpc, 3, 4 }, -{ ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, EvenDblSpc, 3, 2 }, -{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, EvenDblSpc, 3, 2 }, - -{ ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, SingleSpc, 3, 4 }, -{ ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, SingleSpc, 3, 4 }, -{ ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, SingleSpc, 3, 2 }, -{ ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, SingleSpc, 3, 2 }, -{ ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, SingleSpc, 3, 8 }, -{ ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, SingleSpc, 3, 8 }, - -{ ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, EvenDblSpc, 3, 4 }, -{ ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, OddDblSpc, 3, 4 }, -{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, OddDblSpc, 3, 4 }, -{ ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, EvenDblSpc, 3, 2 }, -{ ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, OddDblSpc, 3, 2 }, -{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, OddDblSpc, 3, 2 }, -{ ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, EvenDblSpc, 3, 8 }, -{ ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, OddDblSpc, 3, 8 }, -{ ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, OddDblSpc, 3, 8 }, - -{ ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, SingleSpc, 4, 4}, -{ ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, SingleSpc, 4, 4}, -{ ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, SingleSpc, 4, 2}, -{ ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, SingleSpc, 4, 2}, -{ ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, SingleSpc, 4, 8}, -{ ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, SingleSpc, 4, 8}, - -{ ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, SingleSpc, 4, 4 }, -{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, SingleSpc, 4, 4 }, -{ ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, SingleSpc, 4, 2 }, -{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, SingleSpc, 4, 2 }, -{ ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, SingleSpc, 4, 8 }, -{ ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, SingleSpc, 4, 8 }, -{ ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, EvenDblSpc, 4, 4 }, -{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, EvenDblSpc, 4, 4 }, -{ ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, EvenDblSpc, 4, 2 }, -{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, EvenDblSpc, 4, 2 }, - -{ ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, SingleSpc, 4, 4 }, -{ ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, SingleSpc, 4, 4 }, -{ ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, SingleSpc, 4, 2 }, -{ ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, SingleSpc, 4, 2 }, -{ ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, SingleSpc, 4, 8 }, -{ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, SingleSpc, 4, 8 }, - -{ ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, EvenDblSpc, 4, 4 }, -{ ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, OddDblSpc, 4, 4 }, -{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, OddDblSpc, 4, 4 }, -{ ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, EvenDblSpc, 4, 2 }, -{ ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, OddDblSpc, 4, 2 }, -{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, OddDblSpc, 4, 2 }, -{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, EvenDblSpc, 4, 8 }, -{ ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, OddDblSpc, 4, 8 }, -{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, OddDblSpc, 4, 8 }, - -{ ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, EvenDblSpc, 1, 4 }, -{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD,false, true, EvenDblSpc, 1, 4 }, -{ ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, EvenDblSpc, 1, 2 }, -{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD,false, true, EvenDblSpc, 1, 2 }, -{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, EvenDblSpc, 1, 8 }, -{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, EvenDblSpc, 1, 8 }, - -{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, SingleSpc, 4, 1 }, -{ ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, SingleSpc, 4, 1 }, -{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, SingleSpc, 3, 1 }, -{ ARM::VST1d64TPseudo_UPD, ARM::VST1d64T_UPD, false, true, SingleSpc, 3, 1 }, - -{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, SingleSpc, 2, 4 }, -{ ARM::VST1q16Pseudo_UPD, ARM::VST1q16_UPD, false, true, SingleSpc, 2, 4 }, -{ ARM::VST1q32Pseudo, ARM::VST1q32, false, false, SingleSpc, 2, 2 }, -{ ARM::VST1q32Pseudo_UPD, ARM::VST1q32_UPD, false, true, SingleSpc, 2, 2 }, -{ ARM::VST1q64Pseudo, ARM::VST1q64, false, false, SingleSpc, 2, 1 }, -{ ARM::VST1q64Pseudo_UPD, ARM::VST1q64_UPD, false, true, SingleSpc, 2, 1 }, -{ ARM::VST1q8Pseudo, ARM::VST1q8, false, false, SingleSpc, 2, 8 }, -{ ARM::VST1q8Pseudo_UPD, ARM::VST1q8_UPD, false, true, SingleSpc, 2, 8 }, - -{ ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, SingleSpc, 2, 4 }, -{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, SingleSpc, 2, 4 }, -{ ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, SingleSpc, 2, 2 }, -{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, SingleSpc, 2, 2 }, -{ ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, SingleSpc, 2, 8 }, -{ ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, SingleSpc, 2, 8 }, -{ ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, EvenDblSpc, 2, 4}, -{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, EvenDblSpc, 2, 4}, -{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, EvenDblSpc, 2, 2}, -{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, EvenDblSpc, 2, 2}, - -{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, SingleSpc, 2, 4 }, -{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, SingleSpc, 2, 4 }, -{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, SingleSpc, 2, 2 }, -{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, SingleSpc, 2, 2 }, -{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, SingleSpc, 2, 8 }, -{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, SingleSpc, 2, 8 }, - -{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, SingleSpc, 4, 4 }, -{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, SingleSpc, 4, 4 }, -{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, SingleSpc, 4, 2 }, -{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, SingleSpc, 4, 2 }, -{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, SingleSpc, 4, 8 }, -{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, SingleSpc, 4, 8 }, - -{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, SingleSpc, 3, 4 }, -{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, SingleSpc, 3, 4 }, -{ ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, SingleSpc, 3, 2 }, -{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, SingleSpc, 3, 2 }, -{ ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, SingleSpc, 3, 8 }, -{ ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, SingleSpc, 3, 8 }, -{ ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, EvenDblSpc, 3, 4}, -{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, EvenDblSpc, 3, 4}, -{ ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, EvenDblSpc, 3, 2}, -{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, EvenDblSpc, 3, 2}, - -{ ARM::VST3d16Pseudo, ARM::VST3d16, false, false, SingleSpc, 3, 4 }, -{ ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, SingleSpc, 3, 4 }, -{ ARM::VST3d32Pseudo, ARM::VST3d32, false, false, SingleSpc, 3, 2 }, -{ ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, SingleSpc, 3, 2 }, -{ ARM::VST3d8Pseudo, ARM::VST3d8, false, false, SingleSpc, 3, 8 }, -{ ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, SingleSpc, 3, 8 }, - -{ ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, EvenDblSpc, 3, 4 }, -{ ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, OddDblSpc, 3, 4 }, -{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, OddDblSpc, 3, 4 }, -{ ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, EvenDblSpc, 3, 2 }, -{ ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, OddDblSpc, 3, 2 }, -{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, OddDblSpc, 3, 2 }, -{ ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, EvenDblSpc, 3, 8 }, -{ ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, OddDblSpc, 3, 8 }, -{ ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, OddDblSpc, 3, 8 }, - -{ ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, SingleSpc, 4, 4 }, -{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, SingleSpc, 4, 4 }, -{ ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, SingleSpc, 4, 2 }, -{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, SingleSpc, 4, 2 }, -{ ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, SingleSpc, 4, 8 }, -{ ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, SingleSpc, 4, 8 }, -{ ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, EvenDblSpc, 4, 4}, -{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, EvenDblSpc, 4, 4}, -{ ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, EvenDblSpc, 4, 2}, -{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, EvenDblSpc, 4, 2}, - -{ ARM::VST4d16Pseudo, ARM::VST4d16, false, false, SingleSpc, 4, 4 }, -{ ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, SingleSpc, 4, 4 }, -{ ARM::VST4d32Pseudo, ARM::VST4d32, false, false, SingleSpc, 4, 2 }, -{ ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, SingleSpc, 4, 2 }, -{ ARM::VST4d8Pseudo, ARM::VST4d8, false, false, SingleSpc, 4, 8 }, -{ ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, SingleSpc, 4, 8 }, - -{ ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, EvenDblSpc, 4, 4 }, -{ ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, OddDblSpc, 4, 4 }, -{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, OddDblSpc, 4, 4 }, -{ ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, EvenDblSpc, 4, 2 }, -{ ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, OddDblSpc, 4, 2 }, -{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, OddDblSpc, 4, 2 }, -{ ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, EvenDblSpc, 4, 8 }, -{ ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, OddDblSpc, 4, 8 }, -{ ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, OddDblSpc, 4, 8 } +{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, SingleSpc, 2, 4,true}, +{ ARM::VLD1DUPq16Pseudo_UPD, ARM::VLD1DUPq16_UPD, true, true, SingleSpc, 2, 4,true}, +{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, SingleSpc, 2, 2,true}, +{ ARM::VLD1DUPq32Pseudo_UPD, ARM::VLD1DUPq32_UPD, true, true, SingleSpc, 2, 2,true}, +{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, SingleSpc, 2, 8,true}, +{ ARM::VLD1DUPq8Pseudo_UPD, ARM::VLD1DUPq8_UPD, true, true, SingleSpc, 2, 8,true}, + +{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, EvenDblSpc, 1, 4 ,true}, +{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, EvenDblSpc, 1, 4 ,true}, +{ ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, EvenDblSpc, 1, 2 ,true}, +{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, EvenDblSpc, 1, 2 ,true}, +{ ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, EvenDblSpc, 1, 8 ,true}, +{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, EvenDblSpc, 1, 8 ,true}, + +{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, SingleSpc, 4, 1 ,true}, +{ ARM::VLD1d64QPseudo_UPD, ARM::VLD1d64Q_UPD, true, true, SingleSpc, 4, 1 ,true}, +{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, SingleSpc, 3, 1 ,true}, +{ ARM::VLD1d64TPseudo_UPD, ARM::VLD1d64T_UPD, true, true, SingleSpc, 3, 1 ,true}, + +{ ARM::VLD1q16Pseudo, ARM::VLD1q16, true, false, SingleSpc, 2, 4 ,false}, +{ ARM::VLD1q16Pseudo_UPD, ARM::VLD1q16_UPD, true, true, SingleSpc, 2, 4 ,false}, +{ ARM::VLD1q32Pseudo, ARM::VLD1q32, true, false, SingleSpc, 2, 2 ,false}, +{ ARM::VLD1q32Pseudo_UPD, ARM::VLD1q32_UPD, true, true, SingleSpc, 2, 2 ,false}, +{ ARM::VLD1q64Pseudo, ARM::VLD1q64, true, false, SingleSpc, 2, 1 ,false}, +{ ARM::VLD1q64Pseudo_UPD, ARM::VLD1q64_UPD, true, true, SingleSpc, 2, 1 ,false}, +{ ARM::VLD1q8Pseudo, ARM::VLD1q8, true, false, SingleSpc, 2, 8 ,false}, +{ ARM::VLD1q8Pseudo_UPD, ARM::VLD1q8_UPD, true, true, SingleSpc, 2, 8 ,false}, + +{ ARM::VLD2DUPd16Pseudo, ARM::VLD2DUPd16, true, false, SingleSpc, 2, 4,true}, +{ ARM::VLD2DUPd16Pseudo_UPD, ARM::VLD2DUPd16_UPD, true, true, SingleSpc, 2, 4,true}, +{ ARM::VLD2DUPd32Pseudo, ARM::VLD2DUPd32, true, false, SingleSpc, 2, 2,true}, +{ ARM::VLD2DUPd32Pseudo_UPD, ARM::VLD2DUPd32_UPD, true, true, SingleSpc, 2, 2,true}, +{ ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd8, true, false, SingleSpc, 2, 8,true}, +{ ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd8_UPD, true, true, SingleSpc, 2, 8,true}, + +{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, SingleSpc, 2, 4 ,true}, +{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, SingleSpc, 2, 4 ,true}, +{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, SingleSpc, 2, 2 ,true}, +{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, SingleSpc, 2, 2 ,true}, +{ ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, SingleSpc, 2, 8 ,true}, +{ ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, SingleSpc, 2, 8 ,true}, +{ ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, EvenDblSpc, 2, 4 ,true}, +{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, EvenDblSpc, 2, 4 ,true}, +{ ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, EvenDblSpc, 2, 2 ,true}, +{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, EvenDblSpc, 2, 2 ,true}, + +{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, SingleSpc, 2, 4 ,true}, +{ ARM::VLD2d16Pseudo_UPD, ARM::VLD2d16_UPD, true, true, SingleSpc, 2, 4 ,true}, +{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, SingleSpc, 2, 2 ,true}, +{ ARM::VLD2d32Pseudo_UPD, ARM::VLD2d32_UPD, true, true, SingleSpc, 2, 2 ,true}, +{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, SingleSpc, 2, 8 ,true}, +{ ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, SingleSpc, 2, 8 ,true}, + +{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, SingleSpc, 4, 4 ,true}, +{ ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, SingleSpc, 4, 4 ,true}, +{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, SingleSpc, 4, 2 ,true}, +{ ARM::VLD2q32Pseudo_UPD, ARM::VLD2q32_UPD, true, true, SingleSpc, 4, 2 ,true}, +{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, SingleSpc, 4, 8 ,true}, +{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, SingleSpc, 4, 8 ,true}, + +{ ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, SingleSpc, 3, 4,true}, +{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, SingleSpc, 3, 4,true}, +{ ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, SingleSpc, 3, 2,true}, +{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, SingleSpc, 3, 2,true}, +{ ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, SingleSpc, 3, 8,true}, +{ ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, SingleSpc, 3, 8,true}, + +{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, SingleSpc, 3, 4 ,true}, +{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, SingleSpc, 3, 4 ,true}, +{ ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, SingleSpc, 3, 2 ,true}, +{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, SingleSpc, 3, 2 ,true}, +{ ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, SingleSpc, 3, 8 ,true}, +{ ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, SingleSpc, 3, 8 ,true}, +{ ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, EvenDblSpc, 3, 4 ,true}, +{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, EvenDblSpc, 3, 4 ,true}, +{ ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, EvenDblSpc, 3, 2 ,true}, +{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, EvenDblSpc, 3, 2 ,true}, + +{ ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, SingleSpc, 3, 4 ,true}, +{ ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, SingleSpc, 3, 4 ,true}, +{ ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, SingleSpc, 3, 2 ,true}, +{ ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, SingleSpc, 3, 2 ,true}, +{ ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, SingleSpc, 3, 8 ,true}, +{ ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, SingleSpc, 3, 8 ,true}, + +{ ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, EvenDblSpc, 3, 4 ,true}, +{ ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, OddDblSpc, 3, 4 ,true}, +{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, OddDblSpc, 3, 4 ,true}, +{ ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, EvenDblSpc, 3, 2 ,true}, +{ ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, OddDblSpc, 3, 2 ,true}, +{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, OddDblSpc, 3, 2 ,true}, +{ ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, EvenDblSpc, 3, 8 ,true}, +{ ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, OddDblSpc, 3, 8 ,true}, +{ ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, OddDblSpc, 3, 8 ,true}, + +{ ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, SingleSpc, 4, 4,true}, +{ ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, SingleSpc, 4, 4,true}, +{ ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, SingleSpc, 4, 2,true}, +{ ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, SingleSpc, 4, 2,true}, +{ ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, SingleSpc, 4, 8,true}, +{ ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, SingleSpc, 4, 8,true}, + +{ ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, SingleSpc, 4, 4 ,true}, +{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, SingleSpc, 4, 4 ,true}, +{ ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, SingleSpc, 4, 2 ,true}, +{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, SingleSpc, 4, 2 ,true}, +{ ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, SingleSpc, 4, 8 ,true}, +{ ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, SingleSpc, 4, 8 ,true}, +{ ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, EvenDblSpc, 4, 4 ,true}, +{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, EvenDblSpc, 4, 4 ,true}, +{ ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, EvenDblSpc, 4, 2 ,true}, +{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, EvenDblSpc, 4, 2 ,true}, + +{ ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, SingleSpc, 4, 4 ,true}, +{ ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, SingleSpc, 4, 4 ,true}, +{ ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, SingleSpc, 4, 2 ,true}, +{ ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, SingleSpc, 4, 2 ,true}, +{ ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, SingleSpc, 4, 8 ,true}, +{ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, SingleSpc, 4, 8 ,true}, + +{ ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, EvenDblSpc, 4, 4 ,true}, +{ ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, OddDblSpc, 4, 4 ,true}, +{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, OddDblSpc, 4, 4 ,true}, +{ ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, EvenDblSpc, 4, 2 ,true}, +{ ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, OddDblSpc, 4, 2 ,true}, +{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, OddDblSpc, 4, 2 ,true}, +{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, EvenDblSpc, 4, 8 ,true}, +{ ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, OddDblSpc, 4, 8 ,true}, +{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, OddDblSpc, 4, 8 ,true}, + +{ ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, EvenDblSpc, 1, 4 ,true}, +{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD,false, true, EvenDblSpc, 1, 4 ,true}, +{ ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, EvenDblSpc, 1, 2 ,true}, +{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD,false, true, EvenDblSpc, 1, 2 ,true}, +{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, EvenDblSpc, 1, 8 ,true}, +{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, EvenDblSpc, 1, 8 ,true}, + +{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, SingleSpc, 4, 1 ,true}, +{ ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, SingleSpc, 4, 1 ,true}, +{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, SingleSpc, 3, 1 ,true}, +{ ARM::VST1d64TPseudo_UPD, ARM::VST1d64T_UPD, false, true, SingleSpc, 3, 1 ,true}, + +{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, SingleSpc, 2, 4 ,true}, +{ ARM::VST1q16Pseudo_UPD, ARM::VST1q16_UPD, false, true, SingleSpc, 2, 4 ,true}, +{ ARM::VST1q32Pseudo, ARM::VST1q32, false, false, SingleSpc, 2, 2 ,true}, +{ ARM::VST1q32Pseudo_UPD, ARM::VST1q32_UPD, false, true, SingleSpc, 2, 2 ,true}, +{ ARM::VST1q64Pseudo, ARM::VST1q64, false, false, SingleSpc, 2, 1 ,true}, +{ ARM::VST1q64Pseudo_UPD, ARM::VST1q64_UPD, false, true, SingleSpc, 2, 1 ,true}, +{ ARM::VST1q8Pseudo, ARM::VST1q8, false, false, SingleSpc, 2, 8 ,true}, +{ ARM::VST1q8Pseudo_UPD, ARM::VST1q8_UPD, false, true, SingleSpc, 2, 8 ,true}, + +{ ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, SingleSpc, 2, 4 ,true}, +{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, SingleSpc, 2, 4 ,true}, +{ ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, SingleSpc, 2, 2 ,true}, +{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, SingleSpc, 2, 2 ,true}, +{ ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, SingleSpc, 2, 8 ,true}, +{ ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, SingleSpc, 2, 8 ,true}, +{ ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, EvenDblSpc, 2, 4,true}, +{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, EvenDblSpc, 2, 4,true}, +{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, EvenDblSpc, 2, 2,true}, +{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, EvenDblSpc, 2, 2,true}, + +{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, SingleSpc, 2, 4 ,true}, +{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, SingleSpc, 2, 4 ,true}, +{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, SingleSpc, 2, 2 ,true}, +{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, SingleSpc, 2, 2 ,true}, +{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, SingleSpc, 2, 8 ,true}, +{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, SingleSpc, 2, 8 ,true}, + +{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, SingleSpc, 4, 4 ,true}, +{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, SingleSpc, 4, 4 ,true}, +{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, SingleSpc, 4, 2 ,true}, +{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, SingleSpc, 4, 2 ,true}, +{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, SingleSpc, 4, 8 ,true}, +{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, SingleSpc, 4, 8 ,true}, + +{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, SingleSpc, 3, 4 ,true}, +{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, SingleSpc, 3, 4 ,true}, +{ ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, SingleSpc, 3, 2 ,true}, +{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, SingleSpc, 3, 2 ,true}, +{ ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, SingleSpc, 3, 8 ,true}, +{ ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, SingleSpc, 3, 8 ,true}, +{ ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, EvenDblSpc, 3, 4,true}, +{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, EvenDblSpc, 3, 4,true}, +{ ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, EvenDblSpc, 3, 2,true}, +{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, EvenDblSpc, 3, 2,true}, + +{ ARM::VST3d16Pseudo, ARM::VST3d16, false, false, SingleSpc, 3, 4 ,true}, +{ ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, SingleSpc, 3, 4 ,true}, +{ ARM::VST3d32Pseudo, ARM::VST3d32, false, false, SingleSpc, 3, 2 ,true}, +{ ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, SingleSpc, 3, 2 ,true}, +{ ARM::VST3d8Pseudo, ARM::VST3d8, false, false, SingleSpc, 3, 8 ,true}, +{ ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, SingleSpc, 3, 8 ,true}, + +{ ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, EvenDblSpc, 3, 4 ,true}, +{ ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, OddDblSpc, 3, 4 ,true}, +{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, OddDblSpc, 3, 4 ,true}, +{ ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, EvenDblSpc, 3, 2 ,true}, +{ ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, OddDblSpc, 3, 2 ,true}, +{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, OddDblSpc, 3, 2 ,true}, +{ ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, EvenDblSpc, 3, 8 ,true}, +{ ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, OddDblSpc, 3, 8 ,true}, +{ ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, OddDblSpc, 3, 8 ,true}, + +{ ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, SingleSpc, 4, 4 ,true}, +{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, SingleSpc, 4, 4 ,true}, +{ ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, SingleSpc, 4, 2 ,true}, +{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, SingleSpc, 4, 2 ,true}, +{ ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, SingleSpc, 4, 8 ,true}, +{ ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, SingleSpc, 4, 8 ,true}, +{ ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, EvenDblSpc, 4, 4,true}, +{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, EvenDblSpc, 4, 4,true}, +{ ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, EvenDblSpc, 4, 2,true}, +{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, EvenDblSpc, 4, 2,true}, + +{ ARM::VST4d16Pseudo, ARM::VST4d16, false, false, SingleSpc, 4, 4 ,true}, +{ ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, SingleSpc, 4, 4 ,true}, +{ ARM::VST4d32Pseudo, ARM::VST4d32, false, false, SingleSpc, 4, 2 ,true}, +{ ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, SingleSpc, 4, 2 ,true}, +{ ARM::VST4d8Pseudo, ARM::VST4d8, false, false, SingleSpc, 4, 8 ,true}, +{ ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, SingleSpc, 4, 8 ,true}, + +{ ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, EvenDblSpc, 4, 4 ,true}, +{ ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, OddDblSpc, 4, 4 ,true}, +{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, OddDblSpc, 4, 4 ,true}, +{ ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, EvenDblSpc, 4, 2 ,true}, +{ ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, OddDblSpc, 4, 2 ,true}, +{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, OddDblSpc, 4, 2 ,true}, +{ ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, EvenDblSpc, 4, 8 ,true}, +{ ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, OddDblSpc, 4, 8 ,true}, +{ ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, OddDblSpc, 4, 8 ,true} }; /// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON @@ -422,11 +428,12 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) { unsigned DstReg = MI.getOperand(OpIdx++).getReg(); unsigned D0, D1, D2, D3; GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3); - MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)) - .addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); - if (NumRegs > 2) + MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)); + if (NumRegs > 1 && TableEntry->copyAllListRegs) + MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); + if (NumRegs > 2 && TableEntry->copyAllListRegs) MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead)); - if (NumRegs > 3) + if (NumRegs > 3 && TableEntry->copyAllListRegs) MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); if (TableEntry->HasWriteBack) diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 502c629..d7ebd37 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -77,6 +77,14 @@ def VecListOneDAsmOperand : AsmOperandClass { def VecListOneD : RegisterOperand { let ParserMatchClass = VecListOneDAsmOperand; } +// Register list of two sequential D registers. +def VecListTwoDAsmOperand : AsmOperandClass { + let Name = "VecListTwoD"; + let ParserMethod = "parseVectorList"; +} +def VecListTwoD : RegisterOperand { + let ParserMatchClass = VecListTwoDAsmOperand; +} //===----------------------------------------------------------------------===// // NEON-specific DAG Nodes. @@ -253,9 +261,9 @@ class VLD1D op7_4, string Dt> let DecoderMethod = "DecodeVLDInstruction"; } class VLD1Q op7_4, string Dt> - : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2), + : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd), (ins addrmode6:$Rn), IIC_VLD1x2, - "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> { + "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; @@ -286,9 +294,9 @@ class VLD1DWB op7_4, string Dt> let DecoderMethod = "DecodeVLDInstruction"; } class VLD1QWB op7_4, string Dt> - : NLdSt<0,0b10,0b1010,op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), + : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x2u, - "vld1", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm", + "vld1", Dt, "$Vd, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 0b585c1..7ec3c8e 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -915,6 +915,11 @@ public: return VectorList.Count == 1; } + bool isVecListTwoD() const { + if (Kind != k_VectorList) return false; + return VectorList.Count == 2; + } + bool isVectorIndex8() const { if (Kind != k_VectorIndex) return false; return VectorIndex.Val < 8; @@ -1507,6 +1512,13 @@ public: Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); } + void addVecListTwoDOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // Only the first register actually goes on the instruction. The rest + // are implied by the opcode. + Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); + } + void addVectorIndex8Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index b1f7fd6..d077d46 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -1959,14 +1959,6 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, // Second output register switch (Inst.getOpcode()) { - case ARM::VLD1q8: - case ARM::VLD1q16: - case ARM::VLD1q32: - case ARM::VLD1q64: - case ARM::VLD1q8_UPD: - case ARM::VLD1q16_UPD: - case ARM::VLD1q32_UPD: - case ARM::VLD1q64_UPD: case ARM::VLD1d8T: case ARM::VLD1d16T: case ARM::VLD1d32T: diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index b907e57..1a7e170 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -995,3 +995,12 @@ void ARMInstPrinter::printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O) { O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << "}"; } + +void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "}"; +} diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 3b5e866..1d4bff6 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -130,6 +130,7 @@ public: void printT2LdrLabelOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O); }; } // end namespace llvm -- cgit v1.1 From d2822e7572e75287db66acb14b2d988a80faebdd Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Fri, 21 Oct 2011 19:06:29 +0000 Subject: Extend instcombine's shufflevector simplification to handle more cases where the input and output vectors have different sizes. Patch by Xiaoyi Guo. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142671 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../InstCombine/InstCombineVectorOps.cpp | 256 ++++++++++++++++----- 1 file changed, 195 insertions(+), 61 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 154267c..6dcfa0d 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -55,14 +55,14 @@ static bool CheapToScalarize(Value *V, bool isConstant) { /// getShuffleMask - Read and decode a shufflevector mask. /// Turn undef elements into negative values. -static std::vector getShuffleMask(const ShuffleVectorInst *SVI) { +static SmallVector getShuffleMask(const ShuffleVectorInst *SVI) { unsigned NElts = SVI->getType()->getNumElements(); if (isa(SVI->getOperand(2))) - return std::vector(NElts, 0); + return SmallVector(NElts, 0); if (isa(SVI->getOperand(2))) - return std::vector(NElts, -1); + return SmallVector(NElts, -1); - std::vector Result; + SmallVector Result; const ConstantVector *CP = cast(SVI->getOperand(2)); for (User::const_op_iterator i = CP->op_begin(), e = CP->op_end(); i!=e; ++i) if (isa(*i)) @@ -447,7 +447,7 @@ Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); - std::vector Mask = getShuffleMask(&SVI); + SmallVector Mask = getShuffleMask(&SVI); bool MadeChange = false; @@ -457,9 +457,6 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { unsigned VWidth = cast(SVI.getType())->getNumElements(); - if (VWidth != cast(LHS->getType())->getNumElements()) - return 0; - APInt UndefElts(VWidth, 0); APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { @@ -470,17 +467,21 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { MadeChange = true; } + unsigned LHSWidth = cast(LHS->getType())->getNumElements(); + // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask') // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask'). if (LHS == RHS || isa(LHS)) { if (isa(LHS) && LHS == RHS) { // shuffle(undef,undef,mask) -> undef. - return ReplaceInstUsesWith(SVI, LHS); + Value* result = (VWidth == LHSWidth) + ? LHS : UndefValue::get(SVI.getType()); + return ReplaceInstUsesWith(SVI, result); } // Remap any references to RHS to use LHS. std::vector Elts; - for (unsigned i = 0, e = Mask.size(); i != e; ++i) { + for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) { if (Mask[i] < 0) Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext()))); else { @@ -503,72 +504,205 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { MadeChange = true; } - // Analyze the shuffle, are the LHS or RHS and identity shuffles? - bool isLHSID = true, isRHSID = true; + if (VWidth == LHSWidth) { + // Analyze the shuffle, are the LHS or RHS and identity shuffles? + bool isLHSID = true, isRHSID = true; + + for (unsigned i = 0, e = Mask.size(); i != e; ++i) { + if (Mask[i] < 0) continue; // Ignore undef values. + // Is this an identity shuffle of the LHS value? + isLHSID &= (Mask[i] == (int)i); - for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - if (Mask[i] < 0) continue; // Ignore undef values. - // Is this an identity shuffle of the LHS value? - isLHSID &= (Mask[i] == (int)i); + // Is this an identity shuffle of the RHS value? + isRHSID &= (Mask[i]-e == i); + } - // Is this an identity shuffle of the RHS value? - isRHSID &= (Mask[i]-e == i); + // Eliminate identity shuffles. + if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); + if (isRHSID) return ReplaceInstUsesWith(SVI, RHS); } - // Eliminate identity shuffles. - if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); - if (isRHSID) return ReplaceInstUsesWith(SVI, RHS); - // If the LHS is a shufflevector itself, see if we can combine it with this - // one without producing an unusual shuffle. Here we are really conservative: + // one without producing an unusual shuffle. + // Cases that might be simplified: + // 1. + // x1=shuffle(v1,v2,mask1) + // x=shuffle(x1,undef,mask) + // ==> + // x=shuffle(v1,undef,newMask) + // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1 + // 2. + // x1=shuffle(v1,undef,mask1) + // x=shuffle(x1,x2,mask) + // where v1.size() == mask1.size() + // ==> + // x=shuffle(v1,x2,newMask) + // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i] + // 3. + // x2=shuffle(v2,undef,mask2) + // x=shuffle(x1,x2,mask) + // where v2.size() == mask2.size() + // ==> + // x=shuffle(x1,v2,newMask) + // newMask[i] = (mask[i] < x1.size()) + // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size() + // 4. + // x1=shuffle(v1,undef,mask1) + // x2=shuffle(v2,undef,mask2) + // x=shuffle(x1,x2,mask) + // where v1.size() == v2.size() + // ==> + // x=shuffle(v1,v2,newMask) + // newMask[i] = (mask[i] < x1.size()) + // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size() + // + // Here we are really conservative: // we are absolutely afraid of producing a shuffle mask not in the input // program, because the code gen may not be smart enough to turn a merged // shuffle into two specific shuffles: it may produce worse code. As such, // we only merge two shuffles if the result is either a splat or one of the - // two input shuffle masks. In this case, merging the shuffles just removes + // input shuffle masks. In this case, merging the shuffles just removes // one instruction, which we know is safe. This is good for things like - // turning: (splat(splat)) -> splat. - if (ShuffleVectorInst *LHSSVI = dyn_cast(LHS)) { + // turning: (splat(splat)) -> splat, or + // merge(V[0..n], V[n+1..2n]) -> V[0..2n] + ShuffleVectorInst* LHSShuffle = dyn_cast(LHS); + ShuffleVectorInst* RHSShuffle = dyn_cast(RHS); + if (LHSShuffle) + if (!isa(LHSShuffle->getOperand(1)) && !isa(RHS)) + LHSShuffle = NULL; + if (RHSShuffle) + if (!isa(RHSShuffle->getOperand(1))) + RHSShuffle = NULL; + if (!LHSShuffle && !RHSShuffle) + return MadeChange ? &SVI : 0; + + Value* LHSOp0 = NULL; + Value* LHSOp1 = NULL; + Value* RHSOp0 = NULL; + unsigned LHSOp0Width = 0; + unsigned RHSOp0Width = 0; + if (LHSShuffle) { + LHSOp0 = LHSShuffle->getOperand(0); + LHSOp1 = LHSShuffle->getOperand(1); + LHSOp0Width = cast(LHSOp0->getType())->getNumElements(); + } + if (RHSShuffle) { + RHSOp0 = RHSShuffle->getOperand(0); + RHSOp0Width = cast(RHSOp0->getType())->getNumElements(); + } + Value* newLHS = LHS; + Value* newRHS = RHS; + if (LHSShuffle) { + // case 1 if (isa(RHS)) { - std::vector LHSMask = getShuffleMask(LHSSVI); - - if (LHSMask.size() == Mask.size()) { - std::vector NewMask; - bool isSplat = true; - int SplatElt = -1; // undef - for (unsigned i = 0, e = Mask.size(); i != e; ++i) { - int MaskElt; - if (Mask[i] < 0 || Mask[i] >= (int)e) - MaskElt = -1; // undef - else - MaskElt = LHSMask[Mask[i]]; - // Check if this could still be a splat. - if (MaskElt >= 0) { - if (SplatElt >=0 && SplatElt != MaskElt) - isSplat = false; - SplatElt = MaskElt; - } - NewMask.push_back(MaskElt); - } + newLHS = LHSOp0; + newRHS = LHSOp1; + } + // case 2 or 4 + else if (LHSOp0Width == LHSWidth) { + newLHS = LHSOp0; + } + } + // case 3 or 4 + if (RHSShuffle && RHSOp0Width == LHSWidth) { + newRHS = RHSOp0; + } + // case 4 + if (LHSOp0 == RHSOp0) { + newLHS = LHSOp0; + newRHS = NULL; + } - // If the result mask is equal to the src shuffle or this - // shuffle mask, do the replacement. - if (isSplat || NewMask == LHSMask || NewMask == Mask) { - std::vector Elts; - Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); - for (unsigned i = 0, e = NewMask.size(); i != e; ++i) { - if (NewMask[i] < 0) { - Elts.push_back(UndefValue::get(Int32Ty)); - } else { - Elts.push_back(ConstantInt::get(Int32Ty, NewMask[i])); - } - } - return new ShuffleVectorInst(LHSSVI->getOperand(0), - LHSSVI->getOperand(1), - ConstantVector::get(Elts)); + if (newLHS == LHS && newRHS == RHS) + return MadeChange ? &SVI : 0; + + SmallVector LHSMask; + SmallVector RHSMask; + if (newLHS != LHS) { + LHSMask = getShuffleMask(LHSShuffle); + } + if (RHSShuffle && newRHS != RHS) { + RHSMask = getShuffleMask(RHSShuffle); + } + unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth; + SmallVector newMask; + bool isSplat = true; + int SplatElt = -1; + // Create a new mask for the new ShuffleVectorInst so that the new + // ShuffleVectorInst is equivalent to the original one. + for (unsigned i = 0; i < VWidth; ++i) { + int eltMask; + if (Mask[i] == -1) { + // This element is an undef value. + eltMask = -1; + } else if (Mask[i] < (int)LHSWidth) { + // This element is from left hand side vector operand. + // + // If LHS is going to be replaced (case 1, 2, or 4), calculate the + // new mask value for the element. + if (newLHS != LHS) { + eltMask = LHSMask[Mask[i]]; + // If the value selected is an undef value, explicitly specify it + // with a -1 mask value. + if (eltMask >= (int)LHSOp0Width && isa(LHSOp1)) + eltMask = -1; + } + else + eltMask = Mask[i]; + } else { + // This element is from right hand side vector operand + // + // If the value selected is an undef value, explicitly specify it + // with a -1 mask value. (case 1) + if (isa(RHS)) + eltMask = -1; + // If RHS is going to be replaced (case 3 or 4), calculate the + // new mask value for the element. + else if (newRHS != RHS) { + eltMask = RHSMask[Mask[i]-LHSWidth]; + // If the value selected is an undef value, explicitly specify it + // with a -1 mask value. + if (eltMask >= (int)RHSOp0Width) { + assert(isa(RHSShuffle->getOperand(1)) + && "should have been check above"); + eltMask = -1; } } + else + eltMask = Mask[i]-LHSWidth; + + // If LHS's width is changed, shift the mask value accordingly. + // If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any + // references to RHSOp0 to LHSOp0, so we don't need to shift the mask. + if (eltMask >= 0 && newRHS != NULL) + eltMask += newLHSWidth; + } + + // Check if this could still be a splat. + if (eltMask >= 0) { + if (SplatElt >= 0 && SplatElt != eltMask) + isSplat = false; + SplatElt = eltMask; + } + + newMask.push_back(eltMask); + } + + // If the result mask is equal to one of the original shuffle masks, + // or is a splat, do the replacement. + if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) { + SmallVector Elts; + Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); + for (unsigned i = 0, e = newMask.size(); i != e; ++i) { + if (newMask[i] < 0) { + Elts.push_back(UndefValue::get(Int32Ty)); + } else { + Elts.push_back(ConstantInt::get(Int32Ty, newMask[i])); + } } + if (newRHS == NULL) + newRHS = UndefValue::get(newLHS->getType()); + return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts)); } return MadeChange ? &SVI : 0; -- cgit v1.1 From 8d992f5c2c90ebc8963679de51f461dc5d54fae1 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Fri, 21 Oct 2011 19:11:34 +0000 Subject: Minor simplification: use ShuffleVectorInst::getMaskValue instead of a more expensive helper. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142672 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineVectorOps.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 6dcfa0d..0995d46 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -108,7 +108,7 @@ static Value *FindScalarElement(Value *V, unsigned EltNo) { if (ShuffleVectorInst *SVI = dyn_cast(V)) { unsigned LHSWidth = cast(SVI->getOperand(0)->getType())->getNumElements(); - int InEl = getShuffleMask(SVI)[EltNo]; + int InEl = SVI->getMaskValue(EltNo); if (InEl < 0) return UndefValue::get(PTy->getElementType()); if (InEl < (int)LHSWidth) @@ -212,7 +212,7 @@ Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { // If this is extracting an element from a shufflevector, figure out where // it came from and extract from the appropriate input element instead. if (ConstantInt *Elt = dyn_cast(EI.getOperand(1))) { - int SrcIdx = getShuffleMask(SVI)[Elt->getZExtValue()]; + int SrcIdx = SVI->getMaskValue(Elt->getZExtValue()); Value *Src; unsigned LHSWidth = cast(SVI->getOperand(0)->getType())->getNumElements(); -- cgit v1.1 From a48aab924d93be3cdb4a230a9158d210b97b3f56 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Fri, 21 Oct 2011 19:26:54 +0000 Subject: STABS symbols are debug symbols. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142673 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/MachOObjectFile.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 472e072..7c6c232 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -294,8 +294,10 @@ error_code MachOObjectFile::getSymbolType(DataRefImpl Symb, Res = SymbolRef::ST_Other; // If this is a STAB debugging symbol, we can do nothing more. - if (n_type & MachO::NlistMaskStab) + if (n_type & MachO::NlistMaskStab) { + Res = SymbolRef::ST_Debug; return object_error::success; + } switch (n_type & MachO::NlistMaskType) { case MachO::NListTypeUndefined : -- cgit v1.1 From cdcfa280568d5d48ebeba2dcfc87915105e090d1 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Fri, 21 Oct 2011 20:02:19 +0000 Subject: Assembly parsing for 3-register variant of VLD1. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142675 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMExpandPseudoInsts.cpp | 4 ++-- lib/Target/ARM/ARMInstrNEON.td | 16 ++++++++++++---- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 12 ++++++++++++ lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 16 ---------------- lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 10 ++++++++++ lib/Target/ARM/InstPrinter/ARMInstPrinter.h | 1 + 6 files changed, 37 insertions(+), 22 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 2bc6590..a133f7b 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -144,8 +144,8 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, SingleSpc, 4, 1 ,true}, { ARM::VLD1d64QPseudo_UPD, ARM::VLD1d64Q_UPD, true, true, SingleSpc, 4, 1 ,true}, -{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, SingleSpc, 3, 1 ,true}, -{ ARM::VLD1d64TPseudo_UPD, ARM::VLD1d64T_UPD, true, true, SingleSpc, 3, 1 ,true}, +{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, SingleSpc, 3, 1 ,false}, +{ ARM::VLD1d64TPseudo_UPD, ARM::VLD1d64T_UPD, true, true, SingleSpc, 3, 1 ,false}, { ARM::VLD1q16Pseudo, ARM::VLD1q16, true, false, SingleSpc, 2, 4 ,false}, { ARM::VLD1q16Pseudo_UPD, ARM::VLD1q16_UPD, true, true, SingleSpc, 2, 4 ,false}, diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index d7ebd37..f917bc042 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -85,6 +85,14 @@ def VecListTwoDAsmOperand : AsmOperandClass { def VecListTwoD : RegisterOperand { let ParserMatchClass = VecListTwoDAsmOperand; } +// Register list of three sequential D registers. +def VecListThreeDAsmOperand : AsmOperandClass { + let Name = "VecListThreeD"; + let ParserMethod = "parseVectorList"; +} +def VecListThreeD : RegisterOperand { + let ParserMatchClass = VecListThreeDAsmOperand; +} //===----------------------------------------------------------------------===// // NEON-specific DAG Nodes. @@ -319,17 +327,17 @@ def VLD1q64Pseudo_UPD : VLDQWBPseudo; // ...with 3 registers class VLD1D3 op7_4, string Dt> - : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3), + : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd), (ins addrmode6:$Rn), IIC_VLD1x3, "vld1", Dt, - "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { + "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDInstruction"; } class VLD1D3WB op7_4, string Dt> - : NLdSt<0,0b10,0b0110,op7_4, (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, GPR:$wb), + : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x3u, "vld1", Dt, - "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { + "$Vd, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDInstruction"; } diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 7ec3c8e..1db8268 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -920,6 +920,11 @@ public: return VectorList.Count == 2; } + bool isVecListThreeD() const { + if (Kind != k_VectorList) return false; + return VectorList.Count == 3; + } + bool isVectorIndex8() const { if (Kind != k_VectorIndex) return false; return VectorIndex.Val < 8; @@ -1519,6 +1524,13 @@ public: Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); } + void addVecListThreeDOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // Only the first register actually goes on the instruction. The rest + // are implied by the opcode. + Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); + } + void addVectorIndex8Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index d077d46..361cf91 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -1959,14 +1959,6 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, // Second output register switch (Inst.getOpcode()) { - case ARM::VLD1d8T: - case ARM::VLD1d16T: - case ARM::VLD1d32T: - case ARM::VLD1d64T: - case ARM::VLD1d8T_UPD: - case ARM::VLD1d16T_UPD: - case ARM::VLD1d32T_UPD: - case ARM::VLD1d64T_UPD: case ARM::VLD1d8Q: case ARM::VLD1d16Q: case ARM::VLD1d32Q: @@ -2028,14 +2020,6 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, // Third output register switch(Inst.getOpcode()) { - case ARM::VLD1d8T: - case ARM::VLD1d16T: - case ARM::VLD1d32T: - case ARM::VLD1d64T: - case ARM::VLD1d8T_UPD: - case ARM::VLD1d16T_UPD: - case ARM::VLD1d32T_UPD: - case ARM::VLD1d64T_UPD: case ARM::VLD1d8Q: case ARM::VLD1d16Q: case ARM::VLD1d32Q: diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 1a7e170..df79603 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -1004,3 +1004,13 @@ void ARMInstPrinter::printVectorListTwo(const MCInst *MI, unsigned OpNum, O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << "}"; } + +void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}"; +} diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 1d4bff6..7157e7b 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -131,6 +131,7 @@ public: void printVectorIndex(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O); }; } // end namespace llvm -- cgit v1.1 From c888aa47bdf4b5766f30e4d1a2c3c2d4e06a18b5 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 21 Oct 2011 20:12:47 +0000 Subject: BranchProbabilityInfo: floating point equality is unlikely. This is from the same paper from Ball and Larus as the rest of the currently implemented heuristics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142677 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/BranchProbabilityInfo.cpp | 36 ++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 52090c9..6e8d7e0 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -76,6 +76,9 @@ class BranchProbabilityAnalysis { static const uint32_t ZH_TAKEN_WEIGHT = 20; static const uint32_t ZH_NONTAKEN_WEIGHT = 12; + static const uint32_t FPH_TAKEN_WEIGHT = 20; + static const uint32_t FPH_NONTAKEN_WEIGHT = 12; + // Standard weight value. Used when none of the heuristics set weight for // the edge. static const uint32_t NORMAL_WEIGHT = 16; @@ -131,9 +134,12 @@ public: // Loop Branch Heuristics bool calcLoopBranchHeuristics(BasicBlock *BB); - // Zero Heurestics + // Zero Heuristics bool calcZeroHeuristics(BasicBlock *BB); + // Floating Point Heuristics + bool calcFloatingPointHeuristics(BasicBlock *BB); + bool runOnFunction(Function &F); }; } // end anonymous namespace @@ -378,6 +384,29 @@ bool BranchProbabilityAnalysis::calcZeroHeuristics(BasicBlock *BB) { return true; } +bool BranchProbabilityAnalysis::calcFloatingPointHeuristics(BasicBlock *BB) { + BranchInst *BI = dyn_cast(BB->getTerminator()); + if (!BI || !BI->isConditional()) + return false; + + Value *Cond = BI->getCondition(); + FCmpInst *FCmp = dyn_cast(Cond); + if (!FCmp || !FCmp->isEquality()) + return false; + + BasicBlock *Taken = BI->getSuccessor(0); + BasicBlock *NonTaken = BI->getSuccessor(1); + + // f1 == f2 -> Unlikely + // f1 != f2 -> Likely + if (FCmp->isTrueWhenEqual()) + std::swap(Taken, NonTaken); + + BP->setEdgeWeight(BB, Taken, FPH_TAKEN_WEIGHT); + BP->setEdgeWeight(BB, NonTaken, FPH_NONTAKEN_WEIGHT); + + return true; +} bool BranchProbabilityAnalysis::runOnFunction(Function &F) { @@ -396,7 +425,10 @@ bool BranchProbabilityAnalysis::runOnFunction(Function &F) { if (calcPointerHeuristics(BB)) continue; - calcZeroHeuristics(BB); + if (calcZeroHeuristics(BB)) + continue; + + calcFloatingPointHeuristics(BB); } return false; -- cgit v1.1 From b6310316dbaf8716003531d7ed245f77f1a76a11 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Fri, 21 Oct 2011 20:35:01 +0000 Subject: Assembly parsing for 4-register variant of VLD1. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142682 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMExpandPseudoInsts.cpp | 4 ++-- lib/Target/ARM/ARMInstrNEON.td | 17 ++++++++++++----- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 12 ++++++++++++ lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 24 ------------------------ lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 11 +++++++++++ lib/Target/ARM/InstPrinter/ARMInstPrinter.h | 1 + 6 files changed, 38 insertions(+), 31 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index a133f7b..9cd6894 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -142,8 +142,8 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, EvenDblSpc, 1, 8 ,true}, { ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, EvenDblSpc, 1, 8 ,true}, -{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, SingleSpc, 4, 1 ,true}, -{ ARM::VLD1d64QPseudo_UPD, ARM::VLD1d64Q_UPD, true, true, SingleSpc, 4, 1 ,true}, +{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, SingleSpc, 4, 1 ,false}, +{ ARM::VLD1d64QPseudo_UPD, ARM::VLD1d64Q_UPD, true, true, SingleSpc, 4, 1 ,false}, { ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, SingleSpc, 3, 1 ,false}, { ARM::VLD1d64TPseudo_UPD, ARM::VLD1d64T_UPD, true, true, SingleSpc, 3, 1 ,false}, diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index f917bc042..78a57fb 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -93,6 +93,14 @@ def VecListThreeDAsmOperand : AsmOperandClass { def VecListThreeD : RegisterOperand { let ParserMatchClass = VecListThreeDAsmOperand; } +// Register list of four sequential D registers. +def VecListFourDAsmOperand : AsmOperandClass { + let Name = "VecListFourD"; + let ParserMethod = "parseVectorList"; +} +def VecListFourD : RegisterOperand { + let ParserMatchClass = VecListFourDAsmOperand; +} //===----------------------------------------------------------------------===// // NEON-specific DAG Nodes. @@ -357,18 +365,17 @@ def VLD1d64TPseudo_UPD : VLDQQWBPseudo; // ...with 4 registers class VLD1D4 op7_4, string Dt> - : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), + : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd), (ins addrmode6:$Rn), IIC_VLD1x4, "vld1", Dt, - "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { + "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; } class VLD1D4WB op7_4, string Dt> - : NLdSt<0,0b10,0b0010,op7_4, - (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x4u, "vld1", Dt, - "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb", + "$Vd, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 1db8268..36438db 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -925,6 +925,11 @@ public: return VectorList.Count == 3; } + bool isVecListFourD() const { + if (Kind != k_VectorList) return false; + return VectorList.Count == 4; + } + bool isVectorIndex8() const { if (Kind != k_VectorIndex) return false; return VectorIndex.Val < 8; @@ -1531,6 +1536,13 @@ public: Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); } + void addVecListFourDOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // Only the first register actually goes on the instruction. The rest + // are implied by the opcode. + Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); + } + void addVectorIndex8Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 361cf91..577dd80 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -1959,14 +1959,6 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, // Second output register switch (Inst.getOpcode()) { - case ARM::VLD1d8Q: - case ARM::VLD1d16Q: - case ARM::VLD1d32Q: - case ARM::VLD1d64Q: - case ARM::VLD1d8Q_UPD: - case ARM::VLD1d16Q_UPD: - case ARM::VLD1d32Q_UPD: - case ARM::VLD1d64Q_UPD: case ARM::VLD2d8: case ARM::VLD2d16: case ARM::VLD2d32: @@ -2020,14 +2012,6 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, // Third output register switch(Inst.getOpcode()) { - case ARM::VLD1d8Q: - case ARM::VLD1d16Q: - case ARM::VLD1d32Q: - case ARM::VLD1d64Q: - case ARM::VLD1d8Q_UPD: - case ARM::VLD1d16Q_UPD: - case ARM::VLD1d32Q_UPD: - case ARM::VLD1d64Q_UPD: case ARM::VLD2q8: case ARM::VLD2q16: case ARM::VLD2q32: @@ -2070,14 +2054,6 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, // Fourth output register switch (Inst.getOpcode()) { - case ARM::VLD1d8Q: - case ARM::VLD1d16Q: - case ARM::VLD1d32Q: - case ARM::VLD1d64Q: - case ARM::VLD1d8Q_UPD: - case ARM::VLD1d16Q_UPD: - case ARM::VLD1d32Q_UPD: - case ARM::VLD1d64Q_UPD: case ARM::VLD2q8: case ARM::VLD2q16: case ARM::VLD2q32: diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index df79603..e4a56be 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -1014,3 +1014,14 @@ void ARMInstPrinter::printVectorListThree(const MCInst *MI, unsigned OpNum, << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << ", " << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << "}"; } + +void ARMInstPrinter::printVectorListFour(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + // Normally, it's not safe to use register enum values directly with + // addition to get the next register, but for VFP registers, the + // sort order is guaranteed because they're all of the form D. + O << "{" << getRegisterName(MI->getOperand(OpNum).getReg()) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 1) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 2) << ", " + << getRegisterName(MI->getOperand(OpNum).getReg() + 3) << "}"; +} diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h index 7157e7b..3f38f1a 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.h +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.h @@ -132,6 +132,7 @@ public: void printVectorListOne(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListTwo(const MCInst *MI, unsigned OpNum, raw_ostream &O); void printVectorListThree(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printVectorListFour(const MCInst *MI, unsigned OpNum, raw_ostream &O); }; } // end namespace llvm -- cgit v1.1 From ca7eb3e171f63de3cfc1020d86375ce11d356194 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Fri, 21 Oct 2011 20:35:58 +0000 Subject: Use LLVMBool for a function that logically returns a boolean value. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142683 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/Object.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp index 638105b..dea1466 100644 --- a/lib/Object/Object.cpp +++ b/lib/Object/Object.cpp @@ -104,7 +104,7 @@ uint64_t LLVMGetSectionAddress(LLVMSectionIteratorRef SI) { return ret; } -int LLVMGetSectionContainsSymbol(LLVMSectionIteratorRef SI, +LLVMBool LLVMGetSectionContainsSymbol(LLVMSectionIteratorRef SI, LLVMSymbolIteratorRef Sym) { bool ret; if (error_code ec = (*unwrap(SI))->containsSymbol(**unwrap(Sym), ret)) -- cgit v1.1 From 4090e1ce91fd5a6a690fd0bd6c9240b69ac1f301 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Fri, 21 Oct 2011 20:45:19 +0000 Subject: Remap blockaddress correctly when inlining a function. Fixes PR10162. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142684 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/CloneFunction.cpp | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index cf21f1e..c6dfe73 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -113,8 +113,23 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, // Create a new basic block and copy instructions into it! BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo); - VMap[&BB] = CBB; // Add basic block mapping. + // Add basic block mapping. + VMap[&BB] = CBB; + + // It is only legal to clone a function if a block address within that + // function is never referenced outside of the function. Given that, we + // want to map block addresses from the old function to block addresses in + // the clone. (This is different from the generic ValueMapper + // implementation, which generates an invalid blockaddress when + // cloning a function.) + if (BB.hasAddressTaken()) { + Constant *OldBBAddr = BlockAddress::get(const_cast(OldFunc), + const_cast(&BB)); + VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB); + } + + // Note return instructions for the caller. if (ReturnInst *RI = dyn_cast(CBB->getTerminator())) Returns.push_back(RI); } @@ -224,6 +239,22 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB, BBEntry = NewBB = BasicBlock::Create(BB->getContext()); if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix); + // It is only legal to clone a function if a block address within that + // function is never referenced outside of the function. Given that, we + // want to map block addresses from the old function to block addresses in + // the clone. (This is different from the generic ValueMapper + // implementation, which generates an invalid blockaddress when + // cloning a function.) + // + // Note that we don't need to fix the mapping for unreachable blocks; + // the default mapping there is safe. + if (BB->hasAddressTaken()) { + Constant *OldBBAddr = BlockAddress::get(const_cast(OldFunc), + const_cast(BB)); + VMap[OldBBAddr] = BlockAddress::get(NewFunc, NewBB); + } + + bool hasCalls = false, hasDynamicAllocas = false, hasStaticAllocas = false; // Loop over all instructions, and copy them over, DCE'ing as we go. This -- cgit v1.1 From 675c02b0b9967f286b4d5aa8c3f7d8fc9e7988c4 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 21 Oct 2011 21:13:47 +0000 Subject: Extend the floating point heuristic to consider NaN checks unlikely. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142687 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/BranchProbabilityInfo.cpp | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 6e8d7e0..fc69da9 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -391,15 +391,28 @@ bool BranchProbabilityAnalysis::calcFloatingPointHeuristics(BasicBlock *BB) { Value *Cond = BI->getCondition(); FCmpInst *FCmp = dyn_cast(Cond); - if (!FCmp || !FCmp->isEquality()) + if (!FCmp) return false; + bool isProb; + if (FCmp->isEquality()) { + // f1 == f2 -> Unlikely + // f1 != f2 -> Likely + isProb = !FCmp->isTrueWhenEqual(); + } else if (FCmp->getPredicate() == FCmpInst::FCMP_ORD) { + // !isnan -> Likely + isProb = true; + } else if (FCmp->getPredicate() == FCmpInst::FCMP_UNO) { + // isnan -> Unlikely + isProb = false; + } else { + return false; + } + BasicBlock *Taken = BI->getSuccessor(0); BasicBlock *NonTaken = BI->getSuccessor(1); - // f1 == f2 -> Unlikely - // f1 != f2 -> Likely - if (FCmp->isTrueWhenEqual()) + if (!isProb) std::swap(Taken, NonTaken); BP->setEdgeWeight(BB, Taken, FPH_TAKEN_WEIGHT); -- cgit v1.1 From 0ad56122e585d3d27ea852115390a9e53cabc9d5 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 21 Oct 2011 22:08:56 +0000 Subject: Make sure that the landing pads themselves have no PHI instructions in them. The assumption in the back-end is that PHIs are not allowed at the start of the landing pad block for SjLj exceptions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142689 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SjLjEHPrepare.cpp | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 3ccf39b..e5cb1bb 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -908,6 +908,27 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F, } } } + + // Go through the landing pads and remove any PHIs there. + for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { + BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); + LandingPadInst *LPI = UnwindBlock->getLandingPadInst(); + + // Place PHIs into a set to avoid invalidating the iterator. + SmallPtrSet PHIsToDemote; + for (BasicBlock::iterator + PN = UnwindBlock->begin(); isa(PN); ++PN) + PHIsToDemote.insert(cast(PN)); + if (PHIsToDemote.empty()) continue; + + // Demote the PHIs to the stack. + for (SmallPtrSet::iterator + I = PHIsToDemote.begin(), E = PHIsToDemote.end(); I != E; ++I) + DemotePHIToStack(*I); + + // Move the landingpad instruction back to the top of the landing pad block. + LPI->moveBefore(UnwindBlock->begin()); + } } /// setupEntryBlockAndCallSites - Setup the entry block by creating and filling -- cgit v1.1 From 4661d4cac3ba7f480a91d0ccd35fb2d22d9692d3 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Fri, 21 Oct 2011 22:21:10 +0000 Subject: Assembly parsing for 2-register sequential variant of VLD2. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142691 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMExpandPseudoInsts.cpp | 12 +++---- lib/Target/ARM/ARMInstrNEON.td | 44 +++++++++++++++---------- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 14 ++++++++ lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 6 ---- 4 files changed, 46 insertions(+), 30 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 9cd6894..75b48d1 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -174,12 +174,12 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, EvenDblSpc, 2, 2 ,true}, { ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, EvenDblSpc, 2, 2 ,true}, -{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, SingleSpc, 2, 4 ,true}, -{ ARM::VLD2d16Pseudo_UPD, ARM::VLD2d16_UPD, true, true, SingleSpc, 2, 4 ,true}, -{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, SingleSpc, 2, 2 ,true}, -{ ARM::VLD2d32Pseudo_UPD, ARM::VLD2d32_UPD, true, true, SingleSpc, 2, 2 ,true}, -{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, SingleSpc, 2, 8 ,true}, -{ ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, SingleSpc, 2, 8 ,true}, +{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, SingleSpc, 2, 4 ,false}, +{ ARM::VLD2d16Pseudo_UPD, ARM::VLD2d16_UPD, true, true, SingleSpc, 2, 4 ,false}, +{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, SingleSpc, 2, 2 ,false}, +{ ARM::VLD2d32Pseudo_UPD, ARM::VLD2d32_UPD, true, true, SingleSpc, 2, 2 ,false}, +{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, SingleSpc, 2, 8 ,false}, +{ ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, SingleSpc, 2, 8 ,false}, { ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, SingleSpc, 4, 4 ,true}, { ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, SingleSpc, 4, 4 ,true}, diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 78a57fb..190a344 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -101,6 +101,14 @@ def VecListFourDAsmOperand : AsmOperandClass { def VecListFourD : RegisterOperand { let ParserMatchClass = VecListFourDAsmOperand; } +// Register list of two D registers spaced by 2 (two sequential Q registers). +def VecListTwoQAsmOperand : AsmOperandClass { + let Name = "VecListTwoQ"; + let ParserMethod = "parseVectorList"; +} +def VecListTwoQ : RegisterOperand { + let ParserMatchClass = VecListTwoQAsmOperand; +} //===----------------------------------------------------------------------===// // NEON-specific DAG Nodes. @@ -395,10 +403,10 @@ def VLD1d64QPseudo : VLDQQPseudo; def VLD1d64QPseudo_UPD : VLDQQWBPseudo; // VLD2 : Vector Load (multiple 2-element structures) -class VLD2D op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2), +class VLD2D op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy> + : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd), (ins addrmode6:$Rn), IIC_VLD2, - "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn", "", []> { + "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; @@ -413,9 +421,9 @@ class VLD2Q op7_4, string Dt> let DecoderMethod = "DecodeVLDInstruction"; } -def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8">; -def VLD2d16 : VLD2D<0b1000, {0,1,?,?}, "16">; -def VLD2d32 : VLD2D<0b1000, {1,0,?,?}, "32">; +def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8", VecListTwoD>; +def VLD2d16 : VLD2D<0b1000, {0,1,?,?}, "16", VecListTwoD>; +def VLD2d32 : VLD2D<0b1000, {1,0,?,?}, "32", VecListTwoD>; def VLD2q8 : VLD2Q<{0,0,?,?}, "8">; def VLD2q16 : VLD2Q<{0,1,?,?}, "16">; @@ -430,10 +438,10 @@ def VLD2q16Pseudo : VLDQQPseudo; def VLD2q32Pseudo : VLDQQPseudo; // ...with address register writeback: -class VLD2DWB op11_8, bits<4> op7_4, string Dt> - : NLdSt<0, 0b10, op11_8, op7_4, (outs DPR:$Vd, DPR:$dst2, GPR:$wb), +class VLD2DWB op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy> + : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2u, - "vld2", Dt, "\\{$Vd, $dst2\\}, $Rn$Rm", + "vld2", Dt, "$Vd, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; @@ -448,9 +456,9 @@ class VLD2QWB op7_4, string Dt> let DecoderMethod = "DecodeVLDInstruction"; } -def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8">; -def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16">; -def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32">; +def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>; +def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>; +def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>; def VLD2q8_UPD : VLD2QWB<{0,0,?,?}, "8">; def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16">; @@ -465,12 +473,12 @@ def VLD2q16Pseudo_UPD : VLDQQWBPseudo; def VLD2q32Pseudo_UPD : VLDQQWBPseudo; // ...with double-spaced registers -def VLD2b8 : VLD2D<0b1001, {0,0,?,?}, "8">; -def VLD2b16 : VLD2D<0b1001, {0,1,?,?}, "16">; -def VLD2b32 : VLD2D<0b1001, {1,0,?,?}, "32">; -def VLD2b8_UPD : VLD2DWB<0b1001, {0,0,?,?}, "8">; -def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16">; -def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32">; +def VLD2b8 : VLD2D<0b1001, {0,0,?,?}, "8", VecListTwoQ>; +def VLD2b16 : VLD2D<0b1001, {0,1,?,?}, "16", VecListTwoQ>; +def VLD2b32 : VLD2D<0b1001, {1,0,?,?}, "32", VecListTwoQ>; +def VLD2b8_UPD : VLD2DWB<0b1001, {0,0,?,?}, "8", VecListTwoQ>; +def VLD2b16_UPD : VLD2DWB<0b1001, {0,1,?,?}, "16", VecListTwoQ>; +def VLD2b32_UPD : VLD2DWB<0b1001, {1,0,?,?}, "32", VecListTwoQ>; // VLD3 : Vector Load (multiple 3-element structures) class VLD3D op11_8, bits<4> op7_4, string Dt> diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 36438db..3a60ff9 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -930,6 +930,13 @@ public: return VectorList.Count == 4; } + bool isVecListTwoQ() const { + if (Kind != k_VectorList) return false; + //FIXME: We haven't taught the parser to handle by-two register lists + // yet, so don't pretend to know one. + return VectorList.Count == 2 && false; + } + bool isVectorIndex8() const { if (Kind != k_VectorIndex) return false; return VectorIndex.Val < 8; @@ -1543,6 +1550,13 @@ public: Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); } + void addVecListTwoQOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // Only the first register actually goes on the instruction. The rest + // are implied by the opcode. + Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); + } + void addVectorIndex8Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateImm(getVectorIndex())); diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 577dd80..63ef4af 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -1959,12 +1959,6 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, // Second output register switch (Inst.getOpcode()) { - case ARM::VLD2d8: - case ARM::VLD2d16: - case ARM::VLD2d32: - case ARM::VLD2d8_UPD: - case ARM::VLD2d16_UPD: - case ARM::VLD2d32_UPD: case ARM::VLD2q8: case ARM::VLD2q16: case ARM::VLD2q32: -- cgit v1.1 From 224180e81b34c99d15e35a4d4de6729357c6d372 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Fri, 21 Oct 2011 23:58:57 +0000 Subject: Assembly parsing for 4-register sequential variant of VLD2. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142704 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMExpandPseudoInsts.cpp | 12 ++++++------ lib/Target/ARM/ARMInstrNEON.td | 24 ++++++++++++------------ lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 18 ------------------ 3 files changed, 18 insertions(+), 36 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 75b48d1..77bad0c 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -181,12 +181,12 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, SingleSpc, 2, 8 ,false}, { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, SingleSpc, 2, 8 ,false}, -{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, SingleSpc, 4, 4 ,true}, -{ ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, SingleSpc, 4, 4 ,true}, -{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, SingleSpc, 4, 2 ,true}, -{ ARM::VLD2q32Pseudo_UPD, ARM::VLD2q32_UPD, true, true, SingleSpc, 4, 2 ,true}, -{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, SingleSpc, 4, 8 ,true}, -{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, SingleSpc, 4, 8 ,true}, +{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, SingleSpc, 4, 4 ,false}, +{ ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, SingleSpc, 4, 4 ,false}, +{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, SingleSpc, 4, 2 ,false}, +{ ARM::VLD2q32Pseudo_UPD, ARM::VLD2q32_UPD, true, true, SingleSpc, 4, 2 ,false}, +{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, SingleSpc, 4, 8 ,false}, +{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, SingleSpc, 4, 8 ,false}, { ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, SingleSpc, 3, 4,true}, { ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, SingleSpc, 3, 4,true}, diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 190a344..1efe681 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -411,11 +411,11 @@ class VLD2D op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy> let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; } -class VLD2Q op7_4, string Dt> +class VLD2Q op7_4, string Dt, RegisterOperand VdTy> : NLdSt<0, 0b10, 0b0011, op7_4, - (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4), + (outs VdTy:$Vd), (ins addrmode6:$Rn), IIC_VLD2x2, - "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { + "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; @@ -425,9 +425,9 @@ def VLD2d8 : VLD2D<0b1000, {0,0,?,?}, "8", VecListTwoD>; def VLD2d16 : VLD2D<0b1000, {0,1,?,?}, "16", VecListTwoD>; def VLD2d32 : VLD2D<0b1000, {1,0,?,?}, "32", VecListTwoD>; -def VLD2q8 : VLD2Q<{0,0,?,?}, "8">; -def VLD2q16 : VLD2Q<{0,1,?,?}, "16">; -def VLD2q32 : VLD2Q<{1,0,?,?}, "32">; +def VLD2q8 : VLD2Q<{0,0,?,?}, "8", VecListFourD>; +def VLD2q16 : VLD2Q<{0,1,?,?}, "16", VecListFourD>; +def VLD2q32 : VLD2Q<{1,0,?,?}, "32", VecListFourD>; def VLD2d8Pseudo : VLDQPseudo; def VLD2d16Pseudo : VLDQPseudo; @@ -446,11 +446,11 @@ class VLD2DWB op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy> let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; } -class VLD2QWB op7_4, string Dt> +class VLD2QWB op7_4, string Dt, RegisterOperand VdTy> : NLdSt<0, 0b10, 0b0011, op7_4, - (outs DPR:$Vd, DPR:$dst2, DPR:$dst3, DPR:$dst4, GPR:$wb), + (outs VdTy:$Vd, GPR:$wb), (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD2x2u, - "vld2", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", + "vld2", Dt, "$Vd, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; @@ -460,9 +460,9 @@ def VLD2d8_UPD : VLD2DWB<0b1000, {0,0,?,?}, "8", VecListTwoD>; def VLD2d16_UPD : VLD2DWB<0b1000, {0,1,?,?}, "16", VecListTwoD>; def VLD2d32_UPD : VLD2DWB<0b1000, {1,0,?,?}, "32", VecListTwoD>; -def VLD2q8_UPD : VLD2QWB<{0,0,?,?}, "8">; -def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16">; -def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32">; +def VLD2q8_UPD : VLD2QWB<{0,0,?,?}, "8", VecListFourD>; +def VLD2q16_UPD : VLD2QWB<{0,1,?,?}, "16", VecListFourD>; +def VLD2q32_UPD : VLD2QWB<{1,0,?,?}, "32", VecListFourD>; def VLD2d8Pseudo_UPD : VLDQWBPseudo; def VLD2d16Pseudo_UPD : VLDQWBPseudo; diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 63ef4af..dcdb452 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -1959,12 +1959,6 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, // Second output register switch (Inst.getOpcode()) { - case ARM::VLD2q8: - case ARM::VLD2q16: - case ARM::VLD2q32: - case ARM::VLD2q8_UPD: - case ARM::VLD2q16_UPD: - case ARM::VLD2q32_UPD: case ARM::VLD3d8: case ARM::VLD3d16: case ARM::VLD3d32: @@ -2006,12 +2000,6 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, // Third output register switch(Inst.getOpcode()) { - case ARM::VLD2q8: - case ARM::VLD2q16: - case ARM::VLD2q32: - case ARM::VLD2q8_UPD: - case ARM::VLD2q16_UPD: - case ARM::VLD2q32_UPD: case ARM::VLD3d8: case ARM::VLD3d16: case ARM::VLD3d32: @@ -2048,12 +2036,6 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, // Fourth output register switch (Inst.getOpcode()) { - case ARM::VLD2q8: - case ARM::VLD2q16: - case ARM::VLD2q32: - case ARM::VLD2q8_UPD: - case ARM::VLD2q16_UPD: - case ARM::VLD2q32_UPD: case ARM::VLD4d8: case ARM::VLD4d16: case ARM::VLD4d32: -- cgit v1.1 From b8dcb314f7f9c5c0f068a322c689a64881d78b70 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Sat, 22 Oct 2011 00:29:28 +0000 Subject: The different flavors of ARM have different valid subsets of registers. Check that the set of callee-saved registers is correct for the specific platform. & ctor_dtor_count & ctor_dtor_count-2 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142706 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 7df743b..72ea6ac 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -6015,9 +6015,19 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { MachineInstrBuilder MIB(&*II); for (unsigned i = 0; SavedRegs[i] != 0; ++i) { - if (!TRC->contains(SavedRegs[i])) continue; - if (!DefRegs[SavedRegs[i]]) - MIB.addReg(SavedRegs[i], RegState::ImplicitDefine | RegState::Dead); + unsigned Reg = SavedRegs[i]; + if (Subtarget->isThumb2() && + !ARM::tGPRRegisterClass->contains(Reg) && + !ARM::hGPRRegisterClass->contains(Reg)) + continue; + else if (Subtarget->isThumb1Only() && + !ARM::tGPRRegisterClass->contains(Reg)) + continue; + else if (!Subtarget->isThumb() && + !ARM::GPRRegisterClass->contains(Reg)) + continue; + if (!DefRegs[Reg]) + MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); } break; -- cgit v1.1 From 5b2bba61229fb8c262be90ef7705b41cb29edde0 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sat, 22 Oct 2011 12:39:25 +0000 Subject: Fix pr11193. SHL inserts zeros from the right, thus even when the original sign_extend_inreg value was of 1-bit, we need to sra. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142724 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 3 --- 1 file changed, 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 96f04e7..7dec7c4 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -10146,9 +10146,6 @@ SDValue X86TargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, SelectionDAG &DAG) DAG.getConstant(SHLIntrinsicsID, MVT::i32), Node->getOperand(0), ShAmt); - // In case of 1 bit sext, no need to shr - if (ExtraVT.getScalarType().getSizeInBits() == 1) return Tmp1; - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(SRAIntrinsicsID, MVT::i32), Tmp1, ShAmt); -- cgit v1.1 From 1a2f9886a2a60dbd41216468a240446bbfed3e76 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 22 Oct 2011 16:50:00 +0000 Subject: Move various generated tables into read-only memory, fixing up const correctness along the way. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142726 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 17 +++++++++-------- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 2 +- lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp | 4 ++-- 3 files changed, 12 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 3a60ff9..a13554f 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -1886,9 +1886,10 @@ void ARMOperand::print(raw_ostream &OS) const { OS << ""; break; case k_ITCondMask: { - static char MaskStr[][6] = { "()", "(t)", "(e)", "(tt)", "(et)", "(te)", - "(ee)", "(ttt)", "(ett)", "(tet)", "(eet)", "(tte)", "(ete)", - "(tee)", "(eee)" }; + static const char *MaskStr[] = { + "()", "(t)", "(e)", "(tt)", "(et)", "(te)", "(ee)", "(ttt)", "(ett)", + "(tet)", "(eet)", "(tte)", "(ete)", "(tee)", "(eee)" + }; assert((ITMask.Mask & 0xf) == ITMask.Mask); OS << ""; break; @@ -2366,7 +2367,7 @@ parseRegisterList(SmallVectorImpl &Operands) { if (Reg == -1) return Error(RegLoc, "register expected"); - MCRegisterClass *RC; + const MCRegisterClass *RC; if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg)) RC = &ARMMCRegisterClasses[ARM::GPRRegClassID]; else if (ARMMCRegisterClasses[ARM::DPRRegClassID].contains(Reg)) @@ -4237,9 +4238,9 @@ static bool listContainsReg(MCInst &Inst, unsigned OpNo, unsigned Reg) { // the ARMInsts array) instead. Getting that here requires awkward // API changes, though. Better way? namespace llvm { -extern MCInstrDesc ARMInsts[]; +extern const MCInstrDesc ARMInsts[]; } -static MCInstrDesc &getInstDesc(unsigned Opcode) { +static const MCInstrDesc &getInstDesc(unsigned Opcode) { return ARMInsts[Opcode]; } @@ -4247,7 +4248,7 @@ static MCInstrDesc &getInstDesc(unsigned Opcode) { bool ARMAsmParser:: validateInstruction(MCInst &Inst, const SmallVectorImpl &Operands) { - MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); + const MCInstrDesc &MCID = getInstDesc(Inst.getOpcode()); SMLoc Loc = Operands[0]->getStartLoc(); // Check the IT block state first. // NOTE: In Thumb mode, the BKPT instruction has the interesting property of @@ -4605,7 +4606,7 @@ unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { // 16-bit thumb arithmetic instructions either require or preclude the 'S' // suffix depending on whether they're in an IT block or not. unsigned Opc = Inst.getOpcode(); - MCInstrDesc &MCID = getInstDesc(Opc); + const MCInstrDesc &MCID = getInstDesc(Opc); if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) { assert(MCID.hasOptionalDef() && "optionally flag setting instruction missing optional def operand"); diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index dcdb452..1517625 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -415,7 +415,7 @@ DecodeStatus ARMDisassembler::getInstruction(MCInst &MI, uint64_t &Size, } namespace llvm { -extern MCInstrDesc ARMInsts[]; +extern const MCInstrDesc ARMInsts[]; } /// tryAddingSymbolicOperand - trys to add a symbolic operand in place of the diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp index fd761f1..c3a3833 100644 --- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp +++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp @@ -30,14 +30,14 @@ #include "MBlazeGenEDInfo.inc" namespace llvm { -extern MCInstrDesc MBlazeInsts[]; +extern const MCInstrDesc MBlazeInsts[]; } using namespace llvm; const unsigned UNSUPPORTED = -1; -static unsigned mblazeBinary2Opcode[] = { +static const unsigned mblazeBinary2Opcode[] = { MBlaze::ADD, MBlaze::RSUB, MBlaze::ADDC, MBlaze::RSUBC, //00,01,02,03 MBlaze::ADDK, MBlaze::RSUBK, MBlaze::ADDKC, MBlaze::RSUBKC, //04,05,06,07 MBlaze::ADDI, MBlaze::RSUBI, MBlaze::ADDIC, MBlaze::RSUBIC, //08,09,0A,0B -- cgit v1.1 From 614fef6d5a1e24d01ffe7247b1841fd7e08e1e85 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Sat, 22 Oct 2011 19:58:20 +0000 Subject: Make SCEV's brute force analysis stronger in two ways. Firstly, we should be able to constant fold load instructions where the argument is a constant. Second, we should be able to watch multiple PHI nodes through the loop; this patch only supports PHIs in loop headers, more can be done here. With this patch, we now constant evaluate: static const int arr[] = {1, 2, 3, 4, 5}; int test() { int sum = 0; for (int i = 0; i < 5; ++i) sum += arr[i]; return sum; } git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142731 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 171 +++++++++++++++++++++++++++++++++------ 1 file changed, 145 insertions(+), 26 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index e0ac56c..2da8e6f 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -4658,7 +4658,8 @@ ScalarEvolution::ComputeLoadConstantCompareExitLimit( /// specified type, assuming that all operands were constants. static bool CanConstantFold(const Instruction *I) { if (isa(I) || isa(I) || - isa(I) || isa(I) || isa(I)) + isa(I) || isa(I) || isa(I) || + isa(I)) return true; if (const CallInst *CI = dyn_cast(I)) @@ -4751,13 +4752,19 @@ static Constant *EvaluateExpression(Value *V, const Loop *L, const TargetData *TD) { // Convenient constant check, but redundant for recursive calls. if (Constant *C = dyn_cast(V)) return C; + Instruction *I = dyn_cast(V); + if (!I) return 0; - Instruction *I = cast(V); if (Constant *C = Vals.lookup(I)) return C; - assert(!isa(I) && "loop header phis should be mapped to constant"); - assert(canConstantEvolve(I, L) && "cannot evaluate expression in this loop"); - (void)L; + // An instruction inside the loop depends on a value outside the loop that we + // weren't given a mapping for, or a value such as a call inside the loop. + if (!canConstantEvolve(I, L)) return 0; + + // An unmapped PHI can be due to a branch or another loop inside this loop, + // or due to this not being the initial iteration through a loop where we + // couldn't compute the evolution of this particular PHI last time. + if (isa(I)) return 0; std::vector Operands(I->getNumOperands()); @@ -4774,9 +4781,13 @@ static Constant *EvaluateExpression(Value *V, const Loop *L, Operands[i] = C; } - if (const CmpInst *CI = dyn_cast(I)) + if (CmpInst *CI = dyn_cast(I)) return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], Operands[1], TD); + if (LoadInst *LI = dyn_cast(I)) { + if (!LI->isVolatile()) + return ConstantFoldLoadFromConstPtr(Operands[0], TD); + } return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD); } @@ -4798,23 +4809,26 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, Constant *&RetVal = ConstantEvolutionLoopExitValue[PN]; - // FIXME: Nick's fix for PR11034 will seed constants for multiple header phis. DenseMap CurrentIterVals; + BasicBlock *Header = L->getHeader(); + assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); // Since the loop is canonicalized, the PHI node must have two entries. One // entry must be a constant (coming in from outside of the loop), and the // second must be derived from the same PHI. bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); - Constant *StartCST = - dyn_cast(PN->getIncomingValue(!SecondIsBackedge)); - if (StartCST == 0) - return RetVal = 0; // Must be a constant. - CurrentIterVals[PN] = StartCST; + PHINode *PHI = 0; + for (BasicBlock::iterator I = Header->begin(); + (PHI = dyn_cast(I)); ++I) { + Constant *StartCST = + dyn_cast(PHI->getIncomingValue(!SecondIsBackedge)); + if (StartCST == 0) continue; + CurrentIterVals[PHI] = StartCST; + } + if (!CurrentIterVals.count(PN)) + return RetVal = 0; Value *BEValue = PN->getIncomingValue(SecondIsBackedge); - if (getConstantEvolvingPHI(BEValue, L) != PN && - !isa(BEValue)) - return RetVal = 0; // Not derived from same PHI. // Execute the loop symbolically to determine the exit value. if (BEs.getActiveBits() >= 32) @@ -4826,15 +4840,29 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, if (IterationNum == NumIterations) return RetVal = CurrentIterVals[PN]; // Got exit value! - // Compute the value of the PHI node for the next iteration. + // Compute the value of the PHIs for the next iteration. // EvaluateExpression adds non-phi values to the CurrentIterVals map. + DenseMap NextIterVals; Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); if (NextPHI == CurrentIterVals[PN]) return RetVal = NextPHI; // Stopped evolving! if (NextPHI == 0) return 0; // Couldn't evaluate! - DenseMap NextIterVals; NextIterVals[PN] = NextPHI; + + // Also evaluate the other PHI nodes. However, we don't get to stop if we + // cease to be able to evaluate one of them or if they stop evolving, + // because that doesn't necessarily prevent us from computing PN. + for (DenseMap::const_iterator + I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){ + PHINode *PHI = dyn_cast(I->first); + if (!PHI || PHI == PN) continue; + Constant *&NextPHI = NextIterVals[PHI]; + if (NextPHI) continue; // Already computed! + + Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); + NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); + } CurrentIterVals.swap(NextIterVals); } } @@ -4844,9 +4872,9 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, /// try to evaluate a few iterations of the loop until we get the exit /// condition gets a value of ExitWhen (true or false). If we cannot /// evaluate the trip count of the loop, return getCouldNotCompute(). -const SCEV * ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, - Value *Cond, - bool ExitWhen) { +const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, + Value *Cond, + bool ExitWhen) { PHINode *PN = getConstantEvolvingPHI(Cond, L); if (PN == 0) return getCouldNotCompute(); @@ -4921,6 +4949,98 @@ const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { return C; } +/// This builds up a Constant using the ConstantExpr interface. That way, we +/// will return Constants for objects which aren't represented by a +/// SCEVConstant, because SCEVConstant is restricted to ConstantInt. +/// Returns NULL if the SCEV isn't representable as a Constant. +static Constant *BuildConstantFromSCEV(const SCEV *V) { + switch (V->getSCEVType()) { + default: // TODO: smax, umax. + case scCouldNotCompute: + case scAddRecExpr: + break; + case scConstant: + return cast(V)->getValue(); + case scUnknown: + return dyn_cast(cast(V)->getValue()); + case scSignExtend: { + const SCEVSignExtendExpr *SS = cast(V); + if (Constant *CastOp = BuildConstantFromSCEV(SS->getOperand())) + return ConstantExpr::getSExt(CastOp, SS->getType()); + break; + } + case scZeroExtend: { + const SCEVZeroExtendExpr *SZ = cast(V); + if (Constant *CastOp = BuildConstantFromSCEV(SZ->getOperand())) + return ConstantExpr::getZExt(CastOp, SZ->getType()); + break; + } + case scTruncate: { + const SCEVTruncateExpr *ST = cast(V); + if (Constant *CastOp = BuildConstantFromSCEV(ST->getOperand())) + return ConstantExpr::getTrunc(CastOp, ST->getType()); + break; + } + case scAddExpr: { + const SCEVAddExpr *SA = cast(V); + if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) { + if (C->getType()->isPointerTy()) + C = ConstantExpr::getBitCast(C, Type::getInt8PtrTy(C->getContext())); + for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) { + Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i)); + if (!C2) return 0; + + // First pointer! + if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) { + std::swap(C, C2); + // The offsets have been converted to bytes. We can add bytes to an + // i8* by GEP with the byte count in the first index. + C = ConstantExpr::getBitCast(C,Type::getInt8PtrTy(C->getContext())); + } + + // Don't bother trying to sum two pointers. We probably can't + // statically compute a load that results from it anyway. + if (C2->getType()->isPointerTy()) + return 0; + + if (C->getType()->isPointerTy()) { + if (cast(C->getType())->getElementType()->isStructTy()) + C2 = ConstantExpr::getIntegerCast( + C2, Type::getInt32Ty(C->getContext()), true); + C = ConstantExpr::getGetElementPtr(C, C2); + } else + C = ConstantExpr::getAdd(C, C2); + } + return C; + } + break; + } + case scMulExpr: { + const SCEVMulExpr *SM = cast(V); + if (Constant *C = BuildConstantFromSCEV(SM->getOperand(0))) { + // Don't bother with pointers at all. + if (C->getType()->isPointerTy()) return 0; + for (unsigned i = 1, e = SM->getNumOperands(); i != e; ++i) { + Constant *C2 = BuildConstantFromSCEV(SM->getOperand(i)); + if (!C2 || C2->getType()->isPointerTy()) return 0; + C = ConstantExpr::getMul(C, C2); + } + return C; + } + break; + } + case scUDivExpr: { + const SCEVUDivExpr *SU = cast(V); + if (Constant *LHS = BuildConstantFromSCEV(SU->getLHS())) + if (Constant *RHS = BuildConstantFromSCEV(SU->getRHS())) + if (LHS->getType() == RHS->getType()) + return ConstantExpr::getUDiv(LHS, RHS); + break; + } + } + return 0; +} + const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { if (isa(V)) return V; @@ -4973,11 +5093,7 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { const SCEV *OpV = getSCEVAtScope(OrigV, L); MadeImprovement |= OrigV != OpV; - Constant *C = 0; - if (const SCEVConstant *SC = dyn_cast(OpV)) - C = SC->getValue(); - if (const SCEVUnknown *SU = dyn_cast(OpV)) - C = dyn_cast(SU->getValue()); + Constant *C = BuildConstantFromSCEV(OpV); if (!C) return V; if (C->getType() != Op->getType()) C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, @@ -4993,7 +5109,10 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { if (const CmpInst *CI = dyn_cast(I)) C = ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0], Operands[1], TD); - else + else if (const LoadInst *LI = dyn_cast(I)) { + if (!LI->isVolatile()) + C = ConstantFoldLoadFromConstPtr(Operands[0], TD); + } else C = ConstantFoldInstOperands(I->getOpcode(), I->getType(), Operands, TD); if (!C) return V; -- cgit v1.1 From 4d882aae2acc5194b47385c7cb2e0e9ddd202927 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Sat, 22 Oct 2011 21:59:35 +0000 Subject: A non-escaping malloc in the entry block is not unlike an alloca. Do dead-store elimination on them too. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142735 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/DeadStoreElimination.cpp | 27 ++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index a593d0f..c0738a9 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -24,6 +24,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Pass.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" @@ -255,6 +256,14 @@ static Value *getStoredPointerOperand(Instruction *I) { static uint64_t getPointerSize(Value *V, AliasAnalysis &AA) { const TargetData *TD = AA.getTargetData(); + + if (CallInst *CI = dyn_cast(V)) { + assert(isMalloc(CI) && "Expected Malloc call!"); + if (ConstantInt *C = dyn_cast(CI->getArgOperand(0))) + return C->getZExtValue(); + return AliasAnalysis::UnknownSize; + } + if (TD == 0) return AliasAnalysis::UnknownSize; @@ -265,7 +274,7 @@ static uint64_t getPointerSize(Value *V, AliasAnalysis &AA) { return AliasAnalysis::UnknownSize; } - assert(isa(V) && "Expected AllocaInst or Argument!"); + assert(isa(V) && "Expected AllocaInst, malloc call or Argument!"); PointerType *PT = cast(V->getType()); return TD->getTypeAllocSize(PT->getElementType()); } @@ -279,6 +288,8 @@ static bool isObjectPointerWithTrustworthySize(const Value *V) { return !GV->mayBeOverridden(); if (const Argument *A = dyn_cast(V)) return A->hasByValAttr(); + if (isMalloc(V)) + return true; return false; } @@ -588,10 +599,17 @@ bool DSE::handleEndBlock(BasicBlock &BB) { // Find all of the alloca'd pointers in the entry block. BasicBlock *Entry = BB.getParent()->begin(); - for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) + for (BasicBlock::iterator I = Entry->begin(), E = Entry->end(); I != E; ++I) { if (AllocaInst *AI = dyn_cast(I)) DeadStackObjects.insert(AI); + // Okay, so these are dead heap objects, but if the pointer never escapes + // then it's leaked by this function anyways. + if (CallInst *CI = extractMallocCall(I)) + if (!PointerMayBeCaptured(CI, true, true)) + DeadStackObjects.insert(CI); + } + // Treat byval arguments the same, stores to them are dead at the end of the // function. for (Function::arg_iterator AI = BB.getParent()->arg_begin(), @@ -637,6 +655,11 @@ bool DSE::handleEndBlock(BasicBlock &BB) { continue; } + if (CallInst *CI = extractMallocCall(BBI)) { + DeadStackObjects.erase(CI); + continue; + } + if (CallSite CS = cast(BBI)) { // If this call does not access memory, it can't be loading any of our // pointers. -- cgit v1.1 From 272895f0ab2752c0d117a60259a9bb82db1fdee5 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 22 Oct 2011 23:13:53 +0000 Subject: Remove some duplicate specifying of neverHasSideEffects and mayLoad from X86 multiply instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142737 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrArithmetic.td | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 74b647a..6c49b9e 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -86,7 +86,7 @@ let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def MUL32m : I<0xF7, MRM4m, (outs), (ins i32mem:$src), "mul{l}\t$src", []>; // EAX,EDX = EAX*[mem32] -let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in +let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def MUL64m : RI<0xF7, MRM4m, (outs), (ins i64mem:$src), "mul{q}\t$src", []>; // RAX,RDX = RAX*[mem64] } @@ -101,7 +101,7 @@ def IMUL16r : I<0xF7, MRM5r, (outs), (ins GR16:$src), "imul{w}\t$src", []>, let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32r : I<0xF7, MRM5r, (outs), (ins GR32:$src), "imul{l}\t$src", []>; // EAX,EDX = EAX*GR32 -let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in +let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64r : RI<0xF7, MRM5r, (outs), (ins GR64:$src), "imul{q}\t$src", []>; // RAX,RDX = RAX*GR64 @@ -115,7 +115,7 @@ def IMUL16m : I<0xF7, MRM5m, (outs), (ins i16mem:$src), let Defs = [EAX,EDX,EFLAGS], Uses = [EAX] in def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src), "imul{l}\t$src", []>; // EAX,EDX = EAX*[mem32] -let Defs = [RAX,RDX,EFLAGS], Uses = [RAX], neverHasSideEffects = 1 in +let Defs = [RAX,RDX,EFLAGS], Uses = [RAX] in def IMUL64m : RI<0xF7, MRM5m, (outs), (ins i64mem:$src), "imul{q}\t$src", []>; // RAX,RDX = RAX*[mem64] } @@ -285,8 +285,8 @@ def IDIV32r: I<0xF7, MRM7r, (outs), (ins GR32:$src), // EDX:EAX/r32 = EAX,EDX let Defs = [RAX,RDX,EFLAGS], Uses = [RAX,RDX] in def IDIV64r: RI<0xF7, MRM7r, (outs), (ins GR64:$src), "idiv{q}\t$src", []>; - -let mayLoad = 1, mayLoad = 1 in { + +let mayLoad = 1 in { let Defs = [AL,EFLAGS,AX], Uses = [AX] in def IDIV8m : I<0xF6, MRM7m, (outs), (ins i8mem:$src), // AX/[mem8] = AL,AH "idiv{b}\t$src", []>; -- cgit v1.1 From 4fea38f7732bccd1781390aedcef2bbf87e25990 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 23 Oct 2011 00:33:32 +0000 Subject: Add X86 MULX instruction for disassembler. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142738 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrArithmetic.td | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrArithmetic.td b/lib/Target/X86/X86InstrArithmetic.td index 6c49b9e..c99c52d 100644 --- a/lib/Target/X86/X86InstrArithmetic.td +++ b/lib/Target/X86/X86InstrArithmetic.td @@ -1171,3 +1171,27 @@ let Predicates = [HasBMI], Defs = [EFLAGS] in { defm ANDN32 : bmi_andn<"andn{l}", GR32, i32mem, loadi32>, T8, VEX_4V; defm ANDN64 : bmi_andn<"andn{q}", GR64, i64mem, loadi64>, T8, VEX_4V, VEX_W; } + +//===----------------------------------------------------------------------===// +// MULX Instruction +// +multiclass bmi_mulx { +let neverHasSideEffects = 1 in { + let isCommutable = 1 in + def rr : I<0xF6, MRMSrcReg, (outs RC:$dst1, RC:$dst2), (ins RC:$src), + !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), + []>, T8XD, VEX_4V; + + let mayLoad = 1 in + def rm : I<0xF6, MRMSrcMem, (outs RC:$dst1, RC:$dst2), (ins x86memop:$src), + !strconcat(mnemonic, "\t{$src, $dst2, $dst1|$dst1, $dst2, $src}"), + []>, T8XD, VEX_4V; +} +} + +let Predicates = [HasBMI2] in { + let Uses = [EDX] in + defm MULX32 : bmi_mulx<"mulx{l}", GR32, i32mem>; + let Uses = [RDX] in + defm MULX64 : bmi_mulx<"mulx{q}", GR64, i64mem>, VEX_W; +} -- cgit v1.1 From 90747e34e6ca7162eaf8dde032649071045f161d Mon Sep 17 00:00:00 2001 From: Cameron Zwarich Date: Sun, 23 Oct 2011 07:02:10 +0000 Subject: The element insertion code in scalar replacement doesn't handle incorrect element types, even though the element extraction code does. It is surprising that this bug has been here for so long. Fixes . git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142740 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/ScalarReplAggregates.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index c6d9123..b89f730 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -806,8 +806,10 @@ ConvertScalar_InsertValue(Value *SV, Value *Old, return Builder.CreateBitCast(SV, AllocaType); // Must be an element insertion. - assert(SV->getType() == VTy->getElementType()); - uint64_t EltSize = TD.getTypeAllocSizeInBits(VTy->getElementType()); + Type *EltTy = VTy->getElementType(); + if (SV->getType() != EltTy) + SV = Builder.CreateBitCast(SV, EltTy); + uint64_t EltSize = TD.getTypeAllocSizeInBits(EltTy); unsigned Elt = Offset/EltSize; return Builder.CreateInsertElement(Old, SV, Builder.getInt32(Elt)); } -- cgit v1.1 From 75485d6746f8b5b23c17cf6d2364e7e1e0705992 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 23 Oct 2011 07:34:00 +0000 Subject: Add X86 RORX instruction git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142741 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 3 +++ lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp | 9 +++++++++ lib/Target/X86/X86CodeEmitter.cpp | 2 ++ lib/Target/X86/X86InstrFormats.td | 1 + lib/Target/X86/X86InstrShiftRotate.td | 21 +++++++++++++++++++++ 5 files changed, 36 insertions(+) (limited to 'lib') diff --git a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h index 007e620..c50f785 100644 --- a/lib/Target/X86/MCTargetDesc/X86BaseInfo.h +++ b/lib/Target/X86/MCTargetDesc/X86BaseInfo.h @@ -301,6 +301,9 @@ namespace X86II { // T8XS - Prefix before and after 0x0F. Combination of T8 and XS. T8XS = 18 << Op0Shift, + // TAXD - Prefix before and after 0x0F. Combination of TA and XD. + TAXD = 19 << Op0Shift, + //===------------------------------------------------------------------===// // REX_W - REX prefixes are instruction prefixes used in 64-bit mode. // They are used to specify GPRs and SSE registers, 64-bit operand size, diff --git a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp index 8ae7a3c..1ab469c 100644 --- a/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp @@ -472,6 +472,10 @@ void X86MCCodeEmitter::EmitVEXOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, VEX_PP = 0x3; VEX_5M = 0x2; break; + case X86II::TAXD: // F2 0F 3A + VEX_PP = 0x3; + VEX_5M = 0x3; + break; case X86II::XS: // F3 0F VEX_PP = 0x2; break; @@ -802,6 +806,10 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, EmitByte(0xF2, CurByte, OS); Need0FPrefix = true; break; + case X86II::TAXD: // F2 0F 3A + EmitByte(0xF2, CurByte, OS); + Need0FPrefix = true; + break; case X86II::XS: // F3 0F EmitByte(0xF3, CurByte, OS); Need0FPrefix = true; @@ -838,6 +846,7 @@ void X86MCCodeEmitter::EmitOpcodePrefix(uint64_t TSFlags, unsigned &CurByte, case X86II::T8: // 0F 38 EmitByte(0x38, CurByte, OS); break; + case X86II::TAXD: // F2 0F 3A case X86II::TA: // 0F 3A EmitByte(0x3A, CurByte, OS); break; diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index a150604..d94ba33 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -656,6 +656,7 @@ void Emitter::emitInstruction(MachineInstr &MI, Need0FPrefix = true; break; case X86II::T8XD: // F2 0F 38 + case X86II::TAXD: // F2 0F 3A case X86II::XD: // F2 0F MCE.emitByte(0xF2); Need0FPrefix = true; @@ -686,6 +687,7 @@ void Emitter::emitInstruction(MachineInstr &MI, case X86II::T8: // 0F 38 MCE.emitByte(0x38); break; + case X86II::TAXD: // F2 0F 38 case X86II::TA: // 0F 3A MCE.emitByte(0x3A); break; diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 5b7adf3..5236daf 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -109,6 +109,7 @@ class A6 { bits<5> Prefix = 15; } class A7 { bits<5> Prefix = 16; } class T8XD { bits<5> Prefix = 17; } class T8XS { bits<5> Prefix = 18; } +class TAXD { bits<5> Prefix = 19; } class VEX { bit hasVEXPrefix = 1; } class VEX_W { bit hasVEX_WPrefix = 1; } class VEX_4V : VEX { bit hasVEX_4VPrefix = 1; } diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td index 8278568..a32f066 100644 --- a/lib/Target/X86/X86InstrShiftRotate.td +++ b/lib/Target/X86/X86InstrShiftRotate.td @@ -744,3 +744,24 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem, } // Defs = [EFLAGS] +let Predicates = [HasBMI2], neverHasSideEffects = 1 in { + def RORX32ri : Ii8<0xF0, MRMSrcReg, (outs GR32:$dst), + (ins GR32:$src1, i8imm:$src2), + "rorx{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + TAXD, VEX; + let mayLoad = 1 in + def RORX32mi : Ii8<0xF0, MRMSrcMem, (outs GR32:$dst), + (ins i32mem:$src1, i8imm:$src2), + "rorx{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + TAXD, VEX; + + def RORX64ri : Ii8<0xF0, MRMSrcReg, (outs GR64:$dst), + (ins GR64:$src1, i8imm:$src2), + "rorx{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + TAXD, VEX, VEX_W; + let mayLoad = 1 in + def RORX64mi : Ii8<0xF0, MRMSrcMem, (outs GR64:$dst), + (ins i64mem:$src1, i8imm:$src2), + "rorx{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + TAXD, VEX, VEX_W; +} -- cgit v1.1 From 3071363bcdfd75e81326b4033970d8bee5b1b376 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Sun, 23 Oct 2011 09:18:45 +0000 Subject: Completely re-write the algorithm behind MachineBlockPlacement based on discussions with Andy. Fundamentally, the previous algorithm is both counter productive on several fronts and prioritizing things which aren't necessarily the most important: static branch prediction. The new algorithm uses the existing loop CFG structure information to walk through the CFG itself to layout blocks. It coalesces adjacent blocks within the loop where the CFG allows based on the most likely path taken. Finally, it topologically orders the block chains that have been formed. This allows it to choose a (mostly) topologically valid ordering which still priorizes fallthrough within the structural constraints. As a final twist in the algorithm, it does violate the CFG when it discovers a "hot" edge, that is an edge that is more than 4x hotter than the competing edges in the CFG. These are forcibly merged into a fallthrough chain. Future transformations that need te be added are rotation of loop exit conditions to be fallthrough, and better isolation of cold block chains. I'm also planning on adding statistics to model how well the algorithm does at laying out blocks based on the probabilities it receives. The old tests mostly still pass, and I have some new tests to add, but the nested loops are still behaving very strangely. This almost seems like working-as-intended as it rotated the exit branch to be fallthrough, but I'm not convinced this is actually the best layout. It is well supported by the probabilities for loops we currently get, but those are pretty broken for nested loops, so this may change later. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142743 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBlockPlacement.cpp | 626 ++++++++++++---------------------- 1 file changed, 227 insertions(+), 399 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 7700efc..4f9958a 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -7,15 +7,21 @@ // //===----------------------------------------------------------------------===// // -// This file implements basic block placement transformations using branch -// probability estimates. It is based around "Algo2" from Profile Guided Code -// Positioning [http://portal.acm.org/citation.cfm?id=989433]. +// This file implements basic block placement transformations using the CFG +// structure and branch probability estimates. // -// We combine the BlockFrequencyInfo with BranchProbabilityInfo to simulate -// measured edge-weights. The BlockFrequencyInfo effectively summarizes the -// probability of starting from any particular block, and the -// BranchProbabilityInfo the probability of exiting the block via a particular -// edge. Combined they form a function-wide ordering of the edges. +// The pass strives to preserve the structure of the CFG (that is, retain +// a topological ordering of basic blocks) in the absense of a *strong* signal +// to the contrary from probabilities. However, within the CFG structure, it +// attempts to choose an ordering which favors placing more likely sequences of +// blocks adjacent to each other. +// +// The algorithm works from the inner-most loop within a function outward, and +// at each stage walks through the basic blocks, trying to coalesce them into +// sequential chains where allowed by the CFG (or demanded by heavy +// probabilities). Finally, it walks the blocks in topological order, and the +// first time it reaches a chain of basic blocks, it schedules them in the +// function in-order. // //===----------------------------------------------------------------------===// @@ -29,8 +35,10 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" @@ -57,7 +65,7 @@ struct WeightedEdge { } namespace { -struct BlockChain; +class BlockChain; /// \brief Type for our function-wide basic block -> block chain mapping. typedef DenseMap BlockToChainMapType; } @@ -78,22 +86,12 @@ namespace { /// The block chains also have support for calculating and caching probability /// information related to the chain itself versus other chains. This is used /// for ranking during the final layout of block chains. -struct BlockChain { - class SuccIterator; - - /// \brief The first and last basic block that from this chain. - /// - /// The chain is stored within the existing function ilist of basic blocks. - /// When merging chains or otherwise manipulating them, we splice the blocks - /// within this ilist, giving us very cheap storage here and constant time - /// merge operations. +class BlockChain { + /// \brief The sequence of blocks belonging to this chain. /// - /// It is extremely important to note that LastBB is the iterator pointing - /// *at* the last basic block in the chain. That is, the chain consists of - /// the *closed* range [FirstBB, LastBB]. We cannot use half-open ranges - /// because the next basic block may get relocated to a different part of the - /// function at any time during the run of this pass. - MachineFunction::iterator FirstBB, LastBB; + /// This is the sequence of blocks for a particular chain. These will be laid + /// out in-order within the function. + SmallVector Blocks; /// \brief A handle to the function-wide basic block to block chain mapping. /// @@ -103,158 +101,66 @@ struct BlockChain { /// structure. BlockToChainMapType &BlockToChain; - /// \brief The weight used to rank two block chains in the same SCC. - /// - /// This is used during SCC layout of block chains to cache and rank the - /// chains. It is supposed to represent the expected frequency with which - /// control reaches a block within this chain, has the option of branching to - /// a block in some other chain participating in the SCC, but instead - /// continues within this chain. The higher this is, the more costly we - /// expect mis-predicted branches between this chain and other chains within - /// the SCC to be. Thus, since we expect branches between chains to be - /// predicted when backwards and not predicted when forwards, the higher this - /// is the more important that this chain is laid out first among those - /// chains in the same SCC as it. - BlockFrequency InChainEdgeFrequency; - +public: /// \brief Construct a new BlockChain. /// /// This builds a new block chain representing a single basic block in the /// function. It also registers itself as the chain that block participates /// in with the BlockToChain mapping. BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB) - : FirstBB(BB), LastBB(BB), BlockToChain(BlockToChain) { + : Blocks(1, BB), BlockToChain(BlockToChain) { assert(BB && "Cannot create a chain with a null basic block"); BlockToChain[BB] = this; } - /// \brief Merge another block chain into this one. + /// \brief Iterator over blocks within the chain. + typedef SmallVectorImpl::const_iterator iterator; + + /// \brief Beginning of blocks within the chain. + iterator begin() const { return Blocks.begin(); } + + /// \brief End of blocks within the chain. + iterator end() const { return Blocks.end(); } + + /// \brief Merge a block chain into this one. /// /// This routine merges a block chain into this one. It takes care of forming /// a contiguous sequence of basic blocks, updating the edge list, and /// updating the block -> chain mapping. It does not free or tear down the /// old chain, but the old chain's block list is no longer valid. - void merge(BlockChain *Chain) { - assert(Chain && "Cannot merge a null chain"); - MachineFunction::iterator EndBB = llvm::next(LastBB); - MachineFunction::iterator ChainEndBB = llvm::next(Chain->LastBB); - - // Update the incoming blocks to point to this chain. - for (MachineFunction::iterator BI = Chain->FirstBB, BE = ChainEndBB; - BI != BE; ++BI) { - assert(BlockToChain[BI] == Chain && "Incoming blocks not in chain"); - BlockToChain[BI] = this; + void merge(MachineBasicBlock *BB, BlockChain *Chain) { + assert(BB); + assert(!Blocks.empty()); + assert(Blocks.back()->isSuccessor(BB)); + + // Fast path in case we don't have a chain already. + if (!Chain) { + assert(!BlockToChain[BB]); + Blocks.push_back(BB); + BlockToChain[BB] = this; + return; } - // We splice the blocks together within the function (unless they already - // are adjacent) so we can represent the new chain with a pair of pointers - // to basic blocks within the function. This is also useful as each chain - // of blocks will end up being laid out contiguously within the function. - if (EndBB != Chain->FirstBB) - FirstBB->getParent()->splice(EndBB, Chain->FirstBB, ChainEndBB); - LastBB = Chain->LastBB; - } -}; -} - -namespace { -/// \brief Successor iterator for BlockChains. -/// -/// This is an iterator that walks over the successor block chains by looking -/// through its blocks successors and mapping those back to block chains. This -/// iterator is not a fully-functioning iterator, it is designed specifically -/// to support the interface required by SCCIterator when forming and walking -/// SCCs of BlockChains. -/// -/// Note that this iterator cannot be used while the chains are still being -/// formed and/or merged. Unlike the chains themselves, it does store end -/// iterators which could be moved if the chains are re-ordered. Once we begin -/// forming and iterating over an SCC of chains, the order of blocks within the -/// function must not change until we finish using the SCC iterators. -class BlockChain::SuccIterator - : public std::iterator { - BlockChain *Chain; - MachineFunction::iterator BI, BE; - MachineBasicBlock::succ_iterator SI; - -public: - explicit SuccIterator(BlockChain *Chain) - : Chain(Chain), BI(Chain->FirstBB), BE(llvm::next(Chain->LastBB)), - SI(BI->succ_begin()) { - while (BI != BE && BI->succ_begin() == BI->succ_end()) - ++BI; - if (BI != BE) - SI = BI->succ_begin(); - } - - /// \brief Helper function to create an end iterator for a particular chain. - /// - /// The "end" state is extremely arbitrary. We chose to have BI == BE, and SI - /// == Chain->FirstBB->succ_begin(). The value of SI doesn't really make any - /// sense, but rather than try to rationalize SI and our increment, when we - /// detect an "end" state, we just immediately call this function to build - /// the canonical end iterator. - static SuccIterator CreateEnd(BlockChain *Chain) { - SuccIterator It(Chain); - It.BI = It.BE; - return It; - } + assert(BB == *Chain->begin()); + assert(Chain->begin() != Chain->end()); - bool operator==(const SuccIterator &RHS) const { - return (Chain == RHS.Chain && BI == RHS.BI && SI == RHS.SI); - } - bool operator!=(const SuccIterator &RHS) const { - return !operator==(RHS); - } - - SuccIterator& operator++() { - assert(*this != CreateEnd(Chain) && "Cannot increment the end iterator"); - // There may be null successor pointers, skip over them. - // FIXME: I don't understand *why* there are null successor pointers. - do { - ++SI; - if (SI != BI->succ_end() && *SI) - return *this; - - // There may be a basic block without successors. Skip over them. - do { - ++BI; - if (BI == BE) - return *this = CreateEnd(Chain); - } while (BI->succ_begin() == BI->succ_end()); - SI = BI->succ_begin(); - } while (!*SI); - return *this; - } - SuccIterator operator++(int) { - SuccIterator tmp = *this; - ++*this; - return tmp; - } - - BlockChain *operator*() const { - assert(Chain->BlockToChain.lookup(*SI) && "Missing chain"); - return Chain->BlockToChain.lookup(*SI); - } -}; -} - -namespace { -/// \brief Sorter used with containers of BlockChain pointers. -/// -/// Sorts based on the \see BlockChain::InChainEdgeFrequency -- see its -/// comments for details on what this ordering represents. -struct ChainPtrPrioritySorter { - bool operator()(const BlockChain *LHS, const BlockChain *RHS) const { - assert(LHS && RHS && "Null chain entry"); - return LHS->InChainEdgeFrequency < RHS->InChainEdgeFrequency; + // Update the incoming blocks to point to this chain, and add them to the + // chain structure. + for (BlockChain::iterator BI = Chain->begin(), BE = Chain->end(); + BI != BE; ++BI) { + Blocks.push_back(*BI); + assert(BlockToChain[*BI] == Chain && "Incoming blocks not in chain"); + BlockToChain[*BI] = this; + } } }; } namespace { class MachineBlockPlacement : public MachineFunctionPass { + /// \brief A typedef for a block filter set. + typedef SmallPtrSet BlockFilterSet; + /// \brief A handle to the branch probability pass. const MachineBranchProbabilityInfo *MBPI; @@ -270,17 +176,6 @@ class MachineBlockPlacement : public MachineFunctionPass { /// \brief A handle to the target's lowering info. const TargetLowering *TLI; - /// \brief A prioritized list of edges in the BB-graph. - /// - /// For each function, we insert all control flow edges between BBs, along - /// with their "global" frequency. The Frequency of an edge being taken is - /// defined as the frequency of entering the source BB (from MBFI) times the - /// probability of taking a particular branch out of that block (from MBPI). - /// - /// Once built, this list is sorted in ascending frequency, making the last - /// edge the hottest one in the function. - SmallVector Edges; - /// \brief Allocator and owner of BlockChain structures. /// /// We build BlockChains lazily by merging together high probability BB @@ -297,24 +192,12 @@ class MachineBlockPlacement : public MachineFunctionPass { /// between basic blocks. DenseMap BlockToChain; - /// \brief A prioritized sequence of chains. - /// - /// We build up the ideal sequence of basic block chains in reverse order - /// here, and then walk backwards to arrange the final function ordering. - SmallVector PChains; - -#ifndef NDEBUG - /// \brief A set of active chains used to sanity-check the pass algorithm. - /// - /// All operations on this member should be wrapped in an assert or NDEBUG. - SmallPtrSet ActiveChains; -#endif - BlockChain *CreateChain(MachineBasicBlock *BB); - void PrioritizeEdges(MachineFunction &F); - void BuildBlockChains(); - void PrioritizeChains(MachineFunction &F); - void PlaceBlockChains(MachineFunction &F); + void mergeSuccessor(MachineBasicBlock *BB, BlockChain *Chain, + BlockFilterSet *Filter = 0); + void buildLoopChains(MachineFunction &F, MachineLoop &L); + void buildCFGChains(MachineFunction &F); + void placeChainsTopologically(MachineFunction &F); void AlignLoops(MachineFunction &F); public: @@ -349,21 +232,30 @@ FunctionPass *llvm::createMachineBlockPlacementPass() { return new MachineBlockPlacement(); } -namespace llvm { -/// \brief GraphTraits specialization for our BlockChain graph. -template <> struct GraphTraits { - typedef BlockChain NodeType; - typedef BlockChain::SuccIterator ChildIteratorType; +#ifndef NDEBUG +/// \brief Helper to print the name of a MBB. +/// +/// Only used by debug logging. +static std::string getBlockName(MachineBasicBlock *BB) { + std::string Result; + raw_string_ostream OS(Result); + OS << "BB#" << BB->getNumber() + << " (derived from LLVM BB '" << BB->getName() << "')"; + OS.flush(); + return Result; +} - static NodeType *getEntryNode(NodeType *N) { return N; } - static BlockChain::SuccIterator child_begin(NodeType *N) { - return BlockChain::SuccIterator(N); - } - static BlockChain::SuccIterator child_end(NodeType *N) { - return BlockChain::SuccIterator::CreateEnd(N); - } -}; +/// \brief Helper to print the number of a MBB. +/// +/// Only used by debug logging. +static std::string getBlockNum(MachineBasicBlock *BB) { + std::string Result; + raw_string_ostream OS(Result); + OS << "BB#" << BB->getNumber(); + OS.flush(); + return Result; } +#endif /// \brief Helper to create a new chain for a single BB. /// @@ -373,224 +265,168 @@ template <> struct GraphTraits { BlockChain *MachineBlockPlacement::CreateChain(MachineBasicBlock *BB) { BlockChain *Chain = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB); - assert(ActiveChains.insert(Chain)); + //assert(ActiveChains.insert(Chain)); return Chain; } -/// \brief Build a prioritized list of edges. +/// \brief Merge a chain with any viable successor. /// -/// The priority is determined by the product of the block frequency (how -/// likely it is to arrive at a particular block) times the probability of -/// taking this particular edge out of the block. This provides a function-wide -/// ordering of the edges. -void MachineBlockPlacement::PrioritizeEdges(MachineFunction &F) { - assert(Edges.empty() && "Already have an edge list"); - SmallVector Cond; // For AnalyzeBranch. - BlockChain *RequiredChain = 0; - for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { - MachineBasicBlock *From = &*FI; - // We only consider MBBs with analyzable branches. Even if the analysis - // fails, if there is no fallthrough, we can still work with the MBB. - MachineBasicBlock *TBB = 0, *FBB = 0; - Cond.clear(); - if (TII->AnalyzeBranch(*From, TBB, FBB, Cond) && From->canFallThrough()) { - // We push all unanalyzed blocks onto a chain eagerly to prevent them - // from being split later. Create the chain if needed, otherwise just - // keep track that these blocks reside on it. - if (!RequiredChain) - RequiredChain = CreateChain(From); - else - BlockToChain[From] = RequiredChain; - } else { - // As soon as we find an analyzable branch, add that block to and - // finalize any required chain that has been started. The required chain - // is only modeling potentially inexplicable fallthrough, so the first - // block to have analyzable fallthrough is a known-safe stopping point. - if (RequiredChain) { - BlockToChain[From] = RequiredChain; - RequiredChain->LastBB = FI; - RequiredChain = 0; - } - } +/// This routine walks the predecessors of the current block, looking for +/// viable merge candidates. It has strict rules it uses to determine when +/// a predecessor can be merged with the current block, which center around +/// preserving the CFG structure. It performs the merge if any viable candidate +/// is found. +void MachineBlockPlacement::mergeSuccessor(MachineBasicBlock *BB, + BlockChain *Chain, + BlockFilterSet *Filter) { + assert(BB); + assert(Chain); + + // If this block is not at the end of its chain, it cannot merge with any + // other chain. + if (Chain && *llvm::prior(Chain->end()) != BB) + return; - BlockFrequency BaseFrequency = MBFI->getBlockFreq(From); - for (MachineBasicBlock::succ_iterator SI = From->succ_begin(), - SE = From->succ_end(); - SI != SE; ++SI) { - MachineBasicBlock *To = *SI; - WeightedEdge WE = { BaseFrequency * MBPI->getEdgeProbability(From, To), - From, To }; - Edges.push_back(WE); + // Walk through the successors looking for the highest probability edge. + // FIXME: This is an annoying way to do the comparison, but it's correct. + // Support should be added to BranchProbability to properly compare two. + MachineBasicBlock *Successor = 0; + BlockFrequency BestFreq; + DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n"); + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); + SI != SE; ++SI) { + if (BB == *SI || (Filter && !Filter->count(*SI))) + continue; + + BlockFrequency SuccFreq(BlockFrequency::getEntryFrequency()); + SuccFreq *= MBPI->getEdgeProbability(BB, *SI); + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccFreq << "\n"); + if (!Successor || SuccFreq > BestFreq || (!(SuccFreq < BestFreq) && + BB->isLayoutSuccessor(*SI))) { + Successor = *SI; + BestFreq = SuccFreq; } } - assert(!RequiredChain && "Never found a terminator for a required chain"); - std::stable_sort(Edges.begin(), Edges.end()); -} + if (!Successor) + return; -/// \brief Build chains of basic blocks along hot paths. -/// -/// Build chains by trying to merge each pair of blocks from the mostly costly -/// edge first. This is essentially "Algo2" from the Profile Guided Code -/// Placement paper. While each node is considered a chain of one block, this -/// routine lazily build the chain objects themselves so that when possible it -/// can just merge a block into an existing chain. -void MachineBlockPlacement::BuildBlockChains() { - for (SmallVectorImpl::reverse_iterator EI = Edges.rbegin(), - EE = Edges.rend(); - EI != EE; ++EI) { - MachineBasicBlock *SourceB = EI->From, *DestB = EI->To; - if (SourceB == DestB) continue; - - BlockChain *SourceChain = BlockToChain.lookup(SourceB); - if (!SourceChain) SourceChain = CreateChain(SourceB); - BlockChain *DestChain = BlockToChain.lookup(DestB); - if (!DestChain) DestChain = CreateChain(DestB); - if (SourceChain == DestChain) - continue; + // Grab a chain if it exists already for this successor and make sure the + // successor is at the start of the chain as we can't merge mid-chain. Also, + // if the successor chain is the same as our chain, we're already merged. + BlockChain *SuccChain = BlockToChain[Successor]; + if (SuccChain && (SuccChain == Chain || Successor != *SuccChain->begin())) + return; + + // We only merge chains across a CFG merge when the desired merge path is + // significantly hotter than the incoming edge. We define a hot edge more + // strictly than the BranchProbabilityInfo does, as the two predecessor + // blocks may have dramatically different incoming probabilities we need to + // account for. Therefor we use the "global" edge weight which is the + // branch's probability times the block frequency of the predecessor. + BlockFrequency MergeWeight = MBFI->getBlockFreq(BB); + MergeWeight *= MBPI->getEdgeProbability(BB, Successor); + // We only want to consider breaking the CFG when the merge weight is much + // higher (80% vs. 20%), so multiply it by 1/4. This will require the merged + // edge to be 4x more likely before we disrupt the CFG. This number matches + // the definition of "hot" in BranchProbabilityAnalysis (80% vs. 20%). + MergeWeight *= BranchProbability(1, 4); + for (MachineBasicBlock::pred_iterator PI = Successor->pred_begin(), + PE = Successor->pred_end(); + PI != PE; ++PI) { + if (BB == *PI || Successor == *PI) continue; + BlockFrequency PredWeight = MBFI->getBlockFreq(*PI); + PredWeight *= MBPI->getEdgeProbability(*PI, Successor); + + // Return on the first predecessor we find which outstrips our merge weight. + if (MergeWeight < PredWeight) + return; + DEBUG(dbgs() << "Breaking CFG edge!\n" + << " Edge from " << getBlockNum(BB) << " to " + << getBlockNum(Successor) << ": " << MergeWeight << "\n" + << " vs. " << getBlockNum(BB) << " to " + << getBlockNum(*PI) << ": " << PredWeight << "\n"); + } - bool IsSourceTail = - SourceChain->LastBB == MachineFunction::iterator(SourceB); - bool IsDestHead = - DestChain->FirstBB == MachineFunction::iterator(DestB); + DEBUG(dbgs() << "Merging from " << getBlockNum(BB) << " to " + << getBlockNum(Successor) << "\n"); + Chain->merge(Successor, SuccChain); +} - if (!IsSourceTail || !IsDestHead) - continue; +/// \brief Forms basic block chains from the natural loop structures. +/// +/// These chains are designed to preserve the existing *structure* of the code +/// as much as possible. We can then stitch the chains together in a way which +/// both preserves the topological structure and minimizes taken conditional +/// branches. +void MachineBlockPlacement::buildLoopChains(MachineFunction &F, MachineLoop &L) { + // First recurse through any nested loops, building chains for those inner + // loops. + for (MachineLoop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI) + buildLoopChains(F, **LI); + + SmallPtrSet LoopBlockSet(L.block_begin(), + L.block_end()); + + // Begin building up a set of chains of blocks within this loop which should + // remain contiguous. Some of the blocks already belong to a chain which + // represents an inner loop. + for (MachineLoop::block_iterator BI = L.block_begin(), BE = L.block_end(); + BI != BE; ++BI) { + MachineBasicBlock *BB = *BI; + BlockChain *Chain = BlockToChain[BB]; + if (!Chain) Chain = CreateChain(BB); + mergeSuccessor(BB, Chain, &LoopBlockSet); + } +} - SourceChain->merge(DestChain); - assert(ActiveChains.erase(DestChain)); +void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { + // First build any loop-based chains. + for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE; + ++LI) + buildLoopChains(F, **LI); + + // Now walk the blocks of the function forming chains where they don't + // violate any CFG structure. + for (MachineFunction::iterator BI = F.begin(), BE = F.end(); + BI != BE; ++BI) { + MachineBasicBlock *BB = BI; + BlockChain *Chain = BlockToChain[BB]; + if (!Chain) Chain = CreateChain(BB); + mergeSuccessor(BB, Chain); } } -/// \brief Prioritize the chains to minimize back-edges between chains. -/// -/// This is the trickiest part of the placement algorithm. Each chain is -/// a hot-path through a sequence of basic blocks, but there are conditional -/// branches away from this hot path, and to some other chain. Hardware branch -/// predictors favor back edges over forward edges, and so it is desirable to -/// arrange the targets of branches away from a hot path and to some other -/// chain to come later in the function, making them forward branches, and -/// helping the branch predictor to predict fallthrough. -/// -/// In some cases, this is easy. simply topologically walking from the entry -/// chain through its successors in order would work if there were no cycles -/// between the chains of blocks, but often there are. In such a case, we first -/// need to identify the participants in the cycle, and then rank them so that -/// the linearizing of the chains has the lowest *probability* of causing -/// a mispredicted branch. To compute the correct rank for a chain, we take the -/// complement of the branch probability for each branch leading away from the -/// chain and multiply it by the frequency of the source block for that branch. -/// This gives us the probability of that particular branch *not* being taken -/// in this function. The sum of these probabilities for each chain is used as -/// a rank, so that we order the chain with the highest such sum first. -/// FIXME: This seems like a good approximation, but there is probably a known -/// technique for ordering of an SCC given edge weights. It would be good to -/// use that, or even use its code if possible. -/// -/// Also notable is that we prioritize the chains from the bottom up, and so -/// all of the "first" and "before" relationships end up inverted in the code. -void MachineBlockPlacement::PrioritizeChains(MachineFunction &F) { +void MachineBlockPlacement::placeChainsTopologically(MachineFunction &F) { MachineBasicBlock *EntryB = &F.front(); BlockChain *EntryChain = BlockToChain[EntryB]; assert(EntryChain && "Missing chain for entry block"); - assert(EntryChain->FirstBB == F.begin() && + assert(*EntryChain->begin() == EntryB && "Entry block is not the head of the entry block chain"); - // Form an SCC and walk it from the bottom up. - SmallPtrSet IsInSCC; - for (scc_iterator I = scc_begin(EntryChain); - !I.isAtEnd(); ++I) { - const std::vector &SCC = *I; - PChains.insert(PChains.end(), SCC.begin(), SCC.end()); - - // If there is only one chain in the SCC, it's trivially sorted so just - // bail out early. Sorting the SCC is expensive. - if (SCC.size() == 1) + // Walk the blocks in RPO, and insert each block for a chain in order the + // first time we see that chain. + MachineFunction::iterator InsertPos = F.begin(); + SmallPtrSet VisitedChains; + ReversePostOrderTraversal RPOT(EntryB); + typedef ReversePostOrderTraversal::rpo_iterator + rpo_iterator; + for (rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { + BlockChain *Chain = BlockToChain[*I]; + assert(Chain); + if(!VisitedChains.insert(Chain)) continue; - - // We work strictly on the PChains range from here on out to maximize - // locality. - SmallVectorImpl::iterator SCCEnd = PChains.end(), - SCCBegin = SCCEnd - SCC.size(); - IsInSCC.clear(); - IsInSCC.insert(SCCBegin, SCCEnd); - - // Compute the edge frequency of staying in a chain, despite the existency - // of an edge to some other chain within this SCC. - for (SmallVectorImpl::iterator SCCI = SCCBegin; - SCCI != SCCEnd; ++SCCI) { - BlockChain *Chain = *SCCI; - - // Special case the entry chain. Regardless of the weights of other - // chains, the entry chain *must* come first, so move it to the end, and - // avoid processing that chain at all. - if (Chain == EntryChain) { - --SCCEnd; - if (SCCI == SCCEnd) break; - Chain = *SCCI = *SCCEnd; - *SCCEnd = EntryChain; - } - - // Walk over every block in this chain looking for out-bound edges to - // other chains in this SCC. - for (MachineFunction::iterator BI = Chain->FirstBB, - BE = llvm::next(Chain->LastBB); - BI != BE; ++BI) { - MachineBasicBlock *From = &*BI; - for (MachineBasicBlock::succ_iterator SI = BI->succ_begin(), - SE = BI->succ_end(); - SI != SE; ++SI) { - MachineBasicBlock *To = *SI; - if (!To || !IsInSCC.count(BlockToChain[To])) - continue; - BranchProbability ComplEdgeProb = - MBPI->getEdgeProbability(From, To).getCompl(); - Chain->InChainEdgeFrequency += - MBFI->getBlockFreq(From) * ComplEdgeProb; - } - } + for (BlockChain::iterator BI = Chain->begin(), BE = Chain->end(); BI != BE; + ++BI) { + DEBUG(dbgs() << (BI == Chain->begin() ? "Placing chain " + : " ... ") + << getBlockName(*BI) << "\n"); + if (InsertPos != MachineFunction::iterator(*BI)) + F.splice(InsertPos, *BI); + else + ++InsertPos; } - - // Sort the chains within the SCC according to their edge frequencies, - // which should make the least costly chain of blocks to mis-place be - // ordered first in the prioritized sequence. - std::stable_sort(SCCBegin, SCCEnd, ChainPtrPrioritySorter()); } -} - -/// \brief Splice the function blocks together based on the chain priorities. -/// -/// Each chain is already represented as a contiguous range of blocks in the -/// function. Simply walk backwards down the prioritized chains and splice in -/// any chains out of order. Note that the first chain we visit is necessarily -/// the entry chain. It has no predecessors and so must be the top of the SCC. -/// Also, we cannot splice any chain prior to the entry chain as we can't -/// splice any blocks prior to the entry block. -void MachineBlockPlacement::PlaceBlockChains(MachineFunction &F) { - assert(!PChains.empty() && "No chains were prioritized"); - assert(PChains.back() == BlockToChain[&F.front()] && - "The entry chain must always be the final chain"); - - MachineFunction::iterator InsertPos = F.begin(); - for (SmallVectorImpl::reverse_iterator CI = PChains.rbegin(), - CE = PChains.rend(); - CI != CE; ++CI) { - BlockChain *Chain = *CI; - // Check that we process this chain only once for debugging. - assert(ActiveChains.erase(Chain) && "Processed a chain twice"); - - // If this chain is already in the right position, just skip past it. - // Otherwise, splice it into position. - if (InsertPos == Chain->FirstBB) - InsertPos = llvm::next(Chain->LastBB); - else - F.splice(InsertPos, Chain->FirstBB, llvm::next(Chain->LastBB)); - } - - // Note that we can't assert this is empty as there may be unreachable blocks - // in the function. -#ifndef NDEBUG - ActiveChains.clear(); -#endif // Now that every block is in its final position, update all of the // terminators. @@ -638,21 +474,13 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { MLI = &getAnalysis(); TII = F.getTarget().getInstrInfo(); TLI = F.getTarget().getTargetLowering(); - assert(Edges.empty()); assert(BlockToChain.empty()); - assert(PChains.empty()); - assert(ActiveChains.empty()); - PrioritizeEdges(F); - BuildBlockChains(); - PrioritizeChains(F); - PlaceBlockChains(F); + buildCFGChains(F); + placeChainsTopologically(F); AlignLoops(F); - Edges.clear(); BlockToChain.clear(); - PChains.clear(); - ChainAllocator.DestroyAll(); // We always return true as we have no way to track whether the final order // differs from the original order. -- cgit v1.1 From 341473c86d19cad5f3be432533ecdb42d9e07044 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sun, 23 Oct 2011 11:19:14 +0000 Subject: Add compare operators to BranchProbability and use it to determine if an edge is hot. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142751 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/BranchProbabilityInfo.cpp | 12 ++++-------- lib/Support/BranchProbability.cpp | 7 ------- 2 files changed, 4 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index fc69da9..4f15858 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -476,12 +476,8 @@ uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const { bool BranchProbabilityInfo:: isEdgeHot(const BasicBlock *Src, const BasicBlock *Dst) const { // Hot probability is at least 4/5 = 80% - uint32_t Weight = getEdgeWeight(Src, Dst); - uint32_t Sum = getSumForBlock(Src); - - // FIXME: Implement BranchProbability::compare then change this code to - // compare this BranchProbability against a static "hot" BranchProbability. - return (uint64_t)Weight * 5 > (uint64_t)Sum * 4; + // FIXME: Compare against a static "hot" BranchProbability. + return getEdgeProbability(Src, Dst) > BranchProbability(4, 5); } BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const { @@ -503,8 +499,8 @@ BasicBlock *BranchProbabilityInfo::getHotSucc(BasicBlock *BB) const { } } - // FIXME: Use BranchProbability::compare. - if ((uint64_t)MaxWeight * 5 > (uint64_t)Sum * 4) + // Hot probability is at least 4/5 = 80% + if (BranchProbability(MaxWeight, Sum) > BranchProbability(4, 5)) return MaxSucc; return 0; diff --git a/lib/Support/BranchProbability.cpp b/lib/Support/BranchProbability.cpp index 49d04ed..5a5e075 100644 --- a/lib/Support/BranchProbability.cpp +++ b/lib/Support/BranchProbability.cpp @@ -17,13 +17,6 @@ using namespace llvm; -BranchProbability::BranchProbability(uint32_t n, uint32_t d) { - assert(d > 0 && "Denomiator cannot be 0!"); - assert(n <= d && "Probability cannot be bigger than 1!"); - N = n; - D = d; -} - void BranchProbability::print(raw_ostream &OS) const { OS << N << " / " << D << " = " << ((double)N / D); } -- cgit v1.1 From 7102b617bf69ac24928ce551854a907c649800f9 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sun, 23 Oct 2011 11:32:54 +0000 Subject: Print branch probabilities as percentages. 50% is much more readable than 5.000000e-01. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142752 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/BranchProbability.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Support/BranchProbability.cpp b/lib/Support/BranchProbability.cpp index 5a5e075..e8b83e5 100644 --- a/lib/Support/BranchProbability.cpp +++ b/lib/Support/BranchProbability.cpp @@ -13,17 +13,17 @@ #include "llvm/Support/BranchProbability.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; void BranchProbability::print(raw_ostream &OS) const { - OS << N << " / " << D << " = " << ((double)N / D); + OS << N << " / " << D << " = " << format("%g%%", ((double)N / D) * 100.0); } void BranchProbability::dump() const { - print(dbgs()); - dbgs() << "\n"; + dbgs() << *this << '\n'; } namespace llvm { -- cgit v1.1 From 4f780536953cdd3d92c21111301763ddd57ab720 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Sun, 23 Oct 2011 20:10:30 +0000 Subject: Remove a commented out line of code that snuck by my auditing. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142761 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBlockPlacement.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 4f9958a..043a884 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -265,7 +265,6 @@ static std::string getBlockNum(MachineBasicBlock *BB) { BlockChain *MachineBlockPlacement::CreateChain(MachineBasicBlock *BB) { BlockChain *Chain = new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB); - //assert(ActiveChains.insert(Chain)); return Chain; } -- cgit v1.1 From 66d847c8ffff5199248fccc10cb27f80c5cf9ebe Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Sun, 23 Oct 2011 20:10:34 +0000 Subject: Now that we have comparison on probabilities, add some static functions to get important constant branch probabilities and use them for finding the best branch out of a set of possibilities. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142762 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBlockPlacement.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 043a884..32eb70e 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -287,10 +287,8 @@ void MachineBlockPlacement::mergeSuccessor(MachineBasicBlock *BB, return; // Walk through the successors looking for the highest probability edge. - // FIXME: This is an annoying way to do the comparison, but it's correct. - // Support should be added to BranchProbability to properly compare two. MachineBasicBlock *Successor = 0; - BlockFrequency BestFreq; + BranchProbability BestProb = BranchProbability::getZero(); DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n"); for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); @@ -298,13 +296,12 @@ void MachineBlockPlacement::mergeSuccessor(MachineBasicBlock *BB, if (BB == *SI || (Filter && !Filter->count(*SI))) continue; - BlockFrequency SuccFreq(BlockFrequency::getEntryFrequency()); - SuccFreq *= MBPI->getEdgeProbability(BB, *SI); - DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccFreq << "\n"); - if (!Successor || SuccFreq > BestFreq || (!(SuccFreq < BestFreq) && + BranchProbability SuccProb = MBPI->getEdgeProbability(BB, *SI); + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb << "\n"); + if (!Successor || SuccProb > BestProb || (!(SuccProb < BestProb) && BB->isLayoutSuccessor(*SI))) { Successor = *SI; - BestFreq = SuccFreq; + BestProb = SuccProb; } } if (!Successor) -- cgit v1.1 From 14edd314af99ccaad194d071f23e437a1371f176 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Sun, 23 Oct 2011 21:21:50 +0000 Subject: Teach the BranchProbabilityInfo pass to print its results, and use that to bring it under direct test instead of merely indirectly testing it in the BlockFrequencyInfo pass. The next step is to start adding tests for the various heuristics employed, and to start fixing those heuristics once they're under test. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142778 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/BranchProbabilityInfo.cpp | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 4f15858..9f175b0 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -12,11 +12,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Constants.h" +#include "llvm/Function.h" #include "llvm/Instructions.h" #include "llvm/LLVMContext.h" #include "llvm/Metadata.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" using namespace llvm; @@ -453,11 +455,26 @@ void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const { } bool BranchProbabilityInfo::runOnFunction(Function &F) { + LastF = &F; // Store the last function we ran on for printing. LoopInfo &LI = getAnalysis(); BranchProbabilityAnalysis BPA(this, &LI); return BPA.runOnFunction(F); } +void BranchProbabilityInfo::print(raw_ostream &OS, const Module *) const { + OS << "---- Branch Probabilities ----\n"; + // We print the probabilities from the last function the analysis ran over, + // or the function it is currently running over. + assert(LastF && "Cannot print prior to running over a function"); + for (Function::const_iterator BI = LastF->begin(), BE = LastF->end(); + BI != BE; ++BI) { + for (succ_const_iterator SI = succ_begin(BI), SE = succ_end(BI); + SI != SE; ++SI) { + printEdgeProbability(OS << " ", BI, *SI); + } + } +} + uint32_t BranchProbabilityInfo::getSumForBlock(const BasicBlock *BB) const { uint32_t Sum = 0; @@ -537,8 +554,9 @@ getEdgeProbability(const BasicBlock *Src, const BasicBlock *Dst) const { } raw_ostream & -BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, BasicBlock *Src, - BasicBlock *Dst) const { +BranchProbabilityInfo::printEdgeProbability(raw_ostream &OS, + const BasicBlock *Src, + const BasicBlock *Dst) const { const BranchProbability Prob = getEdgeProbability(Src, Dst); OS << "edge " << Src->getNameStr() << " -> " << Dst->getNameStr() -- cgit v1.1 From 5679ec3b528fb897739251b1f66037767ce2f208 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 23 Oct 2011 22:18:24 +0000 Subject: Add X86 SARX, SHRX, and SHLX instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142779 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrShiftRotate.td | 50 ++++++++++++++++++++++------------- 1 file changed, 32 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrShiftRotate.td b/lib/Target/X86/X86InstrShiftRotate.td index a32f066..58cf6e3 100644 --- a/lib/Target/X86/X86InstrShiftRotate.td +++ b/lib/Target/X86/X86InstrShiftRotate.td @@ -744,24 +744,38 @@ def SHRD64mri8 : RIi8<0xAC, MRMDestMem, } // Defs = [EFLAGS] -let Predicates = [HasBMI2], neverHasSideEffects = 1 in { - def RORX32ri : Ii8<0xF0, MRMSrcReg, (outs GR32:$dst), - (ins GR32:$src1, i8imm:$src2), - "rorx{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - TAXD, VEX; +multiclass bmi_rotate { +let neverHasSideEffects = 1 in { + def ri : Ii8<0xF0, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, i8imm:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, TAXD, VEX; let mayLoad = 1 in - def RORX32mi : Ii8<0xF0, MRMSrcMem, (outs GR32:$dst), - (ins i32mem:$src1, i8imm:$src2), - "rorx{l}\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - TAXD, VEX; - - def RORX64ri : Ii8<0xF0, MRMSrcReg, (outs GR64:$dst), - (ins GR64:$src1, i8imm:$src2), - "rorx{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - TAXD, VEX, VEX_W; + def mi : Ii8<0xF0, MRMSrcMem, (outs RC:$dst), + (ins x86memop:$src1, i8imm:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + []>, TAXD, VEX; +} +} + +multiclass bmi_shift { +let neverHasSideEffects = 1 in { + def rr : I<0xF7, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, + VEX_4VOp3; let mayLoad = 1 in - def RORX64mi : Ii8<0xF0, MRMSrcMem, (outs GR64:$dst), - (ins i64mem:$src1, i8imm:$src2), - "rorx{q}\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - TAXD, VEX, VEX_W; + def rm : I<0xF7, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src1, RC:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), []>, + VEX_4VOp3; +} +} + +let Predicates = [HasBMI2] in { + defm RORX32 : bmi_rotate<"rorx{l}", GR32, i32mem>; + defm RORX64 : bmi_rotate<"rorx{q}", GR64, i64mem>, VEX_W; + defm SARX32 : bmi_shift<"sarx{l}", GR32, i32mem>, T8XS; + defm SARX64 : bmi_shift<"sarx{q}", GR64, i64mem>, T8XS, VEX_W; + defm SHRX32 : bmi_shift<"shrx{l}", GR32, i32mem>, T8XD; + defm SHRX64 : bmi_shift<"shrx{q}", GR64, i64mem>, T8XD, VEX_W; + defm SHLX32 : bmi_shift<"shlx{l}", GR32, i32mem>, T8, OpSize; + defm SHLX64 : bmi_shift<"shlx{q}", GR64, i64mem>, T8, OpSize, VEX_W; } -- cgit v1.1 From 22c8946239de6d0cd6c51eeea245498e3c95ed87 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Sun, 23 Oct 2011 22:40:13 +0000 Subject: Tidy up a loop to be more idiomatic for LLVM's codebase, and remove some extraneous whitespace. Trying to clean-up this pass as much as I can before I start making functional changes. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142780 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/BranchProbabilityInfo.cpp | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 9f175b0..46fe331 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -424,34 +424,25 @@ bool BranchProbabilityAnalysis::calcFloatingPointHeuristics(BasicBlock *BB) { } bool BranchProbabilityAnalysis::runOnFunction(Function &F) { - - for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { - BasicBlock *BB = I++; - - if (calcMetadataWeights(BB)) + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { + if (calcMetadataWeights(I)) continue; - - if (calcLoopBranchHeuristics(BB)) + if (calcLoopBranchHeuristics(I)) continue; - - if (calcReturnHeuristics(BB)) + if (calcReturnHeuristics(I)) continue; - - if (calcPointerHeuristics(BB)) + if (calcPointerHeuristics(I)) continue; - - if (calcZeroHeuristics(BB)) + if (calcZeroHeuristics(I)) continue; - - calcFloatingPointHeuristics(BB); + calcFloatingPointHeuristics(I); } - return false; } void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.setPreservesAll(); + AU.addRequired(); + AU.setPreservesAll(); } bool BranchProbabilityInfo::runOnFunction(Function &F) { -- cgit v1.1 From 795cb48f1a1f01ce55b32d3d3caca728a4122d7d Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Sun, 23 Oct 2011 23:43:14 +0000 Subject: Enhance SCEV's brute force loop analysis to handle multiple PHI nodes in the loop header when computing the trip count. With this, we now constant evaluate: struct ListNode { const struct ListNode *next; int i; }; static const struct ListNode node1 = {0, 1}; static const struct ListNode node2 = {&node1, 2}; static const struct ListNode node3 = {&node2, 3}; int test() { int sum = 0; for (const struct ListNode *n = &node3; n != 0; n = n->next) sum += n->i; return sum; } git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142781 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 52 ++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 2da8e6f..3d1fa95 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -4882,29 +4882,33 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, // That's the only form we support here. if (PN->getNumIncomingValues() != 2) return getCouldNotCompute(); + DenseMap CurrentIterVals; + BasicBlock *Header = L->getHeader(); + assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); + // One entry must be a constant (coming in from outside of the loop), and the // second must be derived from the same PHI. bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); - Constant *StartCST = - dyn_cast(PN->getIncomingValue(!SecondIsBackedge)); - if (StartCST == 0) return getCouldNotCompute(); // Must be a constant. - - Value *BEValue = PN->getIncomingValue(SecondIsBackedge); - if (getConstantEvolvingPHI(BEValue, L) != PN && - !isa(BEValue)) - return getCouldNotCompute(); // Not derived from same PHI. + PHINode *PHI = 0; + for (BasicBlock::iterator I = Header->begin(); + (PHI = dyn_cast(I)); ++I) { + Constant *StartCST = + dyn_cast(PHI->getIncomingValue(!SecondIsBackedge)); + if (StartCST == 0) continue; + CurrentIterVals[PHI] = StartCST; + } + if (!CurrentIterVals.count(PN)) + return getCouldNotCompute(); // Okay, we find a PHI node that defines the trip count of this loop. Execute // the loop symbolically to determine when the condition gets a value of // "ExitWhen". - unsigned IterationNum = 0; - unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. - for (Constant *PHIVal = StartCST; - IterationNum != MaxIterations; ++IterationNum) { - DenseMap PHIValMap; - PHIValMap[PN] = PHIVal; + + unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. + for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){ ConstantInt *CondVal = - dyn_cast_or_null(EvaluateExpression(Cond, L, PHIValMap, TD)); + dyn_cast_or_null(EvaluateExpression(Cond, L, + CurrentIterVals, TD)); // Couldn't symbolically evaluate. if (!CondVal) return getCouldNotCompute(); @@ -4914,11 +4918,19 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, return getConstant(Type::getInt32Ty(getContext()), IterationNum); } - // Compute the value of the PHI node for the next iteration. - Constant *NextPHI = EvaluateExpression(BEValue, L, PHIValMap, TD); - if (NextPHI == 0 || NextPHI == PHIVal) - return getCouldNotCompute();// Couldn't evaluate or not making progress... - PHIVal = NextPHI; + // Update all the PHI nodes for the next iteration. + DenseMap NextIterVals; + for (DenseMap::const_iterator + I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){ + PHINode *PHI = dyn_cast(I->first); + if (!PHI) continue; + Constant *&NextPHI = NextIterVals[PHI]; + if (NextPHI) continue; // Already computed! + + Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); + NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); + } + CurrentIterVals.swap(NextIterVals); } // Too many iterations were needed to evaluate. -- cgit v1.1 From b068bbbaecf338f481124551a5e6f37484fad800 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 24 Oct 2011 01:40:45 +0000 Subject: Simplify the design of BranchProbabilityInfo by collapsing it into a single class. Previously it was split between two classes, one internal and one external. The concern seemed to center around exposing the weights used, but those can remain confined to the implementation file. Having a single class to maintain the state and analyses in use will also simplify several of the enhancements I want to make to our static heuristics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142783 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/BranchProbabilityInfo.cpp | 225 +++++++++++++-------------------- 1 file changed, 90 insertions(+), 135 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 46fe331..a03d9d8 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -31,124 +31,83 @@ INITIALIZE_PASS_END(BranchProbabilityInfo, "branch-prob", char BranchProbabilityInfo::ID = 0; -namespace { -// Please note that BranchProbabilityAnalysis is not a FunctionPass. -// It is created by BranchProbabilityInfo (which is a FunctionPass), which -// provides a clear interface. Thanks to that, all heuristics and other -// private methods are hidden in the .cpp file. -class BranchProbabilityAnalysis { - - typedef std::pair Edge; - - BranchProbabilityInfo *BP; - - LoopInfo *LI; - - - // Weights are for internal use only. They are used by heuristics to help to - // estimate edges' probability. Example: - // - // Using "Loop Branch Heuristics" we predict weights of edges for the - // block BB2. - // ... - // | - // V - // BB1<-+ - // | | - // | | (Weight = 124) - // V | - // BB2--+ - // | - // | (Weight = 4) - // V - // BB3 - // - // Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875 - // Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125 - - static const uint32_t LBH_TAKEN_WEIGHT = 124; - static const uint32_t LBH_NONTAKEN_WEIGHT = 4; - - static const uint32_t RH_TAKEN_WEIGHT = 24; - static const uint32_t RH_NONTAKEN_WEIGHT = 8; - - static const uint32_t PH_TAKEN_WEIGHT = 20; - static const uint32_t PH_NONTAKEN_WEIGHT = 12; - - static const uint32_t ZH_TAKEN_WEIGHT = 20; - static const uint32_t ZH_NONTAKEN_WEIGHT = 12; - - static const uint32_t FPH_TAKEN_WEIGHT = 20; - static const uint32_t FPH_NONTAKEN_WEIGHT = 12; - - // Standard weight value. Used when none of the heuristics set weight for - // the edge. - static const uint32_t NORMAL_WEIGHT = 16; - - // Minimum weight of an edge. Please note, that weight is NEVER 0. - static const uint32_t MIN_WEIGHT = 1; - - // Return TRUE if BB leads directly to a Return Instruction. - static bool isReturningBlock(BasicBlock *BB) { - SmallPtrSet Visited; - - while (true) { - TerminatorInst *TI = BB->getTerminator(); - if (isa(TI)) - return true; - - if (TI->getNumSuccessors() > 1) - break; - - // It is unreachable block which we can consider as a return instruction. - if (TI->getNumSuccessors() == 0) - return true; - - Visited.insert(BB); - BB = TI->getSuccessor(0); - - // Stop if cycle is detected. - if (Visited.count(BB)) - return false; - } +// Weights are for internal use only. They are used by heuristics to help to +// estimate edges' probability. Example: +// +// Using "Loop Branch Heuristics" we predict weights of edges for the +// block BB2. +// ... +// | +// V +// BB1<-+ +// | | +// | | (Weight = 124) +// V | +// BB2--+ +// | +// | (Weight = 4) +// V +// BB3 +// +// Probability of the edge BB2->BB1 = 124 / (124 + 4) = 0.96875 +// Probability of the edge BB2->BB3 = 4 / (124 + 4) = 0.03125 +static const uint32_t LBH_TAKEN_WEIGHT = 124; +static const uint32_t LBH_NONTAKEN_WEIGHT = 4; - return false; - } +static const uint32_t RH_TAKEN_WEIGHT = 24; +static const uint32_t RH_NONTAKEN_WEIGHT = 8; - uint32_t getMaxWeightFor(BasicBlock *BB) const { - return UINT32_MAX / BB->getTerminator()->getNumSuccessors(); - } +static const uint32_t PH_TAKEN_WEIGHT = 20; +static const uint32_t PH_NONTAKEN_WEIGHT = 12; -public: - BranchProbabilityAnalysis(BranchProbabilityInfo *BP, LoopInfo *LI) - : BP(BP), LI(LI) { - } +static const uint32_t ZH_TAKEN_WEIGHT = 20; +static const uint32_t ZH_NONTAKEN_WEIGHT = 12; + +static const uint32_t FPH_TAKEN_WEIGHT = 20; +static const uint32_t FPH_NONTAKEN_WEIGHT = 12; + +// Standard weight value. Used when none of the heuristics set weight for +// the edge. +static const uint32_t NORMAL_WEIGHT = 16; - // Metadata Weights - bool calcMetadataWeights(BasicBlock *BB); +// Minimum weight of an edge. Please note, that weight is NEVER 0. +static const uint32_t MIN_WEIGHT = 1; - // Return Heuristics - bool calcReturnHeuristics(BasicBlock *BB); +// Return TRUE if BB leads directly to a Return Instruction. +static bool isReturningBlock(BasicBlock *BB) { + SmallPtrSet Visited; - // Pointer Heuristics - bool calcPointerHeuristics(BasicBlock *BB); + while (true) { + TerminatorInst *TI = BB->getTerminator(); + if (isa(TI)) + return true; - // Loop Branch Heuristics - bool calcLoopBranchHeuristics(BasicBlock *BB); + if (TI->getNumSuccessors() > 1) + break; + + // It is unreachable block which we can consider as a return instruction. + if (TI->getNumSuccessors() == 0) + return true; + + Visited.insert(BB); + BB = TI->getSuccessor(0); - // Zero Heuristics - bool calcZeroHeuristics(BasicBlock *BB); + // Stop if cycle is detected. + if (Visited.count(BB)) + return false; + } - // Floating Point Heuristics - bool calcFloatingPointHeuristics(BasicBlock *BB); + return false; +} + +static uint32_t getMaxWeightFor(BasicBlock *BB) { + return UINT32_MAX / BB->getTerminator()->getNumSuccessors(); +} - bool runOnFunction(Function &F); -}; -} // end anonymous namespace // Propagate existing explicit probabilities from either profile data or // 'expect' intrinsic processing. -bool BranchProbabilityAnalysis::calcMetadataWeights(BasicBlock *BB) { +bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) { TerminatorInst *TI = BB->getTerminator(); if (TI->getNumSuccessors() == 1) return false; @@ -179,14 +138,14 @@ bool BranchProbabilityAnalysis::calcMetadataWeights(BasicBlock *BB) { } assert(Weights.size() == TI->getNumSuccessors() && "Checked above"); for (unsigned i = 0, e = TI->getNumSuccessors(); i != e; ++i) - BP->setEdgeWeight(BB, TI->getSuccessor(i), Weights[i]); + setEdgeWeight(BB, TI->getSuccessor(i), Weights[i]); return true; } // Calculate Edge Weights using "Return Heuristics". Predict a successor which // leads directly to Return Instruction will not be taken. -bool BranchProbabilityAnalysis::calcReturnHeuristics(BasicBlock *BB){ +bool BranchProbabilityInfo::calcReturnHeuristics(BasicBlock *BB){ if (BB->getTerminator()->getNumSuccessors() == 1) return false; @@ -208,7 +167,7 @@ bool BranchProbabilityAnalysis::calcReturnHeuristics(BasicBlock *BB){ for (SmallPtrSet::iterator I = StayEdges.begin(), E = StayEdges.end(); I != E; ++I) - BP->setEdgeWeight(BB, *I, stayWeight); + setEdgeWeight(BB, *I, stayWeight); } if (uint32_t numRetEdges = ReturningEdges.size()) { @@ -217,7 +176,7 @@ bool BranchProbabilityAnalysis::calcReturnHeuristics(BasicBlock *BB){ retWeight = MIN_WEIGHT; for (SmallPtrSet::iterator I = ReturningEdges.begin(), E = ReturningEdges.end(); I != E; ++I) { - BP->setEdgeWeight(BB, *I, retWeight); + setEdgeWeight(BB, *I, retWeight); } } @@ -226,7 +185,7 @@ bool BranchProbabilityAnalysis::calcReturnHeuristics(BasicBlock *BB){ // Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion // between two pointer or pointer and NULL will fail. -bool BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) { +bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) { BranchInst * BI = dyn_cast(BB->getTerminator()); if (!BI || !BI->isConditional()) return false; @@ -254,14 +213,14 @@ bool BranchProbabilityAnalysis::calcPointerHeuristics(BasicBlock *BB) { if (!isProb) std::swap(Taken, NonTaken); - BP->setEdgeWeight(BB, Taken, PH_TAKEN_WEIGHT); - BP->setEdgeWeight(BB, NonTaken, PH_NONTAKEN_WEIGHT); + setEdgeWeight(BB, Taken, PH_TAKEN_WEIGHT); + setEdgeWeight(BB, NonTaken, PH_NONTAKEN_WEIGHT); return true; } // Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges // as taken, exiting edges as not-taken. -bool BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) { +bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { uint32_t numSuccs = BB->getTerminator()->getNumSuccessors(); Loop *L = LI->getLoopFor(BB); @@ -293,7 +252,7 @@ bool BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) { for (SmallPtrSet::iterator EI = BackEdges.begin(), EE = BackEdges.end(); EI != EE; ++EI) { BasicBlock *Back = *EI; - BP->setEdgeWeight(BB, Back, backWeight); + setEdgeWeight(BB, Back, backWeight); } } @@ -305,7 +264,7 @@ bool BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) { for (SmallPtrSet::iterator EI = InEdges.begin(), EE = InEdges.end(); EI != EE; ++EI) { BasicBlock *Back = *EI; - BP->setEdgeWeight(BB, Back, inWeight); + setEdgeWeight(BB, Back, inWeight); } } @@ -318,14 +277,14 @@ bool BranchProbabilityAnalysis::calcLoopBranchHeuristics(BasicBlock *BB) { for (SmallPtrSet::iterator EI = ExitingEdges.begin(), EE = ExitingEdges.end(); EI != EE; ++EI) { BasicBlock *Exiting = *EI; - BP->setEdgeWeight(BB, Exiting, exitWeight); + setEdgeWeight(BB, Exiting, exitWeight); } } return true; } -bool BranchProbabilityAnalysis::calcZeroHeuristics(BasicBlock *BB) { +bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) { BranchInst * BI = dyn_cast(BB->getTerminator()); if (!BI || !BI->isConditional()) return false; @@ -380,13 +339,13 @@ bool BranchProbabilityAnalysis::calcZeroHeuristics(BasicBlock *BB) { if (!isProb) std::swap(Taken, NonTaken); - BP->setEdgeWeight(BB, Taken, ZH_TAKEN_WEIGHT); - BP->setEdgeWeight(BB, NonTaken, ZH_NONTAKEN_WEIGHT); + setEdgeWeight(BB, Taken, ZH_TAKEN_WEIGHT); + setEdgeWeight(BB, NonTaken, ZH_NONTAKEN_WEIGHT); return true; } -bool BranchProbabilityAnalysis::calcFloatingPointHeuristics(BasicBlock *BB) { +bool BranchProbabilityInfo::calcFloatingPointHeuristics(BasicBlock *BB) { BranchInst *BI = dyn_cast(BB->getTerminator()); if (!BI || !BI->isConditional()) return false; @@ -417,13 +376,21 @@ bool BranchProbabilityAnalysis::calcFloatingPointHeuristics(BasicBlock *BB) { if (!isProb) std::swap(Taken, NonTaken); - BP->setEdgeWeight(BB, Taken, FPH_TAKEN_WEIGHT); - BP->setEdgeWeight(BB, NonTaken, FPH_NONTAKEN_WEIGHT); + setEdgeWeight(BB, Taken, FPH_TAKEN_WEIGHT); + setEdgeWeight(BB, NonTaken, FPH_NONTAKEN_WEIGHT); return true; } -bool BranchProbabilityAnalysis::runOnFunction(Function &F) { +void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.setPreservesAll(); +} + +bool BranchProbabilityInfo::runOnFunction(Function &F) { + LastF = &F; // Store the last function we ran on for printing. + LI = &getAnalysis(); + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { if (calcMetadataWeights(I)) continue; @@ -440,18 +407,6 @@ bool BranchProbabilityAnalysis::runOnFunction(Function &F) { return false; } -void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.setPreservesAll(); -} - -bool BranchProbabilityInfo::runOnFunction(Function &F) { - LastF = &F; // Store the last function we ran on for printing. - LoopInfo &LI = getAnalysis(); - BranchProbabilityAnalysis BPA(this, &LI); - return BPA.runOnFunction(F); -} - void BranchProbabilityInfo::print(raw_ostream &OS, const Module *) const { OS << "---- Branch Probabilities ----\n"; // We print the probabilities from the last function the analysis ran over, -- cgit v1.1 From 5d0700786d53046b3d5d7fe0d8d207290a13872c Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Mon, 24 Oct 2011 03:27:19 +0000 Subject: Windows/Path.inc: [PR8460] Get rid of ScopedNullTerminator. Thanks to Zvi Rackover! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142785 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Windows/Path.inc | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc index 42a92f9..8a5edcc 100644 --- a/lib/Support/Windows/Path.inc +++ b/lib/Support/Windows/Path.inc @@ -66,29 +66,20 @@ Path::operator=(StringRef that) { return *this; } -// push_back 0 on create, and pop_back on delete. -struct ScopedNullTerminator { - std::string &str; - ScopedNullTerminator(std::string &s) : str(s) { str.push_back(0); } - ~ScopedNullTerminator() { - // str.pop_back(); But wait, C++03 doesn't have this... - assert(!str.empty() && str[str.size() - 1] == 0 - && "Null char not present!"); - str.resize(str.size() - 1); - } -}; - bool Path::isValid() const { if (path.empty()) return false; + size_t len = path.size(); + // If there is a null character, it and all its successors are ignored. + size_t pos = path.find_first_of('\0'); + if (pos != std::string::npos) + len = pos; + // If there is a colon, it must be the second character, preceded by a letter // and followed by something. - size_t len = path.size(); - // This code assumes that path is null terminated, so make sure it is. - ScopedNullTerminator snt(path); - size_t pos = path.rfind(':',len); + pos = path.rfind(':',len); size_t rootslash = 0; if (pos != std::string::npos) { if (pos != 1 || !isalpha(path[0]) || len < 3) @@ -118,13 +109,13 @@ Path::isValid() const { for (pos = 0; pos < len; ++pos) { // A component may not end in a space. if (path[pos] == ' ') { - if (path[pos+1] == '/' || path[pos+1] == '\0') + if (pos+1 == len || path[pos+1] == '/' || path[pos+1] == '\0') return false; } // A component may not end in a period. if (path[pos] == '.') { - if (path[pos+1] == '/' || path[pos+1] == '\0') { + if (pos+1 == len || path[pos+1] == '/') { // Unless it is the pseudo-directory "."... if (pos == 0 || path[pos-1] == '/' || path[pos-1] == ':') return true; -- cgit v1.1 From 38af3d5a8dbcada6f264d3a268f525a827adaca2 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Mon, 24 Oct 2011 04:00:25 +0000 Subject: Speculatively revert r142781. Bots are showing Assertion `i_nocapture < OperandTraits::operands(this) && "getOperand() out of range!"' failed. coming out of indvars. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142786 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 52 ++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 32 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 3d1fa95..2da8e6f 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -4882,33 +4882,29 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, // That's the only form we support here. if (PN->getNumIncomingValues() != 2) return getCouldNotCompute(); - DenseMap CurrentIterVals; - BasicBlock *Header = L->getHeader(); - assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); - // One entry must be a constant (coming in from outside of the loop), and the // second must be derived from the same PHI. bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); - PHINode *PHI = 0; - for (BasicBlock::iterator I = Header->begin(); - (PHI = dyn_cast(I)); ++I) { - Constant *StartCST = - dyn_cast(PHI->getIncomingValue(!SecondIsBackedge)); - if (StartCST == 0) continue; - CurrentIterVals[PHI] = StartCST; - } - if (!CurrentIterVals.count(PN)) - return getCouldNotCompute(); + Constant *StartCST = + dyn_cast(PN->getIncomingValue(!SecondIsBackedge)); + if (StartCST == 0) return getCouldNotCompute(); // Must be a constant. + + Value *BEValue = PN->getIncomingValue(SecondIsBackedge); + if (getConstantEvolvingPHI(BEValue, L) != PN && + !isa(BEValue)) + return getCouldNotCompute(); // Not derived from same PHI. // Okay, we find a PHI node that defines the trip count of this loop. Execute // the loop symbolically to determine when the condition gets a value of // "ExitWhen". - - unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. - for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){ + unsigned IterationNum = 0; + unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. + for (Constant *PHIVal = StartCST; + IterationNum != MaxIterations; ++IterationNum) { + DenseMap PHIValMap; + PHIValMap[PN] = PHIVal; ConstantInt *CondVal = - dyn_cast_or_null(EvaluateExpression(Cond, L, - CurrentIterVals, TD)); + dyn_cast_or_null(EvaluateExpression(Cond, L, PHIValMap, TD)); // Couldn't symbolically evaluate. if (!CondVal) return getCouldNotCompute(); @@ -4918,19 +4914,11 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, return getConstant(Type::getInt32Ty(getContext()), IterationNum); } - // Update all the PHI nodes for the next iteration. - DenseMap NextIterVals; - for (DenseMap::const_iterator - I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){ - PHINode *PHI = dyn_cast(I->first); - if (!PHI) continue; - Constant *&NextPHI = NextIterVals[PHI]; - if (NextPHI) continue; // Already computed! - - Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); - NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); - } - CurrentIterVals.swap(NextIterVals); + // Compute the value of the PHI node for the next iteration. + Constant *NextPHI = EvaluateExpression(BEValue, L, PHIValMap, TD); + if (NextPHI == 0 || NextPHI == PHIVal) + return getCouldNotCompute();// Couldn't evaluate or not making progress... + PHIVal = NextPHI; } // Too many iterations were needed to evaluate. -- cgit v1.1 From 4a3935c27e5315081844a5b7ae1f7097efc234b0 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Mon, 24 Oct 2011 04:35:36 +0000 Subject: A dead malloc, a free(NULL) and a free(undef) are all trivially dead instructions. This doesn't introduce any optimizations we weren't doing before (except potentially due to pass ordering issues), now passes will eliminate them sooner as part of their own cleanups. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142787 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/Local.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index 7034feb..134ab71 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -28,6 +28,7 @@ #include "llvm/Analysis/DIBuilder.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Target/TargetData.h" @@ -257,6 +258,13 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) { II->getIntrinsicID() == Intrinsic::lifetime_end) return isa(II->getArgOperand(1)); } + + if (extractMallocCall(I)) return true; + + if (CallInst *CI = isFreeCall(I)) + if (Constant *C = dyn_cast(CI->getArgOperand(0))) + return C->isNullValue() || isa(C); + return false; } -- cgit v1.1 From 5bef0eb1de5f52662de7b4808bee5e9d643b04db Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Mon, 24 Oct 2011 05:51:01 +0000 Subject: PHI nodes not in the loop header aren't part of the loop iteration initial state. Furthermore, they might not have two operands. This fixes the underlying issue behind the crashes introduced in r142781. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142788 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 2da8e6f..1e4bf19 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -4856,7 +4856,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, for (DenseMap::const_iterator I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){ PHINode *PHI = dyn_cast(I->first); - if (!PHI || PHI == PN) continue; + if (!PHI || PHI == PN || PHI->getParent() != Header) continue; Constant *&NextPHI = NextIterVals[PHI]; if (NextPHI) continue; // Already computed! -- cgit v1.1 From 7c3fc5747284a0c6ca4e370f964082c69b42b8dd Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Mon, 24 Oct 2011 06:57:05 +0000 Subject: Reapply r142781 with fix. Original message: Enhance SCEV's brute force loop analysis to handle multiple PHI nodes in the loop header when computing the trip count. With this, we now constant evaluate: struct ListNode { const struct ListNode *next; int i; }; static const struct ListNode node1 = {0, 1}; static const struct ListNode node2 = {&node1, 2}; static const struct ListNode node3 = {&node2, 3}; int test() { int sum = 0; for (const struct ListNode *n = &node3; n != 0; n = n->next) sum += n->i; return sum; } git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142790 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 52 ++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 1e4bf19..1ab6a40 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -4882,29 +4882,33 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, // That's the only form we support here. if (PN->getNumIncomingValues() != 2) return getCouldNotCompute(); + DenseMap CurrentIterVals; + BasicBlock *Header = L->getHeader(); + assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); + // One entry must be a constant (coming in from outside of the loop), and the // second must be derived from the same PHI. bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); - Constant *StartCST = - dyn_cast(PN->getIncomingValue(!SecondIsBackedge)); - if (StartCST == 0) return getCouldNotCompute(); // Must be a constant. - - Value *BEValue = PN->getIncomingValue(SecondIsBackedge); - if (getConstantEvolvingPHI(BEValue, L) != PN && - !isa(BEValue)) - return getCouldNotCompute(); // Not derived from same PHI. + PHINode *PHI = 0; + for (BasicBlock::iterator I = Header->begin(); + (PHI = dyn_cast(I)); ++I) { + Constant *StartCST = + dyn_cast(PHI->getIncomingValue(!SecondIsBackedge)); + if (StartCST == 0) continue; + CurrentIterVals[PHI] = StartCST; + } + if (!CurrentIterVals.count(PN)) + return getCouldNotCompute(); // Okay, we find a PHI node that defines the trip count of this loop. Execute // the loop symbolically to determine when the condition gets a value of // "ExitWhen". - unsigned IterationNum = 0; - unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. - for (Constant *PHIVal = StartCST; - IterationNum != MaxIterations; ++IterationNum) { - DenseMap PHIValMap; - PHIValMap[PN] = PHIVal; + + unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. + for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){ ConstantInt *CondVal = - dyn_cast_or_null(EvaluateExpression(Cond, L, PHIValMap, TD)); + dyn_cast_or_null(EvaluateExpression(Cond, L, + CurrentIterVals, TD)); // Couldn't symbolically evaluate. if (!CondVal) return getCouldNotCompute(); @@ -4914,11 +4918,19 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, return getConstant(Type::getInt32Ty(getContext()), IterationNum); } - // Compute the value of the PHI node for the next iteration. - Constant *NextPHI = EvaluateExpression(BEValue, L, PHIValMap, TD); - if (NextPHI == 0 || NextPHI == PHIVal) - return getCouldNotCompute();// Couldn't evaluate or not making progress... - PHIVal = NextPHI; + // Update all the PHI nodes for the next iteration. + DenseMap NextIterVals; + for (DenseMap::const_iterator + I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){ + PHINode *PHI = dyn_cast(I->first); + if (!PHI || PHI->getParent() != Header) continue; + Constant *&NextPHI = NextIterVals[PHI]; + if (NextPHI) continue; // Already computed! + + Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); + NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); + } + CurrentIterVals.swap(NextIterVals); } // Too many iterations were needed to evaluate. -- cgit v1.1 From de1c9bb45017e25b5fc2b77e15d3c377f6572075 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 24 Oct 2011 12:01:08 +0000 Subject: Remove return heuristics from the static branch probabilities, and introduce no-return or unreachable heuristics. The return heuristics from the Ball and Larus paper don't work well in practice as they pessimize early return paths. The only good hitrate return heuristics are those for: - NULL return - Constant return - negative integer return Only the last of these three can possibly require significant code for the returning block, and even the last is fairly rare and usually also a constant. As a consequence, even for the cold return paths, there is little code on that return path, and so little code density to be gained by sinking it. The places where sinking these blocks is valuable (inner loops) will already be weighted appropriately as the edge is a loop-exit branch. All of this aside, early returns are nearly as common as all three of these return categories, and should actually be predicted as taken! Rather than muddy the waters of the static predictions, just remain silent on returns and let the CFG itself dictate any layout or other issues. However, the return heuristic was flagging one very important case: unreachable. Unfortunately it still gave a 1/4 chance of the branch-to-unreachable occuring. It also didn't do a rigorous job of finding those blocks which post-dominate an unreachable block. This patch builds a more powerful analysis that should flag all branches to blocks known to then reach unreachable. It also has better worst-case runtime complexity by not looping through successors for each block. The previous code would perform an N^2 walk in the event of a single entry block branching to N successors with a switch where each successor falls through to the next and they finally fall through to a return. Test case added for noreturn heuristics. Also doxygen comments improved along the way. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142793 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/BranchProbabilityInfo.cpp | 149 +++++++++++++++++---------------- 1 file changed, 76 insertions(+), 73 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index a03d9d8..0396f99 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -18,6 +18,7 @@ #include "llvm/Metadata.h" #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" @@ -54,8 +55,18 @@ char BranchProbabilityInfo::ID = 0; static const uint32_t LBH_TAKEN_WEIGHT = 124; static const uint32_t LBH_NONTAKEN_WEIGHT = 4; -static const uint32_t RH_TAKEN_WEIGHT = 24; -static const uint32_t RH_NONTAKEN_WEIGHT = 8; +/// \brief Unreachable-terminating branch taken weight. +/// +/// This is the weight for a branch being taken to a block that terminates +/// (eventually) in unreachable. These are predicted as unlikely as possible. +static const uint32_t UR_TAKEN_WEIGHT = 1; + +/// \brief Unreachable-terminating branch not-taken weight. +/// +/// This is the weight for a branch not being taken toward a block that +/// terminates (eventually) in unreachable. Such a branch is essentially never +/// taken. +static const uint32_t UR_NONTAKEN_WEIGHT = 1023; static const uint32_t PH_TAKEN_WEIGHT = 20; static const uint32_t PH_NONTAKEN_WEIGHT = 12; @@ -73,37 +84,61 @@ static const uint32_t NORMAL_WEIGHT = 16; // Minimum weight of an edge. Please note, that weight is NEVER 0. static const uint32_t MIN_WEIGHT = 1; -// Return TRUE if BB leads directly to a Return Instruction. -static bool isReturningBlock(BasicBlock *BB) { - SmallPtrSet Visited; - - while (true) { - TerminatorInst *TI = BB->getTerminator(); - if (isa(TI)) - return true; +static uint32_t getMaxWeightFor(BasicBlock *BB) { + return UINT32_MAX / BB->getTerminator()->getNumSuccessors(); +} - if (TI->getNumSuccessors() > 1) - break; - // It is unreachable block which we can consider as a return instruction. - if (TI->getNumSuccessors() == 0) - return true; +/// \brief Calculate edge weights for successors lead to unreachable. +/// +/// Predict that a successor which leads necessarily to an +/// unreachable-terminated block as extremely unlikely. +bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) { + TerminatorInst *TI = BB->getTerminator(); + if (TI->getNumSuccessors() == 0) { + if (isa(TI)) + PostDominatedByUnreachable.insert(BB); + return false; + } - Visited.insert(BB); - BB = TI->getSuccessor(0); + SmallPtrSet UnreachableEdges; + SmallPtrSet ReachableEdges; - // Stop if cycle is detected. - if (Visited.count(BB)) - return false; + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { + if (PostDominatedByUnreachable.count(*I)) + UnreachableEdges.insert(*I); + else + ReachableEdges.insert(*I); } - return false; -} + // If all successors are in the set of blocks post-dominated by unreachable, + // this block is too. + if (UnreachableEdges.size() == TI->getNumSuccessors()) + PostDominatedByUnreachable.insert(BB); -static uint32_t getMaxWeightFor(BasicBlock *BB) { - return UINT32_MAX / BB->getTerminator()->getNumSuccessors(); -} + // Skip probabilities if this block has a single successor or if all were + // reachable. + if (TI->getNumSuccessors() == 1 || UnreachableEdges.empty()) + return false; + uint32_t UnreachableWeight = + std::max(UR_TAKEN_WEIGHT / UnreachableEdges.size(), MIN_WEIGHT); + for (SmallPtrSet::iterator I = UnreachableEdges.begin(), + E = UnreachableEdges.end(); + I != E; ++I) + setEdgeWeight(BB, *I, UnreachableWeight); + + if (ReachableEdges.empty()) + return true; + uint32_t ReachableWeight = + std::max(UR_NONTAKEN_WEIGHT / ReachableEdges.size(), NORMAL_WEIGHT); + for (SmallPtrSet::iterator I = ReachableEdges.begin(), + E = ReachableEdges.end(); + I != E; ++I) + setEdgeWeight(BB, *I, ReachableWeight); + + return true; +} // Propagate existing explicit probabilities from either profile data or // 'expect' intrinsic processing. @@ -143,46 +178,6 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) { return true; } -// Calculate Edge Weights using "Return Heuristics". Predict a successor which -// leads directly to Return Instruction will not be taken. -bool BranchProbabilityInfo::calcReturnHeuristics(BasicBlock *BB){ - if (BB->getTerminator()->getNumSuccessors() == 1) - return false; - - SmallPtrSet ReturningEdges; - SmallPtrSet StayEdges; - - for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { - BasicBlock *Succ = *I; - if (isReturningBlock(Succ)) - ReturningEdges.insert(Succ); - else - StayEdges.insert(Succ); - } - - if (uint32_t numStayEdges = StayEdges.size()) { - uint32_t stayWeight = RH_TAKEN_WEIGHT / numStayEdges; - if (stayWeight < NORMAL_WEIGHT) - stayWeight = NORMAL_WEIGHT; - - for (SmallPtrSet::iterator I = StayEdges.begin(), - E = StayEdges.end(); I != E; ++I) - setEdgeWeight(BB, *I, stayWeight); - } - - if (uint32_t numRetEdges = ReturningEdges.size()) { - uint32_t retWeight = RH_NONTAKEN_WEIGHT / numRetEdges; - if (retWeight < MIN_WEIGHT) - retWeight = MIN_WEIGHT; - for (SmallPtrSet::iterator I = ReturningEdges.begin(), - E = ReturningEdges.end(); I != E; ++I) { - setEdgeWeight(BB, *I, retWeight); - } - } - - return ReturningEdges.size() > 0; -} - // Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion // between two pointer or pointer and NULL will fail. bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) { @@ -390,20 +385,28 @@ void BranchProbabilityInfo::getAnalysisUsage(AnalysisUsage &AU) const { bool BranchProbabilityInfo::runOnFunction(Function &F) { LastF = &F; // Store the last function we ran on for printing. LI = &getAnalysis(); - - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { - if (calcMetadataWeights(I)) + assert(PostDominatedByUnreachable.empty()); + + // Walk the basic blocks in post-order so that we can build up state about + // the successors of a block iteratively. + for (po_iterator I = po_begin(&F.getEntryBlock()), + E = po_end(&F.getEntryBlock()); + I != E; ++I) { + DEBUG(dbgs() << "Computing probabilities for " << I->getName() << "\n"); + if (calcUnreachableHeuristics(*I)) continue; - if (calcLoopBranchHeuristics(I)) + if (calcMetadataWeights(*I)) continue; - if (calcReturnHeuristics(I)) + if (calcLoopBranchHeuristics(*I)) continue; - if (calcPointerHeuristics(I)) + if (calcPointerHeuristics(*I)) continue; - if (calcZeroHeuristics(I)) + if (calcZeroHeuristics(*I)) continue; - calcFloatingPointHeuristics(I); + calcFloatingPointHeuristics(*I); } + + PostDominatedByUnreachable.clear(); return false; } -- cgit v1.1 From e4617c04c863b2fb342d08408d45ba3bf50b97a0 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 24 Oct 2011 16:51:55 +0000 Subject: Sink an otherwise unused variable's initializer into the asserts that used it. Fixes an unused variable warning from GCC on release builds. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142799 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBlockPlacement.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 32eb70e..21582b9 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -395,9 +395,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { void MachineBlockPlacement::placeChainsTopologically(MachineFunction &F) { MachineBasicBlock *EntryB = &F.front(); - BlockChain *EntryChain = BlockToChain[EntryB]; - assert(EntryChain && "Missing chain for entry block"); - assert(*EntryChain->begin() == EntryB && + assert(BlockToChain[EntryB] && "Missing chain for entry block"); + assert(*BlockToChain[EntryB]->begin() == EntryB && "Entry block is not the head of the entry block chain"); // Walk the blocks in RPO, and insert each block for a chain in order the -- cgit v1.1 From d2dae0cfa00ca3c5164b2543ed712ace6c08386b Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 24 Oct 2011 17:12:36 +0000 Subject: Cleanup. Get rid of the old SjLj EH lowering code. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142800 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SjLjEHPrepare.cpp | 594 +----------------------------------------- 1 file changed, 10 insertions(+), 584 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index e5cb1bb..ac88441 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -30,17 +30,13 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/IRBuilder.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include using namespace llvm; -static cl::opt DisableOldSjLjEH("disable-old-sjlj-eh", cl::Hidden, - cl::init(true), - cl::desc("Disable the old SjLj EH preparation pass")); - STATISTIC(NumInvokes, "Number of invokes replaced"); -STATISTIC(NumUnwinds, "Number of unwinds replaced"); STATISTIC(NumSpilled, "Number of registers live across unwind edges"); namespace { @@ -52,16 +48,12 @@ namespace { Constant *BuiltinSetjmpFn; Constant *FrameAddrFn; Constant *StackAddrFn; - Constant *StackRestoreFn; Constant *LSDAAddrFn; Value *PersonalityFn; - Constant *SelectorFn; - Constant *ExceptionFn; Constant *CallSiteFn; Constant *DispatchSetupFn; Constant *FuncCtxFn; Value *CallSite; - DenseMap LPadSuccMap; public: static char ID; // Pass identification, replacement for typeid explicit SjLjEHPass(const TargetLowering *tli = NULL) @@ -79,13 +71,7 @@ namespace { Value *setupFunctionContext(Function &F, ArrayRef LPads); void lowerIncomingArguments(Function &F); void lowerAcrossUnwindEdges(Function &F, ArrayRef Invokes); - void insertCallSiteStore(Instruction *I, int Number, Value *CallSite); - void markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite, - SwitchInst *CatchSwitch); - void splitLiveRangesAcrossInvokes(SmallVector &Invokes); - void splitLandingPad(InvokeInst *II); - bool insertSjLjEHSupport(Function &F); }; } // end anonymous namespace @@ -121,11 +107,8 @@ bool SjLjEHPass::doInitialization(Module &M) { (Type *)0); FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress); StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave); - StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp); LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda); - SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector); - ExceptionFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_exception); CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite); DispatchSetupFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_dispatch_setup); @@ -145,572 +128,16 @@ void SjLjEHPass::insertCallSiteStore(Instruction *I, int Number, new StoreInst(CallSiteNoC, CallSite, true, I); // volatile } -/// splitLandingPad - Split a landing pad. This takes considerable care because -/// of PHIs and other nasties. The problem is that the jump table needs to jump -/// to the landing pad block. However, the landing pad block can be jumped to -/// only by an invoke instruction. So we clone the landingpad instruction into -/// its own basic block, have the invoke jump to there. The landingpad -/// instruction's basic block's successor is now the target for the jump table. -/// -/// But because of PHI nodes, we need to create another basic block for the jump -/// table to jump to. This is definitely a hack, because the values for the PHI -/// nodes may not be defined on the edge from the jump table. But that's okay, -/// because the jump table is simply a construct to mimic what is happening in -/// the CFG. So the values are mysteriously there, even though there is no value -/// for the PHI from the jump table's edge (hence calling this a hack). -void SjLjEHPass::splitLandingPad(InvokeInst *II) { - SmallVector NewBBs; - SplitLandingPadPredecessors(II->getUnwindDest(), II->getParent(), - ".1", ".2", this, NewBBs); - - // Create an empty block so that the jump table has something to jump to - // which doesn't have any PHI nodes. - BasicBlock *LPad = NewBBs[0]; - BasicBlock *Succ = *succ_begin(LPad); - BasicBlock *JumpTo = BasicBlock::Create(II->getContext(), "jt.land", - LPad->getParent(), Succ); - LPad->getTerminator()->eraseFromParent(); - BranchInst::Create(JumpTo, LPad); - BranchInst::Create(Succ, JumpTo); - LPadSuccMap[II] = JumpTo; - - for (BasicBlock::iterator I = Succ->begin(); isa(I); ++I) { - PHINode *PN = cast(I); - Value *Val = PN->removeIncomingValue(LPad, false); - PN->addIncoming(Val, JumpTo); - } -} - -/// markInvokeCallSite - Insert code to mark the call_site for this invoke -void SjLjEHPass::markInvokeCallSite(InvokeInst *II, int InvokeNo, - Value *CallSite, - SwitchInst *CatchSwitch) { - ConstantInt *CallSiteNoC= ConstantInt::get(Type::getInt32Ty(II->getContext()), - InvokeNo); - // The runtime comes back to the dispatcher with the call_site - 1 in - // the context. Odd, but there it is. - ConstantInt *SwitchValC = ConstantInt::get(Type::getInt32Ty(II->getContext()), - InvokeNo - 1); - - // If the unwind edge has phi nodes, split the edge. - if (isa(II->getUnwindDest()->begin())) { - // FIXME: New EH - This if-condition will be always true in the new scheme. - if (II->getUnwindDest()->isLandingPad()) - splitLandingPad(II); - else - SplitCriticalEdge(II, 1, this); - - // If there are any phi nodes left, they must have a single predecessor. - while (PHINode *PN = dyn_cast(II->getUnwindDest()->begin())) { - PN->replaceAllUsesWith(PN->getIncomingValue(0)); - PN->eraseFromParent(); - } - } - - // Insert the store of the call site value - insertCallSiteStore(II, InvokeNo, CallSite); - - // Record the call site value for the back end so it stays associated with - // the invoke. - CallInst::Create(CallSiteFn, CallSiteNoC, "", II); - - // Add a switch case to our unwind block. - if (BasicBlock *SuccBB = LPadSuccMap[II]) { - CatchSwitch->addCase(SwitchValC, SuccBB); - } else { - CatchSwitch->addCase(SwitchValC, II->getUnwindDest()); - } - - // We still want this to look like an invoke so we emit the LSDA properly, - // so we don't transform the invoke into a call here. -} - /// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until /// we reach blocks we've already seen. -static void MarkBlocksLiveIn(BasicBlock *BB, std::set &LiveBBs) { - if (!LiveBBs.insert(BB).second) return; // already been here. +static void MarkBlocksLiveIn(BasicBlock *BB, + SmallPtrSet &LiveBBs) { + if (!LiveBBs.insert(BB)) return; // already been here. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) MarkBlocksLiveIn(*PI, LiveBBs); } -/// splitLiveRangesAcrossInvokes - Each value that is live across an unwind edge -/// we spill into a stack location, guaranteeing that there is nothing live -/// across the unwind edge. This process also splits all critical edges -/// coming out of invoke's. -/// FIXME: Move this function to a common utility file (Local.cpp?) so -/// both SjLj and LowerInvoke can use it. -void SjLjEHPass:: -splitLiveRangesAcrossInvokes(SmallVector &Invokes) { - // First step, split all critical edges from invoke instructions. - for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { - InvokeInst *II = Invokes[i]; - SplitCriticalEdge(II, 0, this); - - // FIXME: New EH - This if-condition will be always true in the new scheme. - if (II->getUnwindDest()->isLandingPad()) - splitLandingPad(II); - else - SplitCriticalEdge(II, 1, this); - - assert(!isa(II->getNormalDest()) && - !isa(II->getUnwindDest()) && - "Critical edge splitting left single entry phi nodes?"); - } - - Function *F = Invokes.back()->getParent()->getParent(); - - // To avoid having to handle incoming arguments specially, we lower each arg - // to a copy instruction in the entry block. This ensures that the argument - // value itself cannot be live across the entry block. - BasicBlock::iterator AfterAllocaInsertPt = F->begin()->begin(); - while (isa(AfterAllocaInsertPt) && - isa(cast(AfterAllocaInsertPt)->getArraySize())) - ++AfterAllocaInsertPt; - for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); - AI != E; ++AI) { - Type *Ty = AI->getType(); - // Aggregate types can't be cast, but are legal argument types, so we have - // to handle them differently. We use an extract/insert pair as a - // lightweight method to achieve the same goal. - if (isa(Ty) || isa(Ty) || isa(Ty)) { - Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt); - Instruction *NI = InsertValueInst::Create(AI, EI, 0); - NI->insertAfter(EI); - AI->replaceAllUsesWith(NI); - // Set the operand of the instructions back to the AllocaInst. - EI->setOperand(0, AI); - NI->setOperand(0, AI); - } else { - // This is always a no-op cast because we're casting AI to AI->getType() - // so src and destination types are identical. BitCast is the only - // possibility. - CastInst *NC = new BitCastInst( - AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); - AI->replaceAllUsesWith(NC); - // Set the operand of the cast instruction back to the AllocaInst. - // Normally it's forbidden to replace a CastInst's operand because it - // could cause the opcode to reflect an illegal conversion. However, - // we're replacing it here with the same value it was constructed with. - // We do this because the above replaceAllUsesWith() clobbered the - // operand, but we want this one to remain. - NC->setOperand(0, AI); - } - } - - // Finally, scan the code looking for instructions with bad live ranges. - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E; ++II) { - // Ignore obvious cases we don't have to handle. In particular, most - // instructions either have no uses or only have a single use inside the - // current block. Ignore them quickly. - Instruction *Inst = II; - if (Inst->use_empty()) continue; - if (Inst->hasOneUse() && - cast(Inst->use_back())->getParent() == BB && - !isa(Inst->use_back())) continue; - - // If this is an alloca in the entry block, it's not a real register - // value. - if (AllocaInst *AI = dyn_cast(Inst)) - if (isa(AI->getArraySize()) && BB == F->begin()) - continue; - - // Avoid iterator invalidation by copying users to a temporary vector. - SmallVector Users; - for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); - UI != E; ++UI) { - Instruction *User = cast(*UI); - if (User->getParent() != BB || isa(User)) - Users.push_back(User); - } - - // Find all of the blocks that this value is live in. - std::set LiveBBs; - LiveBBs.insert(Inst->getParent()); - while (!Users.empty()) { - Instruction *U = Users.back(); - Users.pop_back(); - - if (!isa(U)) { - MarkBlocksLiveIn(U->getParent(), LiveBBs); - } else { - // Uses for a PHI node occur in their predecessor block. - PHINode *PN = cast(U); - for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) - if (PN->getIncomingValue(i) == Inst) - MarkBlocksLiveIn(PN->getIncomingBlock(i), LiveBBs); - } - } - - // Now that we know all of the blocks that this thing is live in, see if - // it includes any of the unwind locations. - bool NeedsSpill = false; - for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { - BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); - if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) - NeedsSpill = true; - } - - // If we decided we need a spill, do it. - // FIXME: Spilling this way is overkill, as it forces all uses of - // the value to be reloaded from the stack slot, even those that aren't - // in the unwind blocks. We should be more selective. - if (NeedsSpill) { - ++NumSpilled; - DemoteRegToStack(*Inst, true); - } - } -} - -/// CreateLandingPadLoad - Load the exception handling values and insert them -/// into a structure. -static Instruction *CreateLandingPadLoad(Function &F, Value *ExnAddr, - Value *SelAddr, - BasicBlock::iterator InsertPt) { - Value *Exn = new LoadInst(ExnAddr, "exn", false, - InsertPt); - Type *Ty = Type::getInt8PtrTy(F.getContext()); - Exn = CastInst::Create(Instruction::IntToPtr, Exn, Ty, "", InsertPt); - Value *Sel = new LoadInst(SelAddr, "sel", false, InsertPt); - - Ty = StructType::get(Exn->getType(), Sel->getType(), NULL); - InsertValueInst *LPadVal = InsertValueInst::Create(llvm::UndefValue::get(Ty), - Exn, 0, - "lpad.val", InsertPt); - return InsertValueInst::Create(LPadVal, Sel, 1, "lpad.val", InsertPt); -} - -/// ReplaceLandingPadVal - Replace the landingpad instruction's value with a -/// load from the stored values (via CreateLandingPadLoad). This looks through -/// PHI nodes, and removes them if they are dead. -static void ReplaceLandingPadVal(Function &F, Instruction *Inst, Value *ExnAddr, - Value *SelAddr) { - if (Inst->use_empty()) return; - - while (!Inst->use_empty()) { - Instruction *I = cast(Inst->use_back()); - - if (PHINode *PN = dyn_cast(I)) { - ReplaceLandingPadVal(F, PN, ExnAddr, SelAddr); - if (PN->use_empty()) PN->eraseFromParent(); - continue; - } - - I->replaceUsesOfWith(Inst, CreateLandingPadLoad(F, ExnAddr, SelAddr, I)); - } -} - -bool SjLjEHPass::insertSjLjEHSupport(Function &F) { - SmallVector Returns; - SmallVector Unwinds; - SmallVector Invokes; - - // Look through the terminators of the basic blocks to find invokes, returns - // and unwinds. - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - if (ReturnInst *RI = dyn_cast(BB->getTerminator())) { - // Remember all return instructions in case we insert an invoke into this - // function. - Returns.push_back(RI); - } else if (InvokeInst *II = dyn_cast(BB->getTerminator())) { - Invokes.push_back(II); - } else if (UnwindInst *UI = dyn_cast(BB->getTerminator())) { - Unwinds.push_back(UI); - } - } - - NumInvokes += Invokes.size(); - NumUnwinds += Unwinds.size(); - - // If we don't have any invokes, there's nothing to do. - if (Invokes.empty()) return false; - - // Find the eh.selector.*, eh.exception and alloca calls. - // - // Remember any allocas() that aren't in the entry block, as the - // jmpbuf saved SP will need to be updated for them. - // - // We'll use the first eh.selector to determine the right personality - // function to use. For SJLJ, we always use the same personality for the - // whole function, not on a per-selector basis. - // FIXME: That's a bit ugly. Better way? - SmallVector EH_Selectors; - SmallVector EH_Exceptions; - SmallVector JmpbufUpdatePoints; - - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - // Note: Skip the entry block since there's nothing there that interests - // us. eh.selector and eh.exception shouldn't ever be there, and we - // want to disregard any allocas that are there. - // - // FIXME: This is awkward. The new EH scheme won't need to skip the entry - // block. - if (BB == F.begin()) { - if (InvokeInst *II = dyn_cast(F.begin()->getTerminator())) { - // FIXME: This will be always non-NULL in the new EH. - if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst()) - if (!PersonalityFn) PersonalityFn = LPI->getPersonalityFn(); - } - - continue; - } - - for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - if (CallInst *CI = dyn_cast(I)) { - if (CI->getCalledFunction() == SelectorFn) { - if (!PersonalityFn) PersonalityFn = CI->getArgOperand(1); - EH_Selectors.push_back(CI); - } else if (CI->getCalledFunction() == ExceptionFn) { - EH_Exceptions.push_back(CI); - } else if (CI->getCalledFunction() == StackRestoreFn) { - JmpbufUpdatePoints.push_back(CI); - } - } else if (AllocaInst *AI = dyn_cast(I)) { - JmpbufUpdatePoints.push_back(AI); - } else if (InvokeInst *II = dyn_cast(I)) { - // FIXME: This will be always non-NULL in the new EH. - if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst()) - if (!PersonalityFn) PersonalityFn = LPI->getPersonalityFn(); - } - } - } - - // If we don't have any eh.selector calls, we can't determine the personality - // function. Without a personality function, we can't process exceptions. - if (!PersonalityFn) return false; - - // We have invokes, so we need to add register/unregister calls to get this - // function onto the global unwind stack. - // - // First thing we need to do is scan the whole function for values that are - // live across unwind edges. Each value that is live across an unwind edge we - // spill into a stack location, guaranteeing that there is nothing live across - // the unwind edge. This process also splits all critical edges coming out of - // invoke's. - splitLiveRangesAcrossInvokes(Invokes); - - - SmallVector LandingPads; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { - if (InvokeInst *II = dyn_cast(BB->getTerminator())) - // FIXME: This will be always non-NULL in the new EH. - if (LandingPadInst *LPI = II->getUnwindDest()->getLandingPadInst()) - LandingPads.push_back(LPI); - } - - - BasicBlock *EntryBB = F.begin(); - // Create an alloca for the incoming jump buffer ptr and the new jump buffer - // that needs to be restored on all exits from the function. This is an - // alloca because the value needs to be added to the global context list. - unsigned Align = 4; // FIXME: Should be a TLI check? - AllocaInst *FunctionContext = - new AllocaInst(FunctionContextTy, 0, Align, - "fcn_context", F.begin()->begin()); - - Value *Idxs[2]; - Type *Int32Ty = Type::getInt32Ty(F.getContext()); - Value *Zero = ConstantInt::get(Int32Ty, 0); - // We need to also keep around a reference to the call_site field - Idxs[0] = Zero; - Idxs[1] = ConstantInt::get(Int32Ty, 1); - CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, "call_site", - EntryBB->getTerminator()); - - // The exception selector comes back in context->data[1] - Idxs[1] = ConstantInt::get(Int32Ty, 2); - Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, "fc_data", - EntryBB->getTerminator()); - Idxs[1] = ConstantInt::get(Int32Ty, 1); - Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, - "exc_selector_gep", - EntryBB->getTerminator()); - // The exception value comes back in context->data[0] - Idxs[1] = Zero; - Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, - "exception_gep", - EntryBB->getTerminator()); - - // The result of the eh.selector call will be replaced with a a reference to - // the selector value returned in the function context. We leave the selector - // itself so the EH analysis later can use it. - for (int i = 0, e = EH_Selectors.size(); i < e; ++i) { - CallInst *I = EH_Selectors[i]; - Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I); - I->replaceAllUsesWith(SelectorVal); - } - - // eh.exception calls are replaced with references to the proper location in - // the context. Unlike eh.selector, the eh.exception calls are removed - // entirely. - for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) { - CallInst *I = EH_Exceptions[i]; - // Possible for there to be duplicates, so check to make sure the - // instruction hasn't already been removed. - if (!I->getParent()) continue; - Value *Val = new LoadInst(ExceptionAddr, "exception", true, I); - Type *Ty = Type::getInt8PtrTy(F.getContext()); - Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I); - - I->replaceAllUsesWith(Val); - I->eraseFromParent(); - } - - for (unsigned i = 0, e = LandingPads.size(); i != e; ++i) - ReplaceLandingPadVal(F, LandingPads[i], ExceptionAddr, SelectorAddr); - - // The entry block changes to have the eh.sjlj.setjmp, with a conditional - // branch to a dispatch block for non-zero returns. If we return normally, - // we're not handling an exception and just register the function context and - // continue. - - // Create the dispatch block. The dispatch block is basically a big switch - // statement that goes to all of the invoke landing pads. - BasicBlock *DispatchBlock = - BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F); - - // Insert a load of the callsite in the dispatch block, and a switch on its - // value. By default, we issue a trap statement. - BasicBlock *TrapBlock = - BasicBlock::Create(F.getContext(), "trapbb", &F); - CallInst::Create(Intrinsic::getDeclaration(F.getParent(), Intrinsic::trap), - "", TrapBlock); - new UnreachableInst(F.getContext(), TrapBlock); - - Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true, - DispatchBlock); - SwitchInst *DispatchSwitch = - SwitchInst::Create(DispatchLoad, TrapBlock, Invokes.size(), - DispatchBlock); - // Split the entry block to insert the conditional branch for the setjmp. - BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(), - "eh.sjlj.setjmp.cont"); - - // Populate the Function Context - // 1. LSDA address - // 2. Personality function address - // 3. jmpbuf (save SP, FP and call eh.sjlj.setjmp) - - // LSDA address - Idxs[0] = Zero; - Idxs[1] = ConstantInt::get(Int32Ty, 4); - Value *LSDAFieldPtr = - GetElementPtrInst::Create(FunctionContext, Idxs, "lsda_gep", - EntryBB->getTerminator()); - Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr", - EntryBB->getTerminator()); - new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator()); - - Idxs[1] = ConstantInt::get(Int32Ty, 3); - Value *PersonalityFieldPtr = - GetElementPtrInst::Create(FunctionContext, Idxs, "lsda_gep", - EntryBB->getTerminator()); - new StoreInst(PersonalityFn, PersonalityFieldPtr, true, - EntryBB->getTerminator()); - - // Save the frame pointer. - Idxs[1] = ConstantInt::get(Int32Ty, 5); - Value *JBufPtr - = GetElementPtrInst::Create(FunctionContext, Idxs, "jbuf_gep", - EntryBB->getTerminator()); - Idxs[1] = ConstantInt::get(Int32Ty, 0); - Value *FramePtr = - GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_fp_gep", - EntryBB->getTerminator()); - - Value *Val = CallInst::Create(FrameAddrFn, - ConstantInt::get(Int32Ty, 0), - "fp", - EntryBB->getTerminator()); - new StoreInst(Val, FramePtr, true, EntryBB->getTerminator()); - - // Save the stack pointer. - Idxs[1] = ConstantInt::get(Int32Ty, 2); - Value *StackPtr = - GetElementPtrInst::Create(JBufPtr, Idxs, "jbuf_sp_gep", - EntryBB->getTerminator()); - - Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator()); - new StoreInst(Val, StackPtr, true, EntryBB->getTerminator()); - - // Call the setjmp instrinsic. It fills in the rest of the jmpbuf. - Value *SetjmpArg = - CastInst::Create(Instruction::BitCast, JBufPtr, - Type::getInt8PtrTy(F.getContext()), "", - EntryBB->getTerminator()); - Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg, - "", - EntryBB->getTerminator()); - - // Add a call to dispatch_setup after the setjmp call. This is expanded to any - // target-specific setup that needs to be done. - CallInst::Create(DispatchSetupFn, DispatchVal, "", EntryBB->getTerminator()); - - // check the return value of the setjmp. non-zero goes to dispatcher. - Value *IsNormal = new ICmpInst(EntryBB->getTerminator(), - ICmpInst::ICMP_EQ, DispatchVal, Zero, - "notunwind"); - // Nuke the uncond branch. - EntryBB->getTerminator()->eraseFromParent(); - - // Put in a new condbranch in its place. - BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB); - - // Register the function context and make sure it's known to not throw - CallInst *Register = - CallInst::Create(RegisterFn, FunctionContext, "", - ContBlock->getTerminator()); - Register->setDoesNotThrow(); - - // At this point, we are all set up, update the invoke instructions to mark - // their call_site values, and fill in the dispatch switch accordingly. - for (unsigned i = 0, e = Invokes.size(); i != e; ++i) - markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch); - - // Mark call instructions that aren't nounwind as no-action (call_site == - // -1). Skip the entry block, as prior to then, no function context has been - // created for this function and any unexpected exceptions thrown will go - // directly to the caller's context, which is what we want anyway, so no need - // to do anything here. - for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) { - for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I) - if (CallInst *CI = dyn_cast(I)) { - // Ignore calls to the EH builtins (eh.selector, eh.exception) - Constant *Callee = CI->getCalledFunction(); - if (Callee != SelectorFn && Callee != ExceptionFn - && !CI->doesNotThrow()) - insertCallSiteStore(CI, -1, CallSite); - } else if (ResumeInst *RI = dyn_cast(I)) { - insertCallSiteStore(RI, -1, CallSite); - } - } - - // Replace all unwinds with a branch to the unwind handler. - // ??? Should this ever happen with sjlj exceptions? - for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) { - BranchInst::Create(TrapBlock, Unwinds[i]); - Unwinds[i]->eraseFromParent(); - } - - // Following any allocas not in the entry block, update the saved SP in the - // jmpbuf to the new value. - for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) { - Instruction *AI = JmpbufUpdatePoints[i]; - Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp"); - StackAddr->insertAfter(AI); - Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true); - StoreStackAddr->insertAfter(StackAddr); - } - - // Finally, for any returns from this function, if this function contains an - // invoke, add a call to unregister the function context. - for (unsigned i = 0, e = Returns.size(); i != e; ++i) - CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]); - - return true; -} - /// setupFunctionContext - Allocate the function context on the stack and fill /// it with all of the data that we know at this point. Value *SjLjEHPass:: @@ -871,7 +298,7 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F, } // Find all of the blocks that this value is live in. - std::set LiveBBs; + SmallPtrSet LiveBBs; LiveBBs.insert(Inst->getParent()); while (!Users.empty()) { Instruction *U = Users.back(); @@ -895,6 +322,7 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F, BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { NeedsSpill = true; + break; } } @@ -903,8 +331,8 @@ void SjLjEHPass::lowerAcrossUnwindEdges(Function &F, // the value to be reloaded from the stack slot, even those that aren't // in the unwind blocks. We should be more selective. if (NeedsSpill) { - ++NumSpilled; DemoteRegToStack(*Inst, true); + ++NumSpilled; } } } @@ -950,6 +378,8 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) { if (Invokes.empty()) return false; + NumInvokes += Invokes.size(); + lowerIncomingArguments(F); lowerAcrossUnwindEdges(F, Invokes); @@ -1039,10 +469,6 @@ bool SjLjEHPass::setupEntryBlockAndCallSites(Function &F) { } bool SjLjEHPass::runOnFunction(Function &F) { - bool Res = false; - if (!DisableOldSjLjEH) - Res = insertSjLjEHSupport(F); - else - Res = setupEntryBlockAndCallSites(F); + bool Res = setupEntryBlockAndCallSites(F); return Res; } -- cgit v1.1 From f8e74f816df2d0b83e3fe08da3dff4e8c2421e5e Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Mon, 24 Oct 2011 17:16:24 +0000 Subject: Thumb2 LDM instructions can target PC. Make sure to encode it. PR11220 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142801 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 9d85055..a65a75f 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1543,8 +1543,7 @@ multiclass thumb2_ld_mult Date: Mon, 24 Oct 2011 17:45:02 +0000 Subject: Change the default scheduler from Latency to ILP, since Latency is going away. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142810 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 57cc398..f1807ca 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -610,7 +610,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm, ExceptionSelectorRegister = 0; BooleanContents = UndefinedBooleanContent; BooleanVectorContents = UndefinedBooleanContent; - SchedPreferenceInfo = Sched::Latency; + SchedPreferenceInfo = Sched::ILP; JumpBufSize = 0; JumpBufAlignment = 0; MinFunctionAlignment = 0; -- cgit v1.1 From 334190e47eb28fe749380594992a7ae2e1c7737f Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Mon, 24 Oct 2011 17:53:16 +0000 Subject: Remove the explicit request for "Latency" scheduling from MSP430, as the Latency scheduler is going away. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142811 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/MSP430/MSP430ISelLowering.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index dc37431..e837ef8 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -80,7 +80,6 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) : setStackPointerRegisterToSaveRestore(MSP430::SPW); setBooleanContents(ZeroOrOneBooleanContent); setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? - setSchedulingPreference(Sched::Latency); // We have post-incremented loads / stores. setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal); -- cgit v1.1 From 692c1d85353249124caa1885cfeda513146c6d81 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Mon, 24 Oct 2011 17:55:11 +0000 Subject: Change this overloaded use of Sched::Latency to be an overloaded use of Sched::ILP instead, as Sched::Latency is going away. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142813 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 8 ++++---- lib/Target/ARM/ARMISelLowering.cpp | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 8ecbf34..a1abdb4 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -2100,9 +2100,9 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, int LHeight = (int)left->getHeight() + LPenalty; int RHeight = (int)right->getHeight() + RPenalty; - bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) && + bool LStall = (!checkPref || left->SchedulingPref == Sched::ILP) && BUHasStall(left, LHeight, SPQ); - bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) && + bool RStall = (!checkPref || right->SchedulingPref == Sched::ILP) && BUHasStall(right, RHeight, SPQ); // If scheduling one of the node will cause a pipeline stall, delay it. @@ -2124,8 +2124,8 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, // If either node is scheduling for latency, sort them by height/depth // and latency. - if (!checkPref || (left->SchedulingPref == Sched::Latency || - right->SchedulingPref == Sched::Latency)) { + if (!checkPref || (left->SchedulingPref == Sched::ILP || + right->SchedulingPref == Sched::ILP)) { if (DisableSchedCycles) { if (LHeight != RHeight) { DEBUG(++FactorCount[FactHeight]); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 72ea6ac..422a40f 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -986,7 +986,7 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { if (VT == MVT::Glue || VT == MVT::Other) continue; if (VT.isFloatingPoint() || VT.isVector()) - return Sched::Latency; + return Sched::ILP; } if (!N->isMachineOpcode()) @@ -1001,7 +1001,7 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { return Sched::RegPressure; if (!Itins->isEmpty() && Itins->getOperandCycle(MCID.getSchedClass(), 0) > 2) - return Sched::Latency; + return Sched::ILP; return Sched::RegPressure; } -- cgit v1.1 From d5333d6922fa5ce8954df600a61605e4ca1d92f6 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Mon, 24 Oct 2011 17:56:48 +0000 Subject: Delete the Latency scheduling preference. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142815 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 2 -- 1 file changed, 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 68b9146..2964bd3 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -144,8 +144,6 @@ namespace llvm { if (OptLevel == CodeGenOpt::None) return createSourceListDAGScheduler(IS, OptLevel); - if (TLI.getSchedulingPreference() == Sched::Latency) - return createTDListDAGScheduler(IS, OptLevel); if (TLI.getSchedulingPreference() == Sched::RegPressure) return createBURRListDAGScheduler(IS, OptLevel); if (TLI.getSchedulingPreference() == Sched::Hybrid) -- cgit v1.1 From 83dae4466e3abfd1493453588809e9f9ff10ebca Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Mon, 24 Oct 2011 18:01:06 +0000 Subject: Delete the top-down "Latency" scheduler. Top-down scheduling doesn't handle physreg dependencies, and upcoming codegen changes will require proper physreg dependence handling. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142816 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp | 265 --------------------------- 1 file changed, 265 deletions(-) delete mode 100644 lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp deleted file mode 100644 index 430283d..0000000 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp +++ /dev/null @@ -1,265 +0,0 @@ -//===---- ScheduleDAGList.cpp - Implement a list scheduler for isel DAG ---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This implements a top-down list scheduler, using standard algorithms. -// The basic approach uses a priority queue of available nodes to schedule. -// One at a time, nodes are taken from the priority queue (thus in priority -// order), checked for legality to schedule, and emitted if legal. -// -// Nodes may not be legal to schedule either due to structural hazards (e.g. -// pipeline or resource constraints) or because an input to the instruction has -// not completed execution. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "pre-RA-sched" -#include "ScheduleDAGSDNodes.h" -#include "llvm/CodeGen/LatencyPriorityQueue.h" -#include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/CodeGen/SchedulerRegistry.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" -#include -using namespace llvm; - -STATISTIC(NumNoops , "Number of noops inserted"); -STATISTIC(NumStalls, "Number of pipeline stalls"); - -static RegisterScheduler - tdListDAGScheduler("list-td", "Top-down list scheduler", - createTDListDAGScheduler); - -namespace { -//===----------------------------------------------------------------------===// -/// ScheduleDAGList - The actual list scheduler implementation. This supports -/// top-down scheduling. -/// -class ScheduleDAGList : public ScheduleDAGSDNodes { -private: - /// AvailableQueue - The priority queue to use for the available SUnits. - /// - SchedulingPriorityQueue *AvailableQueue; - - /// PendingQueue - This contains all of the instructions whose operands have - /// been issued, but their results are not ready yet (due to the latency of - /// the operation). Once the operands become available, the instruction is - /// added to the AvailableQueue. - std::vector PendingQueue; - - /// HazardRec - The hazard recognizer to use. - ScheduleHazardRecognizer *HazardRec; - -public: - ScheduleDAGList(MachineFunction &mf, - SchedulingPriorityQueue *availqueue) - : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue) { - - const TargetMachine &tm = mf.getTarget(); - HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); - } - - ~ScheduleDAGList() { - delete HazardRec; - delete AvailableQueue; - } - - void Schedule(); - -private: - void ReleaseSucc(SUnit *SU, const SDep &D); - void ReleaseSuccessors(SUnit *SU); - void ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle); - void ListScheduleTopDown(); -}; -} // end anonymous namespace - -/// Schedule - Schedule the DAG using list scheduling. -void ScheduleDAGList::Schedule() { - DEBUG(dbgs() << "********** List Scheduling **********\n"); - - // Build the scheduling graph. - BuildSchedGraph(NULL); - - AvailableQueue->initNodes(SUnits); - - ListScheduleTopDown(); - - AvailableQueue->releaseState(); -} - -//===----------------------------------------------------------------------===// -// Top-Down Scheduling -//===----------------------------------------------------------------------===// - -/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to -/// the PendingQueue if the count reaches zero. Also update its cycle bound. -void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) { - SUnit *SuccSU = D.getSUnit(); - -#ifndef NDEBUG - if (SuccSU->NumPredsLeft == 0) { - dbgs() << "*** Scheduling failed! ***\n"; - SuccSU->dump(this); - dbgs() << " has been released too many times!\n"; - llvm_unreachable(0); - } -#endif - --SuccSU->NumPredsLeft; - - SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency()); - - // If all the node's predecessors are scheduled, this node is ready - // to be scheduled. Ignore the special ExitSU node. - if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU) - PendingQueue.push_back(SuccSU); -} - -void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) { - // Top down: release successors. - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); - I != E; ++I) { - assert(!I->isAssignedRegDep() && - "The list-td scheduler doesn't yet support physreg dependencies!"); - - ReleaseSucc(SU, *I); - } -} - -/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending -/// count of its successors. If a successor pending count is zero, add it to -/// the Available queue. -void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) { - DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: "); - DEBUG(SU->dump(this)); - - Sequence.push_back(SU); - assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!"); - SU->setDepthToAtLeast(CurCycle); - - ReleaseSuccessors(SU); - SU->isScheduled = true; - AvailableQueue->ScheduledNode(SU); -} - -/// ListScheduleTopDown - The main loop of list scheduling for top-down -/// schedulers. -void ScheduleDAGList::ListScheduleTopDown() { - unsigned CurCycle = 0; - - // Release any successors of the special Entry node. - ReleaseSuccessors(&EntrySU); - - // All leaves to Available queue. - for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { - // It is available if it has no predecessors. - if (SUnits[i].Preds.empty()) { - AvailableQueue->push(&SUnits[i]); - SUnits[i].isAvailable = true; - } - } - - // While Available queue is not empty, grab the node with the highest - // priority. If it is not ready put it back. Schedule the node. - std::vector NotReady; - Sequence.reserve(SUnits.size()); - while (!AvailableQueue->empty() || !PendingQueue.empty()) { - // Check to see if any of the pending instructions are ready to issue. If - // so, add them to the available queue. - for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) { - if (PendingQueue[i]->getDepth() == CurCycle) { - AvailableQueue->push(PendingQueue[i]); - PendingQueue[i]->isAvailable = true; - PendingQueue[i] = PendingQueue.back(); - PendingQueue.pop_back(); - --i; --e; - } else { - assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?"); - } - } - - // If there are no instructions available, don't try to issue anything, and - // don't advance the hazard recognizer. - if (AvailableQueue->empty()) { - ++CurCycle; - continue; - } - - SUnit *FoundSUnit = 0; - - bool HasNoopHazards = false; - while (!AvailableQueue->empty()) { - SUnit *CurSUnit = AvailableQueue->pop(); - - ScheduleHazardRecognizer::HazardType HT = - HazardRec->getHazardType(CurSUnit, 0/*no stalls*/); - if (HT == ScheduleHazardRecognizer::NoHazard) { - FoundSUnit = CurSUnit; - break; - } - - // Remember if this is a noop hazard. - HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard; - - NotReady.push_back(CurSUnit); - } - - // Add the nodes that aren't ready back onto the available list. - if (!NotReady.empty()) { - AvailableQueue->push_all(NotReady); - NotReady.clear(); - } - - // If we found a node to schedule, do it now. - if (FoundSUnit) { - ScheduleNodeTopDown(FoundSUnit, CurCycle); - HazardRec->EmitInstruction(FoundSUnit); - - // If this is a pseudo-op node, we don't want to increment the current - // cycle. - if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! - ++CurCycle; - } else if (!HasNoopHazards) { - // Otherwise, we have a pipeline stall, but no other problem, just advance - // the current cycle and try again. - DEBUG(dbgs() << "*** Advancing cycle, no work to do\n"); - HazardRec->AdvanceCycle(); - ++NumStalls; - ++CurCycle; - } else { - // Otherwise, we have no instructions to issue and we have instructions - // that will fault if we don't do this right. This is the case for - // processors without pipeline interlocks and other cases. - DEBUG(dbgs() << "*** Emitting noop\n"); - HazardRec->EmitNoop(); - Sequence.push_back(0); // NULL here means noop - ++NumNoops; - ++CurCycle; - } - } - -#ifndef NDEBUG - VerifySchedule(/*isBottomUp=*/false); -#endif -} - -//===----------------------------------------------------------------------===// -// Public Constructor Functions -//===----------------------------------------------------------------------===// - -/// createTDListDAGScheduler - This creates a top-down list scheduler. -ScheduleDAGSDNodes * -llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) { - return new ScheduleDAGList(*IS->MF, new LatencyPriorityQueue()); -} -- cgit v1.1 From a7c98f58ea939e1dfe40bba725fbac698f36c0bb Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Mon, 24 Oct 2011 18:04:29 +0000 Subject: Fix a NEON disassembly case that was broken in the recent refactorings. As more of this code gets refactored, a lot of these manual decoding hooks should get smaller and/or go away entirely. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142817 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 6 ------ 1 file changed, 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 1517625..bc9a09f 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -1974,12 +1974,6 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, if (!Check(S, DecodeDPRRegisterClass(Inst, (Rd+1)%32, Address, Decoder))) return MCDisassembler::Fail; break; - case ARM::VLD2b8: - case ARM::VLD2b16: - case ARM::VLD2b32: - case ARM::VLD2b8_UPD: - case ARM::VLD2b16_UPD: - case ARM::VLD2b32_UPD: case ARM::VLD3q8: case ARM::VLD3q16: case ARM::VLD3q32: -- cgit v1.1 From 91336cde455c21429ae89c075a435503d1e84686 Mon Sep 17 00:00:00 2001 From: Douglas Gregor Date: Mon, 24 Oct 2011 18:09:23 +0000 Subject: Unbreak CMake build git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142821 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index 2282f0e..2c8d81a 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -17,6 +17,7 @@ add_llvm_library(LLVMSelectionDAG SelectionDAG.cpp SelectionDAGBuilder.cpp SelectionDAGISel.cpp + SelectionDAGList.cpp SelectionDAGPrinter.cpp TargetLowering.cpp TargetSelectionDAGInfo.cpp -- cgit v1.1 From 88d734de43984df03ed909f9163986fe873b8d1e Mon Sep 17 00:00:00 2001 From: Douglas Gregor Date: Mon, 24 Oct 2011 18:10:52 +0000 Subject: Really unbreak CMake build git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142822 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/CMakeLists.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index 2c8d81a..ff4184f 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -11,13 +11,11 @@ add_llvm_library(LLVMSelectionDAG LegalizeVectorOps.cpp LegalizeVectorTypes.cpp ScheduleDAGFast.cpp - ScheduleDAGList.cpp - ScheduleDAGRRList.cpp + ScheduleDAGRRList.cpp ScheduleDAGSDNodes.cpp SelectionDAG.cpp SelectionDAGBuilder.cpp SelectionDAGISel.cpp - SelectionDAGList.cpp SelectionDAGPrinter.cpp TargetLowering.cpp TargetSelectionDAGInfo.cpp -- cgit v1.1 From 5f4e02cbf1d0fa0d8048369264201c258f2ad717 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Mon, 24 Oct 2011 20:19:18 +0000 Subject: Stub out some of the MachO relocation decoding hooks. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142840 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/MachOObjectFile.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 7c6c232..19396be 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -641,6 +641,8 @@ error_code MachOObjectFile::getRelocationType(DataRefImpl Rel, } error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel, SmallVectorImpl &Result) const { + StringRef res = "Unknown"; + Result.append(res.begin(), res.end()); return object_error::success; } error_code MachOObjectFile::getRelocationAdditionalInfo(DataRefImpl Rel, @@ -666,6 +668,8 @@ error_code MachOObjectFile::getRelocationAdditionalInfo(DataRefImpl Rel, } error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, SmallVectorImpl &Result) const { + StringRef res = "Unknown"; + Result.append(res.begin(), res.end()); return object_error::success; } -- cgit v1.1 From 3f3f6b067c62d512e4e0b0c54bfc331bd34be338 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Mon, 24 Oct 2011 20:24:21 +0000 Subject: Add support to the old JIT for acquire/release loads and stores on x86. PR11207. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142841 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86CodeEmitter.cpp | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp index d94ba33..ba615a8 100644 --- a/lib/Target/X86/X86CodeEmitter.cpp +++ b/lib/Target/X86/X86CodeEmitter.cpp @@ -589,6 +589,13 @@ void Emitter::emitMemModRMByte(const MachineInstr &MI, } } +static const MCInstrDesc *UpdateOp(MachineInstr &MI, const X86InstrInfo *II, + unsigned Opcode) { + const MCInstrDesc *Desc = &II->get(Opcode); + MI.setDesc(*Desc); + return Desc; +} + template void Emitter::emitInstruction(MachineInstr &MI, const MCInstrDesc *Desc) { @@ -596,15 +603,23 @@ void Emitter::emitInstruction(MachineInstr &MI, // If this is a pseudo instruction, lower it. switch (Desc->getOpcode()) { - case X86::ADD16rr_DB: Desc = &II->get(X86::OR16rr); MI.setDesc(*Desc);break; - case X86::ADD32rr_DB: Desc = &II->get(X86::OR32rr); MI.setDesc(*Desc);break; - case X86::ADD64rr_DB: Desc = &II->get(X86::OR64rr); MI.setDesc(*Desc);break; - case X86::ADD16ri_DB: Desc = &II->get(X86::OR16ri); MI.setDesc(*Desc);break; - case X86::ADD32ri_DB: Desc = &II->get(X86::OR32ri); MI.setDesc(*Desc);break; - case X86::ADD64ri32_DB:Desc = &II->get(X86::OR64ri32);MI.setDesc(*Desc);break; - case X86::ADD16ri8_DB: Desc = &II->get(X86::OR16ri8);MI.setDesc(*Desc);break; - case X86::ADD32ri8_DB: Desc = &II->get(X86::OR32ri8);MI.setDesc(*Desc);break; - case X86::ADD64ri8_DB: Desc = &II->get(X86::OR64ri8);MI.setDesc(*Desc);break; + case X86::ADD16rr_DB: Desc = UpdateOp(MI, II, X86::OR16rr); break; + case X86::ADD32rr_DB: Desc = UpdateOp(MI, II, X86::OR32rr); break; + case X86::ADD64rr_DB: Desc = UpdateOp(MI, II, X86::OR64rr); break; + case X86::ADD16ri_DB: Desc = UpdateOp(MI, II, X86::OR16ri); break; + case X86::ADD32ri_DB: Desc = UpdateOp(MI, II, X86::OR32ri); break; + case X86::ADD64ri32_DB: Desc = UpdateOp(MI, II, X86::OR64ri32); break; + case X86::ADD16ri8_DB: Desc = UpdateOp(MI, II, X86::OR16ri8); break; + case X86::ADD32ri8_DB: Desc = UpdateOp(MI, II, X86::OR32ri8); break; + case X86::ADD64ri8_DB: Desc = UpdateOp(MI, II, X86::OR64ri8); break; + case X86::ACQUIRE_MOV8rm: Desc = UpdateOp(MI, II, X86::MOV8rm); break; + case X86::ACQUIRE_MOV16rm: Desc = UpdateOp(MI, II, X86::MOV16rm); break; + case X86::ACQUIRE_MOV32rm: Desc = UpdateOp(MI, II, X86::MOV32rm); break; + case X86::ACQUIRE_MOV64rm: Desc = UpdateOp(MI, II, X86::MOV64rm); break; + case X86::RELEASE_MOV8mr: Desc = UpdateOp(MI, II, X86::MOV8mr); break; + case X86::RELEASE_MOV16mr: Desc = UpdateOp(MI, II, X86::MOV16mr); break; + case X86::RELEASE_MOV32mr: Desc = UpdateOp(MI, II, X86::MOV32mr); break; + case X86::RELEASE_MOV64mr: Desc = UpdateOp(MI, II, X86::MOV64mr); break; } -- cgit v1.1 From 3846163aee1b14545057c8664f80a18c55309462 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Mon, 24 Oct 2011 21:02:38 +0000 Subject: Now that we look at all the header PHIs, we need to consider all the header PHIs when deciding that the loop has stopped evolving. Fixes miscompile in the gcc torture testsuite! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142843 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 1ab6a40..f65cf34 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -4844,12 +4844,12 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, // EvaluateExpression adds non-phi values to the CurrentIterVals map. DenseMap NextIterVals; Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); - if (NextPHI == CurrentIterVals[PN]) - return RetVal = NextPHI; // Stopped evolving! if (NextPHI == 0) return 0; // Couldn't evaluate! NextIterVals[PN] = NextPHI; + bool StoppedEvolving = NextPHI == CurrentIterVals[PN]; + // Also evaluate the other PHI nodes. However, we don't get to stop if we // cease to be able to evaluate one of them or if they stop evolving, // because that doesn't necessarily prevent us from computing PN. @@ -4858,11 +4858,19 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, PHINode *PHI = dyn_cast(I->first); if (!PHI || PHI == PN || PHI->getParent() != Header) continue; Constant *&NextPHI = NextIterVals[PHI]; - if (NextPHI) continue; // Already computed! - - Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); - NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); + if (!NextPHI) { // Not already computed. + Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); + NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); + } + if (NextPHI != I->second) + StoppedEvolving = false; } + + // If all entries in CurrentIterVals == NextIterVals then we can stop + // iterating, the loop can't continue to change. + if (StoppedEvolving) + return RetVal = CurrentIterVals[PN]; + CurrentIterVals.swap(NextIterVals); } } -- cgit v1.1 From 0135fe1854bef035efea1e3113dc6a127ef6e51e Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Mon, 24 Oct 2011 21:44:00 +0000 Subject: Get relocation parsing/dumping to a mostly-working state for MachO files. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142852 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/MachOObjectFile.cpp | 133 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 127 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 19396be..5bf03ff 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -16,6 +16,7 @@ #include "llvm/Object/MachO.h" #include "llvm/Object/MachOFormat.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" #include #include @@ -596,15 +597,15 @@ error_code MachOObjectFile::getRelocationNext(DataRefImpl Rel, } error_code MachOObjectFile::getRelocationAddress(DataRefImpl Rel, uint64_t &Res) const { - const uint8_t* sectAddress = base(); + const uint8_t* sectAddress = 0; if (MachOObj->is64Bit()) { InMemoryStruct Sect; getSection64(Sections[Rel.d.b], Sect); - sectAddress += Sect->Offset; + sectAddress += Sect->Address; } else { InMemoryStruct Sect; getSection(Sections[Rel.d.b], Sect); - sectAddress += Sect->Offset; + sectAddress += Sect->Address; } InMemoryStruct RE; getRelocation(Rel, RE); @@ -641,7 +642,88 @@ error_code MachOObjectFile::getRelocationType(DataRefImpl Rel, } error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel, SmallVectorImpl &Result) const { - StringRef res = "Unknown"; + // TODO: Support scattered relocations. + StringRef res; + InMemoryStruct RE; + getRelocation(Rel, RE); + unsigned r_type = (RE->Word1 >> 28) & 0xF; + + unsigned Arch = getArch(); + switch (Arch) { + case Triple::x86: { + const char* Table[] = { + "GENERIC_RELOC_VANILLA", + "GENERIC_RELOC_PAIR", + "GENERIC_RELOC_SECTDIFF", + "GENERIC_RELOC_LOCAL_SECTDIFF", + "GENERIC_RELOC_PB_LA_PTR" }; + + if (r_type > 4) + res = "Unknown"; + else + res = Table[r_type]; + break; + } + case Triple::x86_64: { + const char* Table[] = { + "X86_64_RELOC_BRANCH", + "X86_64_RELOC_GOT_LOAD", + "X86_64_RELOC_GOT", + "X86_64_RELOC_SIGNED", + "X86_64_RELOC_UNSIGNED", + "X86_64_RELOC_SUBTRACTOR" }; + + if (r_type > 5) + res = "Unknown"; + else + res = Table[r_type]; + break; + } + case Triple::arm: { + const char* Table[] = { + "ARM_RELOC_VANILLA", + "ARM_RELOC_PAIR", + "ARM_RELOC_SECTDIFF", + "ARM_RELOC_LOCAL_SECTDIFF", + "ARM_RELOC_PB_LA_PTR", + "ARM_RELOC_BR24", + "ARM_THUMB_RELOC_BR22", + "ARM_THUMB_32BIT_BRANCH", + "ARM_RELOC_HALF", + "ARM_RELOC_HALF_SECTDIFF" }; + + if (r_type > 9) + res = "Unknown"; + else + res = Table[r_type]; + break; + } + case Triple::ppc: { + const char* Table[] = { + "PPC_RELOC_VANILLA", + "PPC_RELOC_PAIR", + "PPC_RELOC_BR14", + "PPC_RELOC_BR24", + "PPC_RELOC_HI16", + "PPC_RELOC_LO16", + "PPC_RELOC_HA16", + "PPC_RELOC_LO14", + "PPC_RELOC_SECTDIFF", + "PPC_RELOC_PB_LA_PTR", + "PPC_RELOC_HI16_SECTDIFF", + "PPC_RELOC_LO16_SECTDIFF", + "PPC_RELOC_HA16_SECTDIFF", + "PPC_RELOC_JBSR", + "PPC_RELOC_LO14_SECTDIFF", + "PPC_RELOC_LOCAL_SECTDIFF" }; + + res = Table[r_type]; + break; + } + case Triple::UnknownArch: + res = "Unknown"; + break; + } Result.append(res.begin(), res.end()); return object_error::success; } @@ -668,8 +750,47 @@ error_code MachOObjectFile::getRelocationAdditionalInfo(DataRefImpl Rel, } error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, SmallVectorImpl &Result) const { - StringRef res = "Unknown"; - Result.append(res.begin(), res.end()); + InMemoryStruct RE; + getRelocation(Rel, RE); + + std::string fmtbuf; + raw_string_ostream fmt(fmtbuf); + + bool isExtern = (RE->Word1 >> 27) & 1; + if (isExtern) { + uint32_t Val = (RE->Word1 & 0xFFFFFF); + symbol_iterator SI = begin_symbols(); + + error_code ec; + while (Val--) { + SI.increment(ec); + if (ec) report_fatal_error(ec.message()); + } + + StringRef SymName; + if ((ec = SI->getName(SymName))) + report_fatal_error(ec.message()); + + fmt << SymName; + } else { + uint32_t Val = (RE->Word1 & 0xFFFFFF); + section_iterator SI = begin_sections(); + + error_code ec; + while (Val--) { + SI.increment(ec); + if (ec) report_fatal_error(ec.message()); + } + + StringRef SectName; + if ((ec = SI->getName(SectName))) + report_fatal_error(ec.message()); + + fmt << SectName; + } + + fmt.flush(); + Result.append(fmtbuf.begin(), fmtbuf.end()); return object_error::success; } -- cgit v1.1 From 10b90a9bbf7dcae1568c03a03f9606f5395f2144 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Mon, 24 Oct 2011 21:45:13 +0000 Subject: ARM refactor am6offset usage for VLD1. Split am6offset into fixed and register offset variants so the instruction encodings are explicit rather than relying an a magic reg0 marker. Needed to being able to parse these. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142853 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseInstrInfo.cpp | 24 ++++--- lib/Target/ARM/ARMExpandPseudoInsts.cpp | 38 ++++++----- lib/Target/ARM/ARMISelDAGToDAG.cpp | 42 +++++++++--- lib/Target/ARM/ARMInstrNEON.td | 85 +++++++++++++++++-------- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 63 ++++++++++++++---- 5 files changed, 181 insertions(+), 71 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 24cd228..211f937 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2401,10 +2401,14 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD1q16: case ARM::VLD1q32: case ARM::VLD1q64: - case ARM::VLD1q8_UPD: - case ARM::VLD1q16_UPD: - case ARM::VLD1q32_UPD: - case ARM::VLD1q64_UPD: + case ARM::VLD1q8wb_fixed: + case ARM::VLD1q16wb_fixed: + case ARM::VLD1q32wb_fixed: + case ARM::VLD1q64wb_fixed: + case ARM::VLD1q8wb_register: + case ARM::VLD1q16wb_register: + case ARM::VLD1q32wb_register: + case ARM::VLD1q64wb_register: case ARM::VLD2d8: case ARM::VLD2d16: case ARM::VLD2d32: @@ -2562,10 +2566,14 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD1q16Pseudo: case ARM::VLD1q32Pseudo: case ARM::VLD1q64Pseudo: - case ARM::VLD1q8Pseudo_UPD: - case ARM::VLD1q16Pseudo_UPD: - case ARM::VLD1q32Pseudo_UPD: - case ARM::VLD1q64Pseudo_UPD: + case ARM::VLD1q8PseudoWB_register: + case ARM::VLD1q16PseudoWB_register: + case ARM::VLD1q32PseudoWB_register: + case ARM::VLD1q64PseudoWB_register: + case ARM::VLD1q8PseudoWB_fixed: + case ARM::VLD1q16PseudoWB_fixed: + case ARM::VLD1q32PseudoWB_fixed: + case ARM::VLD1q64PseudoWB_fixed: case ARM::VLD2d8Pseudo: case ARM::VLD2d16Pseudo: case ARM::VLD2d32Pseudo: diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 77bad0c..c8f9756 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -102,7 +102,7 @@ namespace { unsigned PseudoOpc; unsigned RealOpc; bool IsLoad; - bool HasWriteBack; + bool HasWritebackOperand; NEONRegSpacing RegSpacing; unsigned char NumRegs; // D registers loaded or stored unsigned char RegElts; // elements per D register; used for lane ops @@ -148,13 +148,17 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD1d64TPseudo_UPD, ARM::VLD1d64T_UPD, true, true, SingleSpc, 3, 1 ,false}, { ARM::VLD1q16Pseudo, ARM::VLD1q16, true, false, SingleSpc, 2, 4 ,false}, -{ ARM::VLD1q16Pseudo_UPD, ARM::VLD1q16_UPD, true, true, SingleSpc, 2, 4 ,false}, +{ ARM::VLD1q16PseudoWB_fixed, ARM::VLD1q16wb_fixed,true,false,SingleSpc, 2, 4 ,false}, +{ ARM::VLD1q16PseudoWB_register, ARM::VLD1q16wb_register, true, true, SingleSpc, 2, 4 ,false}, { ARM::VLD1q32Pseudo, ARM::VLD1q32, true, false, SingleSpc, 2, 2 ,false}, -{ ARM::VLD1q32Pseudo_UPD, ARM::VLD1q32_UPD, true, true, SingleSpc, 2, 2 ,false}, +{ ARM::VLD1q32PseudoWB_fixed, ARM::VLD1q32wb_fixed,true,false,SingleSpc, 2, 2 ,false}, +{ ARM::VLD1q32PseudoWB_register, ARM::VLD1q32wb_register, true, true, SingleSpc, 2, 2 ,false}, { ARM::VLD1q64Pseudo, ARM::VLD1q64, true, false, SingleSpc, 2, 1 ,false}, -{ ARM::VLD1q64Pseudo_UPD, ARM::VLD1q64_UPD, true, true, SingleSpc, 2, 1 ,false}, +{ ARM::VLD1q64PseudoWB_fixed, ARM::VLD1q64wb_fixed,true,false,SingleSpc, 2, 2 ,false}, +{ ARM::VLD1q64PseudoWB_register, ARM::VLD1q64wb_register, true, true, SingleSpc, 2, 1 ,false}, { ARM::VLD1q8Pseudo, ARM::VLD1q8, true, false, SingleSpc, 2, 8 ,false}, -{ ARM::VLD1q8Pseudo_UPD, ARM::VLD1q8_UPD, true, true, SingleSpc, 2, 8 ,false}, +{ ARM::VLD1q8PseudoWB_fixed, ARM::VLD1q8wb_fixed,true,false, SingleSpc, 2, 8 ,false}, +{ ARM::VLD1q8PseudoWB_register, ARM::VLD1q8wb_register,true,true,SingleSpc,2,8,false}, { ARM::VLD2DUPd16Pseudo, ARM::VLD2DUPd16, true, false, SingleSpc, 2, 4,true}, { ARM::VLD2DUPd16Pseudo_UPD, ARM::VLD2DUPd16_UPD, true, true, SingleSpc, 2, 4,true}, @@ -436,14 +440,14 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) { if (NumRegs > 3 && TableEntry->copyAllListRegs) MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); - if (TableEntry->HasWriteBack) + if (TableEntry->HasWritebackOperand) MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the addrmode6 operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the am6offset operand. - if (TableEntry->HasWriteBack) + if (TableEntry->HasWritebackOperand) MIB.addOperand(MI.getOperand(OpIdx++)); // For an instruction writing double-spaced subregs, the pseudo instruction @@ -488,14 +492,14 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(TableEntry->RealOpc)); unsigned OpIdx = 0; - if (TableEntry->HasWriteBack) + if (TableEntry->HasWritebackOperand) MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the addrmode6 operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the am6offset operand. - if (TableEntry->HasWriteBack) + if (TableEntry->HasWritebackOperand) MIB.addOperand(MI.getOperand(OpIdx++)); bool SrcIsKill = MI.getOperand(OpIdx).isKill(); @@ -565,14 +569,14 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); } - if (TableEntry->HasWriteBack) + if (TableEntry->HasWritebackOperand) MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the addrmode6 operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the am6offset operand. - if (TableEntry->HasWriteBack) + if (TableEntry->HasWritebackOperand) MIB.addOperand(MI.getOperand(OpIdx++)); // Grab the super-register source. @@ -1068,10 +1072,14 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VLD1q16Pseudo: case ARM::VLD1q32Pseudo: case ARM::VLD1q64Pseudo: - case ARM::VLD1q8Pseudo_UPD: - case ARM::VLD1q16Pseudo_UPD: - case ARM::VLD1q32Pseudo_UPD: - case ARM::VLD1q64Pseudo_UPD: + case ARM::VLD1q8PseudoWB_register: + case ARM::VLD1q16PseudoWB_register: + case ARM::VLD1q32PseudoWB_register: + case ARM::VLD1q64PseudoWB_register: + case ARM::VLD1q8PseudoWB_fixed: + case ARM::VLD1q16PseudoWB_fixed: + case ARM::VLD1q32PseudoWB_fixed: + case ARM::VLD1q64PseudoWB_fixed: case ARM::VLD2d8Pseudo: case ARM::VLD2d16Pseudo: case ARM::VLD2d32Pseudo: diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index ca777bd..8a1b618 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1549,6 +1549,23 @@ SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, unsigned NumVecs, return CurDAG->getTargetConstant(Alignment, MVT::i32); } +// Get the register stride update opcode of a VLD/VST instruction that +// is otherwise equivalent to the given fixed stride updating instruction. +static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { + switch (Opc) { + default: break; + case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; + case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; + case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; + case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; + case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; + case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; + case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; + case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; + } + return Opc; // If not one we handle, return it unchanged. +} + SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, unsigned *DOpcodes, unsigned *QOpcodes0, unsigned *QOpcodes1) { @@ -1612,7 +1629,14 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Align); if (isUpdating) { SDValue Inc = N->getOperand(AddrOpIdx + 1); - Ops.push_back(isa(Inc.getNode()) ? Reg0 : Inc); + // FIXME: VLD1 fixed increment doesn't need Reg0. Remove the reg0 + // case entirely when the rest are updated to that form, too. + // Do that before committing this change. Likewise, the opcode + // update call will become unconditional. + if (NumVecs == 1 && !isa(Inc.getNode())) + Opc = getVLDSTRegisterUpdateOpcode(Opc); + if (NumVecs != 1 || !isa(Inc.getNode())) + Ops.push_back(isa(Inc.getNode()) ? Reg0 : Inc); } Ops.push_back(Pred); Ops.push_back(Reg0); @@ -2750,16 +2774,18 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VLD1_UPD: { - unsigned DOpcodes[] = { ARM::VLD1d8_UPD, ARM::VLD1d16_UPD, - ARM::VLD1d32_UPD, ARM::VLD1d64_UPD }; - unsigned QOpcodes[] = { ARM::VLD1q8Pseudo_UPD, ARM::VLD1q16Pseudo_UPD, - ARM::VLD1q32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD }; + unsigned DOpcodes[] = { ARM::VLD1d8wb_fixed, ARM::VLD1d16wb_fixed, + ARM::VLD1d32wb_fixed, ARM::VLD1d64wb_fixed }; + unsigned QOpcodes[] = { ARM::VLD1q8PseudoWB_fixed, + ARM::VLD1q16PseudoWB_fixed, + ARM::VLD1q32PseudoWB_fixed, + ARM::VLD1q64PseudoWB_fixed }; return SelectVLD(N, true, 1, DOpcodes, QOpcodes, 0); } case ARMISD::VLD2_UPD: { unsigned DOpcodes[] = { ARM::VLD2d8Pseudo_UPD, ARM::VLD2d16Pseudo_UPD, - ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64Pseudo_UPD }; + ARM::VLD2d32Pseudo_UPD, ARM::VLD1q64PseudoWB_fixed}; unsigned QOpcodes[] = { ARM::VLD2q8Pseudo_UPD, ARM::VLD2q16Pseudo_UPD, ARM::VLD2q32Pseudo_UPD }; return SelectVLD(N, true, 2, DOpcodes, QOpcodes, 0); @@ -2767,7 +2793,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ARMISD::VLD3_UPD: { unsigned DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d16Pseudo_UPD, - ARM::VLD3d32Pseudo_UPD, ARM::VLD1d64TPseudo_UPD }; + ARM::VLD3d32Pseudo_UPD, ARM::VLD1q64PseudoWB_fixed}; unsigned QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, ARM::VLD3q16Pseudo_UPD, ARM::VLD3q32Pseudo_UPD }; @@ -2779,7 +2805,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ARMISD::VLD4_UPD: { unsigned DOpcodes[] = { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, - ARM::VLD4d32Pseudo_UPD, ARM::VLD1d64QPseudo_UPD }; + ARM::VLD4d32Pseudo_UPD, ARM::VLD1q64PseudoWB_fixed}; unsigned QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, ARM::VLD4q16Pseudo_UPD, ARM::VLD4q32Pseudo_UPD }; diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 1efe681..9bea53b 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -259,6 +259,14 @@ class VLDQWBPseudo : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), (ins addrmode6:$addr, am6offset:$offset), itin, "$addr.addr = $wb">; +class VLDQWBfixedPseudo + : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), + (ins addrmode6:$addr), itin, + "$addr.addr = $wb">; +class VLDQWBregisterPseudo + : PseudoNLdSt<(outs QPR:$dst, GPR:$wb), + (ins addrmode6:$addr, rGPR:$offset), itin, + "$addr.addr = $wb">; class VLDQQPseudo : PseudoNLdSt<(outs QQPR:$dst), (ins addrmode6:$addr), itin, "">; class VLDQQWBPseudo @@ -309,37 +317,58 @@ def VLD1q32Pseudo : VLDQPseudo; def VLD1q64Pseudo : VLDQPseudo; // ...with address register writeback: -class VLD1DWB op7_4, string Dt> - : NLdSt<0,0b10,0b0111,op7_4, (outs DPR:$Vd, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1u, - "vld1", Dt, "\\{$Vd\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; +multiclass VLD1DWB op7_4, string Dt> { + def _fixed : NLdSt<0,0b10, 0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), + (ins addrmode6:$Rn), IIC_VLD1u, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDInstruction"; + } + def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVLDInstruction"; + } } -class VLD1QWB op7_4, string Dt> - : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x2u, - "vld1", Dt, "$Vd, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; +multiclass VLD1QWB op7_4, string Dt> { + def _fixed : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb), + (ins addrmode6:$Rn), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + } + def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + } } -def VLD1d8_UPD : VLD1DWB<{0,0,0,?}, "8">; -def VLD1d16_UPD : VLD1DWB<{0,1,0,?}, "16">; -def VLD1d32_UPD : VLD1DWB<{1,0,0,?}, "32">; -def VLD1d64_UPD : VLD1DWB<{1,1,0,?}, "64">; - -def VLD1q8_UPD : VLD1QWB<{0,0,?,?}, "8">; -def VLD1q16_UPD : VLD1QWB<{0,1,?,?}, "16">; -def VLD1q32_UPD : VLD1QWB<{1,0,?,?}, "32">; -def VLD1q64_UPD : VLD1QWB<{1,1,?,?}, "64">; - -def VLD1q8Pseudo_UPD : VLDQWBPseudo; -def VLD1q16Pseudo_UPD : VLDQWBPseudo; -def VLD1q32Pseudo_UPD : VLDQWBPseudo; -def VLD1q64Pseudo_UPD : VLDQWBPseudo; +defm VLD1d8wb : VLD1DWB<{0,0,0,?}, "8">; +defm VLD1d16wb : VLD1DWB<{0,1,0,?}, "16">; +defm VLD1d32wb : VLD1DWB<{1,0,0,?}, "32">; +defm VLD1d64wb : VLD1DWB<{1,1,0,?}, "64">; +defm VLD1q8wb : VLD1QWB<{0,0,?,?}, "8">; +defm VLD1q16wb : VLD1QWB<{0,1,?,?}, "16">; +defm VLD1q32wb : VLD1QWB<{1,0,?,?}, "32">; +defm VLD1q64wb : VLD1QWB<{1,1,?,?}, "64">; + +def VLD1q8PseudoWB_fixed : VLDQWBfixedPseudo; +def VLD1q16PseudoWB_fixed : VLDQWBfixedPseudo; +def VLD1q32PseudoWB_fixed : VLDQWBfixedPseudo; +def VLD1q64PseudoWB_fixed : VLDQWBfixedPseudo; +def VLD1q8PseudoWB_register : VLDQWBregisterPseudo; +def VLD1q16PseudoWB_register : VLDQWBregisterPseudo; +def VLD1q32PseudoWB_register : VLDQWBregisterPseudo; +def VLD1q64PseudoWB_register : VLDQWBregisterPseudo; // ...with 3 registers class VLD1D3 op7_4, string Dt> diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index bc9a09f..d6b9260 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -2054,14 +2054,22 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, // Writeback operand switch (Inst.getOpcode()) { - case ARM::VLD1d8_UPD: - case ARM::VLD1d16_UPD: - case ARM::VLD1d32_UPD: - case ARM::VLD1d64_UPD: - case ARM::VLD1q8_UPD: - case ARM::VLD1q16_UPD: - case ARM::VLD1q32_UPD: - case ARM::VLD1q64_UPD: + case ARM::VLD1d8wb_fixed: + case ARM::VLD1d16wb_fixed: + case ARM::VLD1d32wb_fixed: + case ARM::VLD1d64wb_fixed: + case ARM::VLD1d8wb_register: + case ARM::VLD1d16wb_register: + case ARM::VLD1d32wb_register: + case ARM::VLD1d64wb_register: + case ARM::VLD1q8wb_fixed: + case ARM::VLD1q16wb_fixed: + case ARM::VLD1q32wb_fixed: + case ARM::VLD1q64wb_fixed: + case ARM::VLD1q8wb_register: + case ARM::VLD1q16wb_register: + case ARM::VLD1q32wb_register: + case ARM::VLD1q64wb_register: case ARM::VLD1d8T_UPD: case ARM::VLD1d16T_UPD: case ARM::VLD1d32T_UPD: @@ -2103,11 +2111,42 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; // AddrMode6 Offset (register) - if (Rm == 0xD) - Inst.addOperand(MCOperand::CreateReg(0)); - else if (Rm != 0xF) { - if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + switch (Inst.getOpcode()) { + default: + // The below have been updated to have explicit am6offset split + // between fixed and register offset. For those instructions not + // yet updated, we need to add an additional reg0 operand for the + // fixed variant. + // + // The fixed offset encodes as Rm == 0xd, so we check for that. + if (Rm == 0xd) { + Inst.addOperand(MCOperand::CreateReg(0)); + break; + } + // Fall through to handle the register offset variant. + case ARM::VLD1d8wb_fixed: + case ARM::VLD1d16wb_fixed: + case ARM::VLD1d32wb_fixed: + case ARM::VLD1d64wb_fixed: + case ARM::VLD1d8wb_register: + case ARM::VLD1d16wb_register: + case ARM::VLD1d32wb_register: + case ARM::VLD1d64wb_register: + case ARM::VLD1q8wb_fixed: + case ARM::VLD1q16wb_fixed: + case ARM::VLD1q32wb_fixed: + case ARM::VLD1q64wb_fixed: + case ARM::VLD1q8wb_register: + case ARM::VLD1q16wb_register: + case ARM::VLD1q32wb_register: + case ARM::VLD1q64wb_register: + // The fixed offset post-increment encodes Rm == 0xd. The no-writeback + // variant encodes Rm == 0xf. Anything else is a register offset post- + // increment and we need to add the register operand to the instruction. + if (Rm != 0xD && Rm != 0xF && + !Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) return MCDisassembler::Fail; + break; } return S; -- cgit v1.1 From 12431329d617064d6e72dd040a58c1635cc261ab Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Mon, 24 Oct 2011 22:16:58 +0000 Subject: ARM assembly parsing and encoding for VLD1 w/ writeback. One and two length register list variants. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142861 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 4 ++++ lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 34 +++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 9bea53b..ff1a09a 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -325,6 +325,7 @@ multiclass VLD1DWB op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u, @@ -332,6 +333,7 @@ multiclass VLD1DWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; } } multiclass VLD1QWB op7_4, string Dt> { @@ -342,6 +344,7 @@ multiclass VLD1QWB op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListTwoD:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, @@ -349,6 +352,7 @@ multiclass VLD1QWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; } } diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index a13554f..a278f6f 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -198,6 +198,10 @@ class ARMAsmParser : public MCTargetAsmParser { const SmallVectorImpl &); bool cvtThumbMultiply(MCInst &Inst, unsigned Opcode, const SmallVectorImpl &); + bool cvtVLDwbFixed(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl &); + bool cvtVLDwbRegister(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl &); bool validateInstruction(MCInst &Inst, const SmallVectorImpl &Ops); @@ -3326,6 +3330,36 @@ cvtThumbMultiply(MCInst &Inst, unsigned Opcode, return true; } +bool ARMAsmParser:: +cvtVLDwbFixed(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl &Operands) { + // Vd + ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1); + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + // Vn + ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); + // pred + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +bool ARMAsmParser:: +cvtVLDwbRegister(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl &Operands) { + // Vd + ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1); + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + // Vn + ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); + // Vm + ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1); + // pred + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + /// Parse an ARM memory expression, return false if successful else return true /// or an error. The first token must be a '[' when called. bool ARMAsmParser:: -- cgit v1.1 From aed4952753e2af9e93dde0b18a4ca8af73ec6db5 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Mon, 24 Oct 2011 23:01:03 +0000 Subject: ARMConstantPoolMBB::print should print BB number. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142867 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMConstantPoolValue.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp index aadfd47..9576283 100644 --- a/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -315,5 +315,6 @@ void ARMConstantPoolMBB::addSelectionDAGCSEId(FoldingSetNodeID &ID) { } void ARMConstantPoolMBB::print(raw_ostream &O) const { + O << "BB#" << MBB->getNumber(); ARMConstantPoolValue::print(O); } -- cgit v1.1 From 671215464c8335e383fa2db90df2eab73ef83b10 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 24 Oct 2011 23:05:43 +0000 Subject: Check the visibility of the global variable before placing it into the stubs table. A hidden variable could potentially end up in both lists. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142869 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index fb87154..3848f4d 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -506,7 +506,9 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, // Add information about the stub reference to MachOMMI so that the stub // gets emitted by the asmprinter. MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); - MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); + MachineModuleInfoImpl::StubValueTy &StubSym = + GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) : + MachOMMI.getGVStubEntry(SSym); if (StubSym.getPointer() == 0) { MCSymbol *Sym = Mang->getSymbol(GV); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); @@ -534,7 +536,9 @@ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, // Add information about the stub reference to MachOMMI so that the stub // gets emitted by the asmprinter. MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); - MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); + MachineModuleInfoImpl::StubValueTy &StubSym = + GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) : + MachOMMI.getGVStubEntry(SSym); if (StubSym.getPointer() == 0) { MCSymbol *Sym = Mang->getSymbol(GV); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); -- cgit v1.1 From 5c89cb8cd613c5a9d2bb2d6ab68afe8c2b41db70 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Mon, 24 Oct 2011 23:08:52 +0000 Subject: Don't crash on variable insertelement on ARM. PR10258. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142871 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 422a40f..3f913de 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -108,6 +108,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, EVT ElemTy = VT.getVectorElementType(); if (ElemTy != MVT::i64 && ElemTy != MVT::f64) setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom); + setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); if (ElemTy != MVT::i32) { setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); @@ -4453,6 +4454,15 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { return SDValue(); } +static SDValue LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { + // INSERT_VECTOR_ELT is legal only for immediate indexes. + SDValue Lane = Op.getOperand(2); + if (!isa(Lane)) + return SDValue(); + + return Op; +} + static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { // EXTRACT_VECTOR_ELT is legal only for immediate indexes. SDValue Lane = Op.getOperand(1); @@ -4975,6 +4985,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SETCC: return LowerVSETCC(Op, DAG); case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG, Subtarget); case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); + case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); -- cgit v1.1 From d8fa76d4bed067cd8662c3196211bc90cc8d4470 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Mon, 24 Oct 2011 23:20:07 +0000 Subject: More fixes and improvements to MachO relocation pretty-printing, particular for x86 and x86_64 relocations with addends. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142875 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/MachOObjectFile.cpp | 158 +++++++++++++++++++++++++++++++++-------- 1 file changed, 127 insertions(+), 31 deletions(-) (limited to 'lib') diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 5bf03ff..0950020 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -666,14 +666,18 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel, } case Triple::x86_64: { const char* Table[] = { + "X86_64_RELOC_UNSIGNED", + "X86_64_RELOC_SIGNED", "X86_64_RELOC_BRANCH", "X86_64_RELOC_GOT_LOAD", "X86_64_RELOC_GOT", - "X86_64_RELOC_SIGNED", - "X86_64_RELOC_UNSIGNED", - "X86_64_RELOC_SUBTRACTOR" }; + "X86_64_RELOC_SUBTRACTOR", + "X86_64_RELOC_SIGNED_1", + "X86_64_RELOC_SIGNED_2", + "X86_64_RELOC_SIGNED_4", + "X86_64_RELOC_TLV" }; - if (r_type > 5) + if (r_type > 9) res = "Unknown"; else res = Table[r_type]; @@ -748,46 +752,138 @@ error_code MachOObjectFile::getRelocationAdditionalInfo(DataRefImpl Rel, } return object_error::success; } + +// Helper to advance a section or symbol iterator multiple increments at a time. +template +error_code advance(T &it, size_t Val) { + error_code ec; + while (Val--) { + it.increment(ec); + } + return ec; +} + +template +void advanceTo(T &it, size_t Val) { + if (error_code ec = advance(it, Val)) + report_fatal_error(ec.message()); +} + +error_code +MachOObjectFile::getRelocationTargetName(uint32_t Idx, StringRef &S) const { + bool isExtern = (Idx >> 27) & 1; + uint32_t Val = Idx & 0xFFFFFF; + error_code ec; + + if (isExtern) { + symbol_iterator SI = begin_symbols(); + advanceTo(SI, Val); + ec = SI->getName(S); + } else { + section_iterator SI = begin_sections(); + advanceTo(SI, Val); + ec = SI->getName(S); + } + + return ec; +} + error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, SmallVectorImpl &Result) const { InMemoryStruct RE; getRelocation(Rel, RE); - std::string fmtbuf; - raw_string_ostream fmt(fmtbuf); + std::string addend; + raw_string_ostream addend_fmt(addend); - bool isExtern = (RE->Word1 >> 27) & 1; - if (isExtern) { - uint32_t Val = (RE->Word1 & 0xFFFFFF); - symbol_iterator SI = begin_symbols(); + bool isPCRel = (RE->Word1 >> 25) & 1; + unsigned Type = (RE->Word1 >> 28) & 0xF; + + // Determine any addends that should be displayed with the relocation. + // These require decoding the relocation type, which is triple-specific. + unsigned Arch = getArch(); - error_code ec; - while (Val--) { - SI.increment(ec); - if (ec) report_fatal_error(ec.message()); + // X86_64 has entirely custom relocation types. + if (Arch == Triple::x86_64) { + switch (Type) { + case 5: { // X86_64_RELOC_SUBTRACTOR + RelocationRef NextReloc; + if (error_code ec = getRelocationNext(Rel, NextReloc)) + report_fatal_error(ec.message()); + + uint32_t SucessorType; + if (error_code ec = NextReloc.getType(SucessorType)) + report_fatal_error(ec.message()); + + // X86_64_SUBTRACTOR must be followed by a relocation of type + // X86_64_RELOC_UNSIGNED. + unsigned RType = (SucessorType >> 28) & 0xF; + if (RType != 0) + report_fatal_error("Expected X86_64_RELOC_UNSIGNED after " + "X86_64_RELOC_SUBTRACTOR."); + + StringRef Name; + if (error_code ec = getRelocationTargetName(SucessorType, Name)) + report_fatal_error(ec.message()); + + addend_fmt << "-" << Name; + } + case 6: // X86_64_RELOC_SIGNED1 + addend_fmt << "-1"; + break; + case 7: // X86_64_RELOC_SIGNED2 + addend_fmt << "-2"; + break; + case 8: // X86_64_RELOC_SIGNED4 + addend_fmt << "-4"; + break; } + } - StringRef SymName; - if ((ec = SI->getName(SymName))) - report_fatal_error(ec.message()); + // X86 and ARM share some relocation types in common. + if (Arch == Triple::x86 || Arch == Triple::arm) { + switch (Type) { + case 1: // GENERIC_RELOC_PAIR - prints no info + return object_error::success; + case 2: // GENERIC_RELOC_SECTDIFF + case 4: { // GENERIC_RELOC_LOCAL_SECTDIFF + RelocationRef NextReloc; + if (error_code ec = getRelocationNext(Rel, NextReloc)) + report_fatal_error(ec.message()); + + uint32_t SucessorType; + if (error_code ec = NextReloc.getType(SucessorType)) + report_fatal_error(ec.message()); + + // X86 sect diff's must be followed by a relocation of type + // GENERIC_RELOC_PAIR. + unsigned RType = (SucessorType >> 28) & 0xF; + if (RType != 1) + report_fatal_error("Expected GENERIC_RELOC_PAIR after " + "GENERIC_RELOC_SECTDIFF or " + "GENERIC_RELOC_LOCAL_SECTDIFF."); + + StringRef Name; + if (error_code ec = getRelocationTargetName(SucessorType, Name)) + report_fatal_error(ec.message()); + + addend_fmt << "-" << Name; + + } + } + } - fmt << SymName; - } else { - uint32_t Val = (RE->Word1 & 0xFFFFFF); - section_iterator SI = begin_sections(); + addend_fmt.flush(); - error_code ec; - while (Val--) { - SI.increment(ec); - if (ec) report_fatal_error(ec.message()); - } + std::string fmtbuf; + raw_string_ostream fmt(fmtbuf); - StringRef SectName; - if ((ec = SI->getName(SectName))) - report_fatal_error(ec.message()); + StringRef Name; + if (error_code ec = getRelocationTargetName(RE->Word1, Name)) + report_fatal_error(ec.message()); - fmt << SectName; - } + fmt << Name << addend; + if (isPCRel) fmt << "-P"; fmt.flush(); Result.append(fmtbuf.begin(), fmtbuf.end()); -- cgit v1.1 From 5921675ff5ea632ab1e6d7aa5d1f263b858bbafa Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Mon, 24 Oct 2011 23:26:05 +0000 Subject: ARM assembly parsing and encoding for VLD1 w/ writeback. Three entry register list variation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142876 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseInstrInfo.cpp | 4 +-- lib/Target/ARM/ARMExpandPseudoInsts.cpp | 3 --- lib/Target/ARM/ARMInstrNEON.td | 35 ++++++++++++++++--------- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 12 ++++++--- 4 files changed, 33 insertions(+), 21 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 211f937..caa500c 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2428,7 +2428,8 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD3d8_UPD: case ARM::VLD3d16_UPD: case ARM::VLD3d32_UPD: - case ARM::VLD1d64T_UPD: + case ARM::VLD1d64Twb_fixed: + case ARM::VLD1d64Twb_register: case ARM::VLD3q8_UPD: case ARM::VLD3q16_UPD: case ARM::VLD3q32_UPD: @@ -2593,7 +2594,6 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD3d8Pseudo_UPD: case ARM::VLD3d16Pseudo_UPD: case ARM::VLD3d32Pseudo_UPD: - case ARM::VLD1d64TPseudo_UPD: case ARM::VLD3q8Pseudo_UPD: case ARM::VLD3q16Pseudo_UPD: case ARM::VLD3q32Pseudo_UPD: diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index c8f9756..459ba0a 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -145,8 +145,6 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, SingleSpc, 4, 1 ,false}, { ARM::VLD1d64QPseudo_UPD, ARM::VLD1d64Q_UPD, true, true, SingleSpc, 4, 1 ,false}, { ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, SingleSpc, 3, 1 ,false}, -{ ARM::VLD1d64TPseudo_UPD, ARM::VLD1d64T_UPD, true, true, SingleSpc, 3, 1 ,false}, - { ARM::VLD1q16Pseudo, ARM::VLD1q16, true, false, SingleSpc, 2, 4 ,false}, { ARM::VLD1q16PseudoWB_fixed, ARM::VLD1q16wb_fixed,true,false,SingleSpc, 2, 4 ,false}, { ARM::VLD1q16PseudoWB_register, ARM::VLD1q16wb_register, true, true, SingleSpc, 2, 4 ,false}, @@ -1099,7 +1097,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VLD3d8Pseudo_UPD: case ARM::VLD3d16Pseudo_UPD: case ARM::VLD3d32Pseudo_UPD: - case ARM::VLD1d64TPseudo_UPD: case ARM::VLD3q8Pseudo_UPD: case ARM::VLD3q16Pseudo_UPD: case ARM::VLD3q32Pseudo_UPD: diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index ff1a09a..c280d61 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -383,12 +383,24 @@ class VLD1D3 op7_4, string Dt> let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDInstruction"; } -class VLD1D3WB op7_4, string Dt> - : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x3u, "vld1", Dt, - "$Vd, $Rn$Rm", "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; +multiclass VLD1D3WB op7_4, string Dt> { + def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), + (ins addrmode6:$Rn), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } def VLD1d8T : VLD1D3<{0,0,0,?}, "8">; @@ -396,13 +408,12 @@ def VLD1d16T : VLD1D3<{0,1,0,?}, "16">; def VLD1d32T : VLD1D3<{1,0,0,?}, "32">; def VLD1d64T : VLD1D3<{1,1,0,?}, "64">; -def VLD1d8T_UPD : VLD1D3WB<{0,0,0,?}, "8">; -def VLD1d16T_UPD : VLD1D3WB<{0,1,0,?}, "16">; -def VLD1d32T_UPD : VLD1D3WB<{1,0,0,?}, "32">; -def VLD1d64T_UPD : VLD1D3WB<{1,1,0,?}, "64">; +defm VLD1d8Twb : VLD1D3WB<{0,0,0,?}, "8">; +defm VLD1d16Twb : VLD1D3WB<{0,1,0,?}, "16">; +defm VLD1d32Twb : VLD1D3WB<{1,0,0,?}, "32">; +defm VLD1d64Twb : VLD1D3WB<{1,1,0,?}, "64">; -def VLD1d64TPseudo : VLDQQPseudo; -def VLD1d64TPseudo_UPD : VLDQQWBPseudo; +def VLD1d64TPseudo : VLDQQPseudo; // ...with 4 registers class VLD1D4 op7_4, string Dt> diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index d6b9260..ddc5c99 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -2070,10 +2070,14 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, case ARM::VLD1q16wb_register: case ARM::VLD1q32wb_register: case ARM::VLD1q64wb_register: - case ARM::VLD1d8T_UPD: - case ARM::VLD1d16T_UPD: - case ARM::VLD1d32T_UPD: - case ARM::VLD1d64T_UPD: + case ARM::VLD1d8Twb_fixed: + case ARM::VLD1d8Twb_register: + case ARM::VLD1d16Twb_fixed: + case ARM::VLD1d16Twb_register: + case ARM::VLD1d32Twb_fixed: + case ARM::VLD1d32Twb_register: + case ARM::VLD1d64Twb_fixed: + case ARM::VLD1d64Twb_register: case ARM::VLD1d8Q_UPD: case ARM::VLD1d16Q_UPD: case ARM::VLD1d32Q_UPD: -- cgit v1.1 From b36e03d987c843ccb731627ffd2b1db17bd72e39 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Mon, 24 Oct 2011 23:40:46 +0000 Subject: Nuke dead code. Nothing generates the VLD1d64QPseudo_UPD instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142877 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseInstrInfo.cpp | 1 - lib/Target/ARM/ARMExpandPseudoInsts.cpp | 2 -- lib/Target/ARM/ARMInstrNEON.td | 1 - 3 files changed, 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index caa500c..23fae3e 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2610,7 +2610,6 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD4d8Pseudo_UPD: case ARM::VLD4d16Pseudo_UPD: case ARM::VLD4d32Pseudo_UPD: - case ARM::VLD1d64QPseudo_UPD: case ARM::VLD4q8Pseudo_UPD: case ARM::VLD4q16Pseudo_UPD: case ARM::VLD4q32Pseudo_UPD: diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 459ba0a..d1ee635 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -143,7 +143,6 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, EvenDblSpc, 1, 8 ,true}, { ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, SingleSpc, 4, 1 ,false}, -{ ARM::VLD1d64QPseudo_UPD, ARM::VLD1d64Q_UPD, true, true, SingleSpc, 4, 1 ,false}, { ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, SingleSpc, 3, 1 ,false}, { ARM::VLD1q16Pseudo, ARM::VLD1q16, true, false, SingleSpc, 2, 4 ,false}, { ARM::VLD1q16PseudoWB_fixed, ARM::VLD1q16wb_fixed,true,false,SingleSpc, 2, 4 ,false}, @@ -1113,7 +1112,6 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VLD4d8Pseudo_UPD: case ARM::VLD4d16Pseudo_UPD: case ARM::VLD4d32Pseudo_UPD: - case ARM::VLD1d64QPseudo_UPD: case ARM::VLD4q8Pseudo_UPD: case ARM::VLD4q16Pseudo_UPD: case ARM::VLD4q32Pseudo_UPD: diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index c280d61..9d0350b 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -444,7 +444,6 @@ def VLD1d32Q_UPD : VLD1D4WB<{1,0,?,?}, "32">; def VLD1d64Q_UPD : VLD1D4WB<{1,1,?,?}, "64">; def VLD1d64QPseudo : VLDQQPseudo; -def VLD1d64QPseudo_UPD : VLDQQWBPseudo; // VLD2 : Vector Load (multiple 2-element structures) class VLD2D op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy> -- cgit v1.1 From 29074ccf6cb00a3cbe32a3b7809d970ecaf8c9bf Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Mon, 24 Oct 2011 23:48:32 +0000 Subject: Remove the SystemZ backend. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142878 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Triple.cpp | 5 - lib/Target/SystemZ/CMakeLists.txt | 36 - lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt | 14 - lib/Target/SystemZ/MCTargetDesc/Makefile | 16 - .../SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp | 32 - lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h | 30 - .../SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp | 81 -- .../SystemZ/MCTargetDesc/SystemZMCTargetDesc.h | 38 - lib/Target/SystemZ/Makefile | 22 - lib/Target/SystemZ/SystemZ.h | 52 - lib/Target/SystemZ/SystemZ.td | 61 -- lib/Target/SystemZ/SystemZAsmPrinter.cpp | 221 ---- lib/Target/SystemZ/SystemZCallingConv.td | 46 - lib/Target/SystemZ/SystemZFrameLowering.cpp | 386 ------- lib/Target/SystemZ/SystemZFrameLowering.h | 57 - lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 779 ------------- lib/Target/SystemZ/SystemZISelLowering.cpp | 868 --------------- lib/Target/SystemZ/SystemZISelLowering.h | 145 --- lib/Target/SystemZ/SystemZInstrBuilder.h | 128 --- lib/Target/SystemZ/SystemZInstrFP.td | 340 ------ lib/Target/SystemZ/SystemZInstrFormats.td | 133 --- lib/Target/SystemZ/SystemZInstrInfo.cpp | 439 -------- lib/Target/SystemZ/SystemZInstrInfo.h | 113 -- lib/Target/SystemZ/SystemZInstrInfo.td | 1147 -------------------- lib/Target/SystemZ/SystemZMachineFunctionInfo.h | 51 - lib/Target/SystemZ/SystemZOperands.td | 325 ------ lib/Target/SystemZ/SystemZRegisterInfo.cpp | 143 --- lib/Target/SystemZ/SystemZRegisterInfo.h | 60 - lib/Target/SystemZ/SystemZRegisterInfo.td | 205 ---- lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp | 23 - lib/Target/SystemZ/SystemZSelectionDAGInfo.h | 31 - lib/Target/SystemZ/SystemZSubtarget.cpp | 54 - lib/Target/SystemZ/SystemZSubtarget.h | 48 - lib/Target/SystemZ/SystemZTargetMachine.cpp | 40 - lib/Target/SystemZ/SystemZTargetMachine.h | 68 -- lib/Target/SystemZ/TargetInfo/CMakeLists.txt | 13 - lib/Target/SystemZ/TargetInfo/Makefile | 15 - .../SystemZ/TargetInfo/SystemZTargetInfo.cpp | 19 - 38 files changed, 6284 deletions(-) delete mode 100644 lib/Target/SystemZ/CMakeLists.txt delete mode 100644 lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt delete mode 100644 lib/Target/SystemZ/MCTargetDesc/Makefile delete mode 100644 lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp delete mode 100644 lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h delete mode 100644 lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp delete mode 100644 lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h delete mode 100644 lib/Target/SystemZ/Makefile delete mode 100644 lib/Target/SystemZ/SystemZ.h delete mode 100644 lib/Target/SystemZ/SystemZ.td delete mode 100644 lib/Target/SystemZ/SystemZAsmPrinter.cpp delete mode 100644 lib/Target/SystemZ/SystemZCallingConv.td delete mode 100644 lib/Target/SystemZ/SystemZFrameLowering.cpp delete mode 100644 lib/Target/SystemZ/SystemZFrameLowering.h delete mode 100644 lib/Target/SystemZ/SystemZISelDAGToDAG.cpp delete mode 100644 lib/Target/SystemZ/SystemZISelLowering.cpp delete mode 100644 lib/Target/SystemZ/SystemZISelLowering.h delete mode 100644 lib/Target/SystemZ/SystemZInstrBuilder.h delete mode 100644 lib/Target/SystemZ/SystemZInstrFP.td delete mode 100644 lib/Target/SystemZ/SystemZInstrFormats.td delete mode 100644 lib/Target/SystemZ/SystemZInstrInfo.cpp delete mode 100644 lib/Target/SystemZ/SystemZInstrInfo.h delete mode 100644 lib/Target/SystemZ/SystemZInstrInfo.td delete mode 100644 lib/Target/SystemZ/SystemZMachineFunctionInfo.h delete mode 100644 lib/Target/SystemZ/SystemZOperands.td delete mode 100644 lib/Target/SystemZ/SystemZRegisterInfo.cpp delete mode 100644 lib/Target/SystemZ/SystemZRegisterInfo.h delete mode 100644 lib/Target/SystemZ/SystemZRegisterInfo.td delete mode 100644 lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp delete mode 100644 lib/Target/SystemZ/SystemZSelectionDAGInfo.h delete mode 100644 lib/Target/SystemZ/SystemZSubtarget.cpp delete mode 100644 lib/Target/SystemZ/SystemZSubtarget.h delete mode 100644 lib/Target/SystemZ/SystemZTargetMachine.cpp delete mode 100644 lib/Target/SystemZ/SystemZTargetMachine.h delete mode 100644 lib/Target/SystemZ/TargetInfo/CMakeLists.txt delete mode 100644 lib/Target/SystemZ/TargetInfo/Makefile delete mode 100644 lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp (limited to 'lib') diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index c61af37..2554d64 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -31,7 +31,6 @@ const char *Triple::getArchTypeName(ArchType Kind) { case ppc: return "powerpc"; case sparc: return "sparc"; case sparcv9: return "sparcv9"; - case systemz: return "s390x"; case tce: return "tce"; case thumb: return "thumb"; case x86: return "i386"; @@ -165,8 +164,6 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { return sparc; if (Name == "sparcv9") return sparcv9; - if (Name == "systemz") - return systemz; if (Name == "tce") return tce; if (Name == "thumb") @@ -316,8 +313,6 @@ Triple::ArchType Triple::ParseArch(StringRef ArchName) { return sparc; else if (ArchName == "sparcv9") return sparcv9; - else if (ArchName == "s390x") - return systemz; else if (ArchName == "tce") return tce; else if (ArchName == "xcore") diff --git a/lib/Target/SystemZ/CMakeLists.txt b/lib/Target/SystemZ/CMakeLists.txt deleted file mode 100644 index 7c09c0e..0000000 --- a/lib/Target/SystemZ/CMakeLists.txt +++ /dev/null @@ -1,36 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS SystemZ.td) - -llvm_tablegen(SystemZGenRegisterInfo.inc -gen-register-info) -llvm_tablegen(SystemZGenInstrInfo.inc -gen-instr-info) -llvm_tablegen(SystemZGenAsmWriter.inc -gen-asm-writer) -llvm_tablegen(SystemZGenDAGISel.inc -gen-dag-isel) -llvm_tablegen(SystemZGenCallingConv.inc -gen-callingconv) -llvm_tablegen(SystemZGenSubtargetInfo.inc -gen-subtarget) -add_public_tablegen_target(SystemZCommonTableGen) - -add_llvm_target(SystemZCodeGen - SystemZAsmPrinter.cpp - SystemZISelDAGToDAG.cpp - SystemZISelLowering.cpp - SystemZInstrInfo.cpp - SystemZFrameLowering.cpp - SystemZRegisterInfo.cpp - SystemZSubtarget.cpp - SystemZTargetMachine.cpp - SystemZSelectionDAGInfo.cpp - ) - -add_llvm_library_dependencies(LLVMSystemZCodeGen - LLVMAsmPrinter - LLVMCodeGen - LLVMCore - LLVMMC - LLVMSelectionDAG - LLVMSupport - LLVMSystemZDesc - LLVMSystemZInfo - LLVMTarget - ) - -add_subdirectory(TargetInfo) -add_subdirectory(MCTargetDesc) diff --git a/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt b/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index 822df09..0000000 --- a/lib/Target/SystemZ/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -add_llvm_library(LLVMSystemZDesc - SystemZMCTargetDesc.cpp - SystemZMCAsmInfo.cpp - ) - -add_llvm_library_dependencies(LLVMSystemZDesc - LLVMMC - LLVMSystemZInfo - ) - -add_dependencies(LLVMSystemZDesc SystemZCommonTableGen) - -# Hack: we need to include 'main' target directory to grab private headers -include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) diff --git a/lib/Target/SystemZ/MCTargetDesc/Makefile b/lib/Target/SystemZ/MCTargetDesc/Makefile deleted file mode 100644 index 08f1a9d..0000000 --- a/lib/Target/SystemZ/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/SystemZ/TargetDesc/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMSystemZDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp deleted file mode 100644 index 8540546..0000000 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp +++ /dev/null @@ -1,32 +0,0 @@ -//===-- SystemZMCAsmInfo.cpp - SystemZ asm properties ---------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declarations of the SystemZMCAsmInfo properties. -// -//===----------------------------------------------------------------------===// - -#include "SystemZMCAsmInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCSectionELF.h" -#include "llvm/Support/ELF.h" -using namespace llvm; - -SystemZMCAsmInfo::SystemZMCAsmInfo(const Target &T, StringRef TT) { - IsLittleEndian = false; - PointerSize = 8; - PrivateGlobalPrefix = ".L"; - WeakRefDirective = "\t.weak\t"; - PCSymbol = "."; -} - -const MCSection *SystemZMCAsmInfo:: -getNonexecutableStackSection(MCContext &Ctx) const{ - return Ctx.getELFSection(".note.GNU-stack", ELF::SHT_PROGBITS, - 0, SectionKind::getMetadata()); -} diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h deleted file mode 100644 index a6a27e2..0000000 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h +++ /dev/null @@ -1,30 +0,0 @@ -//====-- SystemZMCAsmInfo.h - SystemZ asm properties -----------*- C++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declaration of the SystemZMCAsmInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef SystemZTARGETASMINFO_H -#define SystemZTARGETASMINFO_H - -#include "llvm/MC/MCAsmInfo.h" - -namespace llvm { - class Target; - class StringRef; - - struct SystemZMCAsmInfo : public MCAsmInfo { - explicit SystemZMCAsmInfo(const Target &T, StringRef TT); - virtual const MCSection *getNonexecutableStackSection(MCContext &Ctx) const; - }; - -} // namespace llvm - -#endif diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp deleted file mode 100644 index 23fb1e0..0000000 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +++ /dev/null @@ -1,81 +0,0 @@ -//===-- SystemZMCTargetDesc.cpp - SystemZ Target Descriptions ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides SystemZ specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#include "SystemZMCTargetDesc.h" -#include "SystemZMCAsmInfo.h" -#include "llvm/MC/MCCodeGenInfo.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/TargetRegistry.h" - -#define GET_INSTRINFO_MC_DESC -#include "SystemZGenInstrInfo.inc" - -#define GET_SUBTARGETINFO_MC_DESC -#include "SystemZGenSubtargetInfo.inc" - -#define GET_REGINFO_MC_DESC -#include "SystemZGenRegisterInfo.inc" - -using namespace llvm; - -static MCInstrInfo *createSystemZMCInstrInfo() { - MCInstrInfo *X = new MCInstrInfo(); - InitSystemZMCInstrInfo(X); - return X; -} - -static MCRegisterInfo *createSystemZMCRegisterInfo(StringRef TT) { - MCRegisterInfo *X = new MCRegisterInfo(); - InitSystemZMCRegisterInfo(X, 0); - return X; -} - -static MCSubtargetInfo *createSystemZMCSubtargetInfo(StringRef TT, - StringRef CPU, - StringRef FS) { - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitSystemZMCSubtargetInfo(X, TT, CPU, FS); - return X; -} - -static MCCodeGenInfo *createSystemZMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM) { - MCCodeGenInfo *X = new MCCodeGenInfo(); - if (RM == Reloc::Default) - RM = Reloc::Static; - X->InitMCCodeGenInfo(RM, CM); - return X; -} - -extern "C" void LLVMInitializeSystemZTargetMC() { - // Register the MC asm info. - RegisterMCAsmInfo X(TheSystemZTarget); - - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(TheSystemZTarget, - createSystemZMCCodeGenInfo); - - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(TheSystemZTarget, - createSystemZMCInstrInfo); - - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(TheSystemZTarget, - createSystemZMCRegisterInfo); - - // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(TheSystemZTarget, - createSystemZMCSubtargetInfo); -} diff --git a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h b/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h deleted file mode 100644 index e2ad5af..0000000 --- a/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h +++ /dev/null @@ -1,38 +0,0 @@ -//===-- SystemZMCTargetDesc.h - SystemZ Target Descriptions -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides SystemZ specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#ifndef SYSTEMZMCTARGETDESC_H -#define SYSTEMZMCTARGETDESC_H - -namespace llvm { -class MCSubtargetInfo; -class Target; -class StringRef; - -extern Target TheSystemZTarget; - -} // End llvm namespace - -// Defines symbolic names for SystemZ registers. -// This defines a mapping from register name to register number. -#define GET_REGINFO_ENUM -#include "SystemZGenRegisterInfo.inc" - -// Defines symbolic names for the SystemZ instructions. -#define GET_INSTRINFO_ENUM -#include "SystemZGenInstrInfo.inc" - -#define GET_SUBTARGETINFO_ENUM -#include "SystemZGenSubtargetInfo.inc" - -#endif diff --git a/lib/Target/SystemZ/Makefile b/lib/Target/SystemZ/Makefile deleted file mode 100644 index 6356491..0000000 --- a/lib/Target/SystemZ/Makefile +++ /dev/null @@ -1,22 +0,0 @@ -##===- lib/Target/SystemZ/Makefile ---------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMSystemZCodeGen -TARGET = SystemZ - -# Make sure that tblgen is run, first thing. -BUILT_SOURCES = SystemZGenRegisterInfo.inc SystemZGenInstrInfo.inc \ - SystemZGenAsmWriter.inc SystemZGenDAGISel.inc \ - SystemZGenSubtargetInfo.inc SystemZGenCallingConv.inc - -DIRS = TargetInfo MCTargetDesc - -include $(LEVEL)/Makefile.common - diff --git a/lib/Target/SystemZ/SystemZ.h b/lib/Target/SystemZ/SystemZ.h deleted file mode 100644 index 88960b9..0000000 --- a/lib/Target/SystemZ/SystemZ.h +++ /dev/null @@ -1,52 +0,0 @@ -//=-- SystemZ.h - Top-level interface for SystemZ representation -*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the entry points for global functions defined in -// the LLVM SystemZ backend. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_SystemZ_H -#define LLVM_TARGET_SystemZ_H - -#include "MCTargetDesc/SystemZMCTargetDesc.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { - class SystemZTargetMachine; - class FunctionPass; - class formatted_raw_ostream; - - namespace SystemZCC { - // SystemZ specific condition code. These correspond to SYSTEMZ_*_COND in - // SystemZInstrInfo.td. They must be kept in synch. - enum CondCodes { - O = 0, - H = 1, - NLE = 2, - L = 3, - NHE = 4, - LH = 5, - NE = 6, - E = 7, - NLH = 8, - HE = 9, - NL = 10, - LE = 11, - NH = 12, - NO = 13, - INVALID = -1 - }; - } - - FunctionPass *createSystemZISelDag(SystemZTargetMachine &TM, - CodeGenOpt::Level OptLevel); - -} // end namespace llvm; -#endif diff --git a/lib/Target/SystemZ/SystemZ.td b/lib/Target/SystemZ/SystemZ.td deleted file mode 100644 index 4c08c08..0000000 --- a/lib/Target/SystemZ/SystemZ.td +++ /dev/null @@ -1,61 +0,0 @@ -//===- SystemZ.td - Describe the SystemZ Target Machine ------*- tblgen -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// This is the top level entry point for the SystemZ target. -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Target-independent interfaces -//===----------------------------------------------------------------------===// - -include "llvm/Target/Target.td" - -//===----------------------------------------------------------------------===// -// Subtarget Features. -//===----------------------------------------------------------------------===// -def FeatureZ10 : SubtargetFeature<"z10", "HasZ10Insts", "true", - "Support Z10 instructions">; - -//===----------------------------------------------------------------------===// -// SystemZ supported processors. -//===----------------------------------------------------------------------===// -class Proc Features> - : Processor; - -def : Proc<"z9", []>; -def : Proc<"z10", [FeatureZ10]>; - -//===----------------------------------------------------------------------===// -// Register File Description -//===----------------------------------------------------------------------===// - -include "SystemZRegisterInfo.td" - -//===----------------------------------------------------------------------===// -// Calling Convention Description -//===----------------------------------------------------------------------===// - -include "SystemZCallingConv.td" - -//===----------------------------------------------------------------------===// -// Instruction Descriptions -//===----------------------------------------------------------------------===// - -include "SystemZInstrInfo.td" -include "SystemZInstrFP.td" - -def SystemZInstrInfo : InstrInfo {} - -//===----------------------------------------------------------------------===// -// Target Declaration -//===----------------------------------------------------------------------===// - -def SystemZ : Target { - let InstructionSet = SystemZInstrInfo; -} - diff --git a/lib/Target/SystemZ/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/SystemZAsmPrinter.cpp deleted file mode 100644 index 43dcdfc..0000000 --- a/lib/Target/SystemZ/SystemZAsmPrinter.cpp +++ /dev/null @@ -1,221 +0,0 @@ -//===-- SystemZAsmPrinter.cpp - SystemZ LLVM assembly writer ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to the SystemZ assembly language. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "asm-printer" -#include "SystemZ.h" -#include "SystemZInstrInfo.h" -#include "SystemZTargetMachine.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Target/Mangler.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -namespace { - class SystemZAsmPrinter : public AsmPrinter { - public: - SystemZAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) {} - - virtual const char *getPassName() const { - return "SystemZ Assembly Printer"; - } - - void printOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, - const char* Modifier = 0); - void printPCRelImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O); - void printRIAddrOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, - const char* Modifier = 0); - void printRRIAddrOperand(const MachineInstr *MI, int OpNum, raw_ostream &O, - const char* Modifier = 0); - void printS16ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) { - O << (int16_t)MI->getOperand(OpNum).getImm(); - } - void printU16ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) { - O << (uint16_t)MI->getOperand(OpNum).getImm(); - } - void printS32ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) { - O << (int32_t)MI->getOperand(OpNum).getImm(); - } - void printU32ImmOperand(const MachineInstr *MI, int OpNum, raw_ostream &O) { - O << (uint32_t)MI->getOperand(OpNum).getImm(); - } - - void printInstruction(const MachineInstr *MI, raw_ostream &O); - static const char *getRegisterName(unsigned RegNo); - - void EmitInstruction(const MachineInstr *MI); - }; -} // end of anonymous namespace - -#include "SystemZGenAsmWriter.inc" - -void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { - SmallString<128> Str; - raw_svector_ostream OS(Str); - printInstruction(MI, OS); - OutStreamer.EmitRawText(OS.str()); -} - -void SystemZAsmPrinter::printPCRelImmOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(OpNum); - switch (MO.getType()) { - case MachineOperand::MO_Immediate: - O << MO.getImm(); - return; - case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); - return; - case MachineOperand::MO_GlobalAddress: { - const GlobalValue *GV = MO.getGlobal(); - O << *Mang->getSymbol(GV); - - // Assemble calls via PLT for externally visible symbols if PIC. - if (TM.getRelocationModel() == Reloc::PIC_ && - !GV->hasHiddenVisibility() && !GV->hasProtectedVisibility() && - !GV->hasLocalLinkage()) - O << "@PLT"; - - printOffset(MO.getOffset(), O); - return; - } - case MachineOperand::MO_ExternalSymbol: { - std::string Name(MAI->getGlobalPrefix()); - Name += MO.getSymbolName(); - O << Name; - - if (TM.getRelocationModel() == Reloc::PIC_) - O << "@PLT"; - - return; - } - default: - assert(0 && "Not implemented yet!"); - } -} - - -void SystemZAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O, const char *Modifier) { - const MachineOperand &MO = MI->getOperand(OpNum); - switch (MO.getType()) { - case MachineOperand::MO_Register: { - assert (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && - "Virtual registers should be already mapped!"); - unsigned Reg = MO.getReg(); - if (Modifier && strncmp(Modifier, "subreg", 6) == 0) { - if (strncmp(Modifier + 7, "even", 4) == 0) - Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_32bit); - else if (strncmp(Modifier + 7, "odd", 3) == 0) - Reg = TM.getRegisterInfo()->getSubReg(Reg, SystemZ::subreg_odd32); - else - assert(0 && "Invalid subreg modifier"); - } - - O << '%' << getRegisterName(Reg); - return; - } - case MachineOperand::MO_Immediate: - O << MO.getImm(); - return; - case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); - return; - case MachineOperand::MO_JumpTableIndex: - O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() << '_' - << MO.getIndex(); - - return; - case MachineOperand::MO_ConstantPoolIndex: - O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << '_' - << MO.getIndex(); - - printOffset(MO.getOffset(), O); - break; - case MachineOperand::MO_GlobalAddress: - O << *Mang->getSymbol(MO.getGlobal()); - break; - case MachineOperand::MO_ExternalSymbol: { - O << *GetExternalSymbolSymbol(MO.getSymbolName()); - break; - } - default: - assert(0 && "Not implemented yet!"); - } - - switch (MO.getTargetFlags()) { - default: assert(0 && "Unknown target flag on GV operand"); - case SystemZII::MO_NO_FLAG: - break; - case SystemZII::MO_GOTENT: O << "@GOTENT"; break; - case SystemZII::MO_PLT: O << "@PLT"; break; - } - - printOffset(MO.getOffset(), O); -} - -void SystemZAsmPrinter::printRIAddrOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O, - const char *Modifier) { - const MachineOperand &Base = MI->getOperand(OpNum); - - // Print displacement operand. - printOperand(MI, OpNum+1, O); - - // Print base operand (if any) - if (Base.getReg()) { - O << '('; - printOperand(MI, OpNum, O); - O << ')'; - } -} - -void SystemZAsmPrinter::printRRIAddrOperand(const MachineInstr *MI, int OpNum, - raw_ostream &O, - const char *Modifier) { - const MachineOperand &Base = MI->getOperand(OpNum); - const MachineOperand &Index = MI->getOperand(OpNum+2); - - // Print displacement operand. - printOperand(MI, OpNum+1, O); - - // Print base operand (if any) - if (Base.getReg()) { - O << '('; - printOperand(MI, OpNum, O); - if (Index.getReg()) { - O << ','; - printOperand(MI, OpNum+2, O); - } - O << ')'; - } else - assert(!Index.getReg() && "Should allocate base register first!"); -} - -// Force static initialization. -extern "C" void LLVMInitializeSystemZAsmPrinter() { - RegisterAsmPrinter X(TheSystemZTarget); -} diff --git a/lib/Target/SystemZ/SystemZCallingConv.td b/lib/Target/SystemZ/SystemZCallingConv.td deleted file mode 100644 index c799a9e..0000000 --- a/lib/Target/SystemZ/SystemZCallingConv.td +++ /dev/null @@ -1,46 +0,0 @@ -//=- SystemZCallingConv.td - Calling Conventions for SystemZ -*- tablegen -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// This describes the calling conventions for SystemZ architecture. -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// SystemZ Return Value Calling Convention -//===----------------------------------------------------------------------===// -def RetCC_SystemZ : CallingConv<[ - // Promote i8/i16/i32 arguments to i64. - CCIfType<[i8, i16, i32], CCPromoteToType>, - - // i64 is returned in register R2 - CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D]>>, - - // f32 / f64 are returned in F0 - CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>, - CCIfType<[f64], CCAssignToReg<[F0L, F2L, F4L, F6L]>> -]>; - -//===----------------------------------------------------------------------===// -// SystemZ Argument Calling Conventions -//===----------------------------------------------------------------------===// -def CC_SystemZ : CallingConv<[ - // Promote i8/i16/i32 arguments to i64. - CCIfType<[i8, i16, i32], CCPromoteToType>, - - // The first 5 integer arguments of non-varargs functions are passed in - // integer registers. - CCIfType<[i64], CCAssignToReg<[R2D, R3D, R4D, R5D, R6D]>>, - - // The first 4 floating point arguments of non-varargs functions are passed - // in FP registers. - CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>, - CCIfType<[f64], CCAssignToReg<[F0L, F2L, F4L, F6L]>>, - - // Integer values get stored in stack slots that are 8 bytes in - // size and 8-byte aligned. - CCIfType<[i64, f32, f64], CCAssignToStack<8, 8>> -]>; diff --git a/lib/Target/SystemZ/SystemZFrameLowering.cpp b/lib/Target/SystemZ/SystemZFrameLowering.cpp deleted file mode 100644 index 2ad84a2..0000000 --- a/lib/Target/SystemZ/SystemZFrameLowering.cpp +++ /dev/null @@ -1,386 +0,0 @@ -//=====- SystemZFrameLowering.cpp - SystemZ Frame Information ------*- C++ -*-====// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the SystemZ implementation of TargetFrameLowering class. -// -//===----------------------------------------------------------------------===// - -#include "SystemZFrameLowering.h" -#include "SystemZInstrBuilder.h" -#include "SystemZInstrInfo.h" -#include "SystemZMachineFunctionInfo.h" -#include "llvm/Function.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Support/CommandLine.h" - -using namespace llvm; - -SystemZFrameLowering::SystemZFrameLowering(const SystemZSubtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 8, -160), STI(sti) { - // Fill the spill offsets map - static const unsigned SpillOffsTab[][2] = { - { SystemZ::R2D, 0x10 }, - { SystemZ::R3D, 0x18 }, - { SystemZ::R4D, 0x20 }, - { SystemZ::R5D, 0x28 }, - { SystemZ::R6D, 0x30 }, - { SystemZ::R7D, 0x38 }, - { SystemZ::R8D, 0x40 }, - { SystemZ::R9D, 0x48 }, - { SystemZ::R10D, 0x50 }, - { SystemZ::R11D, 0x58 }, - { SystemZ::R12D, 0x60 }, - { SystemZ::R13D, 0x68 }, - { SystemZ::R14D, 0x70 }, - { SystemZ::R15D, 0x78 } - }; - - RegSpillOffsets.grow(SystemZ::NUM_TARGET_REGS); - - for (unsigned i = 0, e = array_lengthof(SpillOffsTab); i != e; ++i) - RegSpillOffsets[SpillOffsTab[i][0]] = SpillOffsTab[i][1]; -} - -/// needsFP - Return true if the specified function should have a dedicated -/// frame pointer register. This is true if the function has variable sized -/// allocas or if frame pointer elimination is disabled. -bool SystemZFrameLowering::hasFP(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - return DisableFramePointerElim(MF) || MFI->hasVarSizedObjects(); -} - -/// emitSPUpdate - Emit a series of instructions to increment / decrement the -/// stack pointer by a constant value. -static -void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - int64_t NumBytes, const TargetInstrInfo &TII) { - unsigned Opc; uint64_t Chunk; - bool isSub = NumBytes < 0; - uint64_t Offset = isSub ? -NumBytes : NumBytes; - - if (Offset >= (1LL << 15) - 1) { - Opc = SystemZ::ADD64ri32; - Chunk = (1LL << 31) - 1; - } else { - Opc = SystemZ::ADD64ri16; - Chunk = (1LL << 15) - 1; - } - - DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - - while (Offset) { - uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset; - MachineInstr *MI = - BuildMI(MBB, MBBI, DL, TII.get(Opc), SystemZ::R15D) - .addReg(SystemZ::R15D).addImm(isSub ? -ThisVal : ThisVal); - // The PSW implicit def is dead. - MI->getOperand(3).setIsDead(); - Offset -= ThisVal; - } -} - -void SystemZFrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB - MachineFrameInfo *MFI = MF.getFrameInfo(); - const SystemZInstrInfo &TII = - *static_cast(MF.getTarget().getInstrInfo()); - SystemZMachineFunctionInfo *SystemZMFI = - MF.getInfo(); - MachineBasicBlock::iterator MBBI = MBB.begin(); - DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - - // Get the number of bytes to allocate from the FrameInfo. - // Note that area for callee-saved stuff is already allocated, thus we need to - // 'undo' the stack movement. - uint64_t StackSize = MFI->getStackSize(); - StackSize -= SystemZMFI->getCalleeSavedFrameSize(); - - uint64_t NumBytes = StackSize - getOffsetOfLocalArea(); - - // Skip the callee-saved push instructions. - while (MBBI != MBB.end() && - (MBBI->getOpcode() == SystemZ::MOV64mr || - MBBI->getOpcode() == SystemZ::MOV64mrm)) - ++MBBI; - - if (MBBI != MBB.end()) - DL = MBBI->getDebugLoc(); - - // adjust stack pointer: R15 -= numbytes - if (StackSize || MFI->hasCalls()) { - assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) && - "Invalid stack frame calculation!"); - emitSPUpdate(MBB, MBBI, -(int64_t)NumBytes, TII); - } - - if (hasFP(MF)) { - // Update R11 with the new base value... - BuildMI(MBB, MBBI, DL, TII.get(SystemZ::MOV64rr), SystemZ::R11D) - .addReg(SystemZ::R15D); - - // Mark the FramePtr as live-in in every block except the entry. - for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end(); - I != E; ++I) - I->addLiveIn(SystemZ::R11D); - - } -} - -void SystemZFrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - const SystemZInstrInfo &TII = - *static_cast(MF.getTarget().getInstrInfo()); - SystemZMachineFunctionInfo *SystemZMFI = - MF.getInfo(); - unsigned RetOpcode = MBBI->getOpcode(); - - switch (RetOpcode) { - case SystemZ::RET: break; // These are ok - default: - assert(0 && "Can only insert epilog into returning blocks"); - } - - // Get the number of bytes to allocate from the FrameInfo - // Note that area for callee-saved stuff is already allocated, thus we need to - // 'undo' the stack movement. - uint64_t StackSize = - MFI->getStackSize() - SystemZMFI->getCalleeSavedFrameSize(); - uint64_t NumBytes = StackSize - getOffsetOfLocalArea(); - - // Skip the final terminator instruction. - while (MBBI != MBB.begin()) { - MachineBasicBlock::iterator PI = prior(MBBI); - --MBBI; - if (!PI->getDesc().isTerminator()) - break; - } - - // During callee-saved restores emission stack frame was not yet finialized - // (and thus - the stack size was unknown). Tune the offset having full stack - // size in hands. - if (StackSize || MFI->hasCalls()) { - assert((MBBI->getOpcode() == SystemZ::MOV64rmm || - MBBI->getOpcode() == SystemZ::MOV64rm) && - "Expected to see callee-save register restore code"); - assert(MF.getRegInfo().isPhysRegUsed(SystemZ::R15D) && - "Invalid stack frame calculation!"); - - unsigned i = 0; - MachineInstr &MI = *MBBI; - while (!MI.getOperand(i).isImm()) { - ++i; - assert(i < MI.getNumOperands() && "Unexpected restore code!"); - } - - uint64_t Offset = NumBytes + MI.getOperand(i).getImm(); - // If Offset does not fit into 20-bit signed displacement field we need to - // emit some additional code... - if (Offset > 524287) { - // Fold the displacement into load instruction as much as possible. - NumBytes = Offset - 524287; - Offset = 524287; - emitSPUpdate(MBB, MBBI, NumBytes, TII); - } - - MI.getOperand(i).ChangeToImmediate(Offset); - } -} - -int SystemZFrameLowering::getFrameIndexOffset(const MachineFunction &MF, - int FI) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - const SystemZMachineFunctionInfo *SystemZMFI = - MF.getInfo(); - int Offset = MFI->getObjectOffset(FI) + MFI->getOffsetAdjustment(); - uint64_t StackSize = MFI->getStackSize(); - - // Fixed objects are really located in the "previous" frame. - if (FI < 0) - StackSize -= SystemZMFI->getCalleeSavedFrameSize(); - - Offset += StackSize - getOffsetOfLocalArea(); - - // Skip the register save area if we generated the stack frame. - if (StackSize || MFI->hasCalls()) - Offset -= getOffsetOfLocalArea(); - - return Offset; -} - -bool -SystemZFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - - DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - - MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - SystemZMachineFunctionInfo *MFI = MF.getInfo(); - unsigned CalleeFrameSize = 0; - - // Scan the callee-saved and find the bounds of register spill area. - unsigned LowReg = 0, HighReg = 0, StartOffset = -1U, EndOffset = 0; - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - if (!SystemZ::FP64RegClass.contains(Reg)) { - unsigned Offset = RegSpillOffsets[Reg]; - CalleeFrameSize += 8; - if (StartOffset > Offset) { - LowReg = Reg; StartOffset = Offset; - } - if (EndOffset < Offset) { - HighReg = Reg; EndOffset = RegSpillOffsets[Reg]; - } - } - } - - // Save information for epilogue inserter. - MFI->setCalleeSavedFrameSize(CalleeFrameSize); - MFI->setLowReg(LowReg); MFI->setHighReg(HighReg); - - // Save GPRs - if (StartOffset) { - // Build a store instruction. Use STORE MULTIPLE instruction if there are many - // registers to store, otherwise - just STORE. - MachineInstrBuilder MIB = - BuildMI(MBB, MI, DL, TII.get((LowReg == HighReg ? - SystemZ::MOV64mr : SystemZ::MOV64mrm))); - - // Add store operands. - MIB.addReg(SystemZ::R15D).addImm(StartOffset); - if (LowReg == HighReg) - MIB.addReg(0); - MIB.addReg(LowReg, RegState::Kill); - if (LowReg != HighReg) - MIB.addReg(HighReg, RegState::Kill); - - // Do a second scan adding regs as being killed by instruction - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - // Add the callee-saved register as live-in. It's killed at the spill. - MBB.addLiveIn(Reg); - if (Reg != LowReg && Reg != HighReg) - MIB.addReg(Reg, RegState::ImplicitKill); - } - } - - // Save FPRs - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - if (SystemZ::FP64RegClass.contains(Reg)) { - MBB.addLiveIn(Reg); - TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i].getFrameIdx(), - &SystemZ::FP64RegClass, TRI); - } - } - - return true; -} - -bool -SystemZFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const { - if (CSI.empty()) - return false; - - DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - - MachineFunction &MF = *MBB.getParent(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - SystemZMachineFunctionInfo *MFI = MF.getInfo(); - - // Restore FP registers - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - if (SystemZ::FP64RegClass.contains(Reg)) - TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), - &SystemZ::FP64RegClass, TRI); - } - - // Restore GP registers - unsigned LowReg = MFI->getLowReg(), HighReg = MFI->getHighReg(); - unsigned StartOffset = RegSpillOffsets[LowReg]; - - if (StartOffset) { - // Build a load instruction. Use LOAD MULTIPLE instruction if there are many - // registers to load, otherwise - just LOAD. - MachineInstrBuilder MIB = - BuildMI(MBB, MI, DL, TII.get((LowReg == HighReg ? - SystemZ::MOV64rm : SystemZ::MOV64rmm))); - // Add store operands. - MIB.addReg(LowReg, RegState::Define); - if (LowReg != HighReg) - MIB.addReg(HighReg, RegState::Define); - - MIB.addReg(hasFP(MF) ? SystemZ::R11D : SystemZ::R15D); - MIB.addImm(StartOffset); - if (LowReg == HighReg) - MIB.addReg(0); - - // Do a second scan adding regs as being defined by instruction - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - if (Reg != LowReg && Reg != HighReg) - MIB.addReg(Reg, RegState::ImplicitDefine); - } - } - - return true; -} - -void -SystemZFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { - // Determine whether R15/R14 will ever be clobbered inside the function. And - // if yes - mark it as 'callee' saved. - MachineFrameInfo *FFI = MF.getFrameInfo(); - MachineRegisterInfo &MRI = MF.getRegInfo(); - - // Check whether high FPRs are ever used, if yes - we need to save R15 as - // well. - static const unsigned HighFPRs[] = { - SystemZ::F8L, SystemZ::F9L, SystemZ::F10L, SystemZ::F11L, - SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L, - SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S, - SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S, - }; - - bool HighFPRsUsed = false; - for (unsigned i = 0, e = array_lengthof(HighFPRs); i != e; ++i) - HighFPRsUsed |= MRI.isPhysRegUsed(HighFPRs[i]); - - if (FFI->hasCalls()) - /* FIXME: function is varargs */ - /* FIXME: function grabs RA */ - /* FIXME: function calls eh_return */ - MRI.setPhysRegUsed(SystemZ::R14D); - - if (HighFPRsUsed || - FFI->hasCalls() || - FFI->getObjectIndexEnd() != 0 || // Contains automatic variables - FFI->hasVarSizedObjects() // Function calls dynamic alloca's - /* FIXME: function is varargs */) - MRI.setPhysRegUsed(SystemZ::R15D); -} diff --git a/lib/Target/SystemZ/SystemZFrameLowering.h b/lib/Target/SystemZ/SystemZFrameLowering.h deleted file mode 100644 index 1284b68..0000000 --- a/lib/Target/SystemZ/SystemZFrameLowering.h +++ /dev/null @@ -1,57 +0,0 @@ -//=- SystemZFrameLowering.h - Define frame lowering for z/System -*- C++ -*--=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -// -//===----------------------------------------------------------------------===// - -#ifndef SYSTEMZ_FRAMEINFO_H -#define SYSTEMZ_FRAMEINFO_H - -#include "SystemZ.h" -#include "SystemZSubtarget.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/ADT/IndexedMap.h" - -namespace llvm { - class SystemZSubtarget; - -class SystemZFrameLowering : public TargetFrameLowering { - IndexedMap RegSpillOffsets; -protected: - const SystemZSubtarget &STI; - -public: - explicit SystemZFrameLowering(const SystemZSubtarget &sti); - - /// emitProlog/emitEpilog - These methods insert prolog and epilog code into - /// the function. - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - - bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const; - bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - const std::vector &CSI, - const TargetRegisterInfo *TRI) const; - - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const; - - bool hasReservedCallFrame(const MachineFunction &MF) const { return true; } - bool hasFP(const MachineFunction &MF) const; - int getFrameIndexOffset(const MachineFunction &MF, int FI) const; -}; - -} // End llvm namespace - -#endif diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp deleted file mode 100644 index 2186ff1..0000000 --- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +++ /dev/null @@ -1,779 +0,0 @@ -//==-- SystemZISelDAGToDAG.cpp - A dag to dag inst selector for SystemZ ---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines an instruction selector for the SystemZ target. -// -//===----------------------------------------------------------------------===// - -#include "SystemZ.h" -#include "SystemZTargetMachine.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" -#include "llvm/CallingConv.h" -#include "llvm/Constants.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -namespace { - /// SystemZRRIAddressMode - This corresponds to rriaddr, but uses SDValue's - /// instead of register numbers for the leaves of the matched tree. - struct SystemZRRIAddressMode { - enum { - RegBase, - FrameIndexBase - } BaseType; - - struct { // This is really a union, discriminated by BaseType! - SDValue Reg; - int FrameIndex; - } Base; - - SDValue IndexReg; - int64_t Disp; - bool isRI; - - SystemZRRIAddressMode(bool RI = false) - : BaseType(RegBase), IndexReg(), Disp(0), isRI(RI) { - } - - void dump() { - errs() << "SystemZRRIAddressMode " << this << '\n'; - if (BaseType == RegBase) { - errs() << "Base.Reg "; - if (Base.Reg.getNode() != 0) - Base.Reg.getNode()->dump(); - else - errs() << "nul"; - errs() << '\n'; - } else { - errs() << " Base.FrameIndex " << Base.FrameIndex << '\n'; - } - if (!isRI) { - errs() << "IndexReg "; - if (IndexReg.getNode() != 0) IndexReg.getNode()->dump(); - else errs() << "nul"; - } - errs() << " Disp " << Disp << '\n'; - } - }; -} - -/// SystemZDAGToDAGISel - SystemZ specific code to select SystemZ machine -/// instructions for SelectionDAG operations. -/// -namespace { - class SystemZDAGToDAGISel : public SelectionDAGISel { - const SystemZTargetLowering &Lowering; - const SystemZSubtarget &Subtarget; - - void getAddressOperandsRI(const SystemZRRIAddressMode &AM, - SDValue &Base, SDValue &Disp); - void getAddressOperands(const SystemZRRIAddressMode &AM, - SDValue &Base, SDValue &Disp, - SDValue &Index); - - public: - SystemZDAGToDAGISel(SystemZTargetMachine &TM, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(TM, OptLevel), - Lowering(*TM.getTargetLowering()), - Subtarget(*TM.getSubtargetImpl()) { } - - virtual const char *getPassName() const { - return "SystemZ DAG->DAG Pattern Instruction Selection"; - } - - /// getI8Imm - Return a target constant with the specified value, of type - /// i8. - inline SDValue getI8Imm(uint64_t Imm) { - return CurDAG->getTargetConstant(Imm, MVT::i8); - } - - /// getI16Imm - Return a target constant with the specified value, of type - /// i16. - inline SDValue getI16Imm(uint64_t Imm) { - return CurDAG->getTargetConstant(Imm, MVT::i16); - } - - /// getI32Imm - Return a target constant with the specified value, of type - /// i32. - inline SDValue getI32Imm(uint64_t Imm) { - return CurDAG->getTargetConstant(Imm, MVT::i32); - } - - // Include the pieces autogenerated from the target description. - #include "SystemZGenDAGISel.inc" - - private: - bool SelectAddrRI12Only(SDValue& Addr, - SDValue &Base, SDValue &Disp); - bool SelectAddrRI12(SDValue& Addr, - SDValue &Base, SDValue &Disp, - bool is12BitOnly = false); - bool SelectAddrRI(SDValue& Addr, SDValue &Base, SDValue &Disp); - bool SelectAddrRRI12(SDValue Addr, - SDValue &Base, SDValue &Disp, SDValue &Index); - bool SelectAddrRRI20(SDValue Addr, - SDValue &Base, SDValue &Disp, SDValue &Index); - bool SelectLAAddr(SDValue Addr, - SDValue &Base, SDValue &Disp, SDValue &Index); - - SDNode *Select(SDNode *Node); - - bool TryFoldLoad(SDNode *P, SDValue N, - SDValue &Base, SDValue &Disp, SDValue &Index); - - bool MatchAddress(SDValue N, SystemZRRIAddressMode &AM, - bool is12Bit, unsigned Depth = 0); - bool MatchAddressBase(SDValue N, SystemZRRIAddressMode &AM); - }; -} // end anonymous namespace - -/// createSystemZISelDag - This pass converts a legalized DAG into a -/// SystemZ-specific DAG, ready for instruction scheduling. -/// -FunctionPass *llvm::createSystemZISelDag(SystemZTargetMachine &TM, - CodeGenOpt::Level OptLevel) { - return new SystemZDAGToDAGISel(TM, OptLevel); -} - -/// isImmSExt20 - This method tests to see if the node is either a 32-bit -/// or 64-bit immediate, and if the value can be accurately represented as a -/// sign extension from a 20-bit value. If so, this returns true and the -/// immediate. -static bool isImmSExt20(int64_t Val, int64_t &Imm) { - if (Val >= -524288 && Val <= 524287) { - Imm = Val; - return true; - } - return false; -} - -/// isImmZExt12 - This method tests to see if the node is either a 32-bit -/// or 64-bit immediate, and if the value can be accurately represented as a -/// zero extension from a 12-bit value. If so, this returns true and the -/// immediate. -static bool isImmZExt12(int64_t Val, int64_t &Imm) { - if (Val >= 0 && Val <= 0xFFF) { - Imm = Val; - return true; - } - return false; -} - -/// MatchAddress - Add the specified node to the specified addressing mode, -/// returning true if it cannot be done. This just pattern matches for the -/// addressing mode. -bool SystemZDAGToDAGISel::MatchAddress(SDValue N, SystemZRRIAddressMode &AM, - bool is12Bit, unsigned Depth) { - DebugLoc dl = N.getDebugLoc(); - DEBUG(errs() << "MatchAddress: "; AM.dump()); - // Limit recursion. - if (Depth > 5) - return MatchAddressBase(N, AM); - - // FIXME: We can perform better here. If we have something like - // (shift (add A, imm), N), we can try to reassociate stuff and fold shift of - // imm into addressing mode. - switch (N.getOpcode()) { - default: break; - case ISD::Constant: { - int64_t Val = cast(N)->getSExtValue(); - int64_t Imm = 0; - bool Match = (is12Bit ? - isImmZExt12(AM.Disp + Val, Imm) : - isImmSExt20(AM.Disp + Val, Imm)); - if (Match) { - AM.Disp = Imm; - return false; - } - break; - } - - case ISD::FrameIndex: - if (AM.BaseType == SystemZRRIAddressMode::RegBase && - AM.Base.Reg.getNode() == 0) { - AM.BaseType = SystemZRRIAddressMode::FrameIndexBase; - AM.Base.FrameIndex = cast(N)->getIndex(); - return false; - } - break; - - case ISD::SUB: { - // Given A-B, if A can be completely folded into the address and - // the index field with the index field unused, use -B as the index. - // This is a win if a has multiple parts that can be folded into - // the address. Also, this saves a mov if the base register has - // other uses, since it avoids a two-address sub instruction, however - // it costs an additional mov if the index register has other uses. - - // Test if the LHS of the sub can be folded. - SystemZRRIAddressMode Backup = AM; - if (MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1)) { - AM = Backup; - break; - } - // Test if the index field is free for use. - if (AM.IndexReg.getNode() || AM.isRI) { - AM = Backup; - break; - } - - // If the base is a register with multiple uses, this transformation may - // save a mov. Otherwise it's probably better not to do it. - if (AM.BaseType == SystemZRRIAddressMode::RegBase && - (!AM.Base.Reg.getNode() || AM.Base.Reg.getNode()->hasOneUse())) { - AM = Backup; - break; - } - - // Ok, the transformation is legal and appears profitable. Go for it. - SDValue RHS = N.getNode()->getOperand(1); - SDValue Zero = CurDAG->getConstant(0, N.getValueType()); - SDValue Neg = CurDAG->getNode(ISD::SUB, dl, N.getValueType(), Zero, RHS); - AM.IndexReg = Neg; - - // Insert the new nodes into the topological ordering. - if (Zero.getNode()->getNodeId() == -1 || - Zero.getNode()->getNodeId() > N.getNode()->getNodeId()) { - CurDAG->RepositionNode(N.getNode(), Zero.getNode()); - Zero.getNode()->setNodeId(N.getNode()->getNodeId()); - } - if (Neg.getNode()->getNodeId() == -1 || - Neg.getNode()->getNodeId() > N.getNode()->getNodeId()) { - CurDAG->RepositionNode(N.getNode(), Neg.getNode()); - Neg.getNode()->setNodeId(N.getNode()->getNodeId()); - } - return false; - } - - case ISD::ADD: { - SystemZRRIAddressMode Backup = AM; - if (!MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1) && - !MatchAddress(N.getNode()->getOperand(1), AM, is12Bit, Depth+1)) - return false; - AM = Backup; - if (!MatchAddress(N.getNode()->getOperand(1), AM, is12Bit, Depth+1) && - !MatchAddress(N.getNode()->getOperand(0), AM, is12Bit, Depth+1)) - return false; - AM = Backup; - - // If we couldn't fold both operands into the address at the same time, - // see if we can just put each operand into a register and fold at least - // the add. - if (!AM.isRI && - AM.BaseType == SystemZRRIAddressMode::RegBase && - !AM.Base.Reg.getNode() && !AM.IndexReg.getNode()) { - AM.Base.Reg = N.getNode()->getOperand(0); - AM.IndexReg = N.getNode()->getOperand(1); - return false; - } - break; - } - - case ISD::OR: - // Handle "X | C" as "X + C" iff X is known to have C bits clear. - if (ConstantSDNode *CN = dyn_cast(N.getOperand(1))) { - SystemZRRIAddressMode Backup = AM; - int64_t Offset = CN->getSExtValue(); - int64_t Imm = 0; - bool MatchOffset = (is12Bit ? - isImmZExt12(AM.Disp + Offset, Imm) : - isImmSExt20(AM.Disp + Offset, Imm)); - // The resultant disp must fit in 12 or 20-bits. - if (MatchOffset && - // LHS should be an addr mode. - !MatchAddress(N.getOperand(0), AM, is12Bit, Depth+1) && - // Check to see if the LHS & C is zero. - CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) { - AM.Disp = Imm; - return false; - } - AM = Backup; - } - break; - } - - return MatchAddressBase(N, AM); -} - -/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the -/// specified addressing mode without any further recursion. -bool SystemZDAGToDAGISel::MatchAddressBase(SDValue N, - SystemZRRIAddressMode &AM) { - // Is the base register already occupied? - if (AM.BaseType != SystemZRRIAddressMode::RegBase || AM.Base.Reg.getNode()) { - // If so, check to see if the index register is set. - if (AM.IndexReg.getNode() == 0 && !AM.isRI) { - AM.IndexReg = N; - return false; - } - - // Otherwise, we cannot select it. - return true; - } - - // Default, generate it as a register. - AM.BaseType = SystemZRRIAddressMode::RegBase; - AM.Base.Reg = N; - return false; -} - -void SystemZDAGToDAGISel::getAddressOperandsRI(const SystemZRRIAddressMode &AM, - SDValue &Base, SDValue &Disp) { - if (AM.BaseType == SystemZRRIAddressMode::RegBase) - Base = AM.Base.Reg; - else - Base = CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()); - Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i64); -} - -void SystemZDAGToDAGISel::getAddressOperands(const SystemZRRIAddressMode &AM, - SDValue &Base, SDValue &Disp, - SDValue &Index) { - getAddressOperandsRI(AM, Base, Disp); - Index = AM.IndexReg; -} - -/// Returns true if the address can be represented by a base register plus -/// an unsigned 12-bit displacement [r+imm]. -bool SystemZDAGToDAGISel::SelectAddrRI12Only(SDValue &Addr, - SDValue &Base, SDValue &Disp) { - return SelectAddrRI12(Addr, Base, Disp, /*is12BitOnly*/true); -} - -bool SystemZDAGToDAGISel::SelectAddrRI12(SDValue &Addr, - SDValue &Base, SDValue &Disp, - bool is12BitOnly) { - SystemZRRIAddressMode AM20(/*isRI*/true), AM12(/*isRI*/true); - bool Done = false; - - if (!Addr.hasOneUse()) { - unsigned Opcode = Addr.getOpcode(); - if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) { - // If we are able to fold N into addressing mode, then we'll allow it even - // if N has multiple uses. In general, addressing computation is used as - // addresses by all of its uses. But watch out for CopyToReg uses, that - // means the address computation is liveout. It will be computed by a LA - // so we want to avoid computing the address twice. - for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), - UE = Addr.getNode()->use_end(); UI != UE; ++UI) { - if (UI->getOpcode() == ISD::CopyToReg) { - MatchAddressBase(Addr, AM12); - Done = true; - break; - } - } - } - } - if (!Done && MatchAddress(Addr, AM12, /* is12Bit */ true)) - return false; - - // Check, whether we can match stuff using 20-bit displacements - if (!Done && !is12BitOnly && - !MatchAddress(Addr, AM20, /* is12Bit */ false)) - if (AM12.Disp == 0 && AM20.Disp != 0) - return false; - - DEBUG(errs() << "MatchAddress (final): "; AM12.dump()); - - EVT VT = Addr.getValueType(); - if (AM12.BaseType == SystemZRRIAddressMode::RegBase) { - if (!AM12.Base.Reg.getNode()) - AM12.Base.Reg = CurDAG->getRegister(0, VT); - } - - assert(AM12.IndexReg.getNode() == 0 && "Invalid reg-imm address mode!"); - - getAddressOperandsRI(AM12, Base, Disp); - - return true; -} - -/// Returns true if the address can be represented by a base register plus -/// a signed 20-bit displacement [r+imm]. -bool SystemZDAGToDAGISel::SelectAddrRI(SDValue& Addr, - SDValue &Base, SDValue &Disp) { - SystemZRRIAddressMode AM(/*isRI*/true); - bool Done = false; - - if (!Addr.hasOneUse()) { - unsigned Opcode = Addr.getOpcode(); - if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) { - // If we are able to fold N into addressing mode, then we'll allow it even - // if N has multiple uses. In general, addressing computation is used as - // addresses by all of its uses. But watch out for CopyToReg uses, that - // means the address computation is liveout. It will be computed by a LA - // so we want to avoid computing the address twice. - for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), - UE = Addr.getNode()->use_end(); UI != UE; ++UI) { - if (UI->getOpcode() == ISD::CopyToReg) { - MatchAddressBase(Addr, AM); - Done = true; - break; - } - } - } - } - if (!Done && MatchAddress(Addr, AM, /* is12Bit */ false)) - return false; - - DEBUG(errs() << "MatchAddress (final): "; AM.dump()); - - EVT VT = Addr.getValueType(); - if (AM.BaseType == SystemZRRIAddressMode::RegBase) { - if (!AM.Base.Reg.getNode()) - AM.Base.Reg = CurDAG->getRegister(0, VT); - } - - assert(AM.IndexReg.getNode() == 0 && "Invalid reg-imm address mode!"); - - getAddressOperandsRI(AM, Base, Disp); - - return true; -} - -/// Returns true if the address can be represented by a base register plus -/// index register plus an unsigned 12-bit displacement [base + idx + imm]. -bool SystemZDAGToDAGISel::SelectAddrRRI12(SDValue Addr, - SDValue &Base, SDValue &Disp, SDValue &Index) { - SystemZRRIAddressMode AM20, AM12; - bool Done = false; - - if (!Addr.hasOneUse()) { - unsigned Opcode = Addr.getOpcode(); - if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) { - // If we are able to fold N into addressing mode, then we'll allow it even - // if N has multiple uses. In general, addressing computation is used as - // addresses by all of its uses. But watch out for CopyToReg uses, that - // means the address computation is liveout. It will be computed by a LA - // so we want to avoid computing the address twice. - for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), - UE = Addr.getNode()->use_end(); UI != UE; ++UI) { - if (UI->getOpcode() == ISD::CopyToReg) { - MatchAddressBase(Addr, AM12); - Done = true; - break; - } - } - } - } - if (!Done && MatchAddress(Addr, AM12, /* is12Bit */ true)) - return false; - - // Check, whether we can match stuff using 20-bit displacements - if (!Done && !MatchAddress(Addr, AM20, /* is12Bit */ false)) - if (AM12.Disp == 0 && AM20.Disp != 0) - return false; - - DEBUG(errs() << "MatchAddress (final): "; AM12.dump()); - - EVT VT = Addr.getValueType(); - if (AM12.BaseType == SystemZRRIAddressMode::RegBase) { - if (!AM12.Base.Reg.getNode()) - AM12.Base.Reg = CurDAG->getRegister(0, VT); - } - - if (!AM12.IndexReg.getNode()) - AM12.IndexReg = CurDAG->getRegister(0, VT); - - getAddressOperands(AM12, Base, Disp, Index); - - return true; -} - -/// Returns true if the address can be represented by a base register plus -/// index register plus a signed 20-bit displacement [base + idx + imm]. -bool SystemZDAGToDAGISel::SelectAddrRRI20(SDValue Addr, - SDValue &Base, SDValue &Disp, SDValue &Index) { - SystemZRRIAddressMode AM; - bool Done = false; - - if (!Addr.hasOneUse()) { - unsigned Opcode = Addr.getOpcode(); - if (Opcode != ISD::Constant && Opcode != ISD::FrameIndex) { - // If we are able to fold N into addressing mode, then we'll allow it even - // if N has multiple uses. In general, addressing computation is used as - // addresses by all of its uses. But watch out for CopyToReg uses, that - // means the address computation is liveout. It will be computed by a LA - // so we want to avoid computing the address twice. - for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), - UE = Addr.getNode()->use_end(); UI != UE; ++UI) { - if (UI->getOpcode() == ISD::CopyToReg) { - MatchAddressBase(Addr, AM); - Done = true; - break; - } - } - } - } - if (!Done && MatchAddress(Addr, AM, /* is12Bit */ false)) - return false; - - DEBUG(errs() << "MatchAddress (final): "; AM.dump()); - - EVT VT = Addr.getValueType(); - if (AM.BaseType == SystemZRRIAddressMode::RegBase) { - if (!AM.Base.Reg.getNode()) - AM.Base.Reg = CurDAG->getRegister(0, VT); - } - - if (!AM.IndexReg.getNode()) - AM.IndexReg = CurDAG->getRegister(0, VT); - - getAddressOperands(AM, Base, Disp, Index); - - return true; -} - -/// SelectLAAddr - it calls SelectAddr and determines if the maximal addressing -/// mode it matches can be cost effectively emitted as an LA/LAY instruction. -bool SystemZDAGToDAGISel::SelectLAAddr(SDValue Addr, - SDValue &Base, SDValue &Disp, SDValue &Index) { - SystemZRRIAddressMode AM; - - if (MatchAddress(Addr, AM, false)) - return false; - - EVT VT = Addr.getValueType(); - unsigned Complexity = 0; - if (AM.BaseType == SystemZRRIAddressMode::RegBase) - if (AM.Base.Reg.getNode()) - Complexity = 1; - else - AM.Base.Reg = CurDAG->getRegister(0, VT); - else if (AM.BaseType == SystemZRRIAddressMode::FrameIndexBase) - Complexity = 4; - - if (AM.IndexReg.getNode()) - Complexity += 1; - else - AM.IndexReg = CurDAG->getRegister(0, VT); - - if (AM.Disp && (AM.Base.Reg.getNode() || AM.IndexReg.getNode())) - Complexity += 1; - - if (Complexity > 2) { - getAddressOperands(AM, Base, Disp, Index); - return true; - } - - return false; -} - -bool SystemZDAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N, - SDValue &Base, SDValue &Disp, SDValue &Index) { - if (ISD::isNON_EXTLoad(N.getNode()) && - IsLegalToFold(N, P, P, OptLevel)) - return SelectAddrRRI20(N.getOperand(1), Base, Disp, Index); - return false; -} - -SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) { - EVT NVT = Node->getValueType(0); - DebugLoc dl = Node->getDebugLoc(); - unsigned Opcode = Node->getOpcode(); - - // Dump information about the Node being selected - DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n"); - - // If we have a custom node, we already have selected! - if (Node->isMachineOpcode()) { - DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); - return NULL; // Already selected. - } - - switch (Opcode) { - default: break; - case ISD::SDIVREM: { - unsigned Opc, MOpc; - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - - EVT ResVT; - bool is32Bit = false; - switch (NVT.getSimpleVT().SimpleTy) { - default: assert(0 && "Unsupported VT!"); - case MVT::i32: - Opc = SystemZ::SDIVREM32r; MOpc = SystemZ::SDIVREM32m; - ResVT = MVT::v2i64; - is32Bit = true; - break; - case MVT::i64: - Opc = SystemZ::SDIVREM64r; MOpc = SystemZ::SDIVREM64m; - ResVT = MVT::v2i64; - break; - } - - SDValue Tmp0, Tmp1, Tmp2; - bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2); - - // Prepare the dividend - SDNode *Dividend; - if (is32Bit) - Dividend = CurDAG->getMachineNode(SystemZ::MOVSX64rr32, dl, MVT::i64, N0); - else - Dividend = N0.getNode(); - - // Insert prepared dividend into suitable 'subreg' - SDNode *Tmp = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, ResVT); - Dividend = - CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT, - SDValue(Tmp, 0), SDValue(Dividend, 0), - CurDAG->getTargetConstant(SystemZ::subreg_odd, MVT::i32)); - - SDNode *Result; - SDValue DivVal = SDValue(Dividend, 0); - if (foldedLoad) { - SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) }; - Result = CurDAG->getMachineNode(MOpc, dl, ResVT, MVT::Other, - Ops, array_lengthof(Ops)); - // Update the chain. - ReplaceUses(N1.getValue(1), SDValue(Result, 1)); - } else { - Result = CurDAG->getMachineNode(Opc, dl, ResVT, SDValue(Dividend, 0), N1); - } - - // Copy the division (odd subreg) result, if it is needed. - if (!SDValue(Node, 0).use_empty()) { - unsigned SubRegIdx = (is32Bit ? - SystemZ::subreg_odd32 : SystemZ::subreg_odd); - SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, - dl, NVT, - SDValue(Result, 0), - CurDAG->getTargetConstant(SubRegIdx, - MVT::i32)); - - ReplaceUses(SDValue(Node, 0), SDValue(Div, 0)); - DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n"); - } - - // Copy the remainder (even subreg) result, if it is needed. - if (!SDValue(Node, 1).use_empty()) { - unsigned SubRegIdx = (is32Bit ? - SystemZ::subreg_32bit : SystemZ::subreg_even); - SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, - dl, NVT, - SDValue(Result, 0), - CurDAG->getTargetConstant(SubRegIdx, - MVT::i32)); - - ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0)); - DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n"); - } - - return NULL; - } - case ISD::UDIVREM: { - unsigned Opc, MOpc, ClrOpc; - SDValue N0 = Node->getOperand(0); - SDValue N1 = Node->getOperand(1); - EVT ResVT; - - bool is32Bit = false; - switch (NVT.getSimpleVT().SimpleTy) { - default: assert(0 && "Unsupported VT!"); - case MVT::i32: - Opc = SystemZ::UDIVREM32r; MOpc = SystemZ::UDIVREM32m; - ClrOpc = SystemZ::MOV64Pr0_even; - ResVT = MVT::v2i32; - is32Bit = true; - break; - case MVT::i64: - Opc = SystemZ::UDIVREM64r; MOpc = SystemZ::UDIVREM64m; - ClrOpc = SystemZ::MOV128r0_even; - ResVT = MVT::v2i64; - break; - } - - SDValue Tmp0, Tmp1, Tmp2; - bool foldedLoad = TryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2); - - // Prepare the dividend - SDNode *Dividend = N0.getNode(); - - // Insert prepared dividend into suitable 'subreg' - SDNode *Tmp = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, - dl, ResVT); - { - unsigned SubRegIdx = (is32Bit ? - SystemZ::subreg_odd32 : SystemZ::subreg_odd); - Dividend = - CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, dl, ResVT, - SDValue(Tmp, 0), SDValue(Dividend, 0), - CurDAG->getTargetConstant(SubRegIdx, MVT::i32)); - } - - // Zero out even subreg - Dividend = CurDAG->getMachineNode(ClrOpc, dl, ResVT, SDValue(Dividend, 0)); - - SDValue DivVal = SDValue(Dividend, 0); - SDNode *Result; - if (foldedLoad) { - SDValue Ops[] = { DivVal, Tmp0, Tmp1, Tmp2, N1.getOperand(0) }; - Result = CurDAG->getMachineNode(MOpc, dl, ResVT, MVT::Other, - Ops, array_lengthof(Ops)); - // Update the chain. - ReplaceUses(N1.getValue(1), SDValue(Result, 1)); - } else { - Result = CurDAG->getMachineNode(Opc, dl, ResVT, DivVal, N1); - } - - // Copy the division (odd subreg) result, if it is needed. - if (!SDValue(Node, 0).use_empty()) { - unsigned SubRegIdx = (is32Bit ? - SystemZ::subreg_odd32 : SystemZ::subreg_odd); - SDNode *Div = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, - dl, NVT, - SDValue(Result, 0), - CurDAG->getTargetConstant(SubRegIdx, - MVT::i32)); - ReplaceUses(SDValue(Node, 0), SDValue(Div, 0)); - DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n"); - } - - // Copy the remainder (even subreg) result, if it is needed. - if (!SDValue(Node, 1).use_empty()) { - unsigned SubRegIdx = (is32Bit ? - SystemZ::subreg_32bit : SystemZ::subreg_even); - SDNode *Rem = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, - dl, NVT, - SDValue(Result, 0), - CurDAG->getTargetConstant(SubRegIdx, - MVT::i32)); - ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0)); - DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n"); - } - - return NULL; - } - } - - // Select the default instruction - SDNode *ResNode = SelectCode(Node); - - DEBUG(errs() << "=> "; - if (ResNode == NULL || ResNode == Node) - Node->dump(CurDAG); - else - ResNode->dump(CurDAG); - errs() << "\n"; - ); - return ResNode; -} diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp deleted file mode 100644 index 48ca99f..0000000 --- a/lib/Target/SystemZ/SystemZISelLowering.cpp +++ /dev/null @@ -1,868 +0,0 @@ -//===-- SystemZISelLowering.cpp - SystemZ DAG Lowering Implementation -----==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the SystemZTargetLowering class. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "systemz-lower" - -#include "SystemZISelLowering.h" -#include "SystemZ.h" -#include "SystemZTargetMachine.h" -#include "SystemZSubtarget.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" -#include "llvm/CallingConv.h" -#include "llvm/GlobalVariable.h" -#include "llvm/GlobalAlias.h" -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/CodeGen/ValueTypes.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/VectorExtras.h" -using namespace llvm; - -SystemZTargetLowering::SystemZTargetLowering(SystemZTargetMachine &tm) : - TargetLowering(tm, new TargetLoweringObjectFileELF()), - Subtarget(*tm.getSubtargetImpl()), TM(tm) { - - RegInfo = TM.getRegisterInfo(); - - // Set up the register classes. - addRegisterClass(MVT::i32, SystemZ::GR32RegisterClass); - addRegisterClass(MVT::i64, SystemZ::GR64RegisterClass); - addRegisterClass(MVT::v2i32,SystemZ::GR64PRegisterClass); - addRegisterClass(MVT::v2i64,SystemZ::GR128RegisterClass); - - if (!UseSoftFloat) { - addRegisterClass(MVT::f32, SystemZ::FP32RegisterClass); - addRegisterClass(MVT::f64, SystemZ::FP64RegisterClass); - } - - // Compute derived properties from the register classes - computeRegisterProperties(); - - // Provide all sorts of operation actions - setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); - - setLoadExtAction(ISD::SEXTLOAD, MVT::f32, Expand); - setLoadExtAction(ISD::ZEXTLOAD, MVT::f32, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); - - setLoadExtAction(ISD::SEXTLOAD, MVT::f64, Expand); - setLoadExtAction(ISD::ZEXTLOAD, MVT::f64, Expand); - setLoadExtAction(ISD::EXTLOAD, MVT::f64, Expand); - - setStackPointerRegisterToSaveRestore(SystemZ::R15D); - - // TODO: It may be better to default to latency-oriented scheduling, however - // LLVM's current latency-oriented scheduler can't handle physreg definitions - // such as SystemZ has with PSW, so set this to the register-pressure - // scheduler, because it can. - setSchedulingPreference(Sched::RegPressure); - - setBooleanContents(ZeroOrOneBooleanContent); - setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? - - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::BRCOND, MVT::Other, Expand); - setOperationAction(ISD::BR_CC, MVT::i32, Custom); - setOperationAction(ISD::BR_CC, MVT::i64, Custom); - setOperationAction(ISD::BR_CC, MVT::f32, Custom); - setOperationAction(ISD::BR_CC, MVT::f64, Custom); - setOperationAction(ISD::ConstantPool, MVT::i32, Custom); - setOperationAction(ISD::ConstantPool, MVT::i64, Custom); - setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); - setOperationAction(ISD::JumpTable, MVT::i64, Custom); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); - - setOperationAction(ISD::SDIV, MVT::i32, Expand); - setOperationAction(ISD::UDIV, MVT::i32, Expand); - setOperationAction(ISD::SDIV, MVT::i64, Expand); - setOperationAction(ISD::UDIV, MVT::i64, Expand); - setOperationAction(ISD::SREM, MVT::i32, Expand); - setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::SREM, MVT::i64, Expand); - setOperationAction(ISD::UREM, MVT::i64, Expand); - - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - - setOperationAction(ISD::CTPOP, MVT::i32, Expand); - setOperationAction(ISD::CTPOP, MVT::i64, Expand); - setOperationAction(ISD::CTTZ, MVT::i32, Expand); - setOperationAction(ISD::CTTZ, MVT::i64, Expand); - setOperationAction(ISD::CTLZ, MVT::i32, Promote); - setOperationAction(ISD::CTLZ, MVT::i64, Legal); - - // FIXME: Can we lower these 2 efficiently? - setOperationAction(ISD::SETCC, MVT::i32, Expand); - setOperationAction(ISD::SETCC, MVT::i64, Expand); - setOperationAction(ISD::SETCC, MVT::f32, Expand); - setOperationAction(ISD::SETCC, MVT::f64, Expand); - setOperationAction(ISD::SELECT, MVT::i32, Expand); - setOperationAction(ISD::SELECT, MVT::i64, Expand); - setOperationAction(ISD::SELECT, MVT::f32, Expand); - setOperationAction(ISD::SELECT, MVT::f64, Expand); - setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::i64, Custom); - setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); - setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); - - setOperationAction(ISD::MULHS, MVT::i64, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); - - // FIXME: Can we support these natively? - setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::SRL_PARTS, MVT::i64, Expand); - setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); - setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); - - // Lower some FP stuff - setOperationAction(ISD::FSIN, MVT::f32, Expand); - setOperationAction(ISD::FSIN, MVT::f64, Expand); - setOperationAction(ISD::FCOS, MVT::f32, Expand); - setOperationAction(ISD::FCOS, MVT::f64, Expand); - setOperationAction(ISD::FREM, MVT::f32, Expand); - setOperationAction(ISD::FREM, MVT::f64, Expand); - setOperationAction(ISD::FMA, MVT::f32, Expand); - setOperationAction(ISD::FMA, MVT::f64, Expand); - - // We have only 64-bit bitconverts - setOperationAction(ISD::BITCAST, MVT::f32, Expand); - setOperationAction(ISD::BITCAST, MVT::i32, Expand); - - setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand); - setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand); - setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); - - setTruncStoreAction(MVT::f64, MVT::f32, Expand); - - setMinFunctionAlignment(1); -} - -SDValue SystemZTargetLowering::LowerOperation(SDValue Op, - SelectionDAG &DAG) const { - switch (Op.getOpcode()) { - case ISD::BR_CC: return LowerBR_CC(Op, DAG); - case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); - case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); - case ISD::JumpTable: return LowerJumpTable(Op, DAG); - case ISD::ConstantPool: return LowerConstantPool(Op, DAG); - default: - llvm_unreachable("Should not custom lower this!"); - return SDValue(); - } -} - -bool SystemZTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { - if (UseSoftFloat || (VT != MVT::f32 && VT != MVT::f64)) - return false; - - // +0.0 lzer - // +0.0f lzdr - // -0.0 lzer + lner - // -0.0f lzdr + lndr - return Imm.isZero() || Imm.isNegZero(); -} - -//===----------------------------------------------------------------------===// -// SystemZ Inline Assembly Support -//===----------------------------------------------------------------------===// - -/// getConstraintType - Given a constraint letter, return the type of -/// constraint it is for this target. -TargetLowering::ConstraintType -SystemZTargetLowering::getConstraintType(const std::string &Constraint) const { - if (Constraint.size() == 1) { - switch (Constraint[0]) { - case 'r': - return C_RegisterClass; - default: - break; - } - } - return TargetLowering::getConstraintType(Constraint); -} - -std::pair -SystemZTargetLowering:: -getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { - if (Constraint.size() == 1) { - // GCC Constraint Letters - switch (Constraint[0]) { - default: break; - case 'r': // GENERAL_REGS - if (VT == MVT::i32) - return std::make_pair(0U, SystemZ::GR32RegisterClass); - else if (VT == MVT::i128) - return std::make_pair(0U, SystemZ::GR128RegisterClass); - - return std::make_pair(0U, SystemZ::GR64RegisterClass); - } - } - - return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); -} - -//===----------------------------------------------------------------------===// -// Calling Convention Implementation -//===----------------------------------------------------------------------===// - -#include "SystemZGenCallingConv.inc" - -SDValue -SystemZTargetLowering::LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl - &Ins, - DebugLoc dl, - SelectionDAG &DAG, - SmallVectorImpl &InVals) - const { - - switch (CallConv) { - default: - llvm_unreachable("Unsupported calling convention"); - case CallingConv::C: - case CallingConv::Fast: - return LowerCCCArguments(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); - } -} - -SDValue -SystemZTargetLowering::LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const { - // SystemZ target does not yet support tail call optimization. - isTailCall = false; - - switch (CallConv) { - default: - llvm_unreachable("Unsupported calling convention"); - case CallingConv::Fast: - case CallingConv::C: - return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall, - Outs, OutVals, Ins, dl, DAG, InVals); - } -} - -/// LowerCCCArguments - transform physical registers into virtual registers and -/// generate load operations for arguments places on the stack. -// FIXME: struct return stuff -// FIXME: varargs -SDValue -SystemZTargetLowering::LowerCCCArguments(SDValue Chain, - CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl - &Ins, - DebugLoc dl, - SelectionDAG &DAG, - SmallVectorImpl &InVals) - const { - - MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineRegisterInfo &RegInfo = MF.getRegInfo(); - - // Assign locations to all of the incoming arguments. - SmallVector ArgLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); - - if (isVarArg) - report_fatal_error("Varargs not supported yet"); - - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - SDValue ArgValue; - CCValAssign &VA = ArgLocs[i]; - EVT LocVT = VA.getLocVT(); - if (VA.isRegLoc()) { - // Arguments passed in registers - TargetRegisterClass *RC; - switch (LocVT.getSimpleVT().SimpleTy) { - default: -#ifndef NDEBUG - errs() << "LowerFormalArguments Unhandled argument type: " - << LocVT.getSimpleVT().SimpleTy - << "\n"; -#endif - llvm_unreachable(0); - case MVT::i64: - RC = SystemZ::GR64RegisterClass; - break; - case MVT::f32: - RC = SystemZ::FP32RegisterClass; - break; - case MVT::f64: - RC = SystemZ::FP64RegisterClass; - break; - } - - unsigned VReg = RegInfo.createVirtualRegister(RC); - RegInfo.addLiveIn(VA.getLocReg(), VReg); - ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT); - } else { - // Sanity check - assert(VA.isMemLoc()); - - // Create the nodes corresponding to a load from this parameter slot. - // Create the frame index object for this incoming parameter... - int FI = MFI->CreateFixedObject(LocVT.getSizeInBits()/8, - VA.getLocMemOffset(), true); - - // Create the SelectionDAG nodes corresponding to a load - // from this parameter - SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); - ArgValue = DAG.getLoad(LocVT, dl, Chain, FIN, - MachinePointerInfo::getFixedStack(FI), - false, false, 0); - } - - // If this is an 8/16/32-bit value, it is really passed promoted to 64 - // bits. Insert an assert[sz]ext to capture this, then truncate to the - // right size. - if (VA.getLocInfo() == CCValAssign::SExt) - ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue, - DAG.getValueType(VA.getValVT())); - else if (VA.getLocInfo() == CCValAssign::ZExt) - ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue, - DAG.getValueType(VA.getValVT())); - - if (VA.getLocInfo() != CCValAssign::Full) - ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); - - InVals.push_back(ArgValue); - } - - return Chain; -} - -/// LowerCCCCallTo - functions arguments are copied from virtual regs to -/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. -/// TODO: sret. -SDValue -SystemZTargetLowering::LowerCCCCallTo(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, - const SmallVectorImpl - &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const { - MachineFunction &MF = DAG.getMachineFunction(); - const TargetFrameLowering *TFI = TM.getFrameLowering(); - - // Offset to first argument stack slot. - const unsigned FirstArgOffset = 160; - - // Analyze operands of the call, assigning locations to each operand. - SmallVector ArgLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); - - CCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); - - // Get a count of how many bytes are to be pushed on the stack. - unsigned NumBytes = CCInfo.getNextStackOffset(); - - Chain = DAG.getCALLSEQ_START(Chain ,DAG.getConstant(NumBytes, - getPointerTy(), true)); - - SmallVector, 4> RegsToPass; - SmallVector MemOpChains; - SDValue StackPtr; - - // Walk the register/memloc assignments, inserting copies/loads. - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; - - SDValue Arg = OutVals[i]; - - // Promote the value if needed. - switch (VA.getLocInfo()) { - default: assert(0 && "Unknown loc info!"); - case CCValAssign::Full: break; - case CCValAssign::SExt: - Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); - break; - case CCValAssign::ZExt: - Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); - break; - case CCValAssign::AExt: - Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); - break; - } - - // Arguments that can be passed on register must be kept at RegsToPass - // vector - if (VA.isRegLoc()) { - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); - } else { - assert(VA.isMemLoc()); - - if (StackPtr.getNode() == 0) - StackPtr = - DAG.getCopyFromReg(Chain, dl, - (TFI->hasFP(MF) ? - SystemZ::R11D : SystemZ::R15D), - getPointerTy()); - - unsigned Offset = FirstArgOffset + VA.getLocMemOffset(); - SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), - StackPtr, - DAG.getIntPtrConstant(Offset)); - - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, - MachinePointerInfo(), - false, false, 0)); - } - } - - // Transform all store nodes into one single node because all store nodes are - // independent of each other. - if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); - - // Build a sequence of copy-to-reg nodes chained together with token chain and - // flag operands which copy the outgoing args into registers. The InFlag in - // necessary since all emitted instructions must be stuck together. - SDValue InFlag; - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - - // If the callee is a GlobalAddress node (quite common, every direct call is) - // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. - // Likewise ExternalSymbol -> TargetExternalSymbol. - if (GlobalAddressSDNode *G = dyn_cast(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy()); - else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) - Callee = DAG.getTargetExternalSymbol(E->getSymbol(), getPointerTy()); - - // Returns a chain & a flag for retval copy to use. - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - SmallVector Ops; - Ops.push_back(Chain); - Ops.push_back(Callee); - - // Add argument registers to the end of the list so that they are - // known live into the call. - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) - Ops.push_back(DAG.getRegister(RegsToPass[i].first, - RegsToPass[i].second.getValueType())); - - if (InFlag.getNode()) - Ops.push_back(InFlag); - - Chain = DAG.getNode(SystemZISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); - InFlag = Chain.getValue(1); - - // Create the CALLSEQ_END node. - Chain = DAG.getCALLSEQ_END(Chain, - DAG.getConstant(NumBytes, getPointerTy(), true), - DAG.getConstant(0, getPointerTy(), true), - InFlag); - InFlag = Chain.getValue(1); - - // Handle result values, copying them out of physregs into vregs that we - // return. - return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, - DAG, InVals); -} - -/// LowerCallResult - Lower the result values of a call into the -/// appropriate copies out of appropriate physical registers. -/// -SDValue -SystemZTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl - &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const { - - // Assign locations to each value returned by this call. - SmallVector RVLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); - - CCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ); - - // Copy all of the result registers out of their specified physreg. - for (unsigned i = 0; i != RVLocs.size(); ++i) { - CCValAssign &VA = RVLocs[i]; - - Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), - VA.getLocVT(), InFlag).getValue(1); - SDValue RetValue = Chain.getValue(0); - InFlag = Chain.getValue(2); - - // If this is an 8/16/32-bit value, it is really passed promoted to 64 - // bits. Insert an assert[sz]ext to capture this, then truncate to the - // right size. - if (VA.getLocInfo() == CCValAssign::SExt) - RetValue = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), RetValue, - DAG.getValueType(VA.getValVT())); - else if (VA.getLocInfo() == CCValAssign::ZExt) - RetValue = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), RetValue, - DAG.getValueType(VA.getValVT())); - - if (VA.getLocInfo() != CCValAssign::Full) - RetValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), RetValue); - - InVals.push_back(RetValue); - } - - return Chain; -} - - -SDValue -SystemZTargetLowering::LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - DebugLoc dl, SelectionDAG &DAG) const { - - // CCValAssign - represent the assignment of the return value to a location - SmallVector RVLocs; - - // CCState - Info about the registers and stack slot. - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); - - // Analize return values. - CCInfo.AnalyzeReturn(Outs, RetCC_SystemZ); - - // If this is the first return lowered for this function, add the regs to the - // liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - if (RVLocs[i].isRegLoc()) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - - SDValue Flag; - - // Copy the result values into the output registers. - for (unsigned i = 0; i != RVLocs.size(); ++i) { - CCValAssign &VA = RVLocs[i]; - SDValue ResValue = OutVals[i]; - assert(VA.isRegLoc() && "Can only return in registers!"); - - // If this is an 8/16/32-bit value, it is really should be passed promoted - // to 64 bits. - if (VA.getLocInfo() == CCValAssign::SExt) - ResValue = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ResValue); - else if (VA.getLocInfo() == CCValAssign::ZExt) - ResValue = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ResValue); - else if (VA.getLocInfo() == CCValAssign::AExt) - ResValue = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ResValue); - - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), ResValue, Flag); - - // Guarantee that all emitted copies are stuck together, - // avoiding something bad. - Flag = Chain.getValue(1); - } - - if (Flag.getNode()) - return DAG.getNode(SystemZISD::RET_FLAG, dl, MVT::Other, Chain, Flag); - - // Return Void - return DAG.getNode(SystemZISD::RET_FLAG, dl, MVT::Other, Chain); -} - -SDValue SystemZTargetLowering::EmitCmp(SDValue LHS, SDValue RHS, - ISD::CondCode CC, SDValue &SystemZCC, - SelectionDAG &DAG) const { - // FIXME: Emit a test if RHS is zero - - bool isUnsigned = false; - SystemZCC::CondCodes TCC; - switch (CC) { - default: - llvm_unreachable("Invalid integer condition!"); - case ISD::SETEQ: - case ISD::SETOEQ: - TCC = SystemZCC::E; - break; - case ISD::SETUEQ: - TCC = SystemZCC::NLH; - break; - case ISD::SETNE: - case ISD::SETONE: - TCC = SystemZCC::NE; - break; - case ISD::SETUNE: - TCC = SystemZCC::LH; - break; - case ISD::SETO: - TCC = SystemZCC::O; - break; - case ISD::SETUO: - TCC = SystemZCC::NO; - break; - case ISD::SETULE: - if (LHS.getValueType().isFloatingPoint()) { - TCC = SystemZCC::NH; - break; - } - isUnsigned = true; // FALLTHROUGH - case ISD::SETLE: - case ISD::SETOLE: - TCC = SystemZCC::LE; - break; - case ISD::SETUGE: - if (LHS.getValueType().isFloatingPoint()) { - TCC = SystemZCC::NL; - break; - } - isUnsigned = true; // FALLTHROUGH - case ISD::SETGE: - case ISD::SETOGE: - TCC = SystemZCC::HE; - break; - case ISD::SETUGT: - if (LHS.getValueType().isFloatingPoint()) { - TCC = SystemZCC::NLE; - break; - } - isUnsigned = true; // FALLTHROUGH - case ISD::SETGT: - case ISD::SETOGT: - TCC = SystemZCC::H; - break; - case ISD::SETULT: - if (LHS.getValueType().isFloatingPoint()) { - TCC = SystemZCC::NHE; - break; - } - isUnsigned = true; // FALLTHROUGH - case ISD::SETLT: - case ISD::SETOLT: - TCC = SystemZCC::L; - break; - } - - SystemZCC = DAG.getConstant(TCC, MVT::i32); - - DebugLoc dl = LHS.getDebugLoc(); - return DAG.getNode((isUnsigned ? SystemZISD::UCMP : SystemZISD::CMP), - dl, MVT::i64, LHS, RHS); -} - - -SDValue SystemZTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { - SDValue Chain = Op.getOperand(0); - ISD::CondCode CC = cast(Op.getOperand(1))->get(); - SDValue LHS = Op.getOperand(2); - SDValue RHS = Op.getOperand(3); - SDValue Dest = Op.getOperand(4); - DebugLoc dl = Op.getDebugLoc(); - - SDValue SystemZCC; - SDValue Flag = EmitCmp(LHS, RHS, CC, SystemZCC, DAG); - return DAG.getNode(SystemZISD::BRCOND, dl, Op.getValueType(), - Chain, Dest, SystemZCC, Flag); -} - -SDValue SystemZTargetLowering::LowerSELECT_CC(SDValue Op, - SelectionDAG &DAG) const { - SDValue LHS = Op.getOperand(0); - SDValue RHS = Op.getOperand(1); - SDValue TrueV = Op.getOperand(2); - SDValue FalseV = Op.getOperand(3); - ISD::CondCode CC = cast(Op.getOperand(4))->get(); - DebugLoc dl = Op.getDebugLoc(); - - SDValue SystemZCC; - SDValue Flag = EmitCmp(LHS, RHS, CC, SystemZCC, DAG); - - SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Glue); - SmallVector Ops; - Ops.push_back(TrueV); - Ops.push_back(FalseV); - Ops.push_back(SystemZCC); - Ops.push_back(Flag); - - return DAG.getNode(SystemZISD::SELECT, dl, VTs, &Ops[0], Ops.size()); -} - -SDValue SystemZTargetLowering::LowerGlobalAddress(SDValue Op, - SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); - const GlobalValue *GV = cast(Op)->getGlobal(); - int64_t Offset = cast(Op)->getOffset(); - - bool IsPic = getTargetMachine().getRelocationModel() == Reloc::PIC_; - bool ExtraLoadRequired = - Subtarget.GVRequiresExtraLoad(GV, getTargetMachine(), false); - - SDValue Result; - if (!IsPic && !ExtraLoadRequired) { - Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), Offset); - Offset = 0; - } else { - unsigned char OpFlags = 0; - if (ExtraLoadRequired) - OpFlags = SystemZII::MO_GOTENT; - - Result = DAG.getTargetGlobalAddress(GV, dl, getPointerTy(), 0, OpFlags); - } - - Result = DAG.getNode(SystemZISD::PCRelativeWrapper, dl, - getPointerTy(), Result); - - if (ExtraLoadRequired) - Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result, - MachinePointerInfo::getGOT(), false, false, 0); - - // If there was a non-zero offset that we didn't fold, create an explicit - // addition for it. - if (Offset != 0) - Result = DAG.getNode(ISD::ADD, dl, getPointerTy(), Result, - DAG.getConstant(Offset, getPointerTy())); - - return Result; -} - -// FIXME: PIC here -SDValue SystemZTargetLowering::LowerJumpTable(SDValue Op, - SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); - JumpTableSDNode *JT = cast(Op); - SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), getPointerTy()); - - return DAG.getNode(SystemZISD::PCRelativeWrapper, dl, getPointerTy(), Result); -} - - -// FIXME: PIC here -// FIXME: This is just dirty hack. We need to lower cpool properly -SDValue SystemZTargetLowering::LowerConstantPool(SDValue Op, - SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); - ConstantPoolSDNode *CP = cast(Op); - - SDValue Result = DAG.getTargetConstantPool(CP->getConstVal(), getPointerTy(), - CP->getAlignment(), - CP->getOffset()); - - return DAG.getNode(SystemZISD::PCRelativeWrapper, dl, getPointerTy(), Result); -} - -const char *SystemZTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch (Opcode) { - case SystemZISD::RET_FLAG: return "SystemZISD::RET_FLAG"; - case SystemZISD::CALL: return "SystemZISD::CALL"; - case SystemZISD::BRCOND: return "SystemZISD::BRCOND"; - case SystemZISD::CMP: return "SystemZISD::CMP"; - case SystemZISD::UCMP: return "SystemZISD::UCMP"; - case SystemZISD::SELECT: return "SystemZISD::SELECT"; - case SystemZISD::PCRelativeWrapper: return "SystemZISD::PCRelativeWrapper"; - default: return NULL; - } -} - -//===----------------------------------------------------------------------===// -// Other Lowering Code -//===----------------------------------------------------------------------===// - -MachineBasicBlock* -SystemZTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const { - const SystemZInstrInfo &TII = *TM.getInstrInfo(); - DebugLoc dl = MI->getDebugLoc(); - assert((MI->getOpcode() == SystemZ::Select32 || - MI->getOpcode() == SystemZ::SelectF32 || - MI->getOpcode() == SystemZ::Select64 || - MI->getOpcode() == SystemZ::SelectF64) && - "Unexpected instr type to insert"); - - // To "insert" a SELECT instruction, we actually have to insert the diamond - // control-flow pattern. The incoming instruction knows the destination vreg - // to set, the condition code register to branch on, the true/false values to - // select between, and a branch opcode to use. - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - MachineFunction::iterator I = BB; - ++I; - - // thisMBB: - // ... - // TrueVal = ... - // cmpTY ccX, r1, r2 - // jCC copy1MBB - // fallthrough --> copy0MBB - MachineBasicBlock *thisMBB = BB; - MachineFunction *F = BB->getParent(); - MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *copy1MBB = F->CreateMachineBasicBlock(LLVM_BB); - SystemZCC::CondCodes CC = (SystemZCC::CondCodes)MI->getOperand(3).getImm(); - F->insert(I, copy0MBB); - F->insert(I, copy1MBB); - // Update machine-CFG edges by transferring all successors of the current - // block to the new block which will contain the Phi node for the select. - copy1MBB->splice(copy1MBB->begin(), BB, - llvm::next(MachineBasicBlock::iterator(MI)), - BB->end()); - copy1MBB->transferSuccessorsAndUpdatePHIs(BB); - // Next, add the true and fallthrough blocks as its successors. - BB->addSuccessor(copy0MBB); - BB->addSuccessor(copy1MBB); - - BuildMI(BB, dl, TII.getBrCond(CC)).addMBB(copy1MBB); - - // copy0MBB: - // %FalseValue = ... - // # fallthrough to copy1MBB - BB = copy0MBB; - - // Update machine-CFG edges - BB->addSuccessor(copy1MBB); - - // copy1MBB: - // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ] - // ... - BB = copy1MBB; - BuildMI(*BB, BB->begin(), dl, TII.get(SystemZ::PHI), - MI->getOperand(0).getReg()) - .addReg(MI->getOperand(2).getReg()).addMBB(copy0MBB) - .addReg(MI->getOperand(1).getReg()).addMBB(thisMBB); - - MI->eraseFromParent(); // The pseudo instruction is gone now. - return BB; -} diff --git a/lib/Target/SystemZ/SystemZISelLowering.h b/lib/Target/SystemZ/SystemZISelLowering.h deleted file mode 100644 index bab3dc2..0000000 --- a/lib/Target/SystemZ/SystemZISelLowering.h +++ /dev/null @@ -1,145 +0,0 @@ -//==-- SystemZISelLowering.h - SystemZ DAG Lowering Interface ----*- C++ -*-==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interfaces that SystemZ uses to lower LLVM code into a -// selection DAG. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_SystemZ_ISELLOWERING_H -#define LLVM_TARGET_SystemZ_ISELLOWERING_H - -#include "SystemZ.h" -#include "SystemZRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetLowering.h" - -namespace llvm { - namespace SystemZISD { - enum { - FIRST_NUMBER = ISD::BUILTIN_OP_END, - - /// Return with a flag operand. Operand 0 is the chain operand. - RET_FLAG, - - /// CALL - These operations represent an abstract call - /// instruction, which includes a bunch of information. - CALL, - - /// PCRelativeWrapper - PC relative address - PCRelativeWrapper, - - /// CMP, UCMP - Compare instruction - CMP, - UCMP, - - /// BRCOND - Conditional branch. Operand 0 is chain operand, operand 1 is - /// the block to branch if condition is true, operand 2 is condition code - /// and operand 3 is the flag operand produced by a CMP instruction. - BRCOND, - - /// SELECT - Operands 0 and 1 are selection variables, operand 2 is - /// condition code and operand 3 is the flag operand. - SELECT - }; - } - - class SystemZSubtarget; - class SystemZTargetMachine; - - class SystemZTargetLowering : public TargetLowering { - public: - explicit SystemZTargetLowering(SystemZTargetMachine &TM); - - virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; } - - /// LowerOperation - Provide custom lowering hooks for some operations. - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; - - /// getTargetNodeName - This method returns the name of a target specific - /// DAG node. - virtual const char *getTargetNodeName(unsigned Opcode) const; - - std::pair - getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; - TargetLowering::ConstraintType - getConstraintType(const std::string &Constraint) const; - - SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const; - - SDValue EmitCmp(SDValue LHS, SDValue RHS, - ISD::CondCode CC, SDValue &SystemZCC, - SelectionDAG &DAG) const; - - - MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const; - - /// isFPImmLegal - Returns true if the target can instruction select the - /// specified FP immediate natively. If false, the legalizer will - /// materialize the FP immediate as a load from a constant pool. - virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const; - - private: - SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; - - SDValue LowerCCCArguments(SDValue Chain, - CallingConv::ID CallConv, - bool isVarArg, - const SmallVectorImpl &Ins, - DebugLoc dl, - SelectionDAG &DAG, - SmallVectorImpl &InVals) const; - - SDValue LowerCallResult(SDValue Chain, SDValue InFlag, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; - - virtual SDValue - LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; - virtual SDValue - LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; - - virtual SDValue - LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - DebugLoc dl, SelectionDAG &DAG) const; - - const SystemZSubtarget &Subtarget; - const SystemZTargetMachine &TM; - const SystemZRegisterInfo *RegInfo; - }; -} // namespace llvm - -#endif // LLVM_TARGET_SystemZ_ISELLOWERING_H diff --git a/lib/Target/SystemZ/SystemZInstrBuilder.h b/lib/Target/SystemZ/SystemZInstrBuilder.h deleted file mode 100644 index ab45ec5..0000000 --- a/lib/Target/SystemZ/SystemZInstrBuilder.h +++ /dev/null @@ -1,128 +0,0 @@ -//===- SystemZInstrBuilder.h - Functions to aid building insts -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file exposes functions that may be used with BuildMI from the -// MachineInstrBuilder.h file to handle SystemZ'isms in a clean way. -// -// The BuildMem function may be used with the BuildMI function to add entire -// memory references in a single, typed, function call. -// -// For reference, the order of operands for memory references is: -// (Operand), Base, Displacement, Index. -// -//===----------------------------------------------------------------------===// - -#ifndef SYSTEMZINSTRBUILDER_H -#define SYSTEMZINSTRBUILDER_H - -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/PseudoSourceValue.h" - -namespace llvm { - -/// SystemZAddressMode - This struct holds a generalized full x86 address mode. -/// The base register can be a frame index, which will eventually be replaced -/// with R15 or R11 and Disp being offsetted accordingly. -struct SystemZAddressMode { - enum { - RegBase, - FrameIndexBase - } BaseType; - - union { - unsigned Reg; - int FrameIndex; - } Base; - - unsigned IndexReg; - int32_t Disp; - const GlobalValue *GV; - - SystemZAddressMode() : BaseType(RegBase), IndexReg(0), Disp(0) { - Base.Reg = 0; - } -}; - -/// addDirectMem - This function is used to add a direct memory reference to the -/// current instruction -- that is, a dereference of an address in a register, -/// with no index or displacement. -/// -static inline const MachineInstrBuilder & -addDirectMem(const MachineInstrBuilder &MIB, unsigned Reg) { - // Because memory references are always represented with 3 - // values, this adds: Reg, [0, NoReg] to the instruction. - return MIB.addReg(Reg).addImm(0).addReg(0); -} - -static inline const MachineInstrBuilder & -addOffset(const MachineInstrBuilder &MIB, int Offset) { - return MIB.addImm(Offset).addReg(0); -} - -/// addRegOffset - This function is used to add a memory reference of the form -/// [Reg + Offset], i.e., one with no or index, but with a -/// displacement. An example is: 10(%r15). -/// -static inline const MachineInstrBuilder & -addRegOffset(const MachineInstrBuilder &MIB, - unsigned Reg, bool isKill, int Offset) { - return addOffset(MIB.addReg(Reg, getKillRegState(isKill)), Offset); -} - -/// addRegReg - This function is used to add a memory reference of the form: -/// [Reg + Reg]. -static inline const MachineInstrBuilder & -addRegReg(const MachineInstrBuilder &MIB, - unsigned Reg1, bool isKill1, unsigned Reg2, bool isKill2) { - return MIB.addReg(Reg1, getKillRegState(isKill1)).addImm(0) - .addReg(Reg2, getKillRegState(isKill2)); -} - -static inline const MachineInstrBuilder & -addFullAddress(const MachineInstrBuilder &MIB, const SystemZAddressMode &AM) { - if (AM.BaseType == SystemZAddressMode::RegBase) - MIB.addReg(AM.Base.Reg); - else if (AM.BaseType == SystemZAddressMode::FrameIndexBase) - MIB.addFrameIndex(AM.Base.FrameIndex); - else - assert(0); - - return MIB.addImm(AM.Disp).addReg(AM.IndexReg); -} - -/// addFrameReference - This function is used to add a reference to the base of -/// an abstract object on the stack frame of the current function. This -/// reference has base register as the FrameIndex offset until it is resolved. -/// This allows a constant offset to be specified as well... -/// -static inline const MachineInstrBuilder & -addFrameReference(const MachineInstrBuilder &MIB, int FI, int Offset = 0) { - MachineInstr *MI = MIB; - MachineFunction &MF = *MI->getParent()->getParent(); - MachineFrameInfo &MFI = *MF.getFrameInfo(); - const MCInstrDesc &MCID = MI->getDesc(); - unsigned Flags = 0; - if (MCID.mayLoad()) - Flags |= MachineMemOperand::MOLoad; - if (MCID.mayStore()) - Flags |= MachineMemOperand::MOStore; - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo( - PseudoSourceValue::getFixedStack(FI), Offset), - Flags, MFI.getObjectSize(FI), - MFI.getObjectAlignment(FI)); - return addOffset(MIB.addFrameIndex(FI), Offset) - .addMemOperand(MMO); -} - -} // End llvm namespace - -#endif diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td deleted file mode 100644 index a658280..0000000 --- a/lib/Target/SystemZ/SystemZInstrFP.td +++ /dev/null @@ -1,340 +0,0 @@ -//===- SystemZInstrFP.td - SystemZ FP Instruction defs --------*- tblgen-*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes the SystemZ (binary) floating point instructions in -// TableGen format. -// -//===----------------------------------------------------------------------===// - -// FIXME: multiclassify! - -//===----------------------------------------------------------------------===// -// FP Pattern fragments - -def fpimm0 : PatLeaf<(fpimm), [{ - return N->isExactlyValue(+0.0); -}]>; - -def fpimmneg0 : PatLeaf<(fpimm), [{ - return N->isExactlyValue(-0.0); -}]>; - -let Uses = [PSW], usesCustomInserter = 1 in { - def SelectF32 : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, i8imm:$cc), - "# SelectF32 PSEUDO", - [(set FP32:$dst, - (SystemZselect FP32:$src1, FP32:$src2, imm:$cc, PSW))]>; - def SelectF64 : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, i8imm:$cc), - "# SelectF64 PSEUDO", - [(set FP64:$dst, - (SystemZselect FP64:$src1, FP64:$src2, imm:$cc, PSW))]>; -} - -//===----------------------------------------------------------------------===// -// Move Instructions - -// Floating point constant loads. -let isReMaterializable = 1, isAsCheapAsAMove = 1 in { -def LD_Fp032 : Pseudo<(outs FP32:$dst), (ins), - "lzer\t{$dst}", - [(set FP32:$dst, fpimm0)]>; -def LD_Fp064 : Pseudo<(outs FP64:$dst), (ins), - "lzdr\t{$dst}", - [(set FP64:$dst, fpimm0)]>; -} - -let neverHasSideEffects = 1 in { -def FMOV32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src), - "ler\t{$dst, $src}", - []>; -def FMOV64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src), - "ldr\t{$dst, $src}", - []>; -} - -let canFoldAsLoad = 1, isReMaterializable = 1 in { -def FMOV32rm : Pseudo<(outs FP32:$dst), (ins rriaddr12:$src), - "le\t{$dst, $src}", - [(set FP32:$dst, (load rriaddr12:$src))]>; -def FMOV32rmy : Pseudo<(outs FP32:$dst), (ins rriaddr:$src), - "ley\t{$dst, $src}", - [(set FP32:$dst, (load rriaddr:$src))]>; -def FMOV64rm : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src), - "ld\t{$dst, $src}", - [(set FP64:$dst, (load rriaddr12:$src))]>; -def FMOV64rmy : Pseudo<(outs FP64:$dst), (ins rriaddr:$src), - "ldy\t{$dst, $src}", - [(set FP64:$dst, (load rriaddr:$src))]>; -} - -def FMOV32mr : Pseudo<(outs), (ins rriaddr12:$dst, FP32:$src), - "ste\t{$src, $dst}", - [(store FP32:$src, rriaddr12:$dst)]>; -def FMOV32mry : Pseudo<(outs), (ins rriaddr:$dst, FP32:$src), - "stey\t{$src, $dst}", - [(store FP32:$src, rriaddr:$dst)]>; -def FMOV64mr : Pseudo<(outs), (ins rriaddr12:$dst, FP64:$src), - "std\t{$src, $dst}", - [(store FP64:$src, rriaddr12:$dst)]>; -def FMOV64mry : Pseudo<(outs), (ins rriaddr:$dst, FP64:$src), - "stdy\t{$src, $dst}", - [(store FP64:$src, rriaddr:$dst)]>; - -def FCOPYSIGN32 : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2), - "cpsdr\t{$dst, $src2, $src1}", - [(set FP32:$dst, (fcopysign FP32:$src1, FP32:$src2))]>; -def FCOPYSIGN64 : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2), - "cpsdr\t{$dst, $src2, $src1}", - [(set FP64:$dst, (fcopysign FP64:$src1, FP64:$src2))]>; - -//===----------------------------------------------------------------------===// -// Arithmetic Instructions - - -let Defs = [PSW] in { -def FNEG32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src), - "lcebr\t{$dst, $src}", - [(set FP32:$dst, (fneg FP32:$src)), - (implicit PSW)]>; -def FNEG64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src), - "lcdbr\t{$dst, $src}", - [(set FP64:$dst, (fneg FP64:$src)), - (implicit PSW)]>; - -def FABS32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src), - "lpebr\t{$dst, $src}", - [(set FP32:$dst, (fabs FP32:$src)), - (implicit PSW)]>; -def FABS64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src), - "lpdbr\t{$dst, $src}", - [(set FP64:$dst, (fabs FP64:$src)), - (implicit PSW)]>; - -def FNABS32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src), - "lnebr\t{$dst, $src}", - [(set FP32:$dst, (fneg(fabs FP32:$src))), - (implicit PSW)]>; -def FNABS64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src), - "lndbr\t{$dst, $src}", - [(set FP64:$dst, (fneg(fabs FP64:$src))), - (implicit PSW)]>; -} - -let Constraints = "$src1 = $dst" in { -let Defs = [PSW] in { -let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y -def FADD32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2), - "aebr\t{$dst, $src2}", - [(set FP32:$dst, (fadd FP32:$src1, FP32:$src2)), - (implicit PSW)]>; -def FADD64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2), - "adbr\t{$dst, $src2}", - [(set FP64:$dst, (fadd FP64:$src1, FP64:$src2)), - (implicit PSW)]>; -} - -def FADD32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2), - "aeb\t{$dst, $src2}", - [(set FP32:$dst, (fadd FP32:$src1, (load rriaddr12:$src2))), - (implicit PSW)]>; -def FADD64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2), - "adb\t{$dst, $src2}", - [(set FP64:$dst, (fadd FP64:$src1, (load rriaddr12:$src2))), - (implicit PSW)]>; - -def FSUB32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2), - "sebr\t{$dst, $src2}", - [(set FP32:$dst, (fsub FP32:$src1, FP32:$src2)), - (implicit PSW)]>; -def FSUB64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2), - "sdbr\t{$dst, $src2}", - [(set FP64:$dst, (fsub FP64:$src1, FP64:$src2)), - (implicit PSW)]>; - -def FSUB32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2), - "seb\t{$dst, $src2}", - [(set FP32:$dst, (fsub FP32:$src1, (load rriaddr12:$src2))), - (implicit PSW)]>; -def FSUB64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2), - "sdb\t{$dst, $src2}", - [(set FP64:$dst, (fsub FP64:$src1, (load rriaddr12:$src2))), - (implicit PSW)]>; -} // Defs = [PSW] - -let isCommutable = 1 in { // X = MUL Y, Z == X = MUL Z, Y -def FMUL32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2), - "meebr\t{$dst, $src2}", - [(set FP32:$dst, (fmul FP32:$src1, FP32:$src2))]>; -def FMUL64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2), - "mdbr\t{$dst, $src2}", - [(set FP64:$dst, (fmul FP64:$src1, FP64:$src2))]>; -} - -def FMUL32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2), - "meeb\t{$dst, $src2}", - [(set FP32:$dst, (fmul FP32:$src1, (load rriaddr12:$src2)))]>; -def FMUL64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2), - "mdb\t{$dst, $src2}", - [(set FP64:$dst, (fmul FP64:$src1, (load rriaddr12:$src2)))]>; - -def FMADD32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, FP32:$src3), - "maebr\t{$dst, $src3, $src2}", - [(set FP32:$dst, (fadd (fmul FP32:$src2, FP32:$src3), - FP32:$src1))]>; -def FMADD32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2, FP32:$src3), - "maeb\t{$dst, $src3, $src2}", - [(set FP32:$dst, (fadd (fmul (load rriaddr12:$src2), - FP32:$src3), - FP32:$src1))]>; - -def FMADD64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, FP64:$src3), - "madbr\t{$dst, $src3, $src2}", - [(set FP64:$dst, (fadd (fmul FP64:$src2, FP64:$src3), - FP64:$src1))]>; -def FMADD64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2, FP64:$src3), - "madb\t{$dst, $src3, $src2}", - [(set FP64:$dst, (fadd (fmul (load rriaddr12:$src2), - FP64:$src3), - FP64:$src1))]>; - -def FMSUB32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2, FP32:$src3), - "msebr\t{$dst, $src3, $src2}", - [(set FP32:$dst, (fsub (fmul FP32:$src2, FP32:$src3), - FP32:$src1))]>; -def FMSUB32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2, FP32:$src3), - "mseb\t{$dst, $src3, $src2}", - [(set FP32:$dst, (fsub (fmul (load rriaddr12:$src2), - FP32:$src3), - FP32:$src1))]>; - -def FMSUB64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2, FP64:$src3), - "msdbr\t{$dst, $src3, $src2}", - [(set FP64:$dst, (fsub (fmul FP64:$src2, FP64:$src3), - FP64:$src1))]>; -def FMSUB64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2, FP64:$src3), - "msdb\t{$dst, $src3, $src2}", - [(set FP64:$dst, (fsub (fmul (load rriaddr12:$src2), - FP64:$src3), - FP64:$src1))]>; - -def FDIV32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src1, FP32:$src2), - "debr\t{$dst, $src2}", - [(set FP32:$dst, (fdiv FP32:$src1, FP32:$src2))]>; -def FDIV64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src1, FP64:$src2), - "ddbr\t{$dst, $src2}", - [(set FP64:$dst, (fdiv FP64:$src1, FP64:$src2))]>; - -def FDIV32rm : Pseudo<(outs FP32:$dst), (ins FP32:$src1, rriaddr12:$src2), - "deb\t{$dst, $src2}", - [(set FP32:$dst, (fdiv FP32:$src1, (load rriaddr12:$src2)))]>; -def FDIV64rm : Pseudo<(outs FP64:$dst), (ins FP64:$src1, rriaddr12:$src2), - "ddb\t{$dst, $src2}", - [(set FP64:$dst, (fdiv FP64:$src1, (load rriaddr12:$src2)))]>; - -} // Constraints = "$src1 = $dst" - -def FSQRT32rr : Pseudo<(outs FP32:$dst), (ins FP32:$src), - "sqebr\t{$dst, $src}", - [(set FP32:$dst, (fsqrt FP32:$src))]>; -def FSQRT64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src), - "sqdbr\t{$dst, $src}", - [(set FP64:$dst, (fsqrt FP64:$src))]>; - -def FSQRT32rm : Pseudo<(outs FP32:$dst), (ins rriaddr12:$src), - "sqeb\t{$dst, $src}", - [(set FP32:$dst, (fsqrt (load rriaddr12:$src)))]>; -def FSQRT64rm : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src), - "sqdb\t{$dst, $src}", - [(set FP64:$dst, (fsqrt (load rriaddr12:$src)))]>; - -def FROUND64r32 : Pseudo<(outs FP32:$dst), (ins FP64:$src), - "ledbr\t{$dst, $src}", - [(set FP32:$dst, (fround FP64:$src))]>; - -def FEXT32r64 : Pseudo<(outs FP64:$dst), (ins FP32:$src), - "ldebr\t{$dst, $src}", - [(set FP64:$dst, (fextend FP32:$src))]>; -def FEXT32m64 : Pseudo<(outs FP64:$dst), (ins rriaddr12:$src), - "ldeb\t{$dst, $src}", - [(set FP64:$dst, (fextend (load rriaddr12:$src)))]>; - -let Defs = [PSW] in { -def FCONVFP32 : Pseudo<(outs FP32:$dst), (ins GR32:$src), - "cefbr\t{$dst, $src}", - [(set FP32:$dst, (sint_to_fp GR32:$src)), - (implicit PSW)]>; -def FCONVFP32r64: Pseudo<(outs FP32:$dst), (ins GR64:$src), - "cegbr\t{$dst, $src}", - [(set FP32:$dst, (sint_to_fp GR64:$src)), - (implicit PSW)]>; - -def FCONVFP64r32: Pseudo<(outs FP64:$dst), (ins GR32:$src), - "cdfbr\t{$dst, $src}", - [(set FP64:$dst, (sint_to_fp GR32:$src)), - (implicit PSW)]>; -def FCONVFP64 : Pseudo<(outs FP64:$dst), (ins GR64:$src), - "cdgbr\t{$dst, $src}", - [(set FP64:$dst, (sint_to_fp GR64:$src)), - (implicit PSW)]>; - -def FCONVGR32 : Pseudo<(outs GR32:$dst), (ins FP32:$src), - "cfebr\t{$dst, 5, $src}", - [(set GR32:$dst, (fp_to_sint FP32:$src)), - (implicit PSW)]>; -def FCONVGR32r64: Pseudo<(outs GR32:$dst), (ins FP64:$src), - "cfdbr\t{$dst, 5, $src}", - [(set GR32:$dst, (fp_to_sint FP64:$src)), - (implicit PSW)]>; - -def FCONVGR64r32: Pseudo<(outs GR64:$dst), (ins FP32:$src), - "cgebr\t{$dst, 5, $src}", - [(set GR64:$dst, (fp_to_sint FP32:$src)), - (implicit PSW)]>; -def FCONVGR64 : Pseudo<(outs GR64:$dst), (ins FP64:$src), - "cgdbr\t{$dst, 5, $src}", - [(set GR64:$dst, (fp_to_sint FP64:$src)), - (implicit PSW)]>; -} // Defs = [PSW] - -def FBCONVG64 : Pseudo<(outs GR64:$dst), (ins FP64:$src), - "lgdr\t{$dst, $src}", - [(set GR64:$dst, (bitconvert FP64:$src))]>; -def FBCONVF64 : Pseudo<(outs FP64:$dst), (ins GR64:$src), - "ldgr\t{$dst, $src}", - [(set FP64:$dst, (bitconvert GR64:$src))]>; - -//===----------------------------------------------------------------------===// -// Test instructions (like AND but do not produce any result) - -// Integer comparisons -let Defs = [PSW] in { -def FCMP32rr : Pseudo<(outs), (ins FP32:$src1, FP32:$src2), - "cebr\t$src1, $src2", - [(set PSW, (SystemZcmp FP32:$src1, FP32:$src2))]>; -def FCMP64rr : Pseudo<(outs), (ins FP64:$src1, FP64:$src2), - "cdbr\t$src1, $src2", - [(set PSW, (SystemZcmp FP64:$src1, FP64:$src2))]>; - -def FCMP32rm : Pseudo<(outs), (ins FP32:$src1, rriaddr12:$src2), - "ceb\t$src1, $src2", - [(set PSW, (SystemZcmp FP32:$src1, - (load rriaddr12:$src2)))]>; -def FCMP64rm : Pseudo<(outs), (ins FP64:$src1, rriaddr12:$src2), - "cdb\t$src1, $src2", - [(set PSW, (SystemZcmp FP64:$src1, - (load rriaddr12:$src2)))]>; -} // Defs = [PSW] - -//===----------------------------------------------------------------------===// -// Non-Instruction Patterns -//===----------------------------------------------------------------------===// - -// Floating point constant -0.0 -def : Pat<(f32 fpimmneg0), (FNEG32rr (LD_Fp032))>; -def : Pat<(f64 fpimmneg0), (FNEG64rr (LD_Fp064))>; diff --git a/lib/Target/SystemZ/SystemZInstrFormats.td b/lib/Target/SystemZ/SystemZInstrFormats.td deleted file mode 100644 index b4a8993..0000000 --- a/lib/Target/SystemZ/SystemZInstrFormats.td +++ /dev/null @@ -1,133 +0,0 @@ -//===- SystemZInstrFormats.td - SystemZ Instruction Formats ----*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -// Format specifies the encoding used by the instruction. This is part of the -// ad-hoc solution used to emit machine instruction encodings by our machine -// code emitter. -class Format val> { - bits<5> Value = val; -} - -def Pseudo : Format<0>; -def EForm : Format<1>; -def IForm : Format<2>; -def RIForm : Format<3>; -def RIEForm : Format<4>; -def RILForm : Format<5>; -def RISForm : Format<6>; -def RRForm : Format<7>; -def RREForm : Format<8>; -def RRFForm : Format<9>; -def RRRForm : Format<10>; -def RRSForm : Format<11>; -def RSForm : Format<12>; -def RSIForm : Format<13>; -def RSILForm : Format<14>; -def RSYForm : Format<15>; -def RXForm : Format<16>; -def RXEForm : Format<17>; -def RXFForm : Format<18>; -def RXYForm : Format<19>; -def SForm : Format<20>; -def SIForm : Format<21>; -def SILForm : Format<22>; -def SIYForm : Format<23>; -def SSForm : Format<24>; -def SSEForm : Format<25>; -def SSFForm : Format<26>; - -class InstSystemZ op, Format f, dag outs, dag ins> : Instruction { - let Namespace = "SystemZ"; - - bits<16> Opcode = op; - - Format Form = f; - bits<5> FormBits = Form.Value; - - dag OutOperandList = outs; - dag InOperandList = ins; -} - -class I8 op, Format f, dag outs, dag ins, string asmstr, - list pattern> - : InstSystemZ<0, f, outs, ins> { - let Opcode{0-7} = op; - let Opcode{8-15} = 0; - - let Pattern = pattern; - let AsmString = asmstr; -} - -class I12 op, Format f, dag outs, dag ins, string asmstr, - list pattern> - : InstSystemZ<0, f, outs, ins> { - let Opcode{0-11} = op; - let Opcode{12-15} = 0; - - let Pattern = pattern; - let AsmString = asmstr; -} - -class I16 op, Format f, dag outs, dag ins, string asmstr, - list pattern> - : InstSystemZ { - let Pattern = pattern; - let AsmString = asmstr; -} - -class RRI op, dag outs, dag ins, string asmstr, list pattern> - : I8; - -class RII op, dag outs, dag ins, string asmstr, list pattern> - : I12; - -class RILI op, dag outs, dag ins, string asmstr, list pattern> - : I12; - -class RREI op, dag outs, dag ins, string asmstr, list pattern> - : I16; - -class RXI op, dag outs, dag ins, string asmstr, list pattern> - : I8 { - let AddedComplexity = 1; -} - -class RXYI op, dag outs, dag ins, string asmstr, list pattern> - : I16; - -class RSI op, dag outs, dag ins, string asmstr, list pattern> - : I8 { - let AddedComplexity = 1; -} - -class RSYI op, dag outs, dag ins, string asmstr, list pattern> - : I16; - -class SII op, dag outs, dag ins, string asmstr, list pattern> - : I8 { - let AddedComplexity = 1; -} - -class SIYI op, dag outs, dag ins, string asmstr, list pattern> - : I16; - -class SILI op, dag outs, dag ins, string asmstr, list pattern> - : I16; - - -//===----------------------------------------------------------------------===// -// Pseudo instructions -//===----------------------------------------------------------------------===// - -class Pseudo pattern> - : InstSystemZ<0, Pseudo, outs, ins> { - - let Pattern = pattern; - let AsmString = asmstr; -} diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp deleted file mode 100644 index 5f3dd80..0000000 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ /dev/null @@ -1,439 +0,0 @@ -//===- SystemZInstrInfo.cpp - SystemZ Instruction Information --------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the SystemZ implementation of the TargetInstrInfo class. -// -//===----------------------------------------------------------------------===// - -#include "SystemZ.h" -#include "SystemZInstrBuilder.h" -#include "SystemZInstrInfo.h" -#include "SystemZMachineFunctionInfo.h" -#include "SystemZTargetMachine.h" -#include "llvm/Function.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" - -#define GET_INSTRINFO_CTOR -#include "SystemZGenInstrInfo.inc" - -using namespace llvm; - -SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm) - : SystemZGenInstrInfo(SystemZ::ADJCALLSTACKUP, SystemZ::ADJCALLSTACKDOWN), - RI(tm, *this), TM(tm) { -} - -/// isGVStub - Return true if the GV requires an extra load to get the -/// real address. -static inline bool isGVStub(GlobalValue *GV, SystemZTargetMachine &TM) { - return TM.getSubtarget().GVRequiresExtraLoad(GV, TM, false); -} - -void SystemZInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned SrcReg, bool isKill, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - - unsigned Opc = 0; - if (RC == &SystemZ::GR32RegClass || - RC == &SystemZ::ADDR32RegClass) - Opc = SystemZ::MOV32mr; - else if (RC == &SystemZ::GR64RegClass || - RC == &SystemZ::ADDR64RegClass) { - Opc = SystemZ::MOV64mr; - } else if (RC == &SystemZ::FP32RegClass) { - Opc = SystemZ::FMOV32mr; - } else if (RC == &SystemZ::FP64RegClass) { - Opc = SystemZ::FMOV64mr; - } else if (RC == &SystemZ::GR64PRegClass) { - Opc = SystemZ::MOV64Pmr; - } else if (RC == &SystemZ::GR128RegClass) { - Opc = SystemZ::MOV128mr; - } else - llvm_unreachable("Unsupported regclass to store"); - - addFrameReference(BuildMI(MBB, MI, DL, get(Opc)), FrameIdx) - .addReg(SrcReg, getKillRegState(isKill)); -} - -void SystemZInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const{ - DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - - unsigned Opc = 0; - if (RC == &SystemZ::GR32RegClass || - RC == &SystemZ::ADDR32RegClass) - Opc = SystemZ::MOV32rm; - else if (RC == &SystemZ::GR64RegClass || - RC == &SystemZ::ADDR64RegClass) { - Opc = SystemZ::MOV64rm; - } else if (RC == &SystemZ::FP32RegClass) { - Opc = SystemZ::FMOV32rm; - } else if (RC == &SystemZ::FP64RegClass) { - Opc = SystemZ::FMOV64rm; - } else if (RC == &SystemZ::GR64PRegClass) { - Opc = SystemZ::MOV64Prm; - } else if (RC == &SystemZ::GR128RegClass) { - Opc = SystemZ::MOV128rm; - } else - llvm_unreachable("Unsupported regclass to load"); - - addFrameReference(BuildMI(MBB, MI, DL, get(Opc), DestReg), FrameIdx); -} - -void SystemZInstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { - unsigned Opc; - if (SystemZ::GR64RegClass.contains(DestReg, SrcReg)) - Opc = SystemZ::MOV64rr; - else if (SystemZ::GR32RegClass.contains(DestReg, SrcReg)) - Opc = SystemZ::MOV32rr; - else if (SystemZ::GR64PRegClass.contains(DestReg, SrcReg)) - Opc = SystemZ::MOV64rrP; - else if (SystemZ::GR128RegClass.contains(DestReg, SrcReg)) - Opc = SystemZ::MOV128rr; - else if (SystemZ::FP32RegClass.contains(DestReg, SrcReg)) - Opc = SystemZ::FMOV32rr; - else if (SystemZ::FP64RegClass.contains(DestReg, SrcReg)) - Opc = SystemZ::FMOV64rr; - else - llvm_unreachable("Impossible reg-to-reg copy"); - - BuildMI(MBB, I, DL, get(Opc), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); -} - -unsigned SystemZInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - default: break; - case SystemZ::MOV32rm: - case SystemZ::MOV32rmy: - case SystemZ::MOV64rm: - case SystemZ::MOVSX32rm8: - case SystemZ::MOVSX32rm16y: - case SystemZ::MOVSX64rm8: - case SystemZ::MOVSX64rm16: - case SystemZ::MOVSX64rm32: - case SystemZ::MOVZX32rm8: - case SystemZ::MOVZX32rm16: - case SystemZ::MOVZX64rm8: - case SystemZ::MOVZX64rm16: - case SystemZ::MOVZX64rm32: - case SystemZ::FMOV32rm: - case SystemZ::FMOV32rmy: - case SystemZ::FMOV64rm: - case SystemZ::FMOV64rmy: - case SystemZ::MOV64Prm: - case SystemZ::MOV64Prmy: - case SystemZ::MOV128rm: - if (MI->getOperand(1).isFI() && - MI->getOperand(2).isImm() && MI->getOperand(3).isReg() && - MI->getOperand(2).getImm() == 0 && MI->getOperand(3).getReg() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - } - return 0; -} - -unsigned SystemZInstrInfo::isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - default: break; - case SystemZ::MOV32mr: - case SystemZ::MOV32mry: - case SystemZ::MOV64mr: - case SystemZ::MOV32m8r: - case SystemZ::MOV32m8ry: - case SystemZ::MOV32m16r: - case SystemZ::MOV32m16ry: - case SystemZ::MOV64m8r: - case SystemZ::MOV64m8ry: - case SystemZ::MOV64m16r: - case SystemZ::MOV64m16ry: - case SystemZ::MOV64m32r: - case SystemZ::MOV64m32ry: - case SystemZ::FMOV32mr: - case SystemZ::FMOV32mry: - case SystemZ::FMOV64mr: - case SystemZ::FMOV64mry: - case SystemZ::MOV64Pmr: - case SystemZ::MOV64Pmry: - case SystemZ::MOV128mr: - if (MI->getOperand(0).isFI() && - MI->getOperand(1).isImm() && MI->getOperand(2).isReg() && - MI->getOperand(1).getImm() == 0 && MI->getOperand(2).getReg() == 0) { - FrameIndex = MI->getOperand(0).getIndex(); - return MI->getOperand(3).getReg(); - } - break; - } - return 0; -} - -bool SystemZInstrInfo:: -ReverseBranchCondition(SmallVectorImpl &Cond) const { - assert(Cond.size() == 1 && "Invalid Xbranch condition!"); - - SystemZCC::CondCodes CC = static_cast(Cond[0].getImm()); - Cond[0].setImm(getOppositeCondition(CC)); - return false; -} - -bool SystemZInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { - const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.isTerminator()) return false; - - // Conditional branch is a special case. - if (MCID.isBranch() && !MCID.isBarrier()) - return true; - if (!MCID.isPredicable()) - return true; - return !isPredicated(MI); -} - -bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, - MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify) const { - // Start from the bottom of the block and work up, examining the - // terminator instructions. - MachineBasicBlock::iterator I = MBB.end(); - while (I != MBB.begin()) { - --I; - if (I->isDebugValue()) - continue; - // Working from the bottom, when we see a non-terminator - // instruction, we're done. - if (!isUnpredicatedTerminator(I)) - break; - - // A terminator that isn't a branch can't easily be handled - // by this analysis. - if (!I->getDesc().isBranch()) - return true; - - // Handle unconditional branches. - if (I->getOpcode() == SystemZ::JMP) { - if (!AllowModify) { - TBB = I->getOperand(0).getMBB(); - continue; - } - - // If the block has any instructions after a JMP, delete them. - while (llvm::next(I) != MBB.end()) - llvm::next(I)->eraseFromParent(); - Cond.clear(); - FBB = 0; - - // Delete the JMP if it's equivalent to a fall-through. - if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) { - TBB = 0; - I->eraseFromParent(); - I = MBB.end(); - continue; - } - - // TBB is used to indicate the unconditinal destination. - TBB = I->getOperand(0).getMBB(); - continue; - } - - // Handle conditional branches. - SystemZCC::CondCodes BranchCode = getCondFromBranchOpc(I->getOpcode()); - if (BranchCode == SystemZCC::INVALID) - return true; // Can't handle indirect branch. - - // Working from the bottom, handle the first conditional branch. - if (Cond.empty()) { - FBB = TBB; - TBB = I->getOperand(0).getMBB(); - Cond.push_back(MachineOperand::CreateImm(BranchCode)); - continue; - } - - // Handle subsequent conditional branches. Only handle the case where all - // conditional branches branch to the same destination. - assert(Cond.size() == 1); - assert(TBB); - - // Only handle the case where all conditional branches branch to - // the same destination. - if (TBB != I->getOperand(0).getMBB()) - return true; - - SystemZCC::CondCodes OldBranchCode = (SystemZCC::CondCodes)Cond[0].getImm(); - // If the conditions are the same, we can leave them alone. - if (OldBranchCode == BranchCode) - continue; - - return true; - } - - return false; -} - -unsigned SystemZInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator I = MBB.end(); - unsigned Count = 0; - - while (I != MBB.begin()) { - --I; - if (I->isDebugValue()) - continue; - if (I->getOpcode() != SystemZ::JMP && - getCondFromBranchOpc(I->getOpcode()) == SystemZCC::INVALID) - break; - // Remove the branch. - I->eraseFromParent(); - I = MBB.end(); - ++Count; - } - - return Count; -} - -unsigned -SystemZInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, - DebugLoc DL) const { - // Shouldn't be a fall through. - assert(TBB && "InsertBranch must not be told to insert a fallthrough"); - assert((Cond.size() == 1 || Cond.size() == 0) && - "SystemZ branch conditions have one component!"); - - if (Cond.empty()) { - // Unconditional branch? - assert(!FBB && "Unconditional branch with multiple successors!"); - BuildMI(&MBB, DL, get(SystemZ::JMP)).addMBB(TBB); - return 1; - } - - // Conditional branch. - unsigned Count = 0; - SystemZCC::CondCodes CC = (SystemZCC::CondCodes)Cond[0].getImm(); - BuildMI(&MBB, DL, getBrCond(CC)).addMBB(TBB); - ++Count; - - if (FBB) { - // Two-way Conditional branch. Insert the second branch. - BuildMI(&MBB, DL, get(SystemZ::JMP)).addMBB(FBB); - ++Count; - } - return Count; -} - -const MCInstrDesc& -SystemZInstrInfo::getBrCond(SystemZCC::CondCodes CC) const { - switch (CC) { - default: - llvm_unreachable("Unknown condition code!"); - case SystemZCC::O: return get(SystemZ::JO); - case SystemZCC::H: return get(SystemZ::JH); - case SystemZCC::NLE: return get(SystemZ::JNLE); - case SystemZCC::L: return get(SystemZ::JL); - case SystemZCC::NHE: return get(SystemZ::JNHE); - case SystemZCC::LH: return get(SystemZ::JLH); - case SystemZCC::NE: return get(SystemZ::JNE); - case SystemZCC::E: return get(SystemZ::JE); - case SystemZCC::NLH: return get(SystemZ::JNLH); - case SystemZCC::HE: return get(SystemZ::JHE); - case SystemZCC::NL: return get(SystemZ::JNL); - case SystemZCC::LE: return get(SystemZ::JLE); - case SystemZCC::NH: return get(SystemZ::JNH); - case SystemZCC::NO: return get(SystemZ::JNO); - } -} - -SystemZCC::CondCodes -SystemZInstrInfo::getCondFromBranchOpc(unsigned Opc) const { - switch (Opc) { - default: return SystemZCC::INVALID; - case SystemZ::JO: return SystemZCC::O; - case SystemZ::JH: return SystemZCC::H; - case SystemZ::JNLE: return SystemZCC::NLE; - case SystemZ::JL: return SystemZCC::L; - case SystemZ::JNHE: return SystemZCC::NHE; - case SystemZ::JLH: return SystemZCC::LH; - case SystemZ::JNE: return SystemZCC::NE; - case SystemZ::JE: return SystemZCC::E; - case SystemZ::JNLH: return SystemZCC::NLH; - case SystemZ::JHE: return SystemZCC::HE; - case SystemZ::JNL: return SystemZCC::NL; - case SystemZ::JLE: return SystemZCC::LE; - case SystemZ::JNH: return SystemZCC::NH; - case SystemZ::JNO: return SystemZCC::NO; - } -} - -SystemZCC::CondCodes -SystemZInstrInfo::getOppositeCondition(SystemZCC::CondCodes CC) const { - switch (CC) { - default: - llvm_unreachable("Invalid condition!"); - case SystemZCC::O: return SystemZCC::NO; - case SystemZCC::H: return SystemZCC::NH; - case SystemZCC::NLE: return SystemZCC::LE; - case SystemZCC::L: return SystemZCC::NL; - case SystemZCC::NHE: return SystemZCC::HE; - case SystemZCC::LH: return SystemZCC::NLH; - case SystemZCC::NE: return SystemZCC::E; - case SystemZCC::E: return SystemZCC::NE; - case SystemZCC::NLH: return SystemZCC::LH; - case SystemZCC::HE: return SystemZCC::NHE; - case SystemZCC::NL: return SystemZCC::L; - case SystemZCC::LE: return SystemZCC::NLE; - case SystemZCC::NH: return SystemZCC::H; - case SystemZCC::NO: return SystemZCC::O; - } -} - -const MCInstrDesc& -SystemZInstrInfo::getLongDispOpc(unsigned Opc) const { - switch (Opc) { - default: - llvm_unreachable("Don't have long disp version of this instruction"); - case SystemZ::MOV32mr: return get(SystemZ::MOV32mry); - case SystemZ::MOV32rm: return get(SystemZ::MOV32rmy); - case SystemZ::MOVSX32rm16: return get(SystemZ::MOVSX32rm16y); - case SystemZ::MOV32m8r: return get(SystemZ::MOV32m8ry); - case SystemZ::MOV32m16r: return get(SystemZ::MOV32m16ry); - case SystemZ::MOV64m8r: return get(SystemZ::MOV64m8ry); - case SystemZ::MOV64m16r: return get(SystemZ::MOV64m16ry); - case SystemZ::MOV64m32r: return get(SystemZ::MOV64m32ry); - case SystemZ::MOV8mi: return get(SystemZ::MOV8miy); - case SystemZ::MUL32rm: return get(SystemZ::MUL32rmy); - case SystemZ::CMP32rm: return get(SystemZ::CMP32rmy); - case SystemZ::UCMP32rm: return get(SystemZ::UCMP32rmy); - case SystemZ::FMOV32mr: return get(SystemZ::FMOV32mry); - case SystemZ::FMOV64mr: return get(SystemZ::FMOV64mry); - case SystemZ::FMOV32rm: return get(SystemZ::FMOV32rmy); - case SystemZ::FMOV64rm: return get(SystemZ::FMOV64rmy); - case SystemZ::MOV64Pmr: return get(SystemZ::MOV64Pmry); - case SystemZ::MOV64Prm: return get(SystemZ::MOV64Prmy); - } -} diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h deleted file mode 100644 index 6a31e94..0000000 --- a/lib/Target/SystemZ/SystemZInstrInfo.h +++ /dev/null @@ -1,113 +0,0 @@ -//===- SystemZInstrInfo.h - SystemZ Instruction Information -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the SystemZ implementation of the TargetInstrInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_SYSTEMZINSTRINFO_H -#define LLVM_TARGET_SYSTEMZINSTRINFO_H - -#include "SystemZ.h" -#include "SystemZRegisterInfo.h" -#include "llvm/ADT/IndexedMap.h" -#include "llvm/Target/TargetInstrInfo.h" - -#define GET_INSTRINFO_HEADER -#include "SystemZGenInstrInfo.inc" - -namespace llvm { - -class SystemZTargetMachine; - -/// SystemZII - This namespace holds all of the target specific flags that -/// instruction info tracks. -/// -namespace SystemZII { - enum { - //===------------------------------------------------------------------===// - // SystemZ Specific MachineOperand flags. - - MO_NO_FLAG = 0, - - /// MO_GOTENT - On a symbol operand this indicates that the immediate is - /// the offset to the location of the symbol name from the base of the GOT. - /// - /// SYMBOL_LABEL @GOTENT - MO_GOTENT = 1, - - /// MO_PLT - On a symbol operand this indicates that the immediate is - /// offset to the PLT entry of symbol name from the current code location. - /// - /// SYMBOL_LABEL @PLT - MO_PLT = 2 - }; -} - -class SystemZInstrInfo : public SystemZGenInstrInfo { - const SystemZRegisterInfo RI; - SystemZTargetMachine &TM; -public: - explicit SystemZInstrInfo(SystemZTargetMachine &TM); - - /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As - /// such, whenever a client has an instance of instruction info, it should - /// always be able to get register info as well (through this method). - /// - virtual const SystemZRegisterInfo &getRegisterInfo() const { return RI; } - - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const; - - unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const; - unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const; - - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned SrcReg, bool isKill, - int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - bool ReverseBranchCondition(SmallVectorImpl &Cond) const; - virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const; - virtual bool AnalyzeBranch(MachineBasicBlock &MBB, - MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify) const; - virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, - DebugLoc DL) const; - virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const; - - SystemZCC::CondCodes getOppositeCondition(SystemZCC::CondCodes CC) const; - SystemZCC::CondCodes getCondFromBranchOpc(unsigned Opc) const; - const MCInstrDesc& getBrCond(SystemZCC::CondCodes CC) const; - const MCInstrDesc& getLongDispOpc(unsigned Opc) const; - - const MCInstrDesc& getMemoryInstr(unsigned Opc, int64_t Offset = 0) const { - if (Offset < 0 || Offset >= 4096) - return getLongDispOpc(Opc); - else - return get(Opc); - } -}; - -} - -#endif diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td deleted file mode 100644 index 580d65b..0000000 --- a/lib/Target/SystemZ/SystemZInstrInfo.td +++ /dev/null @@ -1,1147 +0,0 @@ -//===- SystemZInstrInfo.td - SystemZ Instruction defs ---------*- tblgen-*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes the SystemZ instructions in TableGen format. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// SystemZ Instruction Predicate Definitions. -def IsZ10 : Predicate<"Subtarget.isZ10()">; - -include "SystemZInstrFormats.td" - -//===----------------------------------------------------------------------===// -// Type Constraints. -//===----------------------------------------------------------------------===// -class SDTCisI8 : SDTCisVT; -class SDTCisI16 : SDTCisVT; -class SDTCisI32 : SDTCisVT; -class SDTCisI64 : SDTCisVT; - -//===----------------------------------------------------------------------===// -// Type Profiles. -//===----------------------------------------------------------------------===// -def SDT_SystemZCall : SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>; -def SDT_SystemZCallSeqStart : SDCallSeqStart<[SDTCisI64<0>]>; -def SDT_SystemZCallSeqEnd : SDCallSeqEnd<[SDTCisI64<0>, SDTCisI64<1>]>; -def SDT_CmpTest : SDTypeProfile<1, 2, [SDTCisI64<0>, - SDTCisSameAs<1, 2>]>; -def SDT_BrCond : SDTypeProfile<0, 3, - [SDTCisVT<0, OtherVT>, - SDTCisI8<1>, SDTCisVT<2, i64>]>; -def SDT_SelectCC : SDTypeProfile<1, 4, - [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>, - SDTCisI8<3>, SDTCisVT<4, i64>]>; -def SDT_Address : SDTypeProfile<1, 1, - [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; - -//===----------------------------------------------------------------------===// -// SystemZ Specific Node Definitions. -//===----------------------------------------------------------------------===// -def SystemZretflag : SDNode<"SystemZISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue]>; -def SystemZcall : SDNode<"SystemZISD::CALL", SDT_SystemZCall, - [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>; -def SystemZcallseq_start : - SDNode<"ISD::CALLSEQ_START", SDT_SystemZCallSeqStart, - [SDNPHasChain, SDNPOutGlue]>; -def SystemZcallseq_end : - SDNode<"ISD::CALLSEQ_END", SDT_SystemZCallSeqEnd, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; -def SystemZcmp : SDNode<"SystemZISD::CMP", SDT_CmpTest>; -def SystemZucmp : SDNode<"SystemZISD::UCMP", SDT_CmpTest>; -def SystemZbrcond : SDNode<"SystemZISD::BRCOND", SDT_BrCond, - [SDNPHasChain]>; -def SystemZselect : SDNode<"SystemZISD::SELECT", SDT_SelectCC>; -def SystemZpcrelwrapper : SDNode<"SystemZISD::PCRelativeWrapper", SDT_Address, []>; - - -include "SystemZOperands.td" - -//===----------------------------------------------------------------------===// -// Instruction list.. - -def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i64imm:$amt), - "#ADJCALLSTACKDOWN", - [(SystemZcallseq_start timm:$amt)]>; -def ADJCALLSTACKUP : Pseudo<(outs), (ins i64imm:$amt1, i64imm:$amt2), - "#ADJCALLSTACKUP", - [(SystemZcallseq_end timm:$amt1, timm:$amt2)]>; - -let Uses = [PSW], usesCustomInserter = 1 in { - def Select32 : Pseudo<(outs GR32:$dst), (ins GR32:$src1, GR32:$src2, i8imm:$cc), - "# Select32 PSEUDO", - [(set GR32:$dst, - (SystemZselect GR32:$src1, GR32:$src2, imm:$cc, PSW))]>; - def Select64 : Pseudo<(outs GR64:$dst), (ins GR64:$src1, GR64:$src2, i8imm:$cc), - "# Select64 PSEUDO", - [(set GR64:$dst, - (SystemZselect GR64:$src1, GR64:$src2, imm:$cc, PSW))]>; -} - - -//===----------------------------------------------------------------------===// -// Control Flow Instructions... -// - -// FIXME: Provide proper encoding! -let isReturn = 1, isTerminator = 1, isBarrier = 1, hasCtrlDep = 1 in { - def RET : Pseudo<(outs), (ins), "br\t%r14", [(SystemZretflag)]>; -} - -let isBranch = 1, isTerminator = 1 in { - let isBarrier = 1 in { - def JMP : Pseudo<(outs), (ins brtarget:$dst), "j\t{$dst}", [(br bb:$dst)]>; - - let isIndirectBranch = 1 in - def JMPr : Pseudo<(outs), (ins GR64:$dst), "br\t{$dst}", [(brind GR64:$dst)]>; - } - - let Uses = [PSW] in { - def JO : Pseudo<(outs), (ins brtarget:$dst), - "jo\t$dst", - [(SystemZbrcond bb:$dst, SYSTEMZ_COND_O, PSW)]>; - def JH : Pseudo<(outs), (ins brtarget:$dst), - "jh\t$dst", - [(SystemZbrcond bb:$dst, SYSTEMZ_COND_H, PSW)]>; - def JNLE: Pseudo<(outs), (ins brtarget:$dst), - "jnle\t$dst", - [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NLE, PSW)]>; - def JL : Pseudo<(outs), (ins brtarget:$dst), - "jl\t$dst", - [(SystemZbrcond bb:$dst, SYSTEMZ_COND_L, PSW)]>; - def JNHE: Pseudo<(outs), (ins brtarget:$dst), - "jnhe\t$dst", - [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NHE, PSW)]>; - def JLH : Pseudo<(outs), (ins brtarget:$dst), - "jlh\t$dst", - [(SystemZbrcond bb:$dst, SYSTEMZ_COND_LH, PSW)]>; - def JNE : Pseudo<(outs), (ins brtarget:$dst), - "jne\t$dst", - [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NE, PSW)]>; - def JE : Pseudo<(outs), (ins brtarget:$dst), - "je\t$dst", - [(SystemZbrcond bb:$dst, SYSTEMZ_COND_E, PSW)]>; - def JNLH: Pseudo<(outs), (ins brtarget:$dst), - "jnlh\t$dst", - [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NLH, PSW)]>; - def JHE : Pseudo<(outs), (ins brtarget:$dst), - "jhe\t$dst", - [(SystemZbrcond bb:$dst, SYSTEMZ_COND_HE, PSW)]>; - def JNL : Pseudo<(outs), (ins brtarget:$dst), - "jnl\t$dst", - [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NL, PSW)]>; - def JLE : Pseudo<(outs), (ins brtarget:$dst), - "jle\t$dst", - [(SystemZbrcond bb:$dst, SYSTEMZ_COND_LE, PSW)]>; - def JNH : Pseudo<(outs), (ins brtarget:$dst), - "jnh\t$dst", - [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NH, PSW)]>; - def JNO : Pseudo<(outs), (ins brtarget:$dst), - "jno\t$dst", - [(SystemZbrcond bb:$dst, SYSTEMZ_COND_NO, PSW)]>; - } // Uses = [PSW] -} // isBranch = 1 - -//===----------------------------------------------------------------------===// -// Call Instructions... -// - -let isCall = 1 in - // All calls clobber the non-callee saved registers. Uses for argument - // registers are added manually. - let Defs = [R0D, R1D, R2D, R3D, R4D, R5D, R14D, - F0L, F1L, F2L, F3L, F4L, F5L, F6L, F7L] in { - def CALLi : Pseudo<(outs), (ins imm_pcrel:$dst, variable_ops), - "brasl\t%r14, $dst", [(SystemZcall imm:$dst)]>; - def CALLr : Pseudo<(outs), (ins ADDR64:$dst, variable_ops), - "basr\t%r14, $dst", [(SystemZcall ADDR64:$dst)]>; - } - -//===----------------------------------------------------------------------===// -// Miscellaneous Instructions. -// - -let isReMaterializable = 1 in -// FIXME: Provide imm12 variant -// FIXME: Address should be halfword aligned... -def LA64r : RXI<0x47, - (outs GR64:$dst), (ins laaddr:$src), - "lay\t{$dst, $src}", - [(set GR64:$dst, laaddr:$src)]>; -def LA64rm : RXYI<0x71E3, - (outs GR64:$dst), (ins i64imm:$src), - "larl\t{$dst, $src}", - [(set GR64:$dst, - (SystemZpcrelwrapper tglobaladdr:$src))]>; - -let neverHasSideEffects = 1 in -def NOP : Pseudo<(outs), (ins), "# no-op", []>; - -//===----------------------------------------------------------------------===// -// Move Instructions - -let neverHasSideEffects = 1 in { -def MOV32rr : RRI<0x18, - (outs GR32:$dst), (ins GR32:$src), - "lr\t{$dst, $src}", - []>; -def MOV64rr : RREI<0xB904, - (outs GR64:$dst), (ins GR64:$src), - "lgr\t{$dst, $src}", - []>; -def MOV128rr : Pseudo<(outs GR128:$dst), (ins GR128:$src), - "# MOV128 PSEUDO!\n" - "\tlgr\t${dst:subreg_odd}, ${src:subreg_odd}\n" - "\tlgr\t${dst:subreg_even}, ${src:subreg_even}", - []>; -def MOV64rrP : Pseudo<(outs GR64P:$dst), (ins GR64P:$src), - "# MOV64P PSEUDO!\n" - "\tlr\t${dst:subreg_odd}, ${src:subreg_odd}\n" - "\tlr\t${dst:subreg_even}, ${src:subreg_even}", - []>; -} - -def MOVSX64rr32 : RREI<0xB914, - (outs GR64:$dst), (ins GR32:$src), - "lgfr\t{$dst, $src}", - [(set GR64:$dst, (sext GR32:$src))]>; -def MOVZX64rr32 : RREI<0xB916, - (outs GR64:$dst), (ins GR32:$src), - "llgfr\t{$dst, $src}", - [(set GR64:$dst, (zext GR32:$src))]>; - -let isReMaterializable = 1, isAsCheapAsAMove = 1 in { -def MOV32ri16 : RII<0x8A7, - (outs GR32:$dst), (ins s16imm:$src), - "lhi\t{$dst, $src}", - [(set GR32:$dst, immSExt16:$src)]>; -def MOV64ri16 : RII<0x9A7, - (outs GR64:$dst), (ins s16imm64:$src), - "lghi\t{$dst, $src}", - [(set GR64:$dst, immSExt16:$src)]>; - -def MOV64rill16 : RII<0xFA5, - (outs GR64:$dst), (ins u16imm:$src), - "llill\t{$dst, $src}", - [(set GR64:$dst, i64ll16:$src)]>; -def MOV64rilh16 : RII<0xEA5, - (outs GR64:$dst), (ins u16imm:$src), - "llilh\t{$dst, $src}", - [(set GR64:$dst, i64lh16:$src)]>; -def MOV64rihl16 : RII<0xDA5, - (outs GR64:$dst), (ins u16imm:$src), - "llihl\t{$dst, $src}", - [(set GR64:$dst, i64hl16:$src)]>; -def MOV64rihh16 : RII<0xCA5, - (outs GR64:$dst), (ins u16imm:$src), - "llihh\t{$dst, $src}", - [(set GR64:$dst, i64hh16:$src)]>; - -def MOV64ri32 : RILI<0x1C0, - (outs GR64:$dst), (ins s32imm64:$src), - "lgfi\t{$dst, $src}", - [(set GR64:$dst, immSExt32:$src)]>; -def MOV64rilo32 : RILI<0xFC0, - (outs GR64:$dst), (ins u32imm:$src), - "llilf\t{$dst, $src}", - [(set GR64:$dst, i64lo32:$src)]>; -def MOV64rihi32 : RILI<0xEC0, (outs GR64:$dst), (ins u32imm:$src), - "llihf\t{$dst, $src}", - [(set GR64:$dst, i64hi32:$src)]>; -} - -let canFoldAsLoad = 1, isReMaterializable = 1 in { -def MOV32rm : RXI<0x58, - (outs GR32:$dst), (ins rriaddr12:$src), - "l\t{$dst, $src}", - [(set GR32:$dst, (load rriaddr12:$src))]>; -def MOV32rmy : RXYI<0x58E3, - (outs GR32:$dst), (ins rriaddr:$src), - "ly\t{$dst, $src}", - [(set GR32:$dst, (load rriaddr:$src))]>; -def MOV64rm : RXYI<0x04E3, - (outs GR64:$dst), (ins rriaddr:$src), - "lg\t{$dst, $src}", - [(set GR64:$dst, (load rriaddr:$src))]>; -def MOV64Prm : Pseudo<(outs GR64P:$dst), (ins rriaddr12:$src), - "# MOV64P PSEUDO!\n" - "\tl\t${dst:subreg_odd}, $src\n" - "\tl\t${dst:subreg_even}, 4+$src", - [(set GR64P:$dst, (load rriaddr12:$src))]>; -def MOV64Prmy : Pseudo<(outs GR64P:$dst), (ins rriaddr:$src), - "# MOV64P PSEUDO!\n" - "\tly\t${dst:subreg_odd}, $src\n" - "\tly\t${dst:subreg_even}, 4+$src", - [(set GR64P:$dst, (load rriaddr:$src))]>; -def MOV128rm : Pseudo<(outs GR128:$dst), (ins rriaddr:$src), - "# MOV128 PSEUDO!\n" - "\tlg\t${dst:subreg_odd}, $src\n" - "\tlg\t${dst:subreg_even}, 8+$src", - [(set GR128:$dst, (load rriaddr:$src))]>; -} - -def MOV32mr : RXI<0x50, - (outs), (ins rriaddr12:$dst, GR32:$src), - "st\t{$src, $dst}", - [(store GR32:$src, rriaddr12:$dst)]>; -def MOV32mry : RXYI<0x50E3, - (outs), (ins rriaddr:$dst, GR32:$src), - "sty\t{$src, $dst}", - [(store GR32:$src, rriaddr:$dst)]>; -def MOV64mr : RXYI<0x24E3, - (outs), (ins rriaddr:$dst, GR64:$src), - "stg\t{$src, $dst}", - [(store GR64:$src, rriaddr:$dst)]>; -def MOV64Pmr : Pseudo<(outs), (ins rriaddr12:$dst, GR64P:$src), - "# MOV64P PSEUDO!\n" - "\tst\t${src:subreg_odd}, $dst\n" - "\tst\t${src:subreg_even}, 4+$dst", - [(store GR64P:$src, rriaddr12:$dst)]>; -def MOV64Pmry : Pseudo<(outs), (ins rriaddr:$dst, GR64P:$src), - "# MOV64P PSEUDO!\n" - "\tsty\t${src:subreg_odd}, $dst\n" - "\tsty\t${src:subreg_even}, 4+$dst", - [(store GR64P:$src, rriaddr:$dst)]>; -def MOV128mr : Pseudo<(outs), (ins rriaddr:$dst, GR128:$src), - "# MOV128 PSEUDO!\n" - "\tstg\t${src:subreg_odd}, $dst\n" - "\tstg\t${src:subreg_even}, 8+$dst", - [(store GR128:$src, rriaddr:$dst)]>; - -def MOV8mi : SII<0x92, - (outs), (ins riaddr12:$dst, i32i8imm:$src), - "mvi\t{$dst, $src}", - [(truncstorei8 (i32 i32immSExt8:$src), riaddr12:$dst)]>; -def MOV8miy : SIYI<0x52EB, - (outs), (ins riaddr:$dst, i32i8imm:$src), - "mviy\t{$dst, $src}", - [(truncstorei8 (i32 i32immSExt8:$src), riaddr:$dst)]>; - -let AddedComplexity = 2 in { -def MOV16mi : SILI<0xE544, - (outs), (ins riaddr12:$dst, s16imm:$src), - "mvhhi\t{$dst, $src}", - [(truncstorei16 (i32 i32immSExt16:$src), riaddr12:$dst)]>, - Requires<[IsZ10]>; -def MOV32mi16 : SILI<0xE54C, - (outs), (ins riaddr12:$dst, s32imm:$src), - "mvhi\t{$dst, $src}", - [(store (i32 immSExt16:$src), riaddr12:$dst)]>, - Requires<[IsZ10]>; -def MOV64mi16 : SILI<0xE548, - (outs), (ins riaddr12:$dst, s32imm64:$src), - "mvghi\t{$dst, $src}", - [(store (i64 immSExt16:$src), riaddr12:$dst)]>, - Requires<[IsZ10]>; -} - -// sexts -def MOVSX32rr8 : RREI<0xB926, - (outs GR32:$dst), (ins GR32:$src), - "lbr\t{$dst, $src}", - [(set GR32:$dst, (sext_inreg GR32:$src, i8))]>; -def MOVSX64rr8 : RREI<0xB906, - (outs GR64:$dst), (ins GR64:$src), - "lgbr\t{$dst, $src}", - [(set GR64:$dst, (sext_inreg GR64:$src, i8))]>; -def MOVSX32rr16 : RREI<0xB927, - (outs GR32:$dst), (ins GR32:$src), - "lhr\t{$dst, $src}", - [(set GR32:$dst, (sext_inreg GR32:$src, i16))]>; -def MOVSX64rr16 : RREI<0xB907, - (outs GR64:$dst), (ins GR64:$src), - "lghr\t{$dst, $src}", - [(set GR64:$dst, (sext_inreg GR64:$src, i16))]>; - -// extloads -def MOVSX32rm8 : RXYI<0x76E3, - (outs GR32:$dst), (ins rriaddr:$src), - "lb\t{$dst, $src}", - [(set GR32:$dst, (sextloadi32i8 rriaddr:$src))]>; -def MOVSX32rm16 : RXI<0x48, - (outs GR32:$dst), (ins rriaddr12:$src), - "lh\t{$dst, $src}", - [(set GR32:$dst, (sextloadi32i16 rriaddr12:$src))]>; -def MOVSX32rm16y : RXYI<0x78E3, - (outs GR32:$dst), (ins rriaddr:$src), - "lhy\t{$dst, $src}", - [(set GR32:$dst, (sextloadi32i16 rriaddr:$src))]>; -def MOVSX64rm8 : RXYI<0x77E3, - (outs GR64:$dst), (ins rriaddr:$src), - "lgb\t{$dst, $src}", - [(set GR64:$dst, (sextloadi64i8 rriaddr:$src))]>; -def MOVSX64rm16 : RXYI<0x15E3, - (outs GR64:$dst), (ins rriaddr:$src), - "lgh\t{$dst, $src}", - [(set GR64:$dst, (sextloadi64i16 rriaddr:$src))]>; -def MOVSX64rm32 : RXYI<0x14E3, - (outs GR64:$dst), (ins rriaddr:$src), - "lgf\t{$dst, $src}", - [(set GR64:$dst, (sextloadi64i32 rriaddr:$src))]>; - -def MOVZX32rm8 : RXYI<0x94E3, - (outs GR32:$dst), (ins rriaddr:$src), - "llc\t{$dst, $src}", - [(set GR32:$dst, (zextloadi32i8 rriaddr:$src))]>; -def MOVZX32rm16 : RXYI<0x95E3, - (outs GR32:$dst), (ins rriaddr:$src), - "llh\t{$dst, $src}", - [(set GR32:$dst, (zextloadi32i16 rriaddr:$src))]>; -def MOVZX64rm8 : RXYI<0x90E3, - (outs GR64:$dst), (ins rriaddr:$src), - "llgc\t{$dst, $src}", - [(set GR64:$dst, (zextloadi64i8 rriaddr:$src))]>; -def MOVZX64rm16 : RXYI<0x91E3, - (outs GR64:$dst), (ins rriaddr:$src), - "llgh\t{$dst, $src}", - [(set GR64:$dst, (zextloadi64i16 rriaddr:$src))]>; -def MOVZX64rm32 : RXYI<0x16E3, - (outs GR64:$dst), (ins rriaddr:$src), - "llgf\t{$dst, $src}", - [(set GR64:$dst, (zextloadi64i32 rriaddr:$src))]>; - -// truncstores -def MOV32m8r : RXI<0x42, - (outs), (ins rriaddr12:$dst, GR32:$src), - "stc\t{$src, $dst}", - [(truncstorei8 GR32:$src, rriaddr12:$dst)]>; - -def MOV32m8ry : RXYI<0x72E3, - (outs), (ins rriaddr:$dst, GR32:$src), - "stcy\t{$src, $dst}", - [(truncstorei8 GR32:$src, rriaddr:$dst)]>; - -def MOV32m16r : RXI<0x40, - (outs), (ins rriaddr12:$dst, GR32:$src), - "sth\t{$src, $dst}", - [(truncstorei16 GR32:$src, rriaddr12:$dst)]>; - -def MOV32m16ry : RXYI<0x70E3, - (outs), (ins rriaddr:$dst, GR32:$src), - "sthy\t{$src, $dst}", - [(truncstorei16 GR32:$src, rriaddr:$dst)]>; - -def MOV64m8r : RXI<0x42, - (outs), (ins rriaddr12:$dst, GR64:$src), - "stc\t{$src, $dst}", - [(truncstorei8 GR64:$src, rriaddr12:$dst)]>; - -def MOV64m8ry : RXYI<0x72E3, - (outs), (ins rriaddr:$dst, GR64:$src), - "stcy\t{$src, $dst}", - [(truncstorei8 GR64:$src, rriaddr:$dst)]>; - -def MOV64m16r : RXI<0x40, - (outs), (ins rriaddr12:$dst, GR64:$src), - "sth\t{$src, $dst}", - [(truncstorei16 GR64:$src, rriaddr12:$dst)]>; - -def MOV64m16ry : RXYI<0x70E3, - (outs), (ins rriaddr:$dst, GR64:$src), - "sthy\t{$src, $dst}", - [(truncstorei16 GR64:$src, rriaddr:$dst)]>; - -def MOV64m32r : RXI<0x50, - (outs), (ins rriaddr12:$dst, GR64:$src), - "st\t{$src, $dst}", - [(truncstorei32 GR64:$src, rriaddr12:$dst)]>; - -def MOV64m32ry : RXYI<0x50E3, - (outs), (ins rriaddr:$dst, GR64:$src), - "sty\t{$src, $dst}", - [(truncstorei32 GR64:$src, rriaddr:$dst)]>; - -// multiple regs moves -// FIXME: should we use multiple arg nodes? -def MOV32mrm : RSYI<0x90EB, - (outs), (ins riaddr:$dst, GR32:$from, GR32:$to), - "stmy\t{$from, $to, $dst}", - []>; -def MOV64mrm : RSYI<0x24EB, - (outs), (ins riaddr:$dst, GR64:$from, GR64:$to), - "stmg\t{$from, $to, $dst}", - []>; -def MOV32rmm : RSYI<0x90EB, - (outs GR32:$from, GR32:$to), (ins riaddr:$dst), - "lmy\t{$from, $to, $dst}", - []>; -def MOV64rmm : RSYI<0x04EB, - (outs GR64:$from, GR64:$to), (ins riaddr:$dst), - "lmg\t{$from, $to, $dst}", - []>; - -let isReMaterializable = 1, neverHasSideEffects = 1, isAsCheapAsAMove = 1, - Constraints = "$src = $dst" in { -def MOV64Pr0_even : Pseudo<(outs GR64P:$dst), (ins GR64P:$src), - "lhi\t${dst:subreg_even}, 0", - []>; -def MOV128r0_even : Pseudo<(outs GR128:$dst), (ins GR128:$src), - "lghi\t${dst:subreg_even}, 0", - []>; -} - -// Byte swaps -def BSWAP32rr : RREI<0xB91F, - (outs GR32:$dst), (ins GR32:$src), - "lrvr\t{$dst, $src}", - [(set GR32:$dst, (bswap GR32:$src))]>; -def BSWAP64rr : RREI<0xB90F, - (outs GR64:$dst), (ins GR64:$src), - "lrvgr\t{$dst, $src}", - [(set GR64:$dst, (bswap GR64:$src))]>; - -// FIXME: this is invalid pattern for big-endian -//def BSWAP16rm : RXYI<0x1FE3, (outs GR32:$dst), (ins rriaddr:$src), -// "lrvh\t{$dst, $src}", -// [(set GR32:$dst, (bswap (extloadi32i16 rriaddr:$src)))]>; -def BSWAP32rm : RXYI<0x1EE3, (outs GR32:$dst), (ins rriaddr:$src), - "lrv\t{$dst, $src}", - [(set GR32:$dst, (bswap (load rriaddr:$src)))]>; -def BSWAP64rm : RXYI<0x0FE3, (outs GR64:$dst), (ins rriaddr:$src), - "lrvg\t{$dst, $src}", - [(set GR64:$dst, (bswap (load rriaddr:$src)))]>; - -//def BSWAP16mr : RXYI<0xE33F, (outs), (ins rriaddr:$dst, GR32:$src), -// "strvh\t{$src, $dst}", -// [(truncstorei16 (bswap GR32:$src), rriaddr:$dst)]>; -def BSWAP32mr : RXYI<0xE33E, (outs), (ins rriaddr:$dst, GR32:$src), - "strv\t{$src, $dst}", - [(store (bswap GR32:$src), rriaddr:$dst)]>; -def BSWAP64mr : RXYI<0xE32F, (outs), (ins rriaddr:$dst, GR64:$src), - "strvg\t{$src, $dst}", - [(store (bswap GR64:$src), rriaddr:$dst)]>; - -//===----------------------------------------------------------------------===// -// Arithmetic Instructions - -let Defs = [PSW] in { -def NEG32rr : RRI<0x13, - (outs GR32:$dst), (ins GR32:$src), - "lcr\t{$dst, $src}", - [(set GR32:$dst, (ineg GR32:$src)), - (implicit PSW)]>; -def NEG64rr : RREI<0xB903, (outs GR64:$dst), (ins GR64:$src), - "lcgr\t{$dst, $src}", - [(set GR64:$dst, (ineg GR64:$src)), - (implicit PSW)]>; -def NEG64rr32 : RREI<0xB913, (outs GR64:$dst), (ins GR32:$src), - "lcgfr\t{$dst, $src}", - [(set GR64:$dst, (ineg (sext GR32:$src))), - (implicit PSW)]>; -} - -let Constraints = "$src1 = $dst" in { - -let Defs = [PSW] in { - -let isCommutable = 1 in { // X = ADD Y, Z == X = ADD Z, Y -def ADD32rr : RRI<0x1A, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "ar\t{$dst, $src2}", - [(set GR32:$dst, (add GR32:$src1, GR32:$src2)), - (implicit PSW)]>; -def ADD64rr : RREI<0xB908, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "agr\t{$dst, $src2}", - [(set GR64:$dst, (add GR64:$src1, GR64:$src2)), - (implicit PSW)]>; -} - -def ADD32rm : RXI<0x5A, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2), - "a\t{$dst, $src2}", - [(set GR32:$dst, (add GR32:$src1, (load rriaddr12:$src2))), - (implicit PSW)]>; -def ADD32rmy : RXYI<0xE35A, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2), - "ay\t{$dst, $src2}", - [(set GR32:$dst, (add GR32:$src1, (load rriaddr:$src2))), - (implicit PSW)]>; -def ADD64rm : RXYI<0xE308, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2), - "ag\t{$dst, $src2}", - [(set GR64:$dst, (add GR64:$src1, (load rriaddr:$src2))), - (implicit PSW)]>; - - -def ADD32ri16 : RII<0xA7A, - (outs GR32:$dst), (ins GR32:$src1, s16imm:$src2), - "ahi\t{$dst, $src2}", - [(set GR32:$dst, (add GR32:$src1, immSExt16:$src2)), - (implicit PSW)]>; -def ADD32ri : RILI<0xC29, - (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2), - "afi\t{$dst, $src2}", - [(set GR32:$dst, (add GR32:$src1, imm:$src2)), - (implicit PSW)]>; -def ADD64ri16 : RILI<0xA7B, - (outs GR64:$dst), (ins GR64:$src1, s16imm64:$src2), - "aghi\t{$dst, $src2}", - [(set GR64:$dst, (add GR64:$src1, immSExt16:$src2)), - (implicit PSW)]>; -def ADD64ri32 : RILI<0xC28, - (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2), - "agfi\t{$dst, $src2}", - [(set GR64:$dst, (add GR64:$src1, immSExt32:$src2)), - (implicit PSW)]>; - -let isCommutable = 1 in { // X = ADC Y, Z == X = ADC Z, Y -def ADC32rr : RRI<0x1E, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "alr\t{$dst, $src2}", - [(set GR32:$dst, (addc GR32:$src1, GR32:$src2))]>; -def ADC64rr : RREI<0xB90A, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "algr\t{$dst, $src2}", - [(set GR64:$dst, (addc GR64:$src1, GR64:$src2))]>; -} - -def ADC32ri : RILI<0xC2B, - (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2), - "alfi\t{$dst, $src2}", - [(set GR32:$dst, (addc GR32:$src1, imm:$src2))]>; -def ADC64ri32 : RILI<0xC2A, - (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2), - "algfi\t{$dst, $src2}", - [(set GR64:$dst, (addc GR64:$src1, immSExt32:$src2))]>; - -let Uses = [PSW] in { -def ADDE32rr : RREI<0xB998, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "alcr\t{$dst, $src2}", - [(set GR32:$dst, (adde GR32:$src1, GR32:$src2)), - (implicit PSW)]>; -def ADDE64rr : RREI<0xB988, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "alcgr\t{$dst, $src2}", - [(set GR64:$dst, (adde GR64:$src1, GR64:$src2)), - (implicit PSW)]>; -} - -let isCommutable = 1 in { // X = AND Y, Z == X = AND Z, Y -def AND32rr : RRI<0x14, - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "nr\t{$dst, $src2}", - [(set GR32:$dst, (and GR32:$src1, GR32:$src2))]>; -def AND64rr : RREI<0xB980, - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "ngr\t{$dst, $src2}", - [(set GR64:$dst, (and GR64:$src1, GR64:$src2))]>; -} - -def AND32rm : RXI<0x54, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2), - "n\t{$dst, $src2}", - [(set GR32:$dst, (and GR32:$src1, (load rriaddr12:$src2))), - (implicit PSW)]>; -def AND32rmy : RXYI<0xE354, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2), - "ny\t{$dst, $src2}", - [(set GR32:$dst, (and GR32:$src1, (load rriaddr:$src2))), - (implicit PSW)]>; -def AND64rm : RXYI<0xE360, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2), - "ng\t{$dst, $src2}", - [(set GR64:$dst, (and GR64:$src1, (load rriaddr:$src2))), - (implicit PSW)]>; - -def AND32rill16 : RII<0xA57, - (outs GR32:$dst), (ins GR32:$src1, u16imm:$src2), - "nill\t{$dst, $src2}", - [(set GR32:$dst, (and GR32:$src1, i32ll16c:$src2))]>; -def AND64rill16 : RII<0xA57, - (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2), - "nill\t{$dst, $src2}", - [(set GR64:$dst, (and GR64:$src1, i64ll16c:$src2))]>; - -def AND32rilh16 : RII<0xA56, - (outs GR32:$dst), (ins GR32:$src1, u16imm:$src2), - "nilh\t{$dst, $src2}", - [(set GR32:$dst, (and GR32:$src1, i32lh16c:$src2))]>; -def AND64rilh16 : RII<0xA56, - (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2), - "nilh\t{$dst, $src2}", - [(set GR64:$dst, (and GR64:$src1, i64lh16c:$src2))]>; - -def AND64rihl16 : RII<0xA55, - (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2), - "nihl\t{$dst, $src2}", - [(set GR64:$dst, (and GR64:$src1, i64hl16c:$src2))]>; -def AND64rihh16 : RII<0xA54, - (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2), - "nihh\t{$dst, $src2}", - [(set GR64:$dst, (and GR64:$src1, i64hh16c:$src2))]>; - -def AND32ri : RILI<0xC0B, - (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2), - "nilf\t{$dst, $src2}", - [(set GR32:$dst, (and GR32:$src1, imm:$src2))]>; -def AND64rilo32 : RILI<0xC0B, - (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2), - "nilf\t{$dst, $src2}", - [(set GR64:$dst, (and GR64:$src1, i64lo32c:$src2))]>; -def AND64rihi32 : RILI<0xC0A, - (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2), - "nihf\t{$dst, $src2}", - [(set GR64:$dst, (and GR64:$src1, i64hi32c:$src2))]>; - -let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y -def OR32rr : RRI<0x16, - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "or\t{$dst, $src2}", - [(set GR32:$dst, (or GR32:$src1, GR32:$src2))]>; -def OR64rr : RREI<0xB981, - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "ogr\t{$dst, $src2}", - [(set GR64:$dst, (or GR64:$src1, GR64:$src2))]>; -} - -def OR32rm : RXI<0x56, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2), - "o\t{$dst, $src2}", - [(set GR32:$dst, (or GR32:$src1, (load rriaddr12:$src2))), - (implicit PSW)]>; -def OR32rmy : RXYI<0xE356, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2), - "oy\t{$dst, $src2}", - [(set GR32:$dst, (or GR32:$src1, (load rriaddr:$src2))), - (implicit PSW)]>; -def OR64rm : RXYI<0xE381, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2), - "og\t{$dst, $src2}", - [(set GR64:$dst, (or GR64:$src1, (load rriaddr:$src2))), - (implicit PSW)]>; - - // FIXME: Provide proper encoding! -def OR32ri16 : RII<0xA5B, - (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2), - "oill\t{$dst, $src2}", - [(set GR32:$dst, (or GR32:$src1, i32ll16:$src2))]>; -def OR32ri16h : RII<0xA5A, - (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2), - "oilh\t{$dst, $src2}", - [(set GR32:$dst, (or GR32:$src1, i32lh16:$src2))]>; -def OR32ri : RILI<0xC0D, - (outs GR32:$dst), (ins GR32:$src1, u32imm:$src2), - "oilf\t{$dst, $src2}", - [(set GR32:$dst, (or GR32:$src1, imm:$src2))]>; - -def OR64rill16 : RII<0xA5B, - (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2), - "oill\t{$dst, $src2}", - [(set GR64:$dst, (or GR64:$src1, i64ll16:$src2))]>; -def OR64rilh16 : RII<0xA5A, - (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2), - "oilh\t{$dst, $src2}", - [(set GR64:$dst, (or GR64:$src1, i64lh16:$src2))]>; -def OR64rihl16 : RII<0xA59, - (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2), - "oihl\t{$dst, $src2}", - [(set GR64:$dst, (or GR64:$src1, i64hl16:$src2))]>; -def OR64rihh16 : RII<0xA58, - (outs GR64:$dst), (ins GR64:$src1, u16imm:$src2), - "oihh\t{$dst, $src2}", - [(set GR64:$dst, (or GR64:$src1, i64hh16:$src2))]>; - -def OR64rilo32 : RILI<0xC0D, - (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2), - "oilf\t{$dst, $src2}", - [(set GR64:$dst, (or GR64:$src1, i64lo32:$src2))]>; -def OR64rihi32 : RILI<0xC0C, - (outs GR64:$dst), (ins GR64:$src1, u32imm:$src2), - "oihf\t{$dst, $src2}", - [(set GR64:$dst, (or GR64:$src1, i64hi32:$src2))]>; - -def SUB32rr : RRI<0x1B, - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "sr\t{$dst, $src2}", - [(set GR32:$dst, (sub GR32:$src1, GR32:$src2))]>; -def SUB64rr : RREI<0xB909, - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "sgr\t{$dst, $src2}", - [(set GR64:$dst, (sub GR64:$src1, GR64:$src2))]>; - -def SUB32rm : RXI<0x5B, (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2), - "s\t{$dst, $src2}", - [(set GR32:$dst, (sub GR32:$src1, (load rriaddr12:$src2))), - (implicit PSW)]>; -def SUB32rmy : RXYI<0xE35B, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2), - "sy\t{$dst, $src2}", - [(set GR32:$dst, (sub GR32:$src1, (load rriaddr:$src2))), - (implicit PSW)]>; -def SUB64rm : RXYI<0xE309, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2), - "sg\t{$dst, $src2}", - [(set GR64:$dst, (sub GR64:$src1, (load rriaddr:$src2))), - (implicit PSW)]>; - -def SBC32rr : RRI<0x1F, - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "slr\t{$dst, $src2}", - [(set GR32:$dst, (subc GR32:$src1, GR32:$src2))]>; -def SBC64rr : RREI<0xB90B, - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "slgr\t{$dst, $src2}", - [(set GR64:$dst, (subc GR64:$src1, GR64:$src2))]>; - -def SBC32ri : RILI<0xC25, - (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2), - "sllfi\t{$dst, $src2}", - [(set GR32:$dst, (subc GR32:$src1, imm:$src2))]>; -def SBC64ri32 : RILI<0xC24, - (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2), - "slgfi\t{$dst, $src2}", - [(set GR64:$dst, (subc GR64:$src1, immSExt32:$src2))]>; - -let Uses = [PSW] in { -def SUBE32rr : RREI<0xB999, (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "slbr\t{$dst, $src2}", - [(set GR32:$dst, (sube GR32:$src1, GR32:$src2)), - (implicit PSW)]>; -def SUBE64rr : RREI<0xB989, (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "slbgr\t{$dst, $src2}", - [(set GR64:$dst, (sube GR64:$src1, GR64:$src2)), - (implicit PSW)]>; -} - -let isCommutable = 1 in { // X = XOR Y, Z == X = XOR Z, Y -def XOR32rr : RRI<0x17, - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "xr\t{$dst, $src2}", - [(set GR32:$dst, (xor GR32:$src1, GR32:$src2))]>; -def XOR64rr : RREI<0xB982, - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "xgr\t{$dst, $src2}", - [(set GR64:$dst, (xor GR64:$src1, GR64:$src2))]>; -} - -def XOR32rm : RXI<0x57,(outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2), - "x\t{$dst, $src2}", - [(set GR32:$dst, (xor GR32:$src1, (load rriaddr12:$src2))), - (implicit PSW)]>; -def XOR32rmy : RXYI<0xE357, (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2), - "xy\t{$dst, $src2}", - [(set GR32:$dst, (xor GR32:$src1, (load rriaddr:$src2))), - (implicit PSW)]>; -def XOR64rm : RXYI<0xE382, (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2), - "xg\t{$dst, $src2}", - [(set GR64:$dst, (xor GR64:$src1, (load rriaddr:$src2))), - (implicit PSW)]>; - -def XOR32ri : RILI<0xC07, - (outs GR32:$dst), (ins GR32:$src1, i32imm:$src2), - "xilf\t{$dst, $src2}", - [(set GR32:$dst, (xor GR32:$src1, imm:$src2))]>; - -} // Defs = [PSW] - -let isCommutable = 1 in { // X = MUL Y, Z == X = MUL Z, Y -def MUL32rr : RREI<0xB252, - (outs GR32:$dst), (ins GR32:$src1, GR32:$src2), - "msr\t{$dst, $src2}", - [(set GR32:$dst, (mul GR32:$src1, GR32:$src2))]>; -def MUL64rr : RREI<0xB90C, - (outs GR64:$dst), (ins GR64:$src1, GR64:$src2), - "msgr\t{$dst, $src2}", - [(set GR64:$dst, (mul GR64:$src1, GR64:$src2))]>; -} - -def MUL64rrP : RRI<0x1C, - (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2), - "mr\t{$dst, $src2}", - []>; -def UMUL64rrP : RREI<0xB996, - (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2), - "mlr\t{$dst, $src2}", - []>; -def UMUL128rrP : RREI<0xB986, - (outs GR128:$dst), (ins GR128:$src1, GR64:$src2), - "mlgr\t{$dst, $src2}", - []>; - -def MUL32ri16 : RII<0xA7C, - (outs GR32:$dst), (ins GR32:$src1, s16imm:$src2), - "mhi\t{$dst, $src2}", - [(set GR32:$dst, (mul GR32:$src1, i32immSExt16:$src2))]>; -def MUL64ri16 : RII<0xA7D, - (outs GR64:$dst), (ins GR64:$src1, s16imm64:$src2), - "mghi\t{$dst, $src2}", - [(set GR64:$dst, (mul GR64:$src1, immSExt16:$src2))]>; - -let AddedComplexity = 2 in { -def MUL32ri : RILI<0xC21, - (outs GR32:$dst), (ins GR32:$src1, s32imm:$src2), - "msfi\t{$dst, $src2}", - [(set GR32:$dst, (mul GR32:$src1, imm:$src2))]>, - Requires<[IsZ10]>; -def MUL64ri32 : RILI<0xC20, - (outs GR64:$dst), (ins GR64:$src1, s32imm64:$src2), - "msgfi\t{$dst, $src2}", - [(set GR64:$dst, (mul GR64:$src1, i64immSExt32:$src2))]>, - Requires<[IsZ10]>; -} - -def MUL32rm : RXI<0x71, - (outs GR32:$dst), (ins GR32:$src1, rriaddr12:$src2), - "ms\t{$dst, $src2}", - [(set GR32:$dst, (mul GR32:$src1, (load rriaddr12:$src2)))]>; -def MUL32rmy : RXYI<0xE351, - (outs GR32:$dst), (ins GR32:$src1, rriaddr:$src2), - "msy\t{$dst, $src2}", - [(set GR32:$dst, (mul GR32:$src1, (load rriaddr:$src2)))]>; -def MUL64rm : RXYI<0xE30C, - (outs GR64:$dst), (ins GR64:$src1, rriaddr:$src2), - "msg\t{$dst, $src2}", - [(set GR64:$dst, (mul GR64:$src1, (load rriaddr:$src2)))]>; - -def MULSX64rr32 : RREI<0xB91C, - (outs GR64:$dst), (ins GR64:$src1, GR32:$src2), - "msgfr\t{$dst, $src2}", - [(set GR64:$dst, (mul GR64:$src1, (sext GR32:$src2)))]>; - -def SDIVREM32r : RREI<0xB91D, - (outs GR128:$dst), (ins GR128:$src1, GR32:$src2), - "dsgfr\t{$dst, $src2}", - []>; -def SDIVREM64r : RREI<0xB90D, - (outs GR128:$dst), (ins GR128:$src1, GR64:$src2), - "dsgr\t{$dst, $src2}", - []>; - -def UDIVREM32r : RREI<0xB997, - (outs GR64P:$dst), (ins GR64P:$src1, GR32:$src2), - "dlr\t{$dst, $src2}", - []>; -def UDIVREM64r : RREI<0xB987, - (outs GR128:$dst), (ins GR128:$src1, GR64:$src2), - "dlgr\t{$dst, $src2}", - []>; -let mayLoad = 1 in { -def SDIVREM32m : RXYI<0xE31D, - (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2), - "dsgf\t{$dst, $src2}", - []>; -def SDIVREM64m : RXYI<0xE30D, - (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2), - "dsg\t{$dst, $src2}", - []>; - -def UDIVREM32m : RXYI<0xE397, (outs GR64P:$dst), (ins GR64P:$src1, rriaddr:$src2), - "dl\t{$dst, $src2}", - []>; -def UDIVREM64m : RXYI<0xE387, (outs GR128:$dst), (ins GR128:$src1, rriaddr:$src2), - "dlg\t{$dst, $src2}", - []>; -} // mayLoad -} // Constraints = "$src1 = $dst" - -//===----------------------------------------------------------------------===// -// Shifts - -let Constraints = "$src = $dst" in -def SRL32rri : RSI<0x88, - (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt), - "srl\t{$src, $amt}", - [(set GR32:$dst, (srl GR32:$src, riaddr32:$amt))]>; -def SRL64rri : RSYI<0xEB0C, - (outs GR64:$dst), (ins GR64:$src, riaddr:$amt), - "srlg\t{$dst, $src, $amt}", - [(set GR64:$dst, (srl GR64:$src, riaddr:$amt))]>; - -let Constraints = "$src = $dst" in -def SHL32rri : RSI<0x89, - (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt), - "sll\t{$src, $amt}", - [(set GR32:$dst, (shl GR32:$src, riaddr32:$amt))]>; -def SHL64rri : RSYI<0xEB0D, - (outs GR64:$dst), (ins GR64:$src, riaddr:$amt), - "sllg\t{$dst, $src, $amt}", - [(set GR64:$dst, (shl GR64:$src, riaddr:$amt))]>; - -let Defs = [PSW] in { -let Constraints = "$src = $dst" in -def SRA32rri : RSI<0x8A, - (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt), - "sra\t{$src, $amt}", - [(set GR32:$dst, (sra GR32:$src, riaddr32:$amt)), - (implicit PSW)]>; - -def SRA64rri : RSYI<0xEB0A, - (outs GR64:$dst), (ins GR64:$src, riaddr:$amt), - "srag\t{$dst, $src, $amt}", - [(set GR64:$dst, (sra GR64:$src, riaddr:$amt)), - (implicit PSW)]>; -} // Defs = [PSW] - -def ROTL32rri : RSYI<0xEB1D, - (outs GR32:$dst), (ins GR32:$src, riaddr32:$amt), - "rll\t{$dst, $src, $amt}", - [(set GR32:$dst, (rotl GR32:$src, riaddr32:$amt))]>; -def ROTL64rri : RSYI<0xEB1C, - (outs GR64:$dst), (ins GR64:$src, riaddr:$amt), - "rllg\t{$dst, $src, $amt}", - [(set GR64:$dst, (rotl GR64:$src, riaddr:$amt))]>; - -//===----------------------------------------------------------------------===// -// Test instructions (like AND but do not produce any result) - -// Integer comparisons -let Defs = [PSW] in { -def CMP32rr : RRI<0x19, - (outs), (ins GR32:$src1, GR32:$src2), - "cr\t$src1, $src2", - [(set PSW, (SystemZcmp GR32:$src1, GR32:$src2))]>; -def CMP64rr : RREI<0xB920, - (outs), (ins GR64:$src1, GR64:$src2), - "cgr\t$src1, $src2", - [(set PSW, (SystemZcmp GR64:$src1, GR64:$src2))]>; - -def CMP32ri : RILI<0xC2D, - (outs), (ins GR32:$src1, s32imm:$src2), - "cfi\t$src1, $src2", - [(set PSW, (SystemZcmp GR32:$src1, imm:$src2))]>; -def CMP64ri32 : RILI<0xC2C, - (outs), (ins GR64:$src1, s32imm64:$src2), - "cgfi\t$src1, $src2", - [(set PSW, (SystemZcmp GR64:$src1, i64immSExt32:$src2))]>; - -def CMP32rm : RXI<0x59, - (outs), (ins GR32:$src1, rriaddr12:$src2), - "c\t$src1, $src2", - [(set PSW, (SystemZcmp GR32:$src1, (load rriaddr12:$src2)))]>; -def CMP32rmy : RXYI<0xE359, - (outs), (ins GR32:$src1, rriaddr:$src2), - "cy\t$src1, $src2", - [(set PSW, (SystemZcmp GR32:$src1, (load rriaddr:$src2)))]>; -def CMP64rm : RXYI<0xE320, - (outs), (ins GR64:$src1, rriaddr:$src2), - "cg\t$src1, $src2", - [(set PSW, (SystemZcmp GR64:$src1, (load rriaddr:$src2)))]>; - -def UCMP32rr : RRI<0x15, - (outs), (ins GR32:$src1, GR32:$src2), - "clr\t$src1, $src2", - [(set PSW, (SystemZucmp GR32:$src1, GR32:$src2))]>; -def UCMP64rr : RREI<0xB921, - (outs), (ins GR64:$src1, GR64:$src2), - "clgr\t$src1, $src2", - [(set PSW, (SystemZucmp GR64:$src1, GR64:$src2))]>; - -def UCMP32ri : RILI<0xC2F, - (outs), (ins GR32:$src1, i32imm:$src2), - "clfi\t$src1, $src2", - [(set PSW, (SystemZucmp GR32:$src1, imm:$src2))]>; -def UCMP64ri32 : RILI<0xC2E, - (outs), (ins GR64:$src1, i64i32imm:$src2), - "clgfi\t$src1, $src2", - [(set PSW,(SystemZucmp GR64:$src1, i64immZExt32:$src2))]>; - -def UCMP32rm : RXI<0x55, - (outs), (ins GR32:$src1, rriaddr12:$src2), - "cl\t$src1, $src2", - [(set PSW, (SystemZucmp GR32:$src1, - (load rriaddr12:$src2)))]>; -def UCMP32rmy : RXYI<0xE355, - (outs), (ins GR32:$src1, rriaddr:$src2), - "cly\t$src1, $src2", - [(set PSW, (SystemZucmp GR32:$src1, - (load rriaddr:$src2)))]>; -def UCMP64rm : RXYI<0xE351, - (outs), (ins GR64:$src1, rriaddr:$src2), - "clg\t$src1, $src2", - [(set PSW, (SystemZucmp GR64:$src1, - (load rriaddr:$src2)))]>; - -def CMPSX64rr32 : RREI<0xB930, - (outs), (ins GR64:$src1, GR32:$src2), - "cgfr\t$src1, $src2", - [(set PSW, (SystemZucmp GR64:$src1, - (sext GR32:$src2)))]>; -def UCMPZX64rr32 : RREI<0xB931, - (outs), (ins GR64:$src1, GR32:$src2), - "clgfr\t$src1, $src2", - [(set PSW, (SystemZucmp GR64:$src1, - (zext GR32:$src2)))]>; - -def CMPSX64rm32 : RXYI<0xE330, - (outs), (ins GR64:$src1, rriaddr:$src2), - "cgf\t$src1, $src2", - [(set PSW, (SystemZucmp GR64:$src1, - (sextloadi64i32 rriaddr:$src2)))]>; -def UCMPZX64rm32 : RXYI<0xE331, - (outs), (ins GR64:$src1, rriaddr:$src2), - "clgf\t$src1, $src2", - [(set PSW, (SystemZucmp GR64:$src1, - (zextloadi64i32 rriaddr:$src2)))]>; - -// FIXME: Add other crazy ucmp forms - -} // Defs = [PSW] - -//===----------------------------------------------------------------------===// -// Other crazy stuff -let Defs = [PSW] in { -def FLOGR64 : RREI<0xB983, - (outs GR128:$dst), (ins GR64:$src), - "flogr\t{$dst, $src}", - []>; -} // Defs = [PSW] - -//===----------------------------------------------------------------------===// -// Non-Instruction Patterns. -//===----------------------------------------------------------------------===// - -// ConstPools, JumpTables -def : Pat<(SystemZpcrelwrapper tjumptable:$src), (LA64rm tjumptable:$src)>; -def : Pat<(SystemZpcrelwrapper tconstpool:$src), (LA64rm tconstpool:$src)>; - -// anyext -def : Pat<(i64 (anyext GR32:$src)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_32bit)>; - -// calls -def : Pat<(SystemZcall (i64 tglobaladdr:$dst)), (CALLi tglobaladdr:$dst)>; -def : Pat<(SystemZcall (i64 texternalsym:$dst)), (CALLi texternalsym:$dst)>; - -//===----------------------------------------------------------------------===// -// Peepholes. -//===----------------------------------------------------------------------===// - -// FIXME: use add/sub tricks with 32678/-32768 - -// Arbitrary immediate support. -def : Pat<(i32 imm:$src), - (EXTRACT_SUBREG (MOV64ri32 (GetI64FromI32 (i32 imm:$src))), - subreg_32bit)>; - -// Implement in terms of LLIHF/OILF. -def : Pat<(i64 imm:$imm), - (OR64rilo32 (MOV64rihi32 (HI32 imm:$imm)), (LO32 imm:$imm))>; - -// trunc patterns -def : Pat<(i32 (trunc GR64:$src)), - (EXTRACT_SUBREG GR64:$src, subreg_32bit)>; - -// sext_inreg patterns -def : Pat<(sext_inreg GR64:$src, i32), - (MOVSX64rr32 (EXTRACT_SUBREG GR64:$src, subreg_32bit))>; - -// extload patterns -def : Pat<(extloadi32i8 rriaddr:$src), (MOVZX32rm8 rriaddr:$src)>; -def : Pat<(extloadi32i16 rriaddr:$src), (MOVZX32rm16 rriaddr:$src)>; -def : Pat<(extloadi64i8 rriaddr:$src), (MOVZX64rm8 rriaddr:$src)>; -def : Pat<(extloadi64i16 rriaddr:$src), (MOVZX64rm16 rriaddr:$src)>; -def : Pat<(extloadi64i32 rriaddr:$src), (MOVZX64rm32 rriaddr:$src)>; - -// muls -def : Pat<(mulhs GR32:$src1, GR32:$src2), - (EXTRACT_SUBREG (MUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), - GR32:$src1, subreg_odd32), - GR32:$src2), - subreg_32bit)>; - -def : Pat<(mulhu GR32:$src1, GR32:$src2), - (EXTRACT_SUBREG (UMUL64rrP (INSERT_SUBREG (v2i32 (IMPLICIT_DEF)), - GR32:$src1, subreg_odd32), - GR32:$src2), - subreg_32bit)>; -def : Pat<(mulhu GR64:$src1, GR64:$src2), - (EXTRACT_SUBREG (UMUL128rrP (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), - GR64:$src1, subreg_odd), - GR64:$src2), - subreg_even)>; - -def : Pat<(ctlz GR64:$src), - (EXTRACT_SUBREG (FLOGR64 GR64:$src), subreg_even)>; diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h deleted file mode 100644 index fd6e330..0000000 --- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +++ /dev/null @@ -1,51 +0,0 @@ -//==- SystemZMachineFuctionInfo.h - SystemZ machine function info -*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares SystemZ-specific per-machine-function information. -// -//===----------------------------------------------------------------------===// - -#ifndef SYSTEMZMACHINEFUNCTIONINFO_H -#define SYSTEMZMACHINEFUNCTIONINFO_H - -#include "llvm/CodeGen/MachineFunction.h" - -namespace llvm { - -/// SystemZMachineFunctionInfo - This class is derived from MachineFunction and -/// contains private SystemZ target-specific information for each MachineFunction. -class SystemZMachineFunctionInfo : public MachineFunctionInfo { - /// CalleeSavedFrameSize - Size of the callee-saved register portion of the - /// stack frame in bytes. - unsigned CalleeSavedFrameSize; - - /// LowReg - Low register of range of callee-saved registers to store. - unsigned LowReg; - - /// HighReg - High register of range of callee-saved registers to store. - unsigned HighReg; -public: - SystemZMachineFunctionInfo() : CalleeSavedFrameSize(0) {} - - explicit SystemZMachineFunctionInfo(MachineFunction &MF) - : CalleeSavedFrameSize(0) {} - - unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; } - void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; } - - unsigned getLowReg() const { return LowReg; } - void setLowReg(unsigned Reg) { LowReg = Reg; } - - unsigned getHighReg() const { return HighReg; } - void setHighReg(unsigned Reg) { HighReg = Reg; } -}; - -} // End llvm namespace - -#endif diff --git a/lib/Target/SystemZ/SystemZOperands.td b/lib/Target/SystemZ/SystemZOperands.td deleted file mode 100644 index 8b835cc..0000000 --- a/lib/Target/SystemZ/SystemZOperands.td +++ /dev/null @@ -1,325 +0,0 @@ -//=====- SystemZOperands.td - SystemZ Operands defs ---------*- tblgen-*-=====// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes the various SystemZ instruction operands. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Instruction Pattern Stuff. -//===----------------------------------------------------------------------===// - -// SystemZ specific condition code. These correspond to CondCode in -// SystemZ.h. They must be kept in synch. -def SYSTEMZ_COND_O : PatLeaf<(i8 0)>; -def SYSTEMZ_COND_H : PatLeaf<(i8 1)>; -def SYSTEMZ_COND_NLE : PatLeaf<(i8 2)>; -def SYSTEMZ_COND_L : PatLeaf<(i8 3)>; -def SYSTEMZ_COND_NHE : PatLeaf<(i8 4)>; -def SYSTEMZ_COND_LH : PatLeaf<(i8 5)>; -def SYSTEMZ_COND_NE : PatLeaf<(i8 6)>; -def SYSTEMZ_COND_E : PatLeaf<(i8 7)>; -def SYSTEMZ_COND_NLH : PatLeaf<(i8 8)>; -def SYSTEMZ_COND_HE : PatLeaf<(i8 9)>; -def SYSTEMZ_COND_NL : PatLeaf<(i8 10)>; -def SYSTEMZ_COND_LE : PatLeaf<(i8 11)>; -def SYSTEMZ_COND_NH : PatLeaf<(i8 12)>; -def SYSTEMZ_COND_NO : PatLeaf<(i8 13)>; - -def LO8 : SDNodeXFormgetZExtValue() & 0x00000000000000FFULL); -}]>; - -def LL16 : SDNodeXFormgetZExtValue() & 0x000000000000FFFFULL); -}]>; - -def LH16 : SDNodeXFormgetZExtValue() & 0x00000000FFFF0000ULL) >> 16); -}]>; - -def HL16 : SDNodeXFormgetZExtValue() & 0x0000FFFF00000000ULL) >> 32); -}]>; - -def HH16 : SDNodeXFormgetZExtValue() & 0xFFFF000000000000ULL) >> 48); -}]>; - -def LO32 : SDNodeXFormgetZExtValue() & 0x00000000FFFFFFFFULL); -}]>; - -def HI32 : SDNodeXFormgetZExtValue() >> 32); -}]>; - -def GetI64FromI32 : SDNodeXFormgetTargetConstant(N->getSExtValue(), MVT::i64); -}]>; - -def i32ll16 : PatLeaf<(i32 imm), [{ - // i32ll16 predicate - true if the 32-bit immediate has only rightmost 16 - // bits set. - return ((N->getZExtValue() & 0x000000000000FFFFULL) == N->getZExtValue()); -}], LL16>; - -def i32lh16 : PatLeaf<(i32 imm), [{ - // i32lh16 predicate - true if the 32-bit immediate has only bits 16-31 set. - return ((N->getZExtValue() & 0x00000000FFFF0000ULL) == N->getZExtValue()); -}], LH16>; - -def i32ll16c : PatLeaf<(i32 imm), [{ - // i32ll16c predicate - true if the 32-bit immediate has all bits 16-31 set. - return ((N->getZExtValue() | 0x00000000FFFF0000ULL) == N->getZExtValue()); -}], LL16>; - -def i32lh16c : PatLeaf<(i32 imm), [{ - // i32lh16c predicate - true if the 32-bit immediate has all rightmost 16 - // bits set. - return ((N->getZExtValue() | 0x000000000000FFFFULL) == N->getZExtValue()); -}], LH16>; - -def i64ll16 : PatLeaf<(i64 imm), [{ - // i64ll16 predicate - true if the 64-bit immediate has only rightmost 16 - // bits set. - return ((N->getZExtValue() & 0x000000000000FFFFULL) == N->getZExtValue()); -}], LL16>; - -def i64lh16 : PatLeaf<(i64 imm), [{ - // i64lh16 predicate - true if the 64-bit immediate has only bits 16-31 set. - return ((N->getZExtValue() & 0x00000000FFFF0000ULL) == N->getZExtValue()); -}], LH16>; - -def i64hl16 : PatLeaf<(i64 imm), [{ - // i64hl16 predicate - true if the 64-bit immediate has only bits 32-47 set. - return ((N->getZExtValue() & 0x0000FFFF00000000ULL) == N->getZExtValue()); -}], HL16>; - -def i64hh16 : PatLeaf<(i64 imm), [{ - // i64hh16 predicate - true if the 64-bit immediate has only bits 48-63 set. - return ((N->getZExtValue() & 0xFFFF000000000000ULL) == N->getZExtValue()); -}], HH16>; - -def i64ll16c : PatLeaf<(i64 imm), [{ - // i64ll16c predicate - true if the 64-bit immediate has only rightmost 16 - // bits set. - return ((N->getZExtValue() | 0xFFFFFFFFFFFF0000ULL) == N->getZExtValue()); -}], LL16>; - -def i64lh16c : PatLeaf<(i64 imm), [{ - // i64lh16c predicate - true if the 64-bit immediate has only bits 16-31 set. - return ((N->getZExtValue() | 0xFFFFFFFF0000FFFFULL) == N->getZExtValue()); -}], LH16>; - -def i64hl16c : PatLeaf<(i64 imm), [{ - // i64hl16c predicate - true if the 64-bit immediate has only bits 32-47 set. - return ((N->getZExtValue() | 0xFFFF0000FFFFFFFFULL) == N->getZExtValue()); -}], HL16>; - -def i64hh16c : PatLeaf<(i64 imm), [{ - // i64hh16c predicate - true if the 64-bit immediate has only bits 48-63 set. - return ((N->getZExtValue() | 0x0000FFFFFFFFFFFFULL) == N->getZExtValue()); -}], HH16>; - -def immSExt16 : PatLeaf<(imm), [{ - // immSExt16 predicate - true if the immediate fits in a 16-bit sign extended - // field. - if (N->getValueType(0) == MVT::i64) { - uint64_t val = N->getZExtValue(); - return ((int64_t)val == (int16_t)val); - } else if (N->getValueType(0) == MVT::i32) { - uint32_t val = N->getZExtValue(); - return ((int32_t)val == (int16_t)val); - } - - return false; -}], LL16>; - -def immSExt32 : PatLeaf<(i64 imm), [{ - // immSExt32 predicate - true if the immediate fits in a 32-bit sign extended - // field. - uint64_t val = N->getZExtValue(); - return ((int64_t)val == (int32_t)val); -}], LO32>; - -def i64lo32 : PatLeaf<(i64 imm), [{ - // i64lo32 predicate - true if the 64-bit immediate has only rightmost 32 - // bits set. - return ((N->getZExtValue() & 0x00000000FFFFFFFFULL) == N->getZExtValue()); -}], LO32>; - -def i64hi32 : PatLeaf<(i64 imm), [{ - // i64hi32 predicate - true if the 64-bit immediate has only bits 32-63 set. - return ((N->getZExtValue() & 0xFFFFFFFF00000000ULL) == N->getZExtValue()); -}], HI32>; - -def i64lo32c : PatLeaf<(i64 imm), [{ - // i64lo32 predicate - true if the 64-bit immediate has only rightmost 32 - // bits set. - return ((N->getZExtValue() | 0xFFFFFFFF00000000ULL) == N->getZExtValue()); -}], LO32>; - -def i64hi32c : PatLeaf<(i64 imm), [{ - // i64hi32 predicate - true if the 64-bit immediate has only bits 32-63 set. - return ((N->getZExtValue() | 0x00000000FFFFFFFFULL) == N->getZExtValue()); -}], HI32>; - -def i32immSExt8 : PatLeaf<(i32 imm), [{ - // i32immSExt8 predicate - True if the 32-bit immediate fits in a 8-bit - // sign extended field. - return (int32_t)N->getZExtValue() == (int8_t)N->getZExtValue(); -}], LO8>; - -def i32immSExt16 : PatLeaf<(i32 imm), [{ - // i32immSExt16 predicate - True if the 32-bit immediate fits in a 16-bit - // sign extended field. - return (int32_t)N->getZExtValue() == (int16_t)N->getZExtValue(); -}], LL16>; - -def i64immSExt32 : PatLeaf<(i64 imm), [{ - // i64immSExt32 predicate - True if the 64-bit immediate fits in a 32-bit - // sign extended field. - return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue(); -}], LO32>; - -def i64immZExt32 : PatLeaf<(i64 imm), [{ - // i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit - // zero extended field. - return (uint64_t)N->getZExtValue() == (uint32_t)N->getZExtValue(); -}], LO32>; - -// extloads -def extloadi32i8 : PatFrag<(ops node:$ptr), (i32 (extloadi8 node:$ptr))>; -def extloadi32i16 : PatFrag<(ops node:$ptr), (i32 (extloadi16 node:$ptr))>; -def extloadi64i8 : PatFrag<(ops node:$ptr), (i64 (extloadi8 node:$ptr))>; -def extloadi64i16 : PatFrag<(ops node:$ptr), (i64 (extloadi16 node:$ptr))>; -def extloadi64i32 : PatFrag<(ops node:$ptr), (i64 (extloadi32 node:$ptr))>; - -def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>; -def sextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (sextloadi16 node:$ptr))>; -def sextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (sextloadi8 node:$ptr))>; -def sextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (sextloadi16 node:$ptr))>; -def sextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (sextloadi32 node:$ptr))>; - -def zextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (zextloadi8 node:$ptr))>; -def zextloadi32i16 : PatFrag<(ops node:$ptr), (i32 (zextloadi16 node:$ptr))>; -def zextloadi64i8 : PatFrag<(ops node:$ptr), (i64 (zextloadi8 node:$ptr))>; -def zextloadi64i16 : PatFrag<(ops node:$ptr), (i64 (zextloadi16 node:$ptr))>; -def zextloadi64i32 : PatFrag<(ops node:$ptr), (i64 (zextloadi32 node:$ptr))>; - -// A couple of more descriptive operand definitions. -// 32-bits but only 8 bits are significant. -def i32i8imm : Operand; -// 32-bits but only 16 bits are significant. -def i32i16imm : Operand; -// 64-bits but only 32 bits are significant. -def i64i32imm : Operand; -// Branch targets have OtherVT type. -def brtarget : Operand; - -// Unsigned i12 -def u12imm : Operand { - let PrintMethod = "printU12ImmOperand"; -} -def u12imm64 : Operand { - let PrintMethod = "printU12ImmOperand"; -} - -// Signed i16 -def s16imm : Operand { - let PrintMethod = "printS16ImmOperand"; -} -def s16imm64 : Operand { - let PrintMethod = "printS16ImmOperand"; -} -// Unsigned i16 -def u16imm : Operand { - let PrintMethod = "printU16ImmOperand"; -} -def u16imm64 : Operand { - let PrintMethod = "printU16ImmOperand"; -} - -// Signed i20 -def s20imm : Operand { - let PrintMethod = "printS20ImmOperand"; -} -def s20imm64 : Operand { - let PrintMethod = "printS20ImmOperand"; -} -// Signed i32 -def s32imm : Operand { - let PrintMethod = "printS32ImmOperand"; -} -def s32imm64 : Operand { - let PrintMethod = "printS32ImmOperand"; -} -// Unsigned i32 -def u32imm : Operand { - let PrintMethod = "printU32ImmOperand"; -} -def u32imm64 : Operand { - let PrintMethod = "printU32ImmOperand"; -} - -def imm_pcrel : Operand { - let PrintMethod = "printPCRelImmOperand"; -} - -//===----------------------------------------------------------------------===// -// SystemZ Operand Definitions. -//===----------------------------------------------------------------------===// - -// Address operands - -// riaddr := reg + imm -def riaddr32 : Operand, - ComplexPattern { - let PrintMethod = "printRIAddrOperand"; - let MIOperandInfo = (ops ADDR64:$base, u12imm:$disp); -} - -def riaddr12 : Operand, - ComplexPattern { - let PrintMethod = "printRIAddrOperand"; - let MIOperandInfo = (ops ADDR64:$base, u12imm64:$disp); -} - -def riaddr : Operand, - ComplexPattern { - let PrintMethod = "printRIAddrOperand"; - let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp); -} - -//===----------------------------------------------------------------------===// - -// rriaddr := reg + reg + imm -def rriaddr12 : Operand, - ComplexPattern { - let PrintMethod = "printRRIAddrOperand"; - let MIOperandInfo = (ops ADDR64:$base, u12imm64:$disp, ADDR64:$index); -} -def rriaddr : Operand, - ComplexPattern { - let PrintMethod = "printRRIAddrOperand"; - let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp, ADDR64:$index); -} -def laaddr : Operand, - ComplexPattern { - let PrintMethod = "printRRIAddrOperand"; - let MIOperandInfo = (ops ADDR64:$base, s20imm64:$disp, ADDR64:$index); -} diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp deleted file mode 100644 index b1050d4..0000000 --- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp +++ /dev/null @@ -1,143 +0,0 @@ -//===- SystemZRegisterInfo.cpp - SystemZ Register Information -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the SystemZ implementation of the TargetRegisterInfo class. -// -//===----------------------------------------------------------------------===// - -#include "SystemZ.h" -#include "SystemZInstrInfo.h" -#include "SystemZMachineFunctionInfo.h" -#include "SystemZRegisterInfo.h" -#include "SystemZSubtarget.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/BitVector.h" - -#define GET_REGINFO_TARGET_DESC -#include "SystemZGenRegisterInfo.inc" - -using namespace llvm; - -SystemZRegisterInfo::SystemZRegisterInfo(SystemZTargetMachine &tm, - const SystemZInstrInfo &tii) - : SystemZGenRegisterInfo(0), TM(tm), TII(tii) { -} - -const unsigned* -SystemZRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - static const unsigned CalleeSavedRegs[] = { - SystemZ::R6D, SystemZ::R7D, SystemZ::R8D, SystemZ::R9D, - SystemZ::R10D, SystemZ::R11D, SystemZ::R12D, SystemZ::R13D, - SystemZ::R14D, SystemZ::R15D, - SystemZ::F8L, SystemZ::F9L, SystemZ::F10L, SystemZ::F11L, - SystemZ::F12L, SystemZ::F13L, SystemZ::F14L, SystemZ::F15L, - 0 - }; - - return CalleeSavedRegs; -} - -BitVector SystemZRegisterInfo::getReservedRegs(const MachineFunction &MF) const { - BitVector Reserved(getNumRegs()); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - if (TFI->hasFP(MF)) { - // R11D is the frame pointer. Reserve all aliases. - Reserved.set(SystemZ::R11D); - Reserved.set(SystemZ::R11W); - Reserved.set(SystemZ::R10P); - Reserved.set(SystemZ::R10Q); - } - - Reserved.set(SystemZ::R14D); - Reserved.set(SystemZ::R15D); - Reserved.set(SystemZ::R14W); - Reserved.set(SystemZ::R15W); - Reserved.set(SystemZ::R14P); - Reserved.set(SystemZ::R14Q); - return Reserved; -} - -const TargetRegisterClass* -SystemZRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, - const TargetRegisterClass *B, - unsigned Idx) const { - switch(Idx) { - // Exact sub-classes don't exist for the other sub-register indexes. - default: return 0; - case SystemZ::subreg_32bit: - if (B == SystemZ::ADDR32RegisterClass) - return A->getSize() == 8 ? SystemZ::ADDR64RegisterClass : 0; - return A; - } -} - -void SystemZRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - MBB.erase(I); -} - -void -SystemZRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { - assert(SPAdj == 0 && "Unxpected"); - - unsigned i = 0; - MachineInstr &MI = *II; - MachineFunction &MF = *MI.getParent()->getParent(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - - int FrameIndex = MI.getOperand(i).getIndex(); - - unsigned BasePtr = (TFI->hasFP(MF) ? SystemZ::R11D : SystemZ::R15D); - - // This must be part of a rri or ri operand memory reference. Replace the - // FrameIndex with base register with BasePtr. Add an offset to the - // displacement field. - MI.getOperand(i).ChangeToRegister(BasePtr, false); - - // Offset is a either 12-bit unsigned or 20-bit signed integer. - // FIXME: handle "too long" displacements. - int Offset = - TFI->getFrameIndexOffset(MF, FrameIndex) + MI.getOperand(i+1).getImm(); - - // Check whether displacement is too long to fit into 12 bit zext field. - MI.setDesc(TII.getMemoryInstr(MI.getOpcode(), Offset)); - - MI.getOperand(i+1).ChangeToImmediate(Offset); -} - -unsigned -SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - assert(0 && "What is the frame register"); - return 0; -} - -unsigned SystemZRegisterInfo::getEHExceptionRegister() const { - assert(0 && "What is the exception register"); - return 0; -} - -unsigned SystemZRegisterInfo::getEHHandlerRegister() const { - assert(0 && "What is the exception handler register"); - return 0; -} diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h deleted file mode 100644 index 03935b2..0000000 --- a/lib/Target/SystemZ/SystemZRegisterInfo.h +++ /dev/null @@ -1,60 +0,0 @@ -//===-- SystemZRegisterInfo.h - SystemZ Register Information ----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the SystemZ implementation of the TargetRegisterInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef SystemZREGISTERINFO_H -#define SystemZREGISTERINFO_H - -#include "llvm/Target/TargetRegisterInfo.h" - -#define GET_REGINFO_HEADER -#include "SystemZGenRegisterInfo.inc" - -namespace llvm { - -class SystemZSubtarget; -class SystemZInstrInfo; -class Type; - -struct SystemZRegisterInfo : public SystemZGenRegisterInfo { - SystemZTargetMachine &TM; - const SystemZInstrInfo &TII; - - SystemZRegisterInfo(SystemZTargetMachine &tm, const SystemZInstrInfo &tii); - - /// Code Generation virtual methods... - const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - - BitVector getReservedRegs(const MachineFunction &MF) const; - - const TargetRegisterClass* - getMatchingSuperRegClass(const TargetRegisterClass *A, - const TargetRegisterClass *B, unsigned Idx) const; - - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; - - // Debug information queries. - unsigned getFrameRegister(const MachineFunction &MF) const; - - // Exception handling queries. - unsigned getEHExceptionRegister() const; - unsigned getEHHandlerRegister() const; -}; - -} // end namespace llvm - -#endif diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.td b/lib/Target/SystemZ/SystemZRegisterInfo.td deleted file mode 100644 index a24cbcf..0000000 --- a/lib/Target/SystemZ/SystemZRegisterInfo.td +++ /dev/null @@ -1,205 +0,0 @@ -//===- SystemZRegisterInfo.td - The PowerPC Register File ------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -class SystemZReg : Register { - let Namespace = "SystemZ"; -} - -class SystemZRegWithSubregs subregs> - : RegisterWithSubRegs { - let Namespace = "SystemZ"; -} - -// We identify all our registers with a 4-bit ID, for consistency's sake. - -// GPR32 - Lower 32 bits of one of the 16 64-bit general-purpose registers -class GPR32 num, string n> : SystemZReg { - field bits<4> Num = num; -} - -// GPR64 - One of the 16 64-bit general-purpose registers -class GPR64 num, string n, list subregs, - list aliases = []> - : SystemZRegWithSubregs { - field bits<4> Num = num; - let Aliases = aliases; -} - -// GPR128 - 8 even-odd register pairs -class GPR128 num, string n, list subregs, - list aliases = []> - : SystemZRegWithSubregs { - field bits<4> Num = num; - let Aliases = aliases; -} - -// FPRS - Lower 32 bits of one of the 16 64-bit floating-point registers -class FPRS num, string n> : SystemZReg { - field bits<4> Num = num; -} - -// FPRL - One of the 16 64-bit floating-point registers -class FPRL num, string n, list subregs> - : SystemZRegWithSubregs { - field bits<4> Num = num; -} - -let Namespace = "SystemZ" in { -def subreg_32bit : SubRegIndex; -def subreg_odd32 : SubRegIndex; -def subreg_even : SubRegIndex; -def subreg_odd : SubRegIndex; -} - -// General-purpose registers -def R0W : GPR32< 0, "r0">; -def R1W : GPR32< 1, "r1">; -def R2W : GPR32< 2, "r2">; -def R3W : GPR32< 3, "r3">; -def R4W : GPR32< 4, "r4">; -def R5W : GPR32< 5, "r5">; -def R6W : GPR32< 6, "r6">; -def R7W : GPR32< 7, "r7">; -def R8W : GPR32< 8, "r8">; -def R9W : GPR32< 9, "r9">; -def R10W : GPR32<10, "r10">; -def R11W : GPR32<11, "r11">; -def R12W : GPR32<12, "r12">; -def R13W : GPR32<13, "r13">; -def R14W : GPR32<14, "r14">; -def R15W : GPR32<15, "r15">; - -let SubRegIndices = [subreg_32bit] in { -def R0D : GPR64< 0, "r0", [R0W]>, DwarfRegNum<[0]>; -def R1D : GPR64< 1, "r1", [R1W]>, DwarfRegNum<[1]>; -def R2D : GPR64< 2, "r2", [R2W]>, DwarfRegNum<[2]>; -def R3D : GPR64< 3, "r3", [R3W]>, DwarfRegNum<[3]>; -def R4D : GPR64< 4, "r4", [R4W]>, DwarfRegNum<[4]>; -def R5D : GPR64< 5, "r5", [R5W]>, DwarfRegNum<[5]>; -def R6D : GPR64< 6, "r6", [R6W]>, DwarfRegNum<[6]>; -def R7D : GPR64< 7, "r7", [R7W]>, DwarfRegNum<[7]>; -def R8D : GPR64< 8, "r8", [R8W]>, DwarfRegNum<[8]>; -def R9D : GPR64< 9, "r9", [R9W]>, DwarfRegNum<[9]>; -def R10D : GPR64<10, "r10", [R10W]>, DwarfRegNum<[10]>; -def R11D : GPR64<11, "r11", [R11W]>, DwarfRegNum<[11]>; -def R12D : GPR64<12, "r12", [R12W]>, DwarfRegNum<[12]>; -def R13D : GPR64<13, "r13", [R13W]>, DwarfRegNum<[13]>; -def R14D : GPR64<14, "r14", [R14W]>, DwarfRegNum<[14]>; -def R15D : GPR64<15, "r15", [R15W]>, DwarfRegNum<[15]>; -} - -// Register pairs -let SubRegIndices = [subreg_32bit, subreg_odd32] in { -def R0P : GPR64< 0, "r0", [R0W, R1W], [R0D, R1D]>; -def R2P : GPR64< 2, "r2", [R2W, R3W], [R2D, R3D]>; -def R4P : GPR64< 4, "r4", [R4W, R5W], [R4D, R5D]>; -def R6P : GPR64< 6, "r6", [R6W, R7W], [R6D, R7D]>; -def R8P : GPR64< 8, "r8", [R8W, R9W], [R8D, R9D]>; -def R10P : GPR64<10, "r10", [R10W, R11W], [R10D, R11D]>; -def R12P : GPR64<12, "r12", [R12W, R13W], [R12D, R13D]>; -def R14P : GPR64<14, "r14", [R14W, R15W], [R14D, R15D]>; -} - -let SubRegIndices = [subreg_even, subreg_odd], - CompositeIndices = [(subreg_odd32 subreg_odd, subreg_32bit)] in { -def R0Q : GPR128< 0, "r0", [R0D, R1D], [R0P]>; -def R2Q : GPR128< 2, "r2", [R2D, R3D], [R2P]>; -def R4Q : GPR128< 4, "r4", [R4D, R5D], [R4P]>; -def R6Q : GPR128< 6, "r6", [R6D, R7D], [R6P]>; -def R8Q : GPR128< 8, "r8", [R8D, R9D], [R8P]>; -def R10Q : GPR128<10, "r10", [R10D, R11D], [R10P]>; -def R12Q : GPR128<12, "r12", [R12D, R13D], [R12P]>; -def R14Q : GPR128<14, "r14", [R14D, R15D], [R14P]>; -} - -// Floating-point registers -def F0S : FPRS< 0, "f0">, DwarfRegNum<[16]>; -def F1S : FPRS< 1, "f1">, DwarfRegNum<[17]>; -def F2S : FPRS< 2, "f2">, DwarfRegNum<[18]>; -def F3S : FPRS< 3, "f3">, DwarfRegNum<[19]>; -def F4S : FPRS< 4, "f4">, DwarfRegNum<[20]>; -def F5S : FPRS< 5, "f5">, DwarfRegNum<[21]>; -def F6S : FPRS< 6, "f6">, DwarfRegNum<[22]>; -def F7S : FPRS< 7, "f7">, DwarfRegNum<[23]>; -def F8S : FPRS< 8, "f8">, DwarfRegNum<[24]>; -def F9S : FPRS< 9, "f9">, DwarfRegNum<[25]>; -def F10S : FPRS<10, "f10">, DwarfRegNum<[26]>; -def F11S : FPRS<11, "f11">, DwarfRegNum<[27]>; -def F12S : FPRS<12, "f12">, DwarfRegNum<[28]>; -def F13S : FPRS<13, "f13">, DwarfRegNum<[29]>; -def F14S : FPRS<14, "f14">, DwarfRegNum<[30]>; -def F15S : FPRS<15, "f15">, DwarfRegNum<[31]>; - -let SubRegIndices = [subreg_32bit] in { -def F0L : FPRL< 0, "f0", [F0S]>; -def F1L : FPRL< 1, "f1", [F1S]>; -def F2L : FPRL< 2, "f2", [F2S]>; -def F3L : FPRL< 3, "f3", [F3S]>; -def F4L : FPRL< 4, "f4", [F4S]>; -def F5L : FPRL< 5, "f5", [F5S]>; -def F6L : FPRL< 6, "f6", [F6S]>; -def F7L : FPRL< 7, "f7", [F7S]>; -def F8L : FPRL< 8, "f8", [F8S]>; -def F9L : FPRL< 9, "f9", [F9S]>; -def F10L : FPRL<10, "f10", [F10S]>; -def F11L : FPRL<11, "f11", [F11S]>; -def F12L : FPRL<12, "f12", [F12S]>; -def F13L : FPRL<13, "f13", [F13S]>; -def F14L : FPRL<14, "f14", [F14S]>; -def F15L : FPRL<15, "f15", [F15S]>; -} - -// Status register -def PSW : SystemZReg<"psw">; - -/// Register classes. -/// Allocate the callee-saved R6-R12 backwards. That way they can be saved -/// together with R14 and R15 in one prolog instruction. -def GR32 : RegisterClass<"SystemZ", [i32], 32, (add (sequence "R%uW", 0, 5), - (sequence "R%uW", 15, 6))>; - -/// Registers used to generate address. Everything except R0. -def ADDR32 : RegisterClass<"SystemZ", [i32], 32, (sub GR32, R0W)>; - -def GR64 : RegisterClass<"SystemZ", [i64], 64, (add (sequence "R%uD", 0, 5), - (sequence "R%uD", 15, 6))> { - let SubRegClasses = [(GR32 subreg_32bit)]; -} - -def ADDR64 : RegisterClass<"SystemZ", [i64], 64, (sub GR64, R0D)> { - let SubRegClasses = [(ADDR32 subreg_32bit)]; -} - -// Even-odd register pairs -def GR64P : RegisterClass<"SystemZ", [v2i32], 64, (add R0P, R2P, R4P, - R12P, R10P, R8P, R6P, - R14P)> { - let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32)]; -} - -def GR128 : RegisterClass<"SystemZ", [v2i64], 128, (add R0Q, R2Q, R4Q, - R12Q, R10Q, R8Q, R6Q, - R14Q)> { - let SubRegClasses = [(GR32 subreg_32bit, subreg_odd32), - (GR64 subreg_even, subreg_odd)]; -} - -def FP32 : RegisterClass<"SystemZ", [f32], 32, (sequence "F%uS", 0, 15)>; - -def FP64 : RegisterClass<"SystemZ", [f64], 64, (sequence "F%uL", 0, 15)> { - let SubRegClasses = [(FP32 subreg_32bit)]; -} - -// Status flags registers. -def CCR : RegisterClass<"SystemZ", [i64], 64, (add PSW)> { - let CopyCost = -1; // Don't allow copying of status registers. -} diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp b/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp deleted file mode 100644 index 3eabcd2..0000000 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp +++ /dev/null @@ -1,23 +0,0 @@ -//===-- SystemZSelectionDAGInfo.cpp - SystemZ SelectionDAG Info -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the SystemZSelectionDAGInfo class. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "systemz-selectiondag-info" -#include "SystemZTargetMachine.h" -using namespace llvm; - -SystemZSelectionDAGInfo::SystemZSelectionDAGInfo(const SystemZTargetMachine &TM) - : TargetSelectionDAGInfo(TM) { -} - -SystemZSelectionDAGInfo::~SystemZSelectionDAGInfo() { -} diff --git a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h b/lib/Target/SystemZ/SystemZSelectionDAGInfo.h deleted file mode 100644 index 1450401..0000000 --- a/lib/Target/SystemZ/SystemZSelectionDAGInfo.h +++ /dev/null @@ -1,31 +0,0 @@ -//===-- SystemZSelectionDAGInfo.h - SystemZ SelectionDAG Info ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the SystemZ subclass for TargetSelectionDAGInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef SYSTEMZSELECTIONDAGINFO_H -#define SYSTEMZSELECTIONDAGINFO_H - -#include "llvm/Target/TargetSelectionDAGInfo.h" - -namespace llvm { - -class SystemZTargetMachine; - -class SystemZSelectionDAGInfo : public TargetSelectionDAGInfo { -public: - explicit SystemZSelectionDAGInfo(const SystemZTargetMachine &TM); - ~SystemZSelectionDAGInfo(); -}; - -} - -#endif diff --git a/lib/Target/SystemZ/SystemZSubtarget.cpp b/lib/Target/SystemZ/SystemZSubtarget.cpp deleted file mode 100644 index 0845510..0000000 --- a/lib/Target/SystemZ/SystemZSubtarget.cpp +++ /dev/null @@ -1,54 +0,0 @@ -//===- SystemZSubtarget.cpp - SystemZ Subtarget Information -------*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the SystemZ specific subclass of TargetSubtargetInfo. -// -//===----------------------------------------------------------------------===// - -#include "SystemZSubtarget.h" -#include "SystemZ.h" -#include "llvm/GlobalValue.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Support/TargetRegistry.h" - -#define GET_SUBTARGETINFO_TARGET_DESC -#define GET_SUBTARGETINFO_CTOR -#include "SystemZGenSubtargetInfo.inc" - -using namespace llvm; - -SystemZSubtarget::SystemZSubtarget(const std::string &TT, - const std::string &CPU, - const std::string &FS): - SystemZGenSubtargetInfo(TT, CPU, FS), HasZ10Insts(false) { - std::string CPUName = CPU; - if (CPUName.empty()) - CPUName = "z9"; - - // Parse features string. - ParseSubtargetFeatures(CPUName, FS); -} - -/// True if accessing the GV requires an extra load. -bool SystemZSubtarget::GVRequiresExtraLoad(const GlobalValue* GV, - const TargetMachine& TM, - bool isDirectCall) const { - if (TM.getRelocationModel() == Reloc::PIC_) { - // Extra load is needed for all externally visible. - if (isDirectCall) - return false; - - if (GV->hasLocalLinkage() || GV->hasHiddenVisibility()) - return false; - - return true; - } - - return false; -} diff --git a/lib/Target/SystemZ/SystemZSubtarget.h b/lib/Target/SystemZ/SystemZSubtarget.h deleted file mode 100644 index 55cfd80..0000000 --- a/lib/Target/SystemZ/SystemZSubtarget.h +++ /dev/null @@ -1,48 +0,0 @@ -//==-- SystemZSubtarget.h - Define Subtarget for the SystemZ ---*- C++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the SystemZ specific subclass of TargetSubtargetInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_SystemZ_SUBTARGET_H -#define LLVM_TARGET_SystemZ_SUBTARGET_H - -#include "llvm/Target/TargetSubtargetInfo.h" -#include - -#define GET_SUBTARGETINFO_HEADER -#include "SystemZGenSubtargetInfo.inc" - -namespace llvm { -class GlobalValue; -class StringRef; -class TargetMachine; - -class SystemZSubtarget : public SystemZGenSubtargetInfo { - bool HasZ10Insts; -public: - /// This constructor initializes the data members to match that - /// of the specified triple. - /// - SystemZSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS); - - /// ParseSubtargetFeatures - Parses features string setting specified - /// subtarget options. Definition of function is auto generated by tblgen. - void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - - bool isZ10() const { return HasZ10Insts; } - - bool GVRequiresExtraLoad(const GlobalValue* GV, const TargetMachine& TM, - bool isDirectCall) const; -}; -} // End llvm namespace - -#endif // LLVM_TARGET_SystemZ_SUBTARGET_H diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp deleted file mode 100644 index e390f06..0000000 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ /dev/null @@ -1,40 +0,0 @@ -//===-- SystemZTargetMachine.cpp - Define TargetMachine for SystemZ -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "SystemZTargetMachine.h" -#include "SystemZ.h" -#include "llvm/PassManager.h" -#include "llvm/Support/TargetRegistry.h" -using namespace llvm; - -extern "C" void LLVMInitializeSystemZTarget() { - // Register the target. - RegisterTargetMachine X(TheSystemZTarget); -} - -/// SystemZTargetMachine ctor - Create an ILP64 architecture model -/// -SystemZTargetMachine::SystemZTargetMachine(const Target &T, - StringRef TT, StringRef CPU, - StringRef FS, Reloc::Model RM, - CodeModel::Model CM) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), - Subtarget(TT, CPU, FS), - DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32" - "-f64:64:64-f128:128:128-a0:16:16-n32:64"), - InstrInfo(*this), TLInfo(*this), TSInfo(*this), - FrameLowering(Subtarget) { -} - -bool SystemZTargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { - // Install an instruction selector. - PM.add(createSystemZISelDag(*this, OptLevel)); - return false; -} diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h deleted file mode 100644 index 43dce4b..0000000 --- a/lib/Target/SystemZ/SystemZTargetMachine.h +++ /dev/null @@ -1,68 +0,0 @@ -//==- SystemZTargetMachine.h - Define TargetMachine for SystemZ ---*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the SystemZ specific subclass of TargetMachine. -// -//===----------------------------------------------------------------------===// - - -#ifndef LLVM_TARGET_SYSTEMZ_TARGETMACHINE_H -#define LLVM_TARGET_SYSTEMZ_TARGETMACHINE_H - -#include "SystemZInstrInfo.h" -#include "SystemZISelLowering.h" -#include "SystemZFrameLowering.h" -#include "SystemZSelectionDAGInfo.h" -#include "SystemZRegisterInfo.h" -#include "SystemZSubtarget.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { - -/// SystemZTargetMachine -/// -class SystemZTargetMachine : public LLVMTargetMachine { - SystemZSubtarget Subtarget; - const TargetData DataLayout; // Calculates type size & alignment - SystemZInstrInfo InstrInfo; - SystemZTargetLowering TLInfo; - SystemZSelectionDAGInfo TSInfo; - SystemZFrameLowering FrameLowering; -public: - SystemZTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); - - virtual const TargetFrameLowering *getFrameLowering() const { - return &FrameLowering; - } - virtual const SystemZInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const TargetData *getTargetData() const { return &DataLayout;} - virtual const SystemZSubtarget *getSubtargetImpl() const { return &Subtarget; } - - virtual const SystemZRegisterInfo *getRegisterInfo() const { - return &InstrInfo.getRegisterInfo(); - } - - virtual const SystemZTargetLowering *getTargetLowering() const { - return &TLInfo; - } - - virtual const SystemZSelectionDAGInfo* getSelectionDAGInfo() const { - return &TSInfo; - } - - virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); -}; // SystemZTargetMachine. - -} // end namespace llvm - -#endif // LLVM_TARGET_SystemZ_TARGETMACHINE_H diff --git a/lib/Target/SystemZ/TargetInfo/CMakeLists.txt b/lib/Target/SystemZ/TargetInfo/CMakeLists.txt deleted file mode 100644 index 3180708..0000000 --- a/lib/Target/SystemZ/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMSystemZInfo - SystemZTargetInfo.cpp - ) - -add_llvm_library_dependencies(LLVMSystemZInfo - LLVMMC - LLVMSupport - LLVMTarget - ) - -add_dependencies(LLVMSystemZInfo SystemZCommonTableGen) diff --git a/lib/Target/SystemZ/TargetInfo/Makefile b/lib/Target/SystemZ/TargetInfo/Makefile deleted file mode 100644 index 0be80eb..0000000 --- a/lib/Target/SystemZ/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/SystemZ/TargetInfo/Makefile --------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMSystemZInfo - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp b/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp deleted file mode 100644 index da99282..0000000 --- a/lib/Target/SystemZ/TargetInfo/SystemZTargetInfo.cpp +++ /dev/null @@ -1,19 +0,0 @@ -//===-- SystemZTargetInfo.cpp - SystemZ Target Implementation -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "SystemZ.h" -#include "llvm/Module.h" -#include "llvm/Support/TargetRegistry.h" -using namespace llvm; - -Target llvm::TheSystemZTarget; - -extern "C" void LLVMInitializeSystemZTargetInfo() { - RegisterTarget X(TheSystemZTarget, "systemz", "SystemZ"); -} -- cgit v1.1 From 3e6157de576e349d33a9b08d103405b3a8fb9159 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 25 Oct 2011 00:05:42 +0000 Subject: Remove the Blackfin backend. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142880 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Triple.cpp | 7 - lib/Target/Blackfin/Blackfin.h | 31 - lib/Target/Blackfin/Blackfin.td | 202 ----- lib/Target/Blackfin/BlackfinAsmPrinter.cpp | 156 ---- lib/Target/Blackfin/BlackfinCallingConv.td | 30 - lib/Target/Blackfin/BlackfinFrameLowering.cpp | 130 ---- lib/Target/Blackfin/BlackfinFrameLowering.h | 47 -- lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp | 180 ----- lib/Target/Blackfin/BlackfinISelLowering.cpp | 645 --------------- lib/Target/Blackfin/BlackfinISelLowering.h | 83 -- lib/Target/Blackfin/BlackfinInstrFormats.td | 34 - lib/Target/Blackfin/BlackfinInstrInfo.cpp | 256 ------ lib/Target/Blackfin/BlackfinInstrInfo.h | 81 -- lib/Target/Blackfin/BlackfinInstrInfo.td | 862 --------------------- lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp | 104 --- lib/Target/Blackfin/BlackfinIntrinsicInfo.h | 32 - lib/Target/Blackfin/BlackfinIntrinsics.td | 34 - lib/Target/Blackfin/BlackfinRegisterInfo.cpp | 344 -------- lib/Target/Blackfin/BlackfinRegisterInfo.h | 77 -- lib/Target/Blackfin/BlackfinRegisterInfo.td | 277 ------- lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp | 24 - lib/Target/Blackfin/BlackfinSelectionDAGInfo.h | 31 - lib/Target/Blackfin/BlackfinSubtarget.cpp | 44 -- lib/Target/Blackfin/BlackfinSubtarget.h | 49 -- lib/Target/Blackfin/BlackfinTargetMachine.cpp | 43 - lib/Target/Blackfin/BlackfinTargetMachine.h | 68 -- lib/Target/Blackfin/CMakeLists.txt | 38 - .../Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp | 22 - .../Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h | 29 - .../Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp | 81 -- .../Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h | 38 - lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt | 11 - lib/Target/Blackfin/MCTargetDesc/Makefile | 16 - lib/Target/Blackfin/Makefile | 23 - lib/Target/Blackfin/README.txt | 244 ------ .../Blackfin/TargetInfo/BlackfinTargetInfo.cpp | 21 - lib/Target/Blackfin/TargetInfo/CMakeLists.txt | 13 - lib/Target/Blackfin/TargetInfo/Makefile | 15 - 38 files changed, 4422 deletions(-) delete mode 100644 lib/Target/Blackfin/Blackfin.h delete mode 100644 lib/Target/Blackfin/Blackfin.td delete mode 100644 lib/Target/Blackfin/BlackfinAsmPrinter.cpp delete mode 100644 lib/Target/Blackfin/BlackfinCallingConv.td delete mode 100644 lib/Target/Blackfin/BlackfinFrameLowering.cpp delete mode 100644 lib/Target/Blackfin/BlackfinFrameLowering.h delete mode 100644 lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp delete mode 100644 lib/Target/Blackfin/BlackfinISelLowering.cpp delete mode 100644 lib/Target/Blackfin/BlackfinISelLowering.h delete mode 100644 lib/Target/Blackfin/BlackfinInstrFormats.td delete mode 100644 lib/Target/Blackfin/BlackfinInstrInfo.cpp delete mode 100644 lib/Target/Blackfin/BlackfinInstrInfo.h delete mode 100644 lib/Target/Blackfin/BlackfinInstrInfo.td delete mode 100644 lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp delete mode 100644 lib/Target/Blackfin/BlackfinIntrinsicInfo.h delete mode 100644 lib/Target/Blackfin/BlackfinIntrinsics.td delete mode 100644 lib/Target/Blackfin/BlackfinRegisterInfo.cpp delete mode 100644 lib/Target/Blackfin/BlackfinRegisterInfo.h delete mode 100644 lib/Target/Blackfin/BlackfinRegisterInfo.td delete mode 100644 lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp delete mode 100644 lib/Target/Blackfin/BlackfinSelectionDAGInfo.h delete mode 100644 lib/Target/Blackfin/BlackfinSubtarget.cpp delete mode 100644 lib/Target/Blackfin/BlackfinSubtarget.h delete mode 100644 lib/Target/Blackfin/BlackfinTargetMachine.cpp delete mode 100644 lib/Target/Blackfin/BlackfinTargetMachine.h delete mode 100644 lib/Target/Blackfin/CMakeLists.txt delete mode 100644 lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp delete mode 100644 lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h delete mode 100644 lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp delete mode 100644 lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h delete mode 100644 lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt delete mode 100644 lib/Target/Blackfin/MCTargetDesc/Makefile delete mode 100644 lib/Target/Blackfin/Makefile delete mode 100644 lib/Target/Blackfin/README.txt delete mode 100644 lib/Target/Blackfin/TargetInfo/BlackfinTargetInfo.cpp delete mode 100644 lib/Target/Blackfin/TargetInfo/CMakeLists.txt delete mode 100644 lib/Target/Blackfin/TargetInfo/Makefile (limited to 'lib') diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 2554d64..6e252a5 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -20,7 +20,6 @@ const char *Triple::getArchTypeName(ArchType Kind) { case alpha: return "alpha"; case arm: return "arm"; - case bfin: return "bfin"; case cellspu: return "cellspu"; case mips: return "mips"; case mipsel: return "mipsel"; @@ -56,8 +55,6 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { case arm: case thumb: return "arm"; - case bfin: return "bfin"; - case cellspu: return "spu"; case ppc64: @@ -138,8 +135,6 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { return alpha; if (Name == "arm") return arm; - if (Name == "bfin") - return bfin; if (Name == "cellspu") return cellspu; if (Name == "mips") @@ -278,8 +273,6 @@ Triple::ArchType Triple::ParseArch(StringRef ArchName) { return x86; else if (ArchName == "amd64" || ArchName == "x86_64") return x86_64; - else if (ArchName == "bfin") - return bfin; else if (ArchName == "powerpc") return ppc; else if ((ArchName == "powerpc64") || (ArchName == "ppu")) diff --git a/lib/Target/Blackfin/Blackfin.h b/lib/Target/Blackfin/Blackfin.h deleted file mode 100644 index a00ff4c..0000000 --- a/lib/Target/Blackfin/Blackfin.h +++ /dev/null @@ -1,31 +0,0 @@ -//=== Blackfin.h - Top-level interface for Blackfin backend -----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the entry points for global functions defined in the LLVM -// Blackfin back-end. -// -//===----------------------------------------------------------------------===// - -#ifndef TARGET_BLACKFIN_H -#define TARGET_BLACKFIN_H - -#include "MCTargetDesc/BlackfinMCTargetDesc.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { - - class FunctionPass; - class BlackfinTargetMachine; - - FunctionPass *createBlackfinISelDag(BlackfinTargetMachine &TM, - CodeGenOpt::Level OptLevel); - -} // end namespace llvm - -#endif diff --git a/lib/Target/Blackfin/Blackfin.td b/lib/Target/Blackfin/Blackfin.td deleted file mode 100644 index cd90962..0000000 --- a/lib/Target/Blackfin/Blackfin.td +++ /dev/null @@ -1,202 +0,0 @@ -//===- Blackfin.td - Describe the Blackfin Target Machine --*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Target-independent interfaces which we are implementing -//===----------------------------------------------------------------------===// - -include "llvm/Target/Target.td" - -//===----------------------------------------------------------------------===// -// Blackfin Subtarget features. -//===----------------------------------------------------------------------===// - -def FeatureSDRAM : SubtargetFeature<"sdram", "sdram", "true", - "Build for SDRAM">; - -def FeatureICPLB : SubtargetFeature<"icplb", "icplb", "true", - "Assume instruction cache lookaside buffers are enabled at runtime">; - -//===----------------------------------------------------------------------===// -// Bugs in the silicon becomes workarounds in the compiler. -// See http://www.analog.com/ for the full list of IC anomalies. -//===----------------------------------------------------------------------===// - -def WA_MI_SHIFT : SubtargetFeature<"mi-shift-anomaly","wa_mi_shift", "true", - "Work around 05000074 - " - "Multi-Issue Instruction with dsp32shiftimm and P-reg Store">; - -def WA_CSYNC : SubtargetFeature<"csync-anomaly","wa_csync", "true", - "Work around 05000244 - " - "If I-Cache Is On, CSYNC/SSYNC/IDLE Around Change of Control">; - -def WA_SPECLD : SubtargetFeature<"specld-anomaly","wa_specld", "true", - "Work around 05000245 - " - "Access in the Shadow of a Conditional Branch">; - -def WA_HWLOOP : SubtargetFeature<"hwloop-anomaly","wa_hwloop", "true", - "Work around 05000257 - " - "Interrupt/Exception During Short Hardware Loop">; - -def WA_MMR_STALL : SubtargetFeature<"mmr-stall-anomaly","wa_mmr_stall", "true", - "Work around 05000283 - " - "System MMR Write Is Stalled Indefinitely when Killed">; - -def WA_LCREGS : SubtargetFeature<"lcregs-anomaly","wa_lcregs", "true", - "Work around 05000312 - " - "SSYNC, CSYNC, or Loads to LT, LB and LC Registers Are Interrupted">; - -def WA_KILLED_MMR : SubtargetFeature<"killed-mmr-anomaly", - "wa_killed_mmr", "true", - "Work around 05000315 - " - "Killed System MMR Write Completes Erroneously on Next System MMR Access">; - -def WA_RETS : SubtargetFeature<"rets-anomaly", "wa_rets", "true", - "Work around 05000371 - " - "Possible RETS Register Corruption when Subroutine Is under 5 Cycles">; - -def WA_IND_CALL : SubtargetFeature<"ind-call-anomaly", "wa_ind_call", "true", - "Work around 05000426 - " - "Speculative Fetches of Indirect-Pointer Instructions">; - -//===----------------------------------------------------------------------===// -// Register File, Calling Conv, Instruction Descriptions -//===----------------------------------------------------------------------===// - -include "BlackfinRegisterInfo.td" -include "BlackfinCallingConv.td" -include "BlackfinIntrinsics.td" -include "BlackfinInstrInfo.td" - -def BlackfinInstrInfo : InstrInfo {} - -//===----------------------------------------------------------------------===// -// Blackfin processors supported. -//===----------------------------------------------------------------------===// - -class Proc Features> - : Processor; - -def : Proc<"generic", "", []>; - -multiclass Core Features> { - def : Proc; - def : Proc; - def : Proc; -} - -multiclass CoreEdinburgh - : Core { - def : Proc; - def : Proc; - def : Proc; - def : Proc; -} -multiclass CoreBraemar - : Core { - def : Proc; - def : Proc; -} -multiclass CoreStirling - : Core { - def : Proc; - def : Proc; - def : Proc; -} -multiclass CoreMoab - : Core { - def : Proc; - def : Proc; - def : Proc; - def : Proc; -} -multiclass CoreTeton - : Core { - def : Proc; - def : Proc; -} -multiclass CoreKookaburra - : Core { - def : Proc; - def : Proc; - def : Proc; -} -multiclass CoreMockingbird - : Core { - def : Proc; - def : Proc; -} -multiclass CoreBrodie - : Core { - def : Proc; - def : Proc; -} - -defm BF512 : CoreBrodie<"bf512">; -defm BF514 : CoreBrodie<"bf514">; -defm BF516 : CoreBrodie<"bf516">; -defm BF518 : CoreBrodie<"bf518">; -defm BF522 : CoreMockingbird<"bf522">; -defm BF523 : CoreKookaburra<"bf523">; -defm BF524 : CoreMockingbird<"bf524">; -defm BF525 : CoreKookaburra<"bf525">; -defm BF526 : CoreMockingbird<"bf526">; -defm BF527 : CoreKookaburra<"bf527">; -defm BF531 : CoreEdinburgh<"bf531">; -defm BF532 : CoreEdinburgh<"bf532">; -defm BF533 : CoreEdinburgh<"bf533">; -defm BF534 : CoreBraemar<"bf534">; -defm BF536 : CoreBraemar<"bf536">; -defm BF537 : CoreBraemar<"bf537">; -defm BF538 : CoreStirling<"bf538">; -defm BF539 : CoreStirling<"bf539">; -defm BF542 : CoreMoab<"bf542">; -defm BF544 : CoreMoab<"bf544">; -defm BF548 : CoreMoab<"bf548">; -defm BF549 : CoreMoab<"bf549">; -defm BF561 : CoreTeton<"bf561">; - -//===----------------------------------------------------------------------===// -// Declare the target which we are implementing -//===----------------------------------------------------------------------===// - -def Blackfin : Target { - // Pull in Instruction Info: - let InstructionSet = BlackfinInstrInfo; -} diff --git a/lib/Target/Blackfin/BlackfinAsmPrinter.cpp b/lib/Target/Blackfin/BlackfinAsmPrinter.cpp deleted file mode 100644 index ed9844e..0000000 --- a/lib/Target/Blackfin/BlackfinAsmPrinter.cpp +++ /dev/null @@ -1,156 +0,0 @@ -//===-- BlackfinAsmPrinter.cpp - Blackfin LLVM assembly writer ------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to GAS-format BLACKFIN assembly language. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "asm-printer" -#include "Blackfin.h" -#include "BlackfinInstrInfo.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCContext.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -namespace { - class BlackfinAsmPrinter : public AsmPrinter { - public: - BlackfinAsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer) {} - - virtual const char *getPassName() const { - return "Blackfin Assembly Printer"; - } - - void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); - void printMemoryOperand(const MachineInstr *MI, int opNum, raw_ostream &O); - void printInstruction(const MachineInstr *MI, raw_ostream &O);// autogen'd. - static const char *getRegisterName(unsigned RegNo); - - void EmitInstruction(const MachineInstr *MI) { - SmallString<128> Str; - raw_svector_ostream OS(Str); - printInstruction(MI, OS); - OutStreamer.EmitRawText(OS.str()); - } - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - }; -} // end of anonymous namespace - -#include "BlackfinGenAsmWriter.inc" - -extern "C" void LLVMInitializeBlackfinAsmPrinter() { - RegisterAsmPrinter X(TheBlackfinTarget); -} - -void BlackfinAsmPrinter::printOperand(const MachineInstr *MI, int opNum, - raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(opNum); - switch (MO.getType()) { - case MachineOperand::MO_Register: - assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && - "Virtual registers should be already mapped!"); - O << getRegisterName(MO.getReg()); - break; - - case MachineOperand::MO_Immediate: - O << MO.getImm(); - break; - case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); - return; - case MachineOperand::MO_GlobalAddress: - O << *Mang->getSymbol(MO.getGlobal()); - printOffset(MO.getOffset(), O); - break; - case MachineOperand::MO_ExternalSymbol: - O << *GetExternalSymbolSymbol(MO.getSymbolName()); - break; - case MachineOperand::MO_ConstantPoolIndex: - O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" - << MO.getIndex(); - break; - case MachineOperand::MO_JumpTableIndex: - O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() - << '_' << MO.getIndex(); - break; - default: - llvm_unreachable(""); - break; - } -} - -void BlackfinAsmPrinter::printMemoryOperand(const MachineInstr *MI, int opNum, - raw_ostream &O) { - printOperand(MI, opNum, O); - - if (MI->getOperand(opNum+1).isImm() && MI->getOperand(opNum+1).getImm() == 0) - return; - - O << " + "; - printOperand(MI, opNum+1, O); -} - -/// PrintAsmOperand - Print out an operand for an inline asm expression. -/// -bool BlackfinAsmPrinter::PrintAsmOperand(const MachineInstr *MI, - unsigned OpNo, unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { - if (ExtraCode && ExtraCode[0]) { - if (ExtraCode[1] != 0) return true; // Unknown modifier. - - switch (ExtraCode[0]) { - default: return true; // Unknown modifier. - case 'r': - break; - } - } - - printOperand(MI, OpNo, O); - - return false; -} - -bool BlackfinAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, - unsigned OpNo, - unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { - if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier - - O << '['; - printOperand(MI, OpNo, O); - O << ']'; - - return false; -} diff --git a/lib/Target/Blackfin/BlackfinCallingConv.td b/lib/Target/Blackfin/BlackfinCallingConv.td deleted file mode 100644 index 0abc84c..0000000 --- a/lib/Target/Blackfin/BlackfinCallingConv.td +++ /dev/null @@ -1,30 +0,0 @@ -//===--- BlackfinCallingConv.td - Calling Conventions ------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This describes the calling conventions for the Blackfin architectures. -// -//===----------------------------------------------------------------------===// - -// Blackfin C Calling convention. -def CC_Blackfin : CallingConv<[ - CCIfType<[i16], CCPromoteToType>, - CCIfSRet>, - CCAssignToReg<[R0, R1, R2]>, - CCAssignToStack<4, 4> -]>; - -//===----------------------------------------------------------------------===// -// Return Value Calling Conventions -//===----------------------------------------------------------------------===// - -// Blackfin C return-value convention. -def RetCC_Blackfin : CallingConv<[ - CCIfType<[i16], CCPromoteToType>, - CCAssignToReg<[R0, R1]> -]>; diff --git a/lib/Target/Blackfin/BlackfinFrameLowering.cpp b/lib/Target/Blackfin/BlackfinFrameLowering.cpp deleted file mode 100644 index 0b0984d..0000000 --- a/lib/Target/Blackfin/BlackfinFrameLowering.cpp +++ /dev/null @@ -1,130 +0,0 @@ -//====- BlackfinFrameLowering.cpp - Blackfin Frame Information --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the Blackfin implementation of TargetFrameLowering class. -// -//===----------------------------------------------------------------------===// - -#include "BlackfinFrameLowering.h" -#include "BlackfinInstrInfo.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Target/TargetOptions.h" - -using namespace llvm; - - -// hasFP - Return true if the specified function should have a dedicated frame -// pointer register. This is true if the function has variable sized allocas or -// if frame pointer elimination is disabled. -bool BlackfinFrameLowering::hasFP(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - return DisableFramePointerElim(MF) || - MFI->adjustsStack() || MFI->hasVarSizedObjects(); -} - -// Always reserve a call frame. We dont have enough registers to adjust SP. -bool BlackfinFrameLowering:: -hasReservedCallFrame(const MachineFunction &MF) const { - return true; -} - -// Emit a prologue that sets up a stack frame. -// On function entry, R0-R2 and P0 may hold arguments. -// R3, P1, and P2 may be used as scratch registers -void BlackfinFrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB - MachineBasicBlock::iterator MBBI = MBB.begin(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - const BlackfinRegisterInfo *RegInfo = - static_cast(MF.getTarget().getRegisterInfo()); - const BlackfinInstrInfo &TII = - *static_cast(MF.getTarget().getInstrInfo()); - - DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); - - int FrameSize = MFI->getStackSize(); - if (FrameSize%4) { - FrameSize = (FrameSize+3) & ~3; - MFI->setStackSize(FrameSize); - } - - if (!hasFP(MF)) { - assert(!MFI->adjustsStack() && - "FP elimination on a non-leaf function is not supported"); - RegInfo->adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, -FrameSize); - return; - } - - // emit a LINK instruction - if (FrameSize <= 0x3ffff) { - BuildMI(MBB, MBBI, dl, TII.get(BF::LINK)).addImm(FrameSize); - return; - } - - // Frame is too big, do a manual LINK: - // [--SP] = RETS; - // [--SP] = FP; - // FP = SP; - // P1 = -FrameSize; - // SP = SP + P1; - BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH)) - .addReg(BF::RETS, RegState::Kill); - BuildMI(MBB, MBBI, dl, TII.get(BF::PUSH)) - .addReg(BF::FP, RegState::Kill); - BuildMI(MBB, MBBI, dl, TII.get(BF::MOVE), BF::FP) - .addReg(BF::SP); - RegInfo->loadConstant(MBB, MBBI, dl, BF::P1, -FrameSize); - BuildMI(MBB, MBBI, dl, TII.get(BF::ADDpp), BF::SP) - .addReg(BF::SP, RegState::Kill) - .addReg(BF::P1, RegState::Kill); - -} - -void BlackfinFrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - MachineFrameInfo *MFI = MF.getFrameInfo(); - const BlackfinRegisterInfo *RegInfo = - static_cast(MF.getTarget().getRegisterInfo()); - const BlackfinInstrInfo &TII = - *static_cast(MF.getTarget().getInstrInfo()); - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - DebugLoc dl = MBBI->getDebugLoc(); - - int FrameSize = MFI->getStackSize(); - assert(FrameSize%4 == 0 && "Misaligned frame size"); - - if (!hasFP(MF)) { - assert(!MFI->adjustsStack() && - "FP elimination on a non-leaf function is not supported"); - RegInfo->adjustRegister(MBB, MBBI, dl, BF::SP, BF::P1, FrameSize); - return; - } - - // emit an UNLINK instruction - BuildMI(MBB, MBBI, dl, TII.get(BF::UNLINK)); -} - -void BlackfinFrameLowering:: -processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const { - MachineFrameInfo *MFI = MF.getFrameInfo(); - const BlackfinRegisterInfo *RegInfo = - static_cast(MF.getTarget().getRegisterInfo()); - const TargetRegisterClass *RC = BF::DPRegisterClass; - - if (RegInfo->requiresRegisterScavenging(MF)) { - // Reserve a slot close to SP or frame pointer. - RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), - RC->getAlignment(), - false)); - } -} diff --git a/lib/Target/Blackfin/BlackfinFrameLowering.h b/lib/Target/Blackfin/BlackfinFrameLowering.h deleted file mode 100644 index 169aa8e..0000000 --- a/lib/Target/Blackfin/BlackfinFrameLowering.h +++ /dev/null @@ -1,47 +0,0 @@ -//=- BlackfinFrameLowering.h - Define frame lowering for Blackfin -*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -// -//===----------------------------------------------------------------------===// - -#ifndef BLACKFIN_FRAMEINFO_H -#define BLACKFIN_FRAMEINFO_H - -#include "Blackfin.h" -#include "BlackfinSubtarget.h" -#include "llvm/Target/TargetFrameLowering.h" - -namespace llvm { - class BlackfinSubtarget; - -class BlackfinFrameLowering : public TargetFrameLowering { -protected: - const BlackfinSubtarget &STI; - -public: - explicit BlackfinFrameLowering(const BlackfinSubtarget &sti) - : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 4, 0), STI(sti) { - } - - /// emitProlog/emitEpilog - These methods insert prolog and epilog code into - /// the function. - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - - bool hasFP(const MachineFunction &MF) const; - bool hasReservedCallFrame(const MachineFunction &MF) const; - - void processFunctionBeforeCalleeSavedScan(MachineFunction &MF, - RegScavenger *RS) const; -}; - -} // End llvm namespace - -#endif diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp deleted file mode 100644 index 215ca43..0000000 --- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp +++ /dev/null @@ -1,180 +0,0 @@ -//===- BlackfinISelDAGToDAG.cpp - A dag to dag inst selector for Blackfin -===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines an instruction selector for the Blackfin target. -// -//===----------------------------------------------------------------------===// - -#include "Blackfin.h" -#include "BlackfinTargetMachine.h" -#include "BlackfinRegisterInfo.h" -#include "llvm/Intrinsics.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -//===----------------------------------------------------------------------===// -// Instruction Selector Implementation -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -/// BlackfinDAGToDAGISel - Blackfin specific code to select blackfin machine -/// instructions for SelectionDAG operations. -namespace { - class BlackfinDAGToDAGISel : public SelectionDAGISel { - /// Subtarget - Keep a pointer to the Blackfin Subtarget around so that we - /// can make the right decision when generating code for different targets. - //const BlackfinSubtarget &Subtarget; - public: - BlackfinDAGToDAGISel(BlackfinTargetMachine &TM, CodeGenOpt::Level OptLevel) - : SelectionDAGISel(TM, OptLevel) {} - - virtual void PostprocessISelDAG(); - - virtual const char *getPassName() const { - return "Blackfin DAG->DAG Pattern Instruction Selection"; - } - - // Include the pieces autogenerated from the target description. -#include "BlackfinGenDAGISel.inc" - - private: - SDNode *Select(SDNode *N); - bool SelectADDRspii(SDValue Addr, SDValue &Base, SDValue &Offset); - - // Walk the DAG after instruction selection, fixing register class issues. - void FixRegisterClasses(SelectionDAG &DAG); - - const BlackfinInstrInfo &getInstrInfo() { - return *static_cast(TM).getInstrInfo(); - } - const BlackfinRegisterInfo *getRegisterInfo() { - return static_cast(TM).getRegisterInfo(); - } - }; -} // end anonymous namespace - -FunctionPass *llvm::createBlackfinISelDag(BlackfinTargetMachine &TM, - CodeGenOpt::Level OptLevel) { - return new BlackfinDAGToDAGISel(TM, OptLevel); -} - -void BlackfinDAGToDAGISel::PostprocessISelDAG() { - FixRegisterClasses(*CurDAG); -} - -SDNode *BlackfinDAGToDAGISel::Select(SDNode *N) { - if (N->isMachineOpcode()) - return NULL; // Already selected. - - switch (N->getOpcode()) { - default: break; - case ISD::FrameIndex: { - // Selects to ADDpp FI, 0 which in turn will become ADDimm7 SP, imm or ADDpp - // SP, Px - int FI = cast(N)->getIndex(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, MVT::i32); - return CurDAG->SelectNodeTo(N, BF::ADDpp, MVT::i32, TFI, - CurDAG->getTargetConstant(0, MVT::i32)); - } - } - - return SelectCode(N); -} - -bool BlackfinDAGToDAGISel::SelectADDRspii(SDValue Addr, - SDValue &Base, - SDValue &Offset) { - FrameIndexSDNode *FIN = 0; - if ((FIN = dyn_cast(Addr))) { - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(0, MVT::i32); - return true; - } - if (Addr.getOpcode() == ISD::ADD) { - ConstantSDNode *CN = 0; - if ((FIN = dyn_cast(Addr.getOperand(0))) && - (CN = dyn_cast(Addr.getOperand(1))) && - (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) { - // Constant positive word offset from frame index - Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32); - Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32); - return true; - } - } - return false; -} - -static inline bool isCC(const TargetRegisterClass *RC) { - return BF::AnyCCRegClass.hasSubClassEq(RC); -} - -static inline bool isDCC(const TargetRegisterClass *RC) { - return BF::DRegClass.hasSubClassEq(RC) || isCC(RC); -} - -static void UpdateNodeOperand(SelectionDAG &DAG, - SDNode *N, - unsigned Num, - SDValue Val) { - SmallVector ops(N->op_begin(), N->op_end()); - ops[Num] = Val; - SDNode *New = DAG.UpdateNodeOperands(N, ops.data(), ops.size()); - DAG.ReplaceAllUsesWith(N, New); -} - -// After instruction selection, insert COPY_TO_REGCLASS nodes to help in -// choosing the proper register classes. -void BlackfinDAGToDAGISel::FixRegisterClasses(SelectionDAG &DAG) { - const BlackfinInstrInfo &TII = getInstrInfo(); - const BlackfinRegisterInfo *TRI = getRegisterInfo(); - DAG.AssignTopologicalOrder(); - HandleSDNode Dummy(DAG.getRoot()); - - for (SelectionDAG::allnodes_iterator NI = DAG.allnodes_begin(); - NI != DAG.allnodes_end(); ++NI) { - if (NI->use_empty() || !NI->isMachineOpcode()) - continue; - const MCInstrDesc &DefMCID = TII.get(NI->getMachineOpcode()); - for (SDNode::use_iterator UI = NI->use_begin(); !UI.atEnd(); ++UI) { - if (!UI->isMachineOpcode()) - continue; - - if (UI.getUse().getResNo() >= DefMCID.getNumDefs()) - continue; - const TargetRegisterClass *DefRC = - TII.getRegClass(DefMCID, UI.getUse().getResNo(), TRI); - - const MCInstrDesc &UseMCID = TII.get(UI->getMachineOpcode()); - if (UseMCID.getNumDefs()+UI.getOperandNo() >= UseMCID.getNumOperands()) - continue; - const TargetRegisterClass *UseRC = - TII.getRegClass(UseMCID, UseMCID.getNumDefs()+UI.getOperandNo(), TRI); - if (!DefRC || !UseRC) - continue; - // We cannot copy CC <-> !(CC/D) - if ((isCC(DefRC) && !isDCC(UseRC)) || (isCC(UseRC) && !isDCC(DefRC))) { - SDNode *Copy = - DAG.getMachineNode(TargetOpcode::COPY_TO_REGCLASS, - NI->getDebugLoc(), - MVT::i32, - UI.getUse().get(), - DAG.getTargetConstant(BF::DRegClassID, MVT::i32)); - UpdateNodeOperand(DAG, *UI, UI.getOperandNo(), SDValue(Copy, 0)); - } - } - } - DAG.setRoot(Dummy.getValue()); -} - diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp deleted file mode 100644 index 7d4c45f..0000000 --- a/lib/Target/Blackfin/BlackfinISelLowering.cpp +++ /dev/null @@ -1,645 +0,0 @@ -//===- BlackfinISelLowering.cpp - Blackfin DAG Lowering Implementation ----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the interfaces that Blackfin uses to lower LLVM code -// into a selection DAG. -// -//===----------------------------------------------------------------------===// - -#include "BlackfinISelLowering.h" -#include "BlackfinTargetMachine.h" -#include "llvm/Function.h" -#include "llvm/Type.h" -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/ADT/VectorExtras.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -using namespace llvm; - -//===----------------------------------------------------------------------===// -// Calling Convention Implementation -//===----------------------------------------------------------------------===// - -#include "BlackfinGenCallingConv.inc" - -//===----------------------------------------------------------------------===// -// TargetLowering Implementation -//===----------------------------------------------------------------------===// - -BlackfinTargetLowering::BlackfinTargetLowering(TargetMachine &TM) - : TargetLowering(TM, new TargetLoweringObjectFileELF()) { - setBooleanContents(ZeroOrOneBooleanContent); - setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? - setStackPointerRegisterToSaveRestore(BF::SP); - setIntDivIsCheap(false); - - // Set up the legal register classes. - addRegisterClass(MVT::i32, BF::DRegisterClass); - addRegisterClass(MVT::i16, BF::D16RegisterClass); - - computeRegisterProperties(); - - // Blackfin doesn't have i1 loads or stores - setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); - - setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); - setOperationAction(ISD::JumpTable, MVT::i32, Custom); - - setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::BR_CC, MVT::Other, Expand); - - // i16 registers don't do much - setOperationAction(ISD::AND, MVT::i16, Promote); - setOperationAction(ISD::OR, MVT::i16, Promote); - setOperationAction(ISD::XOR, MVT::i16, Promote); - setOperationAction(ISD::CTPOP, MVT::i16, Promote); - // The expansion of CTLZ/CTTZ uses AND/OR, so we might as well promote - // immediately. - setOperationAction(ISD::CTLZ, MVT::i16, Promote); - setOperationAction(ISD::CTTZ, MVT::i16, Promote); - setOperationAction(ISD::SETCC, MVT::i16, Promote); - - // Blackfin has no division - setOperationAction(ISD::SDIV, MVT::i16, Expand); - setOperationAction(ISD::SDIV, MVT::i32, Expand); - setOperationAction(ISD::SDIVREM, MVT::i16, Expand); - setOperationAction(ISD::SDIVREM, MVT::i32, Expand); - setOperationAction(ISD::SREM, MVT::i16, Expand); - setOperationAction(ISD::SREM, MVT::i32, Expand); - setOperationAction(ISD::UDIV, MVT::i16, Expand); - setOperationAction(ISD::UDIV, MVT::i32, Expand); - setOperationAction(ISD::UDIVREM, MVT::i16, Expand); - setOperationAction(ISD::UDIVREM, MVT::i32, Expand); - setOperationAction(ISD::UREM, MVT::i16, Expand); - setOperationAction(ISD::UREM, MVT::i32, Expand); - - setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand); - setOperationAction(ISD::MULHU, MVT::i32, Expand); - setOperationAction(ISD::MULHS, MVT::i32, Expand); - - // No carry-in operations. - setOperationAction(ISD::ADDE, MVT::i32, Custom); - setOperationAction(ISD::SUBE, MVT::i32, Custom); - - // Blackfin has no intrinsics for these particular operations. - setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); - setOperationAction(ISD::BSWAP, MVT::i32, Expand); - - setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand); - setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand); - setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand); - - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - - // i32 has native CTPOP, but not CTLZ/CTTZ - setOperationAction(ISD::CTLZ, MVT::i32, Expand); - setOperationAction(ISD::CTTZ, MVT::i32, Expand); - - // READCYCLECOUNTER needs special type legalization. - setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); - - setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); - - // Use the default implementation. - setOperationAction(ISD::VACOPY, MVT::Other, Expand); - setOperationAction(ISD::VAEND, MVT::Other, Expand); - setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); - setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - - setMinFunctionAlignment(2); -} - -const char *BlackfinTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch (Opcode) { - default: return 0; - case BFISD::CALL: return "BFISD::CALL"; - case BFISD::RET_FLAG: return "BFISD::RET_FLAG"; - case BFISD::Wrapper: return "BFISD::Wrapper"; - } -} - -EVT BlackfinTargetLowering::getSetCCResultType(EVT VT) const { - // SETCC always sets the CC register. Technically that is an i1 register, but - // that type is not legal, so we treat it as an i32 register. - return MVT::i32; -} - -SDValue BlackfinTargetLowering::LowerGlobalAddress(SDValue Op, - SelectionDAG &DAG) const { - DebugLoc DL = Op.getDebugLoc(); - const GlobalValue *GV = cast(Op)->getGlobal(); - - Op = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); - return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op); -} - -SDValue BlackfinTargetLowering::LowerJumpTable(SDValue Op, - SelectionDAG &DAG) const { - DebugLoc DL = Op.getDebugLoc(); - int JTI = cast(Op)->getIndex(); - - Op = DAG.getTargetJumpTable(JTI, MVT::i32); - return DAG.getNode(BFISD::Wrapper, DL, MVT::i32, Op); -} - -SDValue -BlackfinTargetLowering::LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl - &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) - const { - - MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - - SmallVector ArgLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); - CCInfo.AllocateStack(12, 4); // ABI requires 12 bytes stack space - CCInfo.AnalyzeFormalArguments(Ins, CC_Blackfin); - - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; - - if (VA.isRegLoc()) { - EVT RegVT = VA.getLocVT(); - TargetRegisterClass *RC = VA.getLocReg() == BF::P0 ? - BF::PRegisterClass : BF::DRegisterClass; - assert(RC->contains(VA.getLocReg()) && "Unexpected regclass in CCState"); - assert(RC->hasType(RegVT) && "Unexpected regclass in CCState"); - - unsigned Reg = MF.getRegInfo().createVirtualRegister(RC); - MF.getRegInfo().addLiveIn(VA.getLocReg(), Reg); - SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); - - // If this is an 8 or 16-bit value, it is really passed promoted to 32 - // bits. Insert an assert[sz]ext to capture this, then truncate to the - // right size. - if (VA.getLocInfo() == CCValAssign::SExt) - ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, - DAG.getValueType(VA.getValVT())); - else if (VA.getLocInfo() == CCValAssign::ZExt) - ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, - DAG.getValueType(VA.getValVT())); - - if (VA.getLocInfo() != CCValAssign::Full) - ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); - - InVals.push_back(ArgValue); - } else { - assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc"); - unsigned ObjSize = VA.getLocVT().getStoreSize(); - int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true); - SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); - InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, - MachinePointerInfo(), - false, false, 0)); - } - } - - return Chain; -} - -SDValue -BlackfinTargetLowering::LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - DebugLoc dl, SelectionDAG &DAG) const { - - // CCValAssign - represent the assignment of the return value to locations. - SmallVector RVLocs; - - // CCState - Info about the registers and stack slot. - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - DAG.getTarget(), RVLocs, *DAG.getContext()); - - // Analize return values. - CCInfo.AnalyzeReturn(Outs, RetCC_Blackfin); - - // If this is the first return lowered for this function, add the regs to the - // liveout set for the function. - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) { - for (unsigned i = 0; i != RVLocs.size(); ++i) - DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg()); - } - - SDValue Flag; - - // Copy the result values into the output registers. - for (unsigned i = 0; i != RVLocs.size(); ++i) { - CCValAssign &VA = RVLocs[i]; - assert(VA.isRegLoc() && "Can only return in registers!"); - SDValue Opi = OutVals[i]; - - // Expand to i32 if necessary - switch (VA.getLocInfo()) { - default: llvm_unreachable("Unknown loc info!"); - case CCValAssign::Full: break; - case CCValAssign::SExt: - Opi = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Opi); - break; - case CCValAssign::ZExt: - Opi = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Opi); - break; - case CCValAssign::AExt: - Opi = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Opi); - break; - } - Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Opi, SDValue()); - // Guarantee that all emitted copies are stuck together with flags. - Flag = Chain.getValue(1); - } - - if (Flag.getNode()) { - return DAG.getNode(BFISD::RET_FLAG, dl, MVT::Other, Chain, Flag); - } else { - return DAG.getNode(BFISD::RET_FLAG, dl, MVT::Other, Chain); - } -} - -SDValue -BlackfinTargetLowering::LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const { - // Blackfin target does not yet support tail call optimization. - isTailCall = false; - - // Analyze operands of the call, assigning locations to each operand. - SmallVector ArgLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - DAG.getTarget(), ArgLocs, *DAG.getContext()); - CCInfo.AllocateStack(12, 4); // ABI requires 12 bytes stack space - CCInfo.AnalyzeCallOperands(Outs, CC_Blackfin); - - // Get the size of the outgoing arguments stack space requirement. - unsigned ArgsSize = CCInfo.getNextStackOffset(); - - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(ArgsSize, true)); - SmallVector, 8> RegsToPass; - SmallVector MemOpChains; - - // Walk the register/memloc assignments, inserting copies/loads. - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; - SDValue Arg = OutVals[i]; - - // Promote the value if needed. - switch (VA.getLocInfo()) { - default: llvm_unreachable("Unknown loc info!"); - case CCValAssign::Full: break; - case CCValAssign::SExt: - Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); - break; - case CCValAssign::ZExt: - Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); - break; - case CCValAssign::AExt: - Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); - break; - } - - // Arguments that can be passed on register must be kept at - // RegsToPass vector - if (VA.isRegLoc()) { - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); - } else { - assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc"); - int Offset = VA.getLocMemOffset(); - assert(Offset%4 == 0 && "Unaligned LocMemOffset"); - assert(VA.getLocVT()==MVT::i32 && "Illegal CCValAssign type"); - SDValue SPN = DAG.getCopyFromReg(Chain, dl, BF::SP, MVT::i32); - SDValue OffsetN = DAG.getIntPtrConstant(Offset); - OffsetN = DAG.getNode(ISD::ADD, dl, MVT::i32, SPN, OffsetN); - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, OffsetN, - MachinePointerInfo(),false, false, 0)); - } - } - - // Transform all store nodes into one single node because - // all store nodes are independent of each other. - if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); - - // Build a sequence of copy-to-reg nodes chained together with token - // chain and flag operands which copy the outgoing args into registers. - // The InFlag in necessary since all emitted instructions must be - // stuck together. - SDValue InFlag; - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - - // If the callee is a GlobalAddress node (quite common, every direct call is) - // turn it into a TargetGlobalAddress node so that legalize doesn't hack it. - // Likewise ExternalSymbol -> TargetExternalSymbol. - if (GlobalAddressSDNode *G = dyn_cast(Callee)) - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, MVT::i32); - else if (ExternalSymbolSDNode *E = dyn_cast(Callee)) - Callee = DAG.getTargetExternalSymbol(E->getSymbol(), MVT::i32); - - std::vector NodeTys; - NodeTys.push_back(MVT::Other); // Returns a chain - NodeTys.push_back(MVT::Glue); // Returns a flag for retval copy to use. - SDValue Ops[] = { Chain, Callee, InFlag }; - Chain = DAG.getNode(BFISD::CALL, dl, NodeTys, Ops, - InFlag.getNode() ? 3 : 2); - InFlag = Chain.getValue(1); - - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, true), - DAG.getIntPtrConstant(0, true), InFlag); - InFlag = Chain.getValue(1); - - // Assign locations to each value returned by this call. - SmallVector RVLocs; - CCState RVInfo(CallConv, isVarArg, DAG.getMachineFunction(), - DAG.getTarget(), RVLocs, *DAG.getContext()); - - RVInfo.AnalyzeCallResult(Ins, RetCC_Blackfin); - - // Copy all of the result registers out of their specified physreg. - for (unsigned i = 0; i != RVLocs.size(); ++i) { - CCValAssign &RV = RVLocs[i]; - unsigned Reg = RV.getLocReg(); - - Chain = DAG.getCopyFromReg(Chain, dl, Reg, - RVLocs[i].getLocVT(), InFlag); - SDValue Val = Chain.getValue(0); - InFlag = Chain.getValue(2); - Chain = Chain.getValue(1); - - // Callee is responsible for extending any i16 return values. - switch (RV.getLocInfo()) { - case CCValAssign::SExt: - Val = DAG.getNode(ISD::AssertSext, dl, RV.getLocVT(), Val, - DAG.getValueType(RV.getValVT())); - break; - case CCValAssign::ZExt: - Val = DAG.getNode(ISD::AssertZext, dl, RV.getLocVT(), Val, - DAG.getValueType(RV.getValVT())); - break; - default: - break; - } - - // Truncate to valtype - if (RV.getLocInfo() != CCValAssign::Full) - Val = DAG.getNode(ISD::TRUNCATE, dl, RV.getValVT(), Val); - InVals.push_back(Val); - } - - return Chain; -} - -// Expansion of ADDE / SUBE. This is a bit involved since blackfin doesn't have -// add-with-carry instructions. -SDValue BlackfinTargetLowering::LowerADDE(SDValue Op, SelectionDAG &DAG) const { - // Operands: lhs, rhs, carry-in (AC0 flag) - // Results: sum, carry-out (AC0 flag) - DebugLoc dl = Op.getDebugLoc(); - - unsigned Opcode = Op.getOpcode()==ISD::ADDE ? BF::ADD : BF::SUB; - - // zext incoming carry flag in AC0 to 32 bits - SDNode* CarryIn = DAG.getMachineNode(BF::MOVE_cc_ac0, dl, MVT::i32, - /* flag= */ Op.getOperand(2)); - CarryIn = DAG.getMachineNode(BF::MOVECC_zext, dl, MVT::i32, - SDValue(CarryIn, 0)); - - // Add operands, produce sum and carry flag - SDNode *Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Glue, - Op.getOperand(0), Op.getOperand(1)); - - // Store intermediate carry from Sum - SDNode* Carry1 = DAG.getMachineNode(BF::MOVE_cc_ac0, dl, MVT::i32, - /* flag= */ SDValue(Sum, 1)); - - // Add incoming carry, again producing an output flag - Sum = DAG.getMachineNode(Opcode, dl, MVT::i32, MVT::Glue, - SDValue(Sum, 0), SDValue(CarryIn, 0)); - - // Update AC0 with the intermediate carry, producing a flag. - SDNode *CarryOut = DAG.getMachineNode(BF::OR_ac0_cc, dl, MVT::Glue, - SDValue(Carry1, 0)); - - // Compose (i32, flag) pair - SDValue ops[2] = { SDValue(Sum, 0), SDValue(CarryOut, 0) }; - return DAG.getMergeValues(ops, 2, dl); -} - -SDValue BlackfinTargetLowering::LowerOperation(SDValue Op, - SelectionDAG &DAG) const { - switch (Op.getOpcode()) { - default: - Op.getNode()->dump(); - llvm_unreachable("Should not custom lower this!"); - case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); - case ISD::GlobalTLSAddress: - llvm_unreachable("TLS not implemented for Blackfin."); - case ISD::JumpTable: return LowerJumpTable(Op, DAG); - // Frame & Return address. Currently unimplemented - case ISD::FRAMEADDR: return SDValue(); - case ISD::RETURNADDR: return SDValue(); - case ISD::ADDE: - case ISD::SUBE: return LowerADDE(Op, DAG); - } -} - -void -BlackfinTargetLowering::ReplaceNodeResults(SDNode *N, - SmallVectorImpl &Results, - SelectionDAG &DAG) const { - DebugLoc dl = N->getDebugLoc(); - switch (N->getOpcode()) { - default: - llvm_unreachable("Do not know how to custom type legalize this operation!"); - return; - case ISD::READCYCLECOUNTER: { - // The low part of the cycle counter is in CYCLES, the high part in - // CYCLES2. Reading CYCLES will latch the value of CYCLES2, so we must read - // CYCLES2 last. - SDValue TheChain = N->getOperand(0); - SDValue lo = DAG.getCopyFromReg(TheChain, dl, BF::CYCLES, MVT::i32); - SDValue hi = DAG.getCopyFromReg(lo.getValue(1), dl, BF::CYCLES2, MVT::i32); - // Use a buildpair to merge the two 32-bit values into a 64-bit one. - Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, lo, hi)); - // Outgoing chain. If we were to use the chain from lo instead, it would be - // possible to entirely eliminate the CYCLES2 read in (i32 (trunc - // readcyclecounter)). Unfortunately this could possibly delay the CYCLES2 - // read beyond the next CYCLES read, leading to invalid results. - Results.push_back(hi.getValue(1)); - return; - } - } -} - -//===----------------------------------------------------------------------===// -// Blackfin Inline Assembly Support -//===----------------------------------------------------------------------===// - -/// getConstraintType - Given a constraint letter, return the type of -/// constraint it is for this target. -BlackfinTargetLowering::ConstraintType -BlackfinTargetLowering::getConstraintType(const std::string &Constraint) const { - if (Constraint.size() != 1) - return TargetLowering::getConstraintType(Constraint); - - switch (Constraint[0]) { - // Standard constraints - case 'r': - return C_RegisterClass; - - // Blackfin-specific constraints - case 'a': - case 'd': - case 'z': - case 'D': - case 'W': - case 'e': - case 'b': - case 'v': - case 'f': - case 'c': - case 't': - case 'u': - case 'k': - case 'x': - case 'y': - case 'w': - return C_RegisterClass; - case 'A': - case 'B': - case 'C': - case 'Z': - case 'Y': - return C_Register; - } - - // Not implemented: q0-q7, qA. Use {R2} etc instead - - return TargetLowering::getConstraintType(Constraint); -} - -/// Examine constraint type and operand type and determine a weight value. -/// This object must already have been set up with the operand type -/// and the current alternative constraint selected. -TargetLowering::ConstraintWeight -BlackfinTargetLowering::getSingleConstraintMatchWeight( - AsmOperandInfo &info, const char *constraint) const { - ConstraintWeight weight = CW_Invalid; - Value *CallOperandVal = info.CallOperandVal; - // If we don't have a value, we can't do a match, - // but allow it at the lowest weight. - if (CallOperandVal == NULL) - return CW_Default; - // Look at the constraint type. - switch (*constraint) { - default: - weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); - break; - - // Blackfin-specific constraints - case 'a': - case 'd': - case 'z': - case 'D': - case 'W': - case 'e': - case 'b': - case 'v': - case 'f': - case 'c': - case 't': - case 'u': - case 'k': - case 'x': - case 'y': - case 'w': - return CW_Register; - case 'A': - case 'B': - case 'C': - case 'Z': - case 'Y': - return CW_SpecificReg; - } - return weight; -} - -/// getRegForInlineAsmConstraint - Return register no and class for a C_Register -/// constraint. -std::pair BlackfinTargetLowering:: -getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const { - typedef std::pair Pair; - using namespace BF; - - if (Constraint.size() != 1) - return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); - - switch (Constraint[0]) { - // Standard constraints - case 'r': - return Pair(0U, VT == MVT::i16 ? D16RegisterClass : DPRegisterClass); - - // Blackfin-specific constraints - case 'a': return Pair(0U, PRegisterClass); - case 'd': return Pair(0U, DRegisterClass); - case 'e': return Pair(0U, AccuRegisterClass); - case 'A': return Pair(A0, AccuRegisterClass); - case 'B': return Pair(A1, AccuRegisterClass); - case 'b': return Pair(0U, IRegisterClass); - case 'v': return Pair(0U, BRegisterClass); - case 'f': return Pair(0U, MRegisterClass); - case 'C': return Pair(CC, JustCCRegisterClass); - case 'x': return Pair(0U, GRRegisterClass); - case 'w': return Pair(0U, ALLRegisterClass); - case 'Z': return Pair(P3, PRegisterClass); - case 'Y': return Pair(P1, PRegisterClass); - case 'z': return Pair(0U, zConsRegisterClass); - case 'D': return Pair(0U, DConsRegisterClass); - case 'W': return Pair(0U, WConsRegisterClass); - case 'c': return Pair(0U, cConsRegisterClass); - case 't': return Pair(0U, tConsRegisterClass); - case 'u': return Pair(0U, uConsRegisterClass); - case 'k': return Pair(0U, kConsRegisterClass); - case 'y': return Pair(0U, yConsRegisterClass); - } - - // Not implemented: q0-q7, qA. Use {R2} etc instead. - - return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); -} - -bool BlackfinTargetLowering:: -isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { - // The Blackfin target isn't yet aware of offsets. - return false; -} diff --git a/lib/Target/Blackfin/BlackfinISelLowering.h b/lib/Target/Blackfin/BlackfinISelLowering.h deleted file mode 100644 index 90908ba..0000000 --- a/lib/Target/Blackfin/BlackfinISelLowering.h +++ /dev/null @@ -1,83 +0,0 @@ -//===- BlackfinISelLowering.h - Blackfin DAG Lowering Interface -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interfaces that Blackfin uses to lower LLVM code into a -// selection DAG. -// -//===----------------------------------------------------------------------===// - -#ifndef BLACKFIN_ISELLOWERING_H -#define BLACKFIN_ISELLOWERING_H - -#include "llvm/Target/TargetLowering.h" -#include "Blackfin.h" - -namespace llvm { - - namespace BFISD { - enum { - FIRST_NUMBER = ISD::BUILTIN_OP_END, - CALL, // A call instruction. - RET_FLAG, // Return with a flag operand. - Wrapper // Address wrapper - }; - } - - class BlackfinTargetLowering : public TargetLowering { - public: - BlackfinTargetLowering(TargetMachine &TM); - virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i16; } - virtual EVT getSetCCResultType(EVT VT) const; - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; - virtual void ReplaceNodeResults(SDNode *N, - SmallVectorImpl &Results, - SelectionDAG &DAG) const; - - ConstraintType getConstraintType(const std::string &Constraint) const; - - /// Examine constraint string and operand type and determine a weight value. - /// The operand object must already have been set up with the operand type. - ConstraintWeight getSingleConstraintMatchWeight( - AsmOperandInfo &info, const char *constraint) const; - - std::pair - getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; - virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; - const char *getTargetNodeName(unsigned Opcode) const; - - private: - SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerADDE(SDValue Op, SelectionDAG &DAG) const; - - virtual SDValue - LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; - virtual SDValue - LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; - - virtual SDValue - LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - DebugLoc dl, SelectionDAG &DAG) const; - }; -} // end namespace llvm - -#endif // BLACKFIN_ISELLOWERING_H diff --git a/lib/Target/Blackfin/BlackfinInstrFormats.td b/lib/Target/Blackfin/BlackfinInstrFormats.td deleted file mode 100644 index d8e6e25..0000000 --- a/lib/Target/Blackfin/BlackfinInstrFormats.td +++ /dev/null @@ -1,34 +0,0 @@ -//===--- BlackfinInstrFormats.td ---------------------------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Instruction format superclass -//===----------------------------------------------------------------------===// - -class InstBfin pattern> - : Instruction { - field bits<32> Inst; - - let Namespace = "BF"; - - dag OutOperandList = outs; - dag InOperandList = ins; - let AsmString = asmstr; - let Pattern = pattern; -} - -// Single-word (16-bit) instructions -class F1 pattern> - : InstBfin { -} - -// Double-word (32-bit) instructions -class F2 pattern> - : InstBfin { -} diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.cpp b/lib/Target/Blackfin/BlackfinInstrInfo.cpp deleted file mode 100644 index c06a919..0000000 --- a/lib/Target/Blackfin/BlackfinInstrInfo.cpp +++ /dev/null @@ -1,256 +0,0 @@ -//===- BlackfinInstrInfo.cpp - Blackfin Instruction Information -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the Blackfin implementation of the TargetInstrInfo class. -// -//===----------------------------------------------------------------------===// - -#include "BlackfinInstrInfo.h" -#include "BlackfinSubtarget.h" -#include "Blackfin.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/TargetRegistry.h" - -#define GET_INSTRINFO_CTOR -#include "BlackfinGenInstrInfo.inc" - -using namespace llvm; - -BlackfinInstrInfo::BlackfinInstrInfo(BlackfinSubtarget &ST) - : BlackfinGenInstrInfo(BF::ADJCALLSTACKDOWN, BF::ADJCALLSTACKUP), - RI(ST, *this), - Subtarget(ST) {} - -/// isLoadFromStackSlot - If the specified machine instruction is a direct -/// load from a stack slot, return the virtual or physical register number of -/// the destination along with the FrameIndex of the loaded stack slot. If -/// not, return 0. This predicate must return 0 if the instruction has -/// any side effects other than loading from the stack slot. -unsigned BlackfinInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - default: break; - case BF::LOAD32fi: - case BF::LOAD16fi: - if (MI->getOperand(1).isFI() && - MI->getOperand(2).isImm() && - MI->getOperand(2).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - } - return 0; -} - -/// isStoreToStackSlot - If the specified machine instruction is a direct -/// store to a stack slot, return the virtual or physical register number of -/// the source reg along with the FrameIndex of the loaded stack slot. If -/// not, return 0. This predicate must return 0 if the instruction has -/// any side effects other than storing to the stack slot. -unsigned BlackfinInstrInfo::isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - default: break; - case BF::STORE32fi: - case BF::STORE16fi: - if (MI->getOperand(1).isFI() && - MI->getOperand(2).isImm() && - MI->getOperand(2).getImm() == 0) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - } - return 0; -} - -unsigned BlackfinInstrInfo:: -InsertBranch(MachineBasicBlock &MBB, - MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, - DebugLoc DL) const { - // Shouldn't be a fall through. - assert(TBB && "InsertBranch must not be told to insert a fallthrough"); - assert((Cond.size() == 1 || Cond.size() == 0) && - "Branch conditions have one component!"); - - if (Cond.empty()) { - // Unconditional branch? - assert(!FBB && "Unconditional branch with multiple successors!"); - BuildMI(&MBB, DL, get(BF::JUMPa)).addMBB(TBB); - return 1; - } - - // Conditional branch. - llvm_unreachable("Implement conditional branches!"); -} - -void BlackfinInstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { - if (BF::ALLRegClass.contains(DestReg, SrcReg)) { - BuildMI(MBB, I, DL, get(BF::MOVE), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - return; - } - - if (BF::D16RegClass.contains(DestReg, SrcReg)) { - BuildMI(MBB, I, DL, get(BF::SLL16i), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)) - .addImm(0); - return; - } - - if (BF::DRegClass.contains(DestReg)) { - if (SrcReg == BF::NCC) { - BuildMI(MBB, I, DL, get(BF::MOVENCC_z), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - BuildMI(MBB, I, DL, get(BF::BITTGL), DestReg).addReg(DestReg).addImm(0); - return; - } - if (SrcReg == BF::CC) { - BuildMI(MBB, I, DL, get(BF::MOVECC_zext), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - return; - } - } - - if (BF::DRegClass.contains(SrcReg)) { - if (DestReg == BF::NCC) { - BuildMI(MBB, I, DL, get(BF::SETEQri_not), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)).addImm(0); - return; - } - if (DestReg == BF::CC) { - BuildMI(MBB, I, DL, get(BF::MOVECC_nz), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - return; - } - } - - - if (DestReg == BF::NCC && SrcReg == BF::CC) { - BuildMI(MBB, I, DL, get(BF::MOVE_ncccc), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - return; - } - - if (DestReg == BF::CC && SrcReg == BF::NCC) { - BuildMI(MBB, I, DL, get(BF::MOVE_ccncc), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - return; - } - - llvm_unreachable("Bad reg-to-reg copy"); -} - -static bool inClass(const TargetRegisterClass &Test, - unsigned Reg, - const TargetRegisterClass *RC) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - return Test.contains(Reg); - else - return Test.hasSubClassEq(RC); -} - -void -BlackfinInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned SrcReg, - bool isKill, - int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); - - if (inClass(BF::DPRegClass, SrcReg, RC)) { - BuildMI(MBB, I, DL, get(BF::STORE32fi)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI) - .addImm(0); - return; - } - - if (inClass(BF::D16RegClass, SrcReg, RC)) { - BuildMI(MBB, I, DL, get(BF::STORE16fi)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI) - .addImm(0); - return; - } - - if (inClass(BF::AnyCCRegClass, SrcReg, RC)) { - BuildMI(MBB, I, DL, get(BF::STORE8fi)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI) - .addImm(0); - return; - } - - llvm_unreachable((std::string("Cannot store regclass to stack slot: ")+ - RC->getName()).c_str()); -} - -void BlackfinInstrInfo:: -storeRegToAddr(MachineFunction &MF, - unsigned SrcReg, - bool isKill, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const { - llvm_unreachable("storeRegToAddr not implemented"); -} - -void -BlackfinInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, - int FI, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - DebugLoc DL = I != MBB.end() ? I->getDebugLoc() : DebugLoc(); - if (inClass(BF::DPRegClass, DestReg, RC)) { - BuildMI(MBB, I, DL, get(BF::LOAD32fi), DestReg) - .addFrameIndex(FI) - .addImm(0); - return; - } - - if (inClass(BF::D16RegClass, DestReg, RC)) { - BuildMI(MBB, I, DL, get(BF::LOAD16fi), DestReg) - .addFrameIndex(FI) - .addImm(0); - return; - } - - if (inClass(BF::AnyCCRegClass, DestReg, RC)) { - BuildMI(MBB, I, DL, get(BF::LOAD8fi), DestReg) - .addFrameIndex(FI) - .addImm(0); - return; - } - - llvm_unreachable("Cannot load regclass from stack slot"); -} - -void BlackfinInstrInfo:: -loadRegFromAddr(MachineFunction &MF, - unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const { - llvm_unreachable("loadRegFromAddr not implemented"); -} diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.h b/lib/Target/Blackfin/BlackfinInstrInfo.h deleted file mode 100644 index d22ddf0..0000000 --- a/lib/Target/Blackfin/BlackfinInstrInfo.h +++ /dev/null @@ -1,81 +0,0 @@ -//===- BlackfinInstrInfo.h - Blackfin Instruction Information ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the Blackfin implementation of the TargetInstrInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef BLACKFININSTRUCTIONINFO_H -#define BLACKFININSTRUCTIONINFO_H - -#include "llvm/Target/TargetInstrInfo.h" -#include "BlackfinRegisterInfo.h" - -#define GET_INSTRINFO_HEADER -#include "BlackfinGenInstrInfo.inc" - -namespace llvm { - - class BlackfinInstrInfo : public BlackfinGenInstrInfo { - const BlackfinRegisterInfo RI; - const BlackfinSubtarget& Subtarget; - public: - explicit BlackfinInstrInfo(BlackfinSubtarget &ST); - - /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As - /// such, whenever a client has an instance of instruction info, it should - /// always be able to get register info as well (through this method). - virtual const BlackfinRegisterInfo &getRegisterInfo() const { return RI; } - - virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - - virtual unsigned isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - - virtual unsigned - InsertBranch(MachineBasicBlock &MBB, - MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, - DebugLoc DL) const; - - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const; - - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, - int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - virtual void storeRegToAddr(MachineFunction &MF, - unsigned SrcReg, bool isKill, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const; - - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - virtual void loadRegFromAddr(MachineFunction &MF, unsigned DestReg, - SmallVectorImpl &Addr, - const TargetRegisterClass *RC, - SmallVectorImpl &NewMIs) const; - }; - -} // end namespace llvm - -#endif diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td deleted file mode 100644 index 5b59d77..0000000 --- a/lib/Target/Blackfin/BlackfinInstrInfo.td +++ /dev/null @@ -1,862 +0,0 @@ -//===- BlackfinInstrInfo.td - Target Description for Blackfin Target ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes the Blackfin instructions in TableGen format. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Instruction format superclass -//===----------------------------------------------------------------------===// - -include "BlackfinInstrFormats.td" - -// These are target-independent nodes, but have target-specific formats. -def SDT_BfinCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i32> ]>; -def SDT_BfinCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i32>, - SDTCisVT<1, i32> ]>; - -def BfinCallseqStart : SDNode<"ISD::CALLSEQ_START", SDT_BfinCallSeqStart, - [SDNPHasChain, SDNPOutGlue]>; -def BfinCallseqEnd : SDNode<"ISD::CALLSEQ_END", SDT_BfinCallSeqEnd, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; - -def SDT_BfinCall : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; -def BfinCall : SDNode<"BFISD::CALL", SDT_BfinCall, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, - SDNPVariadic]>; - -def BfinRet: SDNode<"BFISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue]>; - -def BfinWrapper: SDNode<"BFISD::Wrapper", SDTIntUnaryOp>; - -//===----------------------------------------------------------------------===// -// Transformations -//===----------------------------------------------------------------------===// - -def trailingZeros_xform : SDNodeXFormgetTargetConstant(N->getAPIntValue().countTrailingZeros(), - MVT::i32); -}]>; - -def trailingOnes_xform : SDNodeXFormgetTargetConstant(N->getAPIntValue().countTrailingOnes(), - MVT::i32); -}]>; - -def LO16 : SDNodeXFormgetTargetConstant((unsigned short)N->getZExtValue(), MVT::i16); -}]>; - -def HI16 : SDNodeXFormgetTargetConstant((unsigned)N->getZExtValue() >> 16, MVT::i16); -}]>; - -//===----------------------------------------------------------------------===// -// Immediates -//===----------------------------------------------------------------------===// - -def imm3 : PatLeaf<(imm), [{return isInt<3>(N->getSExtValue());}]>; -def uimm3 : PatLeaf<(imm), [{return isUInt<3>(N->getZExtValue());}]>; -def uimm4 : PatLeaf<(imm), [{return isUInt<4>(N->getZExtValue());}]>; -def uimm5 : PatLeaf<(imm), [{return isUInt<5>(N->getZExtValue());}]>; - -def uimm5m2 : PatLeaf<(imm), [{ - uint64_t value = N->getZExtValue(); - return value % 2 == 0 && isUInt<5>(value); -}]>; - -def uimm6m4 : PatLeaf<(imm), [{ - uint64_t value = N->getZExtValue(); - return value % 4 == 0 && isUInt<6>(value); -}]>; - -def imm7 : PatLeaf<(imm), [{return isInt<7>(N->getSExtValue());}]>; -def imm16 : PatLeaf<(imm), [{return isInt<16>(N->getSExtValue());}]>; -def uimm16 : PatLeaf<(imm), [{return isUInt<16>(N->getZExtValue());}]>; - -def ximm16 : PatLeaf<(imm), [{ - int64_t value = N->getSExtValue(); - return value < (1<<16) && value >= -(1<<15); -}]>; - -def imm17m2 : PatLeaf<(imm), [{ - int64_t value = N->getSExtValue(); - return value % 2 == 0 && isInt<17>(value); -}]>; - -def imm18m4 : PatLeaf<(imm), [{ - int64_t value = N->getSExtValue(); - return value % 4 == 0 && isInt<18>(value); -}]>; - -// 32-bit bitmask transformed to a bit number -def uimm5mask : Operand, PatLeaf<(imm), [{ - return isPowerOf2_32(N->getZExtValue()); -}], trailingZeros_xform>; - -// 32-bit inverse bitmask transformed to a bit number -def uimm5imask : Operand, PatLeaf<(imm), [{ - return isPowerOf2_32(~N->getZExtValue()); -}], trailingOnes_xform>; - -//===----------------------------------------------------------------------===// -// Operands -//===----------------------------------------------------------------------===// - -def calltarget : Operand; - -def brtarget : Operand; - -// Addressing modes -def ADDRspii : ComplexPattern; - -// Address operands -def MEMii : Operand { - let PrintMethod = "printMemoryOperand"; - let MIOperandInfo = (ops i32imm, i32imm); -} - -//===----------------------------------------------------------------------===// -// Instructions -//===----------------------------------------------------------------------===// - -// Pseudo instructions. -class Pseudo pattern> - : InstBfin; - -let Defs = [SP], Uses = [SP] in { -def ADJCALLSTACKDOWN : Pseudo<(outs), (ins i32imm:$amt), - "${:comment}ADJCALLSTACKDOWN $amt", - [(BfinCallseqStart timm:$amt)]>; -def ADJCALLSTACKUP : Pseudo<(outs), (ins i32imm:$amt1, i32imm:$amt2), - "${:comment}ADJCALLSTACKUP $amt1 $amt2", - [(BfinCallseqEnd timm:$amt1, timm:$amt2)]>; -} - -//===----------------------------------------------------------------------===// -// Table C-9. Program Flow Control Instructions -//===----------------------------------------------------------------------===// - -let isBranch = 1, isTerminator = 1 in { - -let isIndirectBranch = 1 in -def JUMPp : F1<(outs), (ins P:$target), - "JUMP ($target);", - [(brind P:$target)]>; - -// TODO JUMP (PC-P) - -// NOTE: assembler chooses between JUMP.S and JUMP.L -def JUMPa : F1<(outs), (ins brtarget:$target), - "jump $target;", - [(br bb:$target)]>; - -def JUMPcc : F1<(outs), (ins AnyCC:$cc, brtarget:$target), - "if $cc jump $target;", - [(brcond AnyCC:$cc, bb:$target)]>; -} - -let isCall = 1, - Defs = [R0, R1, R2, R3, P0, P1, P2, LB0, LB1, LC0, LC1, RETS, ASTAT] in { -def CALLa: F1<(outs), (ins calltarget:$func, variable_ops), - "call $func;", []>; -def CALLp: F1<(outs), (ins P:$func, variable_ops), - "call ($func);", [(BfinCall P:$func)]>; -} - -let isReturn = 1, - isTerminator = 1, - isBarrier = 1, - Uses = [RETS] in -def RTS: F1<(outs), (ins), "rts;", [(BfinRet)]>; - -//===----------------------------------------------------------------------===// -// Table C-10. Load / Store Instructions -//===----------------------------------------------------------------------===// - -// Immediate constant loads - -// sext immediate, i32 D/P regs -def LOADimm7: F1<(outs DP:$dst), (ins i32imm:$src), - "$dst = $src (x);", - [(set DP:$dst, imm7:$src)]>; - -// zext immediate, i32 reg groups 0-3 -def LOADuimm16: F2<(outs GR:$dst), (ins i32imm:$src), - "$dst = $src (z);", - [(set GR:$dst, uimm16:$src)]>; - -// sext immediate, i32 reg groups 0-3 -def LOADimm16: F2<(outs GR:$dst), (ins i32imm:$src), - "$dst = $src (x);", - [(set GR:$dst, imm16:$src)]>; - -// Pseudo-instruction for loading a general 32-bit constant. -def LOAD32imm: Pseudo<(outs GR:$dst), (ins i32imm:$src), - "$dst.h = ($src >> 16); $dst.l = ($src & 0xffff);", - [(set GR:$dst, imm:$src)]>; - -def LOAD32sym: Pseudo<(outs GR:$dst), (ins i32imm:$src), - "$dst.h = $src; $dst.l = $src;", []>; - - -// 16-bit immediate, i16 reg groups 0-3 -def LOAD16i: F2<(outs GR16:$dst), (ins i16imm:$src), - "$dst = $src;", []>; - -def : Pat<(BfinWrapper (i32 tglobaladdr:$addr)), - (LOAD32sym tglobaladdr:$addr)>; - -def : Pat<(BfinWrapper (i32 tjumptable:$addr)), - (LOAD32sym tjumptable:$addr)>; - -// We cannot copy from GR16 to D16, and codegen wants to insert copies if we -// emit GR16 instructions. As a hack, we use this fake instruction instead. -def LOAD16i_d16: F2<(outs D16:$dst), (ins i16imm:$src), - "$dst = $src;", - [(set D16:$dst, ximm16:$src)]>; - -// Memory loads with patterns - -def LOAD32p: F1<(outs DP:$dst), (ins P:$ptr), - "$dst = [$ptr];", - [(set DP:$dst, (load P:$ptr))]>; - -// Pseudo-instruction for loading a stack slot -def LOAD32fi: Pseudo<(outs DP:$dst), (ins MEMii:$mem), - "${:comment}FI $dst = [$mem];", - [(set DP:$dst, (load ADDRspii:$mem))]>; - -// Note: Expands to multiple insns -def LOAD16fi: Pseudo<(outs D16:$dst), (ins MEMii:$mem), - "${:comment}FI $dst = [$mem];", - [(set D16:$dst, (load ADDRspii:$mem))]>; - -// Pseudo-instruction for loading a stack slot, used for AnyCC regs. -// Replaced with Load D + CC=D -def LOAD8fi: Pseudo<(outs AnyCC:$dst), (ins MEMii:$mem), - "${:comment}FI $dst = B[$mem];", - [(set AnyCC:$dst, (load ADDRspii:$mem))]>; - -def LOAD32p_uimm6m4: F1<(outs DP:$dst), (ins P:$ptr, i32imm:$off), - "$dst = [$ptr + $off];", - [(set DP:$dst, (load (add P:$ptr, uimm6m4:$off)))]>; - -def LOAD32p_imm18m4: F2<(outs DP:$dst), (ins P:$ptr, i32imm:$off), - "$dst = [$ptr + $off];", - [(set DP:$dst, (load (add P:$ptr, imm18m4:$off)))]>; - -def LOAD32p_16z: F1<(outs D:$dst), (ins P:$ptr), - "$dst = W[$ptr] (z);", - [(set D:$dst, (zextloadi16 P:$ptr))]>; - -def : Pat<(i32 (extloadi16 P:$ptr)),(LOAD32p_16z P:$ptr)>; - -def LOAD32p_uimm5m2_16z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off), - "$dst = w[$ptr + $off] (z);", - [(set D:$dst, (zextloadi16 (add P:$ptr, - uimm5m2:$off)))]>; - -def : Pat<(i32 (extloadi16 (add P:$ptr, uimm5m2:$off))), - (LOAD32p_uimm5m2_16z P:$ptr, imm:$off)>; - -def LOAD32p_imm17m2_16z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off), - "$dst = w[$ptr + $off] (z);", - [(set D:$dst, - (zextloadi16 (add P:$ptr, imm17m2:$off)))]>; - -def : Pat<(i32 (extloadi16 (add P:$ptr, imm17m2:$off))), - (LOAD32p_imm17m2_16z P:$ptr, imm:$off)>; - -def LOAD32p_16s: F1<(outs D:$dst), (ins P:$ptr), - "$dst = w[$ptr] (x);", - [(set D:$dst, (sextloadi16 P:$ptr))]>; - -def LOAD32p_uimm5m2_16s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off), - "$dst = w[$ptr + $off] (x);", - [(set D:$dst, - (sextloadi16 (add P:$ptr, uimm5m2:$off)))]>; - -def LOAD32p_imm17m2_16s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off), - "$dst = w[$ptr + $off] (x);", - [(set D:$dst, - (sextloadi16 (add P:$ptr, imm17m2:$off)))]>; - -def LOAD16pi: F1<(outs D16:$dst), (ins PI:$ptr), - "$dst = w[$ptr];", - [(set D16:$dst, (load PI:$ptr))]>; - -def LOAD32p_8z: F1<(outs D:$dst), (ins P:$ptr), - "$dst = B[$ptr] (z);", - [(set D:$dst, (zextloadi8 P:$ptr))]>; - -def : Pat<(i32 (extloadi8 P:$ptr)), (LOAD32p_8z P:$ptr)>; -def : Pat<(i16 (extloadi8 P:$ptr)), - (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), lo16)>; -def : Pat<(i16 (zextloadi8 P:$ptr)), - (EXTRACT_SUBREG (LOAD32p_8z P:$ptr), lo16)>; - -def LOAD32p_imm16_8z: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off), - "$dst = b[$ptr + $off] (z);", - [(set D:$dst, (zextloadi8 (add P:$ptr, imm16:$off)))]>; - -def : Pat<(i32 (extloadi8 (add P:$ptr, imm16:$off))), - (LOAD32p_imm16_8z P:$ptr, imm:$off)>; -def : Pat<(i16 (extloadi8 (add P:$ptr, imm16:$off))), - (EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off), - lo16)>; -def : Pat<(i16 (zextloadi8 (add P:$ptr, imm16:$off))), - (EXTRACT_SUBREG (LOAD32p_imm16_8z P:$ptr, imm:$off), - lo16)>; - -def LOAD32p_8s: F1<(outs D:$dst), (ins P:$ptr), - "$dst = b[$ptr] (x);", - [(set D:$dst, (sextloadi8 P:$ptr))]>; - -def : Pat<(i16 (sextloadi8 P:$ptr)), - (EXTRACT_SUBREG (LOAD32p_8s P:$ptr), lo16)>; - -def LOAD32p_imm16_8s: F1<(outs D:$dst), (ins P:$ptr, i32imm:$off), - "$dst = b[$ptr + $off] (x);", - [(set D:$dst, (sextloadi8 (add P:$ptr, imm16:$off)))]>; - -def : Pat<(i16 (sextloadi8 (add P:$ptr, imm16:$off))), - (EXTRACT_SUBREG (LOAD32p_imm16_8s P:$ptr, imm:$off), - lo16)>; -// Memory loads without patterns - -let mayLoad = 1 in { - -multiclass LOAD_incdec { - def _inc : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr), - !strconcat(!subst("M", mem, "$dst = M[$ptr++]"), suf), []>; - def _dec : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr), - !strconcat(!subst("M", mem, "$dst = M[$ptr--]"), suf), []>; -} -multiclass LOAD_incdecpost - : LOAD_incdec { - def _post : F1<(outs drc:$dst, prc:$ptr_wb), (ins prc:$ptr, prc:$off), - !strconcat(!subst("M", mem, "$dst = M[$ptr++$off]"), suf), []>; -} - -defm LOAD32p: LOAD_incdec; -defm LOAD32i: LOAD_incdec; -defm LOAD8z32p: LOAD_incdec; -defm LOAD8s32p: LOAD_incdec; -defm LOADhi: LOAD_incdec; -defm LOAD16z32p: LOAD_incdecpost; -defm LOAD16s32p: LOAD_incdecpost; - -def LOAD32p_post: F1<(outs D:$dst, P:$ptr_wb), (ins P:$ptr, P:$off), - "$dst = [$ptr ++ $off];", []>; - -// Note: $fp MUST be FP -def LOAD32fp_nimm7m4: F1<(outs DP:$dst), (ins P:$fp, i32imm:$off), - "$dst = [$fp - $off];", []>; - -def LOAD32i: F1<(outs D:$dst), (ins I:$ptr), - "$dst = [$ptr];", []>; -def LOAD32i_post: F1<(outs D:$dst, I:$ptr_wb), (ins I:$ptr, M:$off), - "$dst = [$ptr ++ $off];", []>; - - - -def LOADhp_post: F1<(outs D16:$dst, P:$ptr_wb), (ins P:$ptr, P:$off), - "$dst = w[$ptr ++ $off];", []>; - - -} - -// Memory stores with patterns -def STORE32p: F1<(outs), (ins DP:$val, P:$ptr), - "[$ptr] = $val;", - [(store DP:$val, P:$ptr)]>; - -// Pseudo-instructions for storing to a stack slot -def STORE32fi: Pseudo<(outs), (ins DP:$val, MEMii:$mem), - "${:comment}FI [$mem] = $val;", - [(store DP:$val, ADDRspii:$mem)]>; - -// Note: This stack-storing pseudo-instruction is expanded to multiple insns -def STORE16fi: Pseudo<(outs), (ins D16:$val, MEMii:$mem), - "${:comment}FI [$mem] = $val;", - [(store D16:$val, ADDRspii:$mem)]>; - -// Pseudo-instructions for storing AnyCC register to a stack slot. -// Replaced with D=CC + STORE byte -def STORE8fi: Pseudo<(outs), (ins AnyCC:$val, MEMii:$mem), - "${:comment}FI b[$mem] = $val;", - [(store AnyCC:$val, ADDRspii:$mem)]>; - -def STORE32p_uimm6m4: F1<(outs), (ins DP:$val, P:$ptr, i32imm:$off), - "[$ptr + $off] = $val;", - [(store DP:$val, (add P:$ptr, uimm6m4:$off))]>; - -def STORE32p_imm18m4: F1<(outs), (ins DP:$val, P:$ptr, i32imm:$off), - "[$ptr + $off] = $val;", - [(store DP:$val, (add P:$ptr, imm18m4:$off))]>; - -def STORE16pi: F1<(outs), (ins D16:$val, PI:$ptr), - "w[$ptr] = $val;", - [(store D16:$val, PI:$ptr)]>; - -def STORE8p: F1<(outs), (ins D:$val, P:$ptr), - "b[$ptr] = $val;", - [(truncstorei8 D:$val, P:$ptr)]>; - -def STORE8p_imm16: F1<(outs), (ins D:$val, P:$ptr, i32imm:$off), - "b[$ptr + $off] = $val;", - [(truncstorei8 D:$val, (add P:$ptr, imm16:$off))]>; - -let Constraints = "$ptr = $ptr_wb" in { - -multiclass STORE_incdec { - def _inc : F1<(outs prc:$ptr_wb), (ins drc:$val, prc:$ptr), - !strconcat(pre, "[$ptr++] = $val;"), - [(set prc:$ptr_wb, (post_store drc:$val, prc:$ptr, off))]>; - def _dec : F1<(outs prc:$ptr_wb), (ins drc:$val, prc:$ptr), - !strconcat(pre, "[$ptr--] = $val;"), - [(set prc:$ptr_wb, (post_store drc:$val, prc:$ptr, - (ineg off)))]>; -} - -defm STORE32p: STORE_incdec; -defm STORE16i: STORE_incdec; -defm STORE8p: STORE_incdec; - -def STORE32p_post: F1<(outs P:$ptr_wb), (ins D:$val, P:$ptr, P:$off), - "[$ptr ++ $off] = $val;", - [(set P:$ptr_wb, (post_store D:$val, P:$ptr, P:$off))]>; - -def STORE16p_post: F1<(outs P:$ptr_wb), (ins D16:$val, P:$ptr, P:$off), - "w[$ptr ++ $off] = $val;", - [(set P:$ptr_wb, (post_store D16:$val, P:$ptr, P:$off))]>; -} - -// Memory stores without patterns - -let mayStore = 1 in { - -// Note: only works for $fp == FP -def STORE32fp_nimm7m4: F1<(outs), (ins DP:$val, P:$fp, i32imm:$off), - "[$fp - $off] = $val;", []>; - -def STORE32i: F1<(outs), (ins D:$val, I:$ptr), - "[$ptr] = $val;", []>; - -def STORE32i_inc: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr), - "[$ptr++] = $val;", []>; - -def STORE32i_dec: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr), - "[$ptr--] = $val;", []>; - -def STORE32i_post: F1<(outs I:$ptr_wb), (ins D:$val, I:$ptr, M:$off), - "[$ptr ++ $off] = $val;", []>; -} - -def : Pat<(truncstorei16 D:$val, PI:$ptr), - (STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)), - lo16), PI:$ptr)>; - -def : Pat<(truncstorei16 (srl D:$val, (i16 16)), PI:$ptr), - (STORE16pi (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$val, D)), - hi16), PI:$ptr)>; - -def : Pat<(truncstorei8 D16L:$val, P:$ptr), - (STORE8p (INSERT_SUBREG (i32 (IMPLICIT_DEF)), - (i16 (COPY_TO_REGCLASS D16L:$val, D16L)), - lo16), - P:$ptr)>; - -//===----------------------------------------------------------------------===// -// Table C-11. Move Instructions. -//===----------------------------------------------------------------------===// - -def MOVE: F1<(outs ALL:$dst), (ins ALL:$src), - "$dst = $src;", - []>; - -let Constraints = "$src1 = $dst" in -def MOVEcc: F1<(outs DP:$dst), (ins DP:$src1, DP:$src2, AnyCC:$cc), - "if $cc $dst = $src2;", - [(set DP:$dst, (select AnyCC:$cc, DP:$src2, DP:$src1))]>; - -let Defs = [AZ, AN, AC0, V] in { -def MOVEzext: F1<(outs D:$dst), (ins D16L:$src), - "$dst = $src (z);", - [(set D:$dst, (zext D16L:$src))]>; - -def MOVEsext: F1<(outs D:$dst), (ins D16L:$src), - "$dst = $src (x);", - [(set D:$dst, (sext D16L:$src))]>; - -def MOVEzext8: F1<(outs D:$dst), (ins D:$src), - "$dst = $src.b (z);", - [(set D:$dst, (and D:$src, 0xff))]>; - -def MOVEsext8: F1<(outs D:$dst), (ins D:$src), - "$dst = $src.b (x);", - [(set D:$dst, (sext_inreg D:$src, i8))]>; - -} - -def : Pat<(sext_inreg D16L:$src, i8), - (EXTRACT_SUBREG (MOVEsext8 - (INSERT_SUBREG (i32 (IMPLICIT_DEF)), - D16L:$src, - lo16)), - lo16)>; - -def : Pat<(sext_inreg D:$src, i16), - (MOVEsext (EXTRACT_SUBREG D:$src, lo16))>; - -def : Pat<(and D:$src, 0xffff), - (MOVEzext (EXTRACT_SUBREG D:$src, lo16))>; - -def : Pat<(i32 (anyext D16L:$src)), - (INSERT_SUBREG (i32 (IMPLICIT_DEF)), - (i16 (COPY_TO_REGCLASS D16L:$src, D16L)), - lo16)>; - -// TODO Dreg = Dreg_byte (X/Z) - -// TODO Accumulator moves - -//===----------------------------------------------------------------------===// -// Table C-12. Stack Control Instructions -//===----------------------------------------------------------------------===// - -let Uses = [SP], Defs = [SP] in { -def PUSH: F1<(outs), (ins ALL:$src), - "[--sp] = $src;", []> { let mayStore = 1; } - -// NOTE: POP does not work for DP regs, use LOAD instead -def POP: F1<(outs ALL:$dst), (ins), - "$dst = [sp++];", []> { let mayLoad = 1; } -} - -// TODO: push/pop multiple - -def LINK: F2<(outs), (ins i32imm:$amount), - "link $amount;", []>; - -def UNLINK: F2<(outs), (ins), - "unlink;", []>; - -//===----------------------------------------------------------------------===// -// Table C-13. Control Code Bit Management Instructions -//===----------------------------------------------------------------------===// - -multiclass SETCC { - def dd : F1<(outs JustCC:$cc), (ins D:$a, D:$b), - !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf), - [(set JustCC:$cc, (opnode D:$a, D:$b))]>; - - def ri : F1<(outs JustCC:$cc), (ins DP:$a, i32imm:$b), - !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf), - [(set JustCC:$cc, (opnode DP:$a, imm3:$b))]>; - - def pp : F1<(outs JustCC:$cc), (ins P:$a, P:$b), - !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf), - []>; - - def ri_not : F1<(outs NotCC:$cc), (ins DP:$a, i32imm:$b), - !strconcat(!subst("XX", cond, "cc = $a XX $b"), suf), - [(set NotCC:$cc, (invnode DP:$a, imm3:$b))]>; -} - -defm SETEQ : SETCC; -defm SETLT : SETCC; -defm SETLE : SETCC; -defm SETULE : SETCC; - -def SETNEdd : F1<(outs NotCC:$cc), (ins D:$a, D:$b), - "cc = $a == $b;", - [(set NotCC:$cc, (setne D:$a, D:$b))]>; - -def : Pat<(setgt D:$a, D:$b), (SETLTdd D:$b, D:$a)>; -def : Pat<(setge D:$a, D:$b), (SETLEdd D:$b, D:$a)>; -def : Pat<(setugt D:$a, D:$b), (SETULTdd D:$b, D:$a)>; -def : Pat<(setuge D:$a, D:$b), (SETULEdd D:$b, D:$a)>; - -// TODO: compare pointer for P-P comparisons -// TODO: compare accumulator - -let Defs = [AC0] in -def OR_ac0_cc : F1<(outs), (ins JustCC:$cc), - "ac0 \\|= cc;", []>; - -let Uses = [AC0] in -def MOVE_cc_ac0 : F1<(outs JustCC:$cc), (ins), - "cc = ac0;", []>; - -def MOVE_ccncc : F1<(outs JustCC:$cc), (ins NotCC:$sb), - "cc = !cc;", []>; - -def MOVE_ncccc : F1<(outs NotCC:$cc), (ins JustCC:$sb), - "cc = !cc;", []>; - -def MOVECC_zext : F1<(outs D:$dst), (ins JustCC:$cc), - "$dst = $cc;", []>; - -def MOVENCC_z : F1<(outs D:$dst), (ins NotCC:$cc), - "$dst = cc;", []>; - -def MOVECC_nz : F1<(outs AnyCC:$cc), (ins D:$src), - "cc = $src;", - [(set AnyCC:$cc, (setne D:$src, 0))]>; - -//===----------------------------------------------------------------------===// -// Table C-14. Logical Operations Instructions -//===----------------------------------------------------------------------===// - -def AND: F1<(outs D:$dst), (ins D:$src1, D:$src2), - "$dst = $src1 & $src2;", - [(set D:$dst, (and D:$src1, D:$src2))]>; - -def NOT: F1<(outs D:$dst), (ins D:$src), - "$dst = ~$src;", - [(set D:$dst, (not D:$src))]>; - -def OR: F1<(outs D:$dst), (ins D:$src1, D:$src2), - "$dst = $src1 \\| $src2;", - [(set D:$dst, (or D:$src1, D:$src2))]>; - -def XOR: F1<(outs D:$dst), (ins D:$src1, D:$src2), - "$dst = $src1 ^ $src2;", - [(set D:$dst, (xor D:$src1, D:$src2))]>; - -// missing: BXOR, BXORSHIFT - -//===----------------------------------------------------------------------===// -// Table C-15. Bit Operations Instructions -//===----------------------------------------------------------------------===// - -let Constraints = "$src1 = $dst" in { -def BITCLR: F1<(outs D:$dst), (ins D:$src1, uimm5imask:$src2), - "bitclr($dst, $src2);", - [(set D:$dst, (and D:$src1, uimm5imask:$src2))]>; - -def BITSET: F1<(outs D:$dst), (ins D:$src1, uimm5mask:$src2), - "bitset($dst, $src2);", - [(set D:$dst, (or D:$src1, uimm5mask:$src2))]>; - -def BITTGL: F1<(outs D:$dst), (ins D:$src1, uimm5mask:$src2), - "bittgl($dst, $src2);", - [(set D:$dst, (xor D:$src1, uimm5mask:$src2))]>; -} - -def BITTST: F1<(outs JustCC:$cc), (ins D:$src1, uimm5mask:$src2), - "cc = bittst($src1, $src2);", - [(set JustCC:$cc, (setne (and D:$src1, uimm5mask:$src2), - (i32 0)))]>; - -def NBITTST: F1<(outs JustCC:$cc), (ins D:$src1, uimm5mask:$src2), - "cc = !bittst($src1, $src2);", - [(set JustCC:$cc, (seteq (and D:$src1, uimm5mask:$src2), - (i32 0)))]>; - -// TODO: DEPOSIT, EXTRACT, BITMUX - -def ONES: F2<(outs D16L:$dst), (ins D:$src), - "$dst = ones $src;", - [(set D16L:$dst, (trunc (ctpop D:$src)))]>; - -def : Pat<(ctpop D:$src), (MOVEzext (ONES D:$src))>; - -//===----------------------------------------------------------------------===// -// Table C-16. Shift / Rotate Instructions -//===----------------------------------------------------------------------===// - -multiclass SHIFT32 { - def i : F1<(outs D:$dst), (ins D:$src, i16imm:$amount), - !subst("XX", ops, "$dst XX= $amount;"), - [(set D:$dst, (opnode D:$src, (i16 uimm5:$amount)))]>; - def r : F1<(outs D:$dst), (ins D:$src, D:$amount), - !subst("XX", ops, "$dst XX= $amount;"), - [(set D:$dst, (opnode D:$src, D:$amount))]>; -} - -let Defs = [AZ, AN, V, VS], - Constraints = "$src = $dst" in { -defm SRA : SHIFT32>>">; -defm SRL : SHIFT32>">; -defm SLL : SHIFT32; -} - -// TODO: automatic switching between 2-addr and 3-addr (?) - -let Defs = [AZ, AN, V, VS] in { -def SLLr16: F2<(outs D:$dst), (ins D:$src, D16L:$amount), - "$dst = lshift $src by $amount;", - [(set D:$dst, (shl D:$src, D16L:$amount))]>; - -// Arithmetic left-shift = saturing overflow. -def SLAr16: F2<(outs D:$dst), (ins D:$src, D16L:$amount), - "$dst = ashift $src by $amount;", - [(set D:$dst, (sra D:$src, (ineg D16L:$amount)))]>; - -def SRA16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount), - "$dst = $src >>> $amount;", - [(set D16:$dst, (sra D16:$src, (i16 uimm4:$amount)))]>; - -def SRL16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount), - "$dst = $src >> $amount;", - [(set D16:$dst, (srl D16:$src, (i16 uimm4:$amount)))]>; - -// Arithmetic left-shift = saturing overflow. -def SLA16r: F1<(outs D16:$dst), (ins D16:$src, D16L:$amount), - "$dst = ashift $src BY $amount;", - [(set D16:$dst, (srl D16:$src, (ineg D16L:$amount)))]>; - -def SLL16i: F1<(outs D16:$dst), (ins D16:$src, i16imm:$amount), - "$dst = $src << $amount;", - [(set D16:$dst, (shl D16:$src, (i16 uimm4:$amount)))]>; - -def SLL16r: F1<(outs D16:$dst), (ins D16:$src, D16L:$amount), - "$dst = lshift $src by $amount;", - [(set D16:$dst, (shl D16:$src, D16L:$amount))]>; - -} - -//===----------------------------------------------------------------------===// -// Table C-17. Arithmetic Operations Instructions -//===----------------------------------------------------------------------===// - -// TODO: ABS - -let Defs = [AZ, AN, AC0, V, VS] in { - -def ADD: F1<(outs D:$dst), (ins D:$src1, D:$src2), - "$dst = $src1 + $src2;", - [(set D:$dst, (add D:$src1, D:$src2))]>; - -def ADD16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2), - "$dst = $src1 + $src2;", - [(set D16:$dst, (add D16:$src1, D16:$src2))]>; - -let Constraints = "$src1 = $dst" in -def ADDimm7: F1<(outs D:$dst), (ins D:$src1, i32imm:$src2), - "$dst += $src2;", - [(set D:$dst, (add D:$src1, imm7:$src2))]>; - -def SUB: F1<(outs D:$dst), (ins D:$src1, D:$src2), - "$dst = $src1 - $src2;", - [(set D:$dst, (sub D:$src1, D:$src2))]>; - -def SUB16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2), - "$dst = $src1 - $src2;", - [(set D16:$dst, (sub D16:$src1, D16:$src2))]>; - -} - -def : Pat<(addc D:$src1, D:$src2), (ADD D:$src1, D:$src2)>; -def : Pat<(subc D:$src1, D:$src2), (SUB D:$src1, D:$src2)>; - -let Defs = [AZ, AN, V, VS] in -def NEG: F1<(outs D:$dst), (ins D:$src), - "$dst = -$src;", - [(set D:$dst, (ineg D:$src))]>; - -// No pattern, it would confuse isel to have two i32 = i32+i32 patterns -def ADDpp: F1<(outs P:$dst), (ins P:$src1, P:$src2), - "$dst = $src1 + $src2;", []>; - -let Constraints = "$src1 = $dst" in -def ADDpp_imm7: F1<(outs P:$dst), (ins P:$src1, i32imm:$src2), - "$dst += $src2;", []>; - -let Defs = [AZ, AN, V] in -def ADD_RND20: F2<(outs D16:$dst), (ins D:$src1, D:$src2), - "$dst = $src1 + $src2 (rnd20);", []>; - -let Defs = [V, VS] in { -def MUL16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2), - "$dst = $src1 * $src2 (is);", - [(set D16:$dst, (mul D16:$src1, D16:$src2))]>; - -def MULHS16: F2<(outs D16:$dst), (ins D16:$src1, D16:$src2), - "$dst = $src1 * $src2 (ih);", - [(set D16:$dst, (mulhs D16:$src1, D16:$src2))]>; - -def MULhh32s: F2<(outs D:$dst), (ins D16:$src1, D16:$src2), - "$dst = $src1 * $src2 (is);", - [(set D:$dst, (mul (sext D16:$src1), (sext D16:$src2)))]>; - -def MULhh32u: F2<(outs D:$dst), (ins D16:$src1, D16:$src2), - "$dst = $src1 * $src2 (is);", - [(set D:$dst, (mul (zext D16:$src1), (zext D16:$src2)))]>; -} - - -let Constraints = "$src1 = $dst" in -def MUL32: F1<(outs D:$dst), (ins D:$src1, D:$src2), - "$dst *= $src2;", - [(set D:$dst, (mul D:$src1, D:$src2))]>; - -//===----------------------------------------------------------------------===// -// Table C-18. External Exent Management Instructions -//===----------------------------------------------------------------------===// - -def IDLE : F1<(outs), (ins), "idle;", [(int_bfin_idle)]>; -def CSYNC : F1<(outs), (ins), "csync;", [(int_bfin_csync)]>; -def SSYNC : F1<(outs), (ins), "ssync;", [(int_bfin_ssync)]>; -def EMUEXCPT : F1<(outs), (ins), "emuexcpt;", []>; -def CLI : F1<(outs D:$mask), (ins), "cli $mask;", []>; -def STI : F1<(outs), (ins D:$mask), "sti $mask;", []>; -def RAISE : F1<(outs), (ins i32imm:$itr), "raise $itr;", []>; -def EXCPT : F1<(outs), (ins i32imm:$exc), "excpt $exc;", []>; -def NOP : F1<(outs), (ins), "nop;", []>; -def MNOP : F2<(outs), (ins), "mnop;", []>; -def ABORT : F1<(outs), (ins), "abort;", []>; - -//===----------------------------------------------------------------------===// -// Table C-19. Cache Control Instructions -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Table C-20. Video Pixel Operations Instructions -//===----------------------------------------------------------------------===// - -def ALIGN8 : F2<(outs D:$dst), (ins D:$src1, D:$src2), - "$dst = align8($src1, $src2);", - [(set D:$dst, (or (shl D:$src1, (i32 24)), - (srl D:$src2, (i32 8))))]>; - -def ALIGN16 : F2<(outs D:$dst), (ins D:$src1, D:$src2), - "$dst = align16($src1, $src2);", - [(set D:$dst, (or (shl D:$src1, (i32 16)), - (srl D:$src2, (i32 16))))]>; - -def ALIGN24 : F2<(outs D:$dst), (ins D:$src1, D:$src2), - "$dst = align16($src1, $src2);", - [(set D:$dst, (or (shl D:$src1, (i32 8)), - (srl D:$src2, (i32 24))))]>; - -def DISALGNEXCPT : F2<(outs), (ins), "disalignexcpt;", []>; - -// TODO: BYTEOP3P, BYTEOP16P, BYTEOP1P, BYTEOP2P, BYTEOP16M, SAA, -// BYTEPACK, BYTEUNPACK - -// Table C-21. Vector Operations Instructions - -// Patterns -def : Pat<(BfinCall (i32 tglobaladdr:$dst)), - (CALLa tglobaladdr:$dst)>; -def : Pat<(BfinCall (i32 texternalsym:$dst)), - (CALLa texternalsym:$dst)>; -def : Pat<(i16 (trunc D:$src)), - (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS D:$src, D)), lo16)>; diff --git a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp deleted file mode 100644 index 9120e15..0000000 --- a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp +++ /dev/null @@ -1,104 +0,0 @@ -//===- BlackfinIntrinsicInfo.cpp - Intrinsic Information --------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the Blackfin implementation of TargetIntrinsicInfo. -// -//===----------------------------------------------------------------------===// - -#include "BlackfinIntrinsicInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" -#include "llvm/Module.h" -#include "llvm/Type.h" -#include "llvm/Support/raw_ostream.h" -#include - -using namespace llvm; - -namespace bfinIntrinsic { - - enum ID { - last_non_bfin_intrinsic = Intrinsic::num_intrinsics-1, -#define GET_INTRINSIC_ENUM_VALUES -#include "BlackfinGenIntrinsics.inc" -#undef GET_INTRINSIC_ENUM_VALUES - , num_bfin_intrinsics - }; - -} - -std::string BlackfinIntrinsicInfo::getName(unsigned IntrID, Type **Tys, - unsigned numTys) const { - static const char *const names[] = { -#define GET_INTRINSIC_NAME_TABLE -#include "BlackfinGenIntrinsics.inc" -#undef GET_INTRINSIC_NAME_TABLE - }; - - assert(!isOverloaded(IntrID) && "Blackfin intrinsics are not overloaded"); - if (IntrID < Intrinsic::num_intrinsics) - return 0; - assert(IntrID < bfinIntrinsic::num_bfin_intrinsics && "Invalid intrinsic ID"); - - std::string Result(names[IntrID - Intrinsic::num_intrinsics]); - return Result; -} - -unsigned -BlackfinIntrinsicInfo::lookupName(const char *Name, unsigned Len) const { - if (Len < 5 || Name[4] != '.' || Name[0] != 'l' || Name[1] != 'l' - || Name[2] != 'v' || Name[3] != 'm') - return 0; // All intrinsics start with 'llvm.' - -#define GET_FUNCTION_RECOGNIZER -#include "BlackfinGenIntrinsics.inc" -#undef GET_FUNCTION_RECOGNIZER - return 0; -} - -bool BlackfinIntrinsicInfo::isOverloaded(unsigned IntrID) const { - // Overload Table - const bool OTable[] = { -#define GET_INTRINSIC_OVERLOAD_TABLE -#include "BlackfinGenIntrinsics.inc" -#undef GET_INTRINSIC_OVERLOAD_TABLE - }; - if (IntrID == 0) - return false; - else - return OTable[IntrID - Intrinsic::num_intrinsics]; -} - -/// This defines the "getAttributes(ID id)" method. -#define GET_INTRINSIC_ATTRIBUTES -#include "BlackfinGenIntrinsics.inc" -#undef GET_INTRINSIC_ATTRIBUTES - -static FunctionType *getType(LLVMContext &Context, unsigned id) { - Type *ResultTy = NULL; - SmallVector ArgTys; - bool IsVarArg = false; - -#define GET_INTRINSIC_GENERATOR -#include "BlackfinGenIntrinsics.inc" -#undef GET_INTRINSIC_GENERATOR - - return FunctionType::get(ResultTy, ArgTys, IsVarArg); -} - -Function *BlackfinIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID, - Type **Tys, - unsigned numTy) const { - assert(!isOverloaded(IntrID) && "Blackfin intrinsics are not overloaded"); - AttrListPtr AList = getAttributes((bfinIntrinsic::ID) IntrID); - return cast(M->getOrInsertFunction(getName(IntrID), - getType(M->getContext(), IntrID), - AList)); -} diff --git a/lib/Target/Blackfin/BlackfinIntrinsicInfo.h b/lib/Target/Blackfin/BlackfinIntrinsicInfo.h deleted file mode 100644 index f05db5a..0000000 --- a/lib/Target/Blackfin/BlackfinIntrinsicInfo.h +++ /dev/null @@ -1,32 +0,0 @@ -//===- BlackfinIntrinsicInfo.h - Blackfin Intrinsic Information -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the Blackfin implementation of TargetIntrinsicInfo. -// -//===----------------------------------------------------------------------===// -#ifndef BLACKFININTRINSICS_H -#define BLACKFININTRINSICS_H - -#include "llvm/Target/TargetIntrinsicInfo.h" - -namespace llvm { - - class BlackfinIntrinsicInfo : public TargetIntrinsicInfo { - public: - std::string getName(unsigned IntrID, Type **Tys = 0, - unsigned numTys = 0) const; - unsigned lookupName(const char *Name, unsigned Len) const; - bool isOverloaded(unsigned IID) const; - Function *getDeclaration(Module *M, unsigned ID, Type **Tys = 0, - unsigned numTys = 0) const; - }; - -} - -#endif diff --git a/lib/Target/Blackfin/BlackfinIntrinsics.td b/lib/Target/Blackfin/BlackfinIntrinsics.td deleted file mode 100644 index ce21b08..0000000 --- a/lib/Target/Blackfin/BlackfinIntrinsics.td +++ /dev/null @@ -1,34 +0,0 @@ -//===- BlackfinIntrinsics.td - Defines Blackfin intrinsics -*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines all of the blackfin-specific intrinsics. -// -//===----------------------------------------------------------------------===// - -let TargetPrefix = "bfin", isTarget = 1 in { - -//===----------------------------------------------------------------------===// -// Core synchronisation etc. -// -// These intrinsics have sideeffects. Each represent a single instruction, but -// workarounds are sometimes required depending on the cpu. - -// Execute csync instruction with workarounds -def int_bfin_csync : GCCBuiltin<"__builtin_bfin_csync">, - Intrinsic<[]>; - -// Execute ssync instruction with workarounds -def int_bfin_ssync : GCCBuiltin<"__builtin_bfin_ssync">, - Intrinsic<[]>; - -// Execute idle instruction with workarounds -def int_bfin_idle : GCCBuiltin<"__builtin_bfin_idle">, - Intrinsic<[]>; - -} diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp deleted file mode 100644 index 0d415c5..0000000 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp +++ /dev/null @@ -1,344 +0,0 @@ -//===- BlackfinRegisterInfo.cpp - Blackfin Register Information -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the Blackfin implementation of the TargetRegisterInfo -// class. -// -//===----------------------------------------------------------------------===// - -#include "Blackfin.h" -#include "BlackfinRegisterInfo.h" -#include "BlackfinSubtarget.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Type.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/STLExtras.h" - -#define GET_REGINFO_TARGET_DESC -#include "BlackfinGenRegisterInfo.inc" - -using namespace llvm; - -BlackfinRegisterInfo::BlackfinRegisterInfo(BlackfinSubtarget &st, - const TargetInstrInfo &tii) - : BlackfinGenRegisterInfo(BF::RETS), Subtarget(st), TII(tii) {} - -const unsigned* -BlackfinRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - using namespace BF; - static const unsigned CalleeSavedRegs[] = { - FP, - R4, R5, R6, R7, - P3, P4, P5, - 0 }; - return CalleeSavedRegs; -} - -BitVector -BlackfinRegisterInfo::getReservedRegs(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - using namespace BF; - BitVector Reserved(getNumRegs()); - Reserved.set(AZ); - Reserved.set(AN); - Reserved.set(AQ); - Reserved.set(AC0); - Reserved.set(AC1); - Reserved.set(AV0); - Reserved.set(AV0S); - Reserved.set(AV1); - Reserved.set(AV1S); - Reserved.set(V); - Reserved.set(VS); - Reserved.set(CYCLES).set(CYCLES2); - Reserved.set(L0); - Reserved.set(L1); - Reserved.set(L2); - Reserved.set(L3); - Reserved.set(SP); - Reserved.set(RETS); - if (TFI->hasFP(MF)) - Reserved.set(FP); - return Reserved; -} - -bool BlackfinRegisterInfo:: -requiresRegisterScavenging(const MachineFunction &MF) const { - return true; -} - -// Emit instructions to add delta to D/P register. ScratchReg must be of the -// same class as Reg (P). -void BlackfinRegisterInfo::adjustRegister(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - DebugLoc DL, - unsigned Reg, - unsigned ScratchReg, - int delta) const { - if (!delta) - return; - if (isInt<7>(delta)) { - BuildMI(MBB, I, DL, TII.get(BF::ADDpp_imm7), Reg) - .addReg(Reg) // No kill on two-addr operand - .addImm(delta); - return; - } - - // We must load delta into ScratchReg and add that. - loadConstant(MBB, I, DL, ScratchReg, delta); - if (BF::PRegClass.contains(Reg)) { - assert(BF::PRegClass.contains(ScratchReg) && - "ScratchReg must be a P register"); - BuildMI(MBB, I, DL, TII.get(BF::ADDpp), Reg) - .addReg(Reg, RegState::Kill) - .addReg(ScratchReg, RegState::Kill); - } else { - assert(BF::DRegClass.contains(Reg) && "Reg must be a D or P register"); - assert(BF::DRegClass.contains(ScratchReg) && - "ScratchReg must be a D register"); - BuildMI(MBB, I, DL, TII.get(BF::ADD), Reg) - .addReg(Reg, RegState::Kill) - .addReg(ScratchReg, RegState::Kill); - } -} - -// Emit instructions to load a constant into D/P register -void BlackfinRegisterInfo::loadConstant(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - DebugLoc DL, - unsigned Reg, - int value) const { - if (isInt<7>(value)) { - BuildMI(MBB, I, DL, TII.get(BF::LOADimm7), Reg).addImm(value); - return; - } - - if (isUInt<16>(value)) { - BuildMI(MBB, I, DL, TII.get(BF::LOADuimm16), Reg).addImm(value); - return; - } - - if (isInt<16>(value)) { - BuildMI(MBB, I, DL, TII.get(BF::LOADimm16), Reg).addImm(value); - return; - } - - // We must split into halves - BuildMI(MBB, I, DL, - TII.get(BF::LOAD16i), getSubReg(Reg, BF::hi16)) - .addImm((value >> 16) & 0xffff) - .addReg(Reg, RegState::ImplicitDefine); - BuildMI(MBB, I, DL, - TII.get(BF::LOAD16i), getSubReg(Reg, BF::lo16)) - .addImm(value & 0xffff) - .addReg(Reg, RegState::ImplicitKill) - .addReg(Reg, RegState::ImplicitDefine); -} - -void BlackfinRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - if (!TFI->hasReservedCallFrame(MF)) { - int64_t Amount = I->getOperand(0).getImm(); - if (Amount != 0) { - assert(Amount%4 == 0 && "Unaligned call frame size"); - if (I->getOpcode() == BF::ADJCALLSTACKDOWN) { - adjustRegister(MBB, I, I->getDebugLoc(), BF::SP, BF::P1, -Amount); - } else { - assert(I->getOpcode() == BF::ADJCALLSTACKUP && - "Unknown call frame pseudo instruction"); - adjustRegister(MBB, I, I->getDebugLoc(), BF::SP, BF::P1, Amount); - } - } - } - MBB.erase(I); -} - -/// findScratchRegister - Find a 'free' register. Try for a call-clobbered -/// register first and then a spilled callee-saved register if that fails. -static unsigned findScratchRegister(MachineBasicBlock::iterator II, - RegScavenger *RS, - const TargetRegisterClass *RC, - int SPAdj) { - assert(RS && "Register scavenging must be on"); - unsigned Reg = RS->FindUnusedReg(RC); - if (Reg == 0) - Reg = RS->scavengeRegister(RC, II, SPAdj); - return Reg; -} - -void -BlackfinRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { - MachineInstr &MI = *II; - MachineBasicBlock &MBB = *MI.getParent(); - MachineFunction &MF = *MBB.getParent(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - DebugLoc DL = MI.getDebugLoc(); - - unsigned FIPos; - for (FIPos=0; !MI.getOperand(FIPos).isFI(); ++FIPos) { - assert(FIPos < MI.getNumOperands() && - "Instr doesn't have FrameIndex operand!"); - } - int FrameIndex = MI.getOperand(FIPos).getIndex(); - assert(FIPos+1 < MI.getNumOperands() && MI.getOperand(FIPos+1).isImm()); - int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) - + MI.getOperand(FIPos+1).getImm(); - unsigned BaseReg = BF::FP; - if (TFI->hasFP(MF)) { - assert(SPAdj==0 && "Unexpected SP adjust in function with frame pointer"); - } else { - BaseReg = BF::SP; - Offset += MF.getFrameInfo()->getStackSize() + SPAdj; - } - - bool isStore = false; - - switch (MI.getOpcode()) { - case BF::STORE32fi: - isStore = true; - case BF::LOAD32fi: { - assert(Offset%4 == 0 && "Unaligned i32 stack access"); - assert(FIPos==1 && "Bad frame index operand"); - MI.getOperand(FIPos).ChangeToRegister(BaseReg, false); - MI.getOperand(FIPos+1).setImm(Offset); - if (isUInt<6>(Offset)) { - MI.setDesc(TII.get(isStore - ? BF::STORE32p_uimm6m4 - : BF::LOAD32p_uimm6m4)); - return; - } - if (BaseReg == BF::FP && isUInt<7>(-Offset)) { - MI.setDesc(TII.get(isStore - ? BF::STORE32fp_nimm7m4 - : BF::LOAD32fp_nimm7m4)); - MI.getOperand(FIPos+1).setImm(-Offset); - return; - } - if (isInt<18>(Offset)) { - MI.setDesc(TII.get(isStore - ? BF::STORE32p_imm18m4 - : BF::LOAD32p_imm18m4)); - return; - } - // Use RegScavenger to calculate proper offset... - MI.dump(); - llvm_unreachable("Stack frame offset too big"); - break; - } - case BF::ADDpp: { - assert(MI.getOperand(0).isReg() && "ADD instruction needs a register"); - unsigned DestReg = MI.getOperand(0).getReg(); - // We need to produce a stack offset in a P register. We emit: - // P0 = offset; - // P0 = BR + P0; - assert(FIPos==1 && "Bad frame index operand"); - loadConstant(MBB, II, DL, DestReg, Offset); - MI.getOperand(1).ChangeToRegister(DestReg, false, false, true); - MI.getOperand(2).ChangeToRegister(BaseReg, false); - break; - } - case BF::STORE16fi: - isStore = true; - case BF::LOAD16fi: { - assert(Offset%2 == 0 && "Unaligned i16 stack access"); - assert(FIPos==1 && "Bad frame index operand"); - // We need a P register to use as an address - unsigned ScratchReg = findScratchRegister(II, RS, &BF::PRegClass, SPAdj); - assert(ScratchReg && "Could not scavenge register"); - loadConstant(MBB, II, DL, ScratchReg, Offset); - BuildMI(MBB, II, DL, TII.get(BF::ADDpp), ScratchReg) - .addReg(ScratchReg, RegState::Kill) - .addReg(BaseReg); - MI.setDesc(TII.get(isStore ? BF::STORE16pi : BF::LOAD16pi)); - MI.getOperand(1).ChangeToRegister(ScratchReg, false, false, true); - MI.RemoveOperand(2); - break; - } - case BF::STORE8fi: { - // This is an AnyCC spill, we need a scratch register. - assert(FIPos==1 && "Bad frame index operand"); - MachineOperand SpillReg = MI.getOperand(0); - unsigned ScratchReg = findScratchRegister(II, RS, &BF::DRegClass, SPAdj); - assert(ScratchReg && "Could not scavenge register"); - if (SpillReg.getReg()==BF::NCC) { - BuildMI(MBB, II, DL, TII.get(BF::MOVENCC_z), ScratchReg) - .addOperand(SpillReg); - BuildMI(MBB, II, DL, TII.get(BF::BITTGL), ScratchReg) - .addReg(ScratchReg).addImm(0); - } else { - BuildMI(MBB, II, DL, TII.get(BF::MOVECC_zext), ScratchReg) - .addOperand(SpillReg); - } - // STORE D - MI.setDesc(TII.get(BF::STORE8p_imm16)); - MI.getOperand(0).ChangeToRegister(ScratchReg, false, false, true); - MI.getOperand(FIPos).ChangeToRegister(BaseReg, false); - MI.getOperand(FIPos+1).setImm(Offset); - break; - } - case BF::LOAD8fi: { - // This is an restore, we need a scratch register. - assert(FIPos==1 && "Bad frame index operand"); - MachineOperand SpillReg = MI.getOperand(0); - unsigned ScratchReg = findScratchRegister(II, RS, &BF::DRegClass, SPAdj); - assert(ScratchReg && "Could not scavenge register"); - MI.setDesc(TII.get(BF::LOAD32p_imm16_8z)); - MI.getOperand(0).ChangeToRegister(ScratchReg, true); - MI.getOperand(FIPos).ChangeToRegister(BaseReg, false); - MI.getOperand(FIPos+1).setImm(Offset); - ++II; - if (SpillReg.getReg()==BF::CC) { - // CC = D - BuildMI(MBB, II, DL, TII.get(BF::MOVECC_nz), BF::CC) - .addReg(ScratchReg, RegState::Kill); - } else { - // Restore NCC (CC = D==0) - BuildMI(MBB, II, DL, TII.get(BF::SETEQri_not), BF::NCC) - .addReg(ScratchReg, RegState::Kill) - .addImm(0); - } - break; - } - default: - llvm_unreachable("Cannot eliminate frame index"); - break; - } -} - -unsigned -BlackfinRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - return TFI->hasFP(MF) ? BF::FP : BF::SP; -} - -unsigned BlackfinRegisterInfo::getEHExceptionRegister() const { - llvm_unreachable("What is the exception register"); - return 0; -} - -unsigned BlackfinRegisterInfo::getEHHandlerRegister() const { - llvm_unreachable("What is the exception handler register"); - return 0; -} diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h deleted file mode 100644 index 6ac22af..0000000 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.h +++ /dev/null @@ -1,77 +0,0 @@ -//===- BlackfinRegisterInfo.h - Blackfin Register Information ..-*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the Blackfin implementation of the TargetRegisterInfo -// class. -// -//===----------------------------------------------------------------------===// - -#ifndef BLACKFINREGISTERINFO_H -#define BLACKFINREGISTERINFO_H - -#include "llvm/Target/TargetRegisterInfo.h" - -#define GET_REGINFO_HEADER -#include "BlackfinGenRegisterInfo.inc" - -namespace llvm { - - class BlackfinSubtarget; - class TargetInstrInfo; - class Type; - - struct BlackfinRegisterInfo : public BlackfinGenRegisterInfo { - BlackfinSubtarget &Subtarget; - const TargetInstrInfo &TII; - - BlackfinRegisterInfo(BlackfinSubtarget &st, const TargetInstrInfo &tii); - - /// Code Generation virtual methods... - const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - - BitVector getReservedRegs(const MachineFunction &MF) const; - - // getSubReg implemented by tablegen - - const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const { - return &BF::PRegClass; - } - - bool requiresRegisterScavenging(const MachineFunction &MF) const; - - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; - - unsigned getFrameRegister(const MachineFunction &MF) const; - - // Exception handling queries. - unsigned getEHExceptionRegister() const; - unsigned getEHHandlerRegister() const; - - // Utility functions - void adjustRegister(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - DebugLoc DL, - unsigned Reg, - unsigned ScratchReg, - int delta) const; - void loadConstant(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - DebugLoc DL, - unsigned Reg, - int value) const; - }; - -} // end namespace llvm - -#endif diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.td b/lib/Target/Blackfin/BlackfinRegisterInfo.td deleted file mode 100644 index 1c42205..0000000 --- a/lib/Target/Blackfin/BlackfinRegisterInfo.td +++ /dev/null @@ -1,277 +0,0 @@ -//===- BlackfinRegisterInfo.td - Blackfin Register defs ----*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Declarations that describe the Blackfin register file -//===----------------------------------------------------------------------===// - -// Subregs are: -// 1: .L -// 2: .H -// 3: .W (32 low bits of 40-bit accu) -let Namespace = "BF" in { -def lo16 : SubRegIndex; -def hi16 : SubRegIndex; -def lo32 : SubRegIndex; -def hi32 : SubRegIndex; -} - -// Registers are identified with 3-bit group and 3-bit ID numbers. -class BlackfinReg : Register { - field bits<3> Group; - field bits<3> Num; - let Namespace = "BF"; -} - -// Rc - 1-bit registers -class Rc bitno, string n> : BlackfinReg { - field bits<5> BitNum = bitno; -} - -// Rs - 16-bit integer registers -class Rs group, bits<3> num, bits<1> hi, string n> : BlackfinReg { - let Group = group; - let Num = num; - field bits<1> High = hi; -} - -// Ri - 32-bit integer registers with subregs -class Ri group, bits<3> num, string n> : BlackfinReg { - let Group = group; - let Num = num; -} - -// Ra 40-bit accumulator registers -class Ra num, string n, list subs> : BlackfinReg { - let SubRegs = subs; - let SubRegIndices = [hi32, lo32]; - let Group = 4; - let Num = num; -} - -// Two halves of 32-bit register -multiclass Rss group, bits<3> num, string n> { - def H : Rs; - def L : Rs; -} - -// Rii - 32-bit integer registers with subregs -class Rii group, bits<3> num, string n, list subs> - : BlackfinReg { - let SubRegs = subs; - let SubRegIndices = [hi16, lo16]; - let Group = group; - let Num = num; -} - -// Status bits are all part of ASTAT -def AZ : Rc<0, "az">; -def AN : Rc<1, "an">; -def CC : Rc<5, "cc">, DwarfRegNum<[34]>; -def NCC : Rc<5, "!cc"> { let Aliases = [CC]; } -def AQ : Rc<6, "aq">; -def AC0 : Rc<12, "ac0">; -def AC1 : Rc<13, "ac1">; -def AV0 : Rc<16, "av0">; -def AV0S : Rc<17, "av0s">; -def AV1 : Rc<18, "av1">; -def AV1S : Rc<19, "av1s">; -def V : Rc<24, "v">; -def VS : Rc<25, "vs">; -// Skipped non-status bits: AC0_COPY, V_COPY, RND_MOD - -// Group 0: Integer registers -defm R0 : Rss<0, 0, "r0">; -def R0 : Rii<0, 0, "r0", [R0H, R0L]>, DwarfRegNum<[0]>; -defm R1 : Rss<0, 1, "r1">; -def R1 : Rii<0, 1, "r1", [R1H, R1L]>, DwarfRegNum<[1]>; -defm R2 : Rss<0, 2, "r2">; -def R2 : Rii<0, 2, "r2", [R2H, R2L]>, DwarfRegNum<[2]>; -defm R3 : Rss<0, 3, "r3">; -def R3 : Rii<0, 3, "r3", [R3H, R3L]>, DwarfRegNum<[3]>; -defm R4 : Rss<0, 4, "r4">; -def R4 : Rii<0, 4, "r4", [R4H, R4L]>, DwarfRegNum<[4]>; -defm R5 : Rss<0, 5, "r5">; -def R5 : Rii<0, 5, "r5", [R5H, R5L]>, DwarfRegNum<[5]>; -defm R6 : Rss<0, 6, "r6">; -def R6 : Rii<0, 6, "r6", [R6H, R6L]>, DwarfRegNum<[6]>; -defm R7 : Rss<0, 7, "r7">; -def R7 : Rii<0, 7, "r7", [R7H, R7L]>, DwarfRegNum<[7]>; - -// Group 1: Pointer registers -defm P0 : Rss<1, 0, "p0">; -def P0 : Rii<1, 0, "p0", [P0H, P0L]>, DwarfRegNum<[8]>; -defm P1 : Rss<1, 1, "p1">; -def P1 : Rii<1, 1, "p1", [P1H, P1L]>, DwarfRegNum<[9]>; -defm P2 : Rss<1, 2, "p2">; -def P2 : Rii<1, 2, "p2", [P2H, P2L]>, DwarfRegNum<[10]>; -defm P3 : Rss<1, 3, "p3">; -def P3 : Rii<1, 3, "p3", [P3H, P3L]>, DwarfRegNum<[11]>; -defm P4 : Rss<1, 4, "p4">; -def P4 : Rii<1, 4, "p4", [P4H, P4L]>, DwarfRegNum<[12]>; -defm P5 : Rss<1, 5, "p5">; -def P5 : Rii<1, 5, "p5", [P5H, P5L]>, DwarfRegNum<[13]>; -defm SP : Rss<1, 6, "sp">; -def SP : Rii<1, 6, "sp", [SPH, SPL]>, DwarfRegNum<[14]>; -defm FP : Rss<1, 7, "fp">; -def FP : Rii<1, 7, "fp", [FPH, FPL]>, DwarfRegNum<[15]>; - -// Group 2: Index registers -defm I0 : Rss<2, 0, "i0">; -def I0 : Rii<2, 0, "i0", [I0H, I0L]>, DwarfRegNum<[16]>; -defm I1 : Rss<2, 1, "i1">; -def I1 : Rii<2, 1, "i1", [I1H, I1L]>, DwarfRegNum<[17]>; -defm I2 : Rss<2, 2, "i2">; -def I2 : Rii<2, 2, "i2", [I2H, I2L]>, DwarfRegNum<[18]>; -defm I3 : Rss<2, 3, "i3">; -def I3 : Rii<2, 3, "i3", [I3H, I3L]>, DwarfRegNum<[19]>; -defm M0 : Rss<2, 4, "m0">; -def M0 : Rii<2, 4, "m0", [M0H, M0L]>, DwarfRegNum<[20]>; -defm M1 : Rss<2, 5, "m1">; -def M1 : Rii<2, 5, "m1", [M1H, M1L]>, DwarfRegNum<[21]>; -defm M2 : Rss<2, 6, "m2">; -def M2 : Rii<2, 6, "m2", [M2H, M2L]>, DwarfRegNum<[22]>; -defm M3 : Rss<2, 7, "m3">; -def M3 : Rii<2, 7, "m3", [M3H, M3L]>, DwarfRegNum<[23]>; - -// Group 3: Cyclic indexing registers -defm B0 : Rss<3, 0, "b0">; -def B0 : Rii<3, 0, "b0", [B0H, B0L]>, DwarfRegNum<[24]>; -defm B1 : Rss<3, 1, "b1">; -def B1 : Rii<3, 1, "b1", [B1H, B1L]>, DwarfRegNum<[25]>; -defm B2 : Rss<3, 2, "b2">; -def B2 : Rii<3, 2, "b2", [B2H, B2L]>, DwarfRegNum<[26]>; -defm B3 : Rss<3, 3, "b3">; -def B3 : Rii<3, 3, "b3", [B3H, B3L]>, DwarfRegNum<[27]>; -defm L0 : Rss<3, 4, "l0">; -def L0 : Rii<3, 4, "l0", [L0H, L0L]>, DwarfRegNum<[28]>; -defm L1 : Rss<3, 5, "l1">; -def L1 : Rii<3, 5, "l1", [L1H, L1L]>, DwarfRegNum<[29]>; -defm L2 : Rss<3, 6, "l2">; -def L2 : Rii<3, 6, "l2", [L2H, L2L]>, DwarfRegNum<[30]>; -defm L3 : Rss<3, 7, "l3">; -def L3 : Rii<3, 7, "l3", [L3H, L3L]>, DwarfRegNum<[31]>; - -// Accumulators -def A0X : Ri <4, 0, "a0.x">; -defm A0 : Rss<4, 1, "a0">; -def A0W : Rii<4, 1, "a0.w", [A0H, A0L]>, DwarfRegNum<[32]>; -def A0 : Ra <0, "a0", [A0X, A0W]>; - -def A1X : Ri <4, 2, "a1.x">; -defm A1 : Rss<4, 3, "a1">; -def A1W : Rii<4, 3, "a1.w", [A1H, A1L]>, DwarfRegNum<[33]>; -def A1 : Ra <2, "a1", [A1X, A1W]>; - -def RETS : Ri<4, 7, "rets">, DwarfRegNum<[35]>; -def RETI : Ri<7, 3, "reti">, DwarfRegNum<[36]>; -def RETX : Ri<7, 4, "retx">, DwarfRegNum<[37]>; -def RETN : Ri<7, 5, "retn">, DwarfRegNum<[38]>; -def RETE : Ri<7, 6, "rete">, DwarfRegNum<[39]>; - -def ASTAT : Ri<4, 6, "astat">, DwarfRegNum<[40]> { - let Aliases = [AZ, AN, CC, NCC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS]; -} - -def SEQSTAT : Ri<7, 1, "seqstat">, DwarfRegNum<[41]>; -def USP : Ri<7, 0, "usp">, DwarfRegNum<[42]>; -def EMUDAT : Ri<7, 7, "emudat">, DwarfRegNum<[43]>; -def SYSCFG : Ri<7, 2, "syscfg">; -def CYCLES : Ri<6, 6, "cycles">; -def CYCLES2 : Ri<6, 7, "cycles2">; - -// Hardware loops -def LT0 : Ri<6, 1, "lt0">, DwarfRegNum<[44]>; -def LT1 : Ri<6, 4, "lt1">, DwarfRegNum<[45]>; -def LC0 : Ri<6, 0, "lc0">, DwarfRegNum<[46]>; -def LC1 : Ri<6, 3, "lc1">, DwarfRegNum<[47]>; -def LB0 : Ri<6, 2, "lb0">, DwarfRegNum<[48]>; -def LB1 : Ri<6, 5, "lb1">, DwarfRegNum<[49]>; - -// Register classes. -def D16L : RegisterClass<"BF", [i16], 16, (sequence "R%uL", 0, 7)>; - -def D16H : RegisterClass<"BF", [i16], 16, (sequence "R%uH", 0, 7)>; - -def D16 : RegisterClass<"BF", [i16], 16, (add D16L, D16H)>; - -def P16L : RegisterClass<"BF", [i16], 16, - (add (sequence "P%uL", 0, 5), SPL, FPL)>; - -def P16H : RegisterClass<"BF", [i16], 16, - (add (sequence "P%uH", 0, 5), SPH, FPH)>; - -def P16 : RegisterClass<"BF", [i16], 16, (add P16L, P16H)>; - -def DP16 : RegisterClass<"BF", [i16], 16, (add D16, P16)>; - -def DP16L : RegisterClass<"BF", [i16], 16, (add D16L, P16L)>; - -def DP16H : RegisterClass<"BF", [i16], 16, (add D16H, P16H)>; - -def GR16 : RegisterClass<"BF", [i16], 16, - (add DP16, - I0H, I0L, I1H, I1L, I2H, I2L, I3H, I3L, - M0H, M0L, M1H, M1L, M2H, M2L, M3H, M3L, - B0H, B0L, B1H, B1L, B2H, B2L, B3H, B3L, - L0H, L0L, L1H, L1L, L2H, L2L, L3H, L3L)>; - -def D : RegisterClass<"BF", [i32], 32, (sequence "R%u", 0, 7)> { - let SubRegClasses = [(D16L lo16), (D16H hi16)]; -} - -def P : RegisterClass<"BF", [i32], 32, (add (sequence "P%u", 0, 5), FP, SP)> { - let SubRegClasses = [(P16L lo16), (P16H hi16)]; -} - -def DP : RegisterClass<"BF", [i32], 32, (add D, P)> { - let SubRegClasses = [(DP16L lo16), (DP16H hi16)]; -} - -def I : RegisterClass<"BF", [i32], 32, (add I0, I1, I2, I3)>; -def M : RegisterClass<"BF", [i32], 32, (add M0, M1, M2, M3)>; -def B : RegisterClass<"BF", [i32], 32, (add B0, B1, B2, B3)>; -def L : RegisterClass<"BF", [i32], 32, (add L0, L1, L2, L3)>; - -def GR : RegisterClass<"BF", [i32], 32, (add DP, I, M, B, L)>; - -def ALL : RegisterClass<"BF", [i32], 32, - (add GR, - A0X, A0W, A1X, A1W, ASTAT, RETS, - LC0, LT0, LB0, LC1, LT1, LB1, CYCLES, CYCLES2, - USP, SEQSTAT, SYSCFG, RETI, RETX, RETN, RETE, EMUDAT)>; - -def PI : RegisterClass<"BF", [i32], 32, (add P, I)>; - -// We are going to pretend that CC and !CC are 32-bit registers, even though -// they only can hold 1 bit. -let CopyCost = -1, Size = 8 in { -def JustCC : RegisterClass<"BF", [i32], 8, (add CC)>; -def NotCC : RegisterClass<"BF", [i32], 8, (add NCC)>; -def AnyCC : RegisterClass<"BF", [i32], 8, (add CC, NCC)>; -def StatBit : RegisterClass<"BF", [i1], 8, - (add AZ, AN, CC, AQ, AC0, AC1, AV0, AV0S, AV1, AV1S, V, VS)>; -} - -// Should be i40, but that isn't defined. It is not a legal type yet anyway. -def Accu : RegisterClass<"BF", [i64], 64, (add A0, A1)>; - -// Register classes to match inline asm constraints. -def zCons : RegisterClass<"BF", [i32], 32, (add P0, P1, P2)>; -def DCons : RegisterClass<"BF", [i32], 32, (add R0, R2, R4, R6)>; -def WCons : RegisterClass<"BF", [i32], 32, (add R1, R3, R5, R7)>; -def cCons : RegisterClass<"BF", [i32], 32, (add I0, I1, I2, I3, - B0, B1, B2, B3, - L0, L1, L2, L3)>; -def tCons : RegisterClass<"BF", [i32], 32, (add LT0, LT1)>; -def uCons : RegisterClass<"BF", [i32], 32, (add LB0, LB1)>; -def kCons : RegisterClass<"BF", [i32], 32, (add LC0, LC1)>; -def yCons : RegisterClass<"BF", [i32], 32, (add RETS, RETN, RETI, RETX, - RETE, ASTAT, SEQSTAT, - USP)>; diff --git a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp deleted file mode 100644 index a21f696..0000000 --- a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.cpp +++ /dev/null @@ -1,24 +0,0 @@ -//===-- BlackfinSelectionDAGInfo.cpp - Blackfin SelectionDAG Info ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the BlackfinSelectionDAGInfo class. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "blackfin-selectiondag-info" -#include "BlackfinTargetMachine.h" -using namespace llvm; - -BlackfinSelectionDAGInfo::BlackfinSelectionDAGInfo( - const BlackfinTargetMachine &TM) - : TargetSelectionDAGInfo(TM) { -} - -BlackfinSelectionDAGInfo::~BlackfinSelectionDAGInfo() { -} diff --git a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h b/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h deleted file mode 100644 index f1ce348..0000000 --- a/lib/Target/Blackfin/BlackfinSelectionDAGInfo.h +++ /dev/null @@ -1,31 +0,0 @@ -//===-- BlackfinSelectionDAGInfo.h - Blackfin SelectionDAG Info -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the Blackfin subclass for TargetSelectionDAGInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef BLACKFINSELECTIONDAGINFO_H -#define BLACKFINSELECTIONDAGINFO_H - -#include "llvm/Target/TargetSelectionDAGInfo.h" - -namespace llvm { - -class BlackfinTargetMachine; - -class BlackfinSelectionDAGInfo : public TargetSelectionDAGInfo { -public: - explicit BlackfinSelectionDAGInfo(const BlackfinTargetMachine &TM); - ~BlackfinSelectionDAGInfo(); -}; - -} - -#endif diff --git a/lib/Target/Blackfin/BlackfinSubtarget.cpp b/lib/Target/Blackfin/BlackfinSubtarget.cpp deleted file mode 100644 index 0bdce09..0000000 --- a/lib/Target/Blackfin/BlackfinSubtarget.cpp +++ /dev/null @@ -1,44 +0,0 @@ -//===- BlackfinSubtarget.cpp - BLACKFIN Subtarget Information -------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the blackfin specific subclass of TargetSubtargetInfo. -// -//===----------------------------------------------------------------------===// - -#include "BlackfinSubtarget.h" -#include "Blackfin.h" -#include "llvm/Support/TargetRegistry.h" - -#define GET_SUBTARGETINFO_TARGET_DESC -#define GET_SUBTARGETINFO_CTOR -#include "BlackfinGenSubtargetInfo.inc" - -using namespace llvm; - -BlackfinSubtarget::BlackfinSubtarget(const std::string &TT, - const std::string &CPU, - const std::string &FS) - : BlackfinGenSubtargetInfo(TT, CPU, FS), sdram(false), - icplb(false), - wa_mi_shift(false), - wa_csync(false), - wa_specld(false), - wa_mmr_stall(false), - wa_lcregs(false), - wa_hwloop(false), - wa_ind_call(false), - wa_killed_mmr(false), - wa_rets(false) -{ - std::string CPUName = CPU; - if (CPUName.empty()) - CPUName = "generic"; - // Parse features string. - ParseSubtargetFeatures(CPUName, FS); -} diff --git a/lib/Target/Blackfin/BlackfinSubtarget.h b/lib/Target/Blackfin/BlackfinSubtarget.h deleted file mode 100644 index 1a01a81..0000000 --- a/lib/Target/Blackfin/BlackfinSubtarget.h +++ /dev/null @@ -1,49 +0,0 @@ -//===- BlackfinSubtarget.h - Define Subtarget for the Blackfin -*- C++ -*-====// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the BLACKFIN specific subclass of TargetSubtargetInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef BLACKFIN_SUBTARGET_H -#define BLACKFIN_SUBTARGET_H - -#include "llvm/Target/TargetSubtargetInfo.h" -#include - -#define GET_SUBTARGETINFO_HEADER -#include "BlackfinGenSubtargetInfo.inc" - -namespace llvm { -class StringRef; - - class BlackfinSubtarget : public BlackfinGenSubtargetInfo { - bool sdram; - bool icplb; - bool wa_mi_shift; - bool wa_csync; - bool wa_specld; - bool wa_mmr_stall; - bool wa_lcregs; - bool wa_hwloop; - bool wa_ind_call; - bool wa_killed_mmr; - bool wa_rets; - public: - BlackfinSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS); - - /// ParseSubtargetFeatures - Parses features string setting specified - /// subtarget options. Definition of function is auto generated by tblgen. - void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - }; - -} // end namespace llvm - -#endif diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/lib/Target/Blackfin/BlackfinTargetMachine.cpp deleted file mode 100644 index a4ae46b..0000000 --- a/lib/Target/Blackfin/BlackfinTargetMachine.cpp +++ /dev/null @@ -1,43 +0,0 @@ -//===-- BlackfinTargetMachine.cpp - Define TargetMachine for Blackfin -----===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -#include "BlackfinTargetMachine.h" -#include "Blackfin.h" -#include "llvm/PassManager.h" -#include "llvm/Support/TargetRegistry.h" - -using namespace llvm; - -extern "C" void LLVMInitializeBlackfinTarget() { - RegisterTargetMachine X(TheBlackfinTarget); -} - -BlackfinTargetMachine::BlackfinTargetMachine(const Target &T, - StringRef TT, - StringRef CPU, - StringRef FS, - Reloc::Model RM, - CodeModel::Model CM) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), - DataLayout("e-p:32:32-i64:32-f64:32-n32"), - Subtarget(TT, CPU, FS), - TLInfo(*this), - TSInfo(*this), - InstrInfo(Subtarget), - FrameLowering(Subtarget) { -} - -bool BlackfinTargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { - PM.add(createBlackfinISelDag(*this, OptLevel)); - return false; -} diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.h b/lib/Target/Blackfin/BlackfinTargetMachine.h deleted file mode 100644 index c85337f..0000000 --- a/lib/Target/Blackfin/BlackfinTargetMachine.h +++ /dev/null @@ -1,68 +0,0 @@ -//===-- BlackfinTargetMachine.h - TargetMachine for Blackfin ----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the Blackfin specific subclass of TargetMachine. -// -//===----------------------------------------------------------------------===// - -#ifndef BLACKFINTARGETMACHINE_H -#define BLACKFINTARGETMACHINE_H - -#include "BlackfinInstrInfo.h" -#include "BlackfinIntrinsicInfo.h" -#include "BlackfinISelLowering.h" -#include "BlackfinFrameLowering.h" -#include "BlackfinSubtarget.h" -#include "BlackfinSelectionDAGInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameLowering.h" - -namespace llvm { - - class BlackfinTargetMachine : public LLVMTargetMachine { - const TargetData DataLayout; - BlackfinSubtarget Subtarget; - BlackfinTargetLowering TLInfo; - BlackfinSelectionDAGInfo TSInfo; - BlackfinInstrInfo InstrInfo; - BlackfinFrameLowering FrameLowering; - BlackfinIntrinsicInfo IntrinsicInfo; - public: - BlackfinTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); - - virtual const BlackfinInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const TargetFrameLowering *getFrameLowering() const { - return &FrameLowering; - } - virtual const BlackfinSubtarget *getSubtargetImpl() const { - return &Subtarget; - } - virtual const BlackfinRegisterInfo *getRegisterInfo() const { - return &InstrInfo.getRegisterInfo(); - } - virtual const BlackfinTargetLowering* getTargetLowering() const { - return &TLInfo; - } - virtual const BlackfinSelectionDAGInfo* getSelectionDAGInfo() const { - return &TSInfo; - } - virtual const TargetData *getTargetData() const { return &DataLayout; } - virtual bool addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel); - const TargetIntrinsicInfo *getIntrinsicInfo() const { - return &IntrinsicInfo; - } - }; - -} // end namespace llvm - -#endif diff --git a/lib/Target/Blackfin/CMakeLists.txt b/lib/Target/Blackfin/CMakeLists.txt deleted file mode 100644 index 94d05fb..0000000 --- a/lib/Target/Blackfin/CMakeLists.txt +++ /dev/null @@ -1,38 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS Blackfin.td) - -llvm_tablegen(BlackfinGenRegisterInfo.inc -gen-register-info) -llvm_tablegen(BlackfinGenInstrInfo.inc -gen-instr-info) -llvm_tablegen(BlackfinGenAsmWriter.inc -gen-asm-writer) -llvm_tablegen(BlackfinGenDAGISel.inc -gen-dag-isel) -llvm_tablegen(BlackfinGenSubtargetInfo.inc -gen-subtarget) -llvm_tablegen(BlackfinGenCallingConv.inc -gen-callingconv) -llvm_tablegen(BlackfinGenIntrinsics.inc -gen-tgt-intrinsic) -add_public_tablegen_target(BlackfinCommonTableGen) - -add_llvm_target(BlackfinCodeGen - BlackfinAsmPrinter.cpp - BlackfinInstrInfo.cpp - BlackfinIntrinsicInfo.cpp - BlackfinISelDAGToDAG.cpp - BlackfinISelLowering.cpp - BlackfinFrameLowering.cpp - BlackfinRegisterInfo.cpp - BlackfinSubtarget.cpp - BlackfinTargetMachine.cpp - BlackfinSelectionDAGInfo.cpp - ) - -add_llvm_library_dependencies(LLVMBlackfinCodeGen - LLVMAsmPrinter - LLVMBlackfinDesc - LLVMBlackfinInfo - LLVMCodeGen - LLVMCore - LLVMMC - LLVMSelectionDAG - LLVMSupport - LLVMTarget - ) - -add_subdirectory(TargetInfo) -add_subdirectory(MCTargetDesc) diff --git a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp deleted file mode 100644 index 5b9d4a2..0000000 --- a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.cpp +++ /dev/null @@ -1,22 +0,0 @@ -//===-- BlackfinMCAsmInfo.cpp - Blackfin asm properties -------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declarations of the BlackfinMCAsmInfo properties. -// -//===----------------------------------------------------------------------===// - -#include "BlackfinMCAsmInfo.h" - -using namespace llvm; - -BlackfinMCAsmInfo::BlackfinMCAsmInfo(const Target &T, StringRef TT) { - GlobalPrefix = "_"; - CommentString = "//"; - HasSetDirective = false; -} diff --git a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h deleted file mode 100644 index c372aa2..0000000 --- a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCAsmInfo.h +++ /dev/null @@ -1,29 +0,0 @@ -//===-- BlackfinMCAsmInfo.h - Blackfin asm properties ---------*- C++ -*--====// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declaration of the BlackfinMCAsmInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef BLACKFINTARGETASMINFO_H -#define BLACKFINTARGETASMINFO_H - -#include "llvm/ADT/StringRef.h" -#include "llvm/MC/MCAsmInfo.h" - -namespace llvm { - class Target; - - struct BlackfinMCAsmInfo : public MCAsmInfo { - explicit BlackfinMCAsmInfo(const Target &T, StringRef TT); - }; - -} // namespace llvm - -#endif diff --git a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp deleted file mode 100644 index 272e3c2..0000000 --- a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.cpp +++ /dev/null @@ -1,81 +0,0 @@ -//===-- BlackfinMCTargetDesc.cpp - Blackfin Target Descriptions -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides Blackfin specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#include "BlackfinMCTargetDesc.h" -#include "BlackfinMCAsmInfo.h" -#include "llvm/MC/MCCodeGenInfo.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/TargetRegistry.h" - -#define GET_INSTRINFO_MC_DESC -#include "BlackfinGenInstrInfo.inc" - -#define GET_SUBTARGETINFO_MC_DESC -#include "BlackfinGenSubtargetInfo.inc" - -#define GET_REGINFO_MC_DESC -#include "BlackfinGenRegisterInfo.inc" - -using namespace llvm; - - -static MCInstrInfo *createBlackfinMCInstrInfo() { - MCInstrInfo *X = new MCInstrInfo(); - InitBlackfinMCInstrInfo(X); - return X; -} - -static MCRegisterInfo *createBlackfinMCRegisterInfo(StringRef TT) { - MCRegisterInfo *X = new MCRegisterInfo(); - InitBlackfinMCRegisterInfo(X, BF::RETS); - return X; -} - -static MCSubtargetInfo *createBlackfinMCSubtargetInfo(StringRef TT, - StringRef CPU, - StringRef FS) { - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitBlackfinMCSubtargetInfo(X, TT, CPU, FS); - return X; -} - -static MCCodeGenInfo *createBlackfinMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM) { - MCCodeGenInfo *X = new MCCodeGenInfo(); - X->InitMCCodeGenInfo(RM, CM); - return X; -} - -// Force static initialization. -extern "C" void LLVMInitializeBlackfinTargetMC() { - // Register the MC asm info. - RegisterMCAsmInfo X(TheBlackfinTarget); - - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(TheBlackfinTarget, - createBlackfinMCCodeGenInfo); - - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(TheBlackfinTarget, - createBlackfinMCInstrInfo); - - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(TheBlackfinTarget, - createBlackfinMCRegisterInfo); - - // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(TheBlackfinTarget, - createBlackfinMCSubtargetInfo); -} diff --git a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h b/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h deleted file mode 100644 index 5bffe94..0000000 --- a/lib/Target/Blackfin/MCTargetDesc/BlackfinMCTargetDesc.h +++ /dev/null @@ -1,38 +0,0 @@ -//===-- BlackfinMCTargetDesc.h - Blackfin Target Descriptions ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides Blackfin specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#ifndef BLACKFINMCTARGETDESC_H -#define BLACKFINMCTARGETDESC_H - -namespace llvm { -class MCSubtargetInfo; -class Target; -class StringRef; - -extern Target TheBlackfinTarget; - -} // End llvm namespace - -// Defines symbolic names for Blackfin registers. This defines a mapping from -// register name to register number. -#define GET_REGINFO_ENUM -#include "BlackfinGenRegisterInfo.inc" - -// Defines symbolic names for the Blackfin instructions. -#define GET_INSTRINFO_ENUM -#include "BlackfinGenInstrInfo.inc" - -#define GET_SUBTARGETINFO_ENUM -#include "BlackfinGenSubtargetInfo.inc" - -#endif diff --git a/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt b/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index 73315d8..0000000 --- a/lib/Target/Blackfin/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -add_llvm_library(LLVMBlackfinDesc - BlackfinMCTargetDesc.cpp - BlackfinMCAsmInfo.cpp - ) - -add_llvm_library_dependencies(LLVMBlackfinDesc - LLVMBlackfinInfo - LLVMMC - ) - -add_dependencies(LLVMBlackfinDesc BlackfinCommonTableGen) diff --git a/lib/Target/Blackfin/MCTargetDesc/Makefile b/lib/Target/Blackfin/MCTargetDesc/Makefile deleted file mode 100644 index 6b26101..0000000 --- a/lib/Target/Blackfin/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/Blackfin/TargetDesc/Makefile -------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMBlackfinDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/Blackfin/Makefile b/lib/Target/Blackfin/Makefile deleted file mode 100644 index 756ac6b..0000000 --- a/lib/Target/Blackfin/Makefile +++ /dev/null @@ -1,23 +0,0 @@ -##===- lib/Target/Blackfin/Makefile ------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMBlackfinCodeGen -TARGET = Blackfin - -# Make sure that tblgen is run, first thing. -BUILT_SOURCES = BlackfinGenRegisterInfo.inc BlackfinGenInstrInfo.inc \ - BlackfinGenAsmWriter.inc \ - BlackfinGenDAGISel.inc BlackfinGenSubtargetInfo.inc \ - BlackfinGenCallingConv.inc BlackfinGenIntrinsics.inc - -DIRS = TargetInfo MCTargetDesc - -include $(LEVEL)/Makefile.common - diff --git a/lib/Target/Blackfin/README.txt b/lib/Target/Blackfin/README.txt deleted file mode 100644 index b4c8227..0000000 --- a/lib/Target/Blackfin/README.txt +++ /dev/null @@ -1,244 +0,0 @@ -//===-- README.txt - Notes for Blackfin Target ------------------*- org -*-===// - -* Condition codes -** DONE Problem with asymmetric SETCC operations -The instruction - - CC = R0 < 2 - -is not symmetric - there is no R0 > 2 instruction. On the other hand, IF CC -JUMP can take both CC and !CC as a condition. We cannot pattern-match (brcond -(not cc), target), the DAG optimizer removes that kind of thing. - -This is handled by creating a pseudo-register NCC that aliases CC. Register -classes JustCC and NotCC are used to control the inversion of CC. - -** DONE CC as an i32 register -The AnyCC register class pretends to hold i32 values. It can only represent the -values 0 and 1, but we can copy to and from the D class. This hack makes it -possible to represent the setcc instruction without having i1 as a legal type. - -In most cases, the CC register is set by a "CC = .." or BITTST instruction, and -then used in a conditional branch or move. The code generator thinks it is -moving 32 bits, but the value stays in CC. In other cases, the result of a -comparison is actually used as am i32 number, and CC will be copied to a D -register. - -* Stack frames -** TODO Use Push/Pop instructions -We should use the push/pop instructions when saving callee-saved -registers. The are smaller, and we may even use push multiple instructions. - -** TODO requiresRegisterScavenging -We need more intelligence in determining when the scavenger is needed. We -should keep track of: -- Spilling D16 registers -- Spilling AnyCC registers - -* Assembler -** TODO Implement PrintGlobalVariable -** TODO Remove LOAD32sym -It's a hack combining two instructions by concatenation. - -* Inline Assembly - -These are the GCC constraints from bfin/constraints.md: - -| Code | Register class | LLVM | -|-------+-------------------------------------------+------| -| a | P | C | -| d | D | C | -| z | Call clobbered P (P0, P1, P2) | X | -| D | EvenD | X | -| W | OddD | X | -| e | Accu | C | -| A | A0 | S | -| B | A1 | S | -| b | I | C | -| v | B | C | -| f | M | C | -| c | Circular I, B, L | X | -| C | JustCC | S | -| t | LoopTop | X | -| u | LoopBottom | X | -| k | LoopCount | X | -| x | GR | C | -| y | RET*, ASTAT, SEQSTAT, USP | X | -| w | ALL | C | -| Z | The FD-PIC GOT pointer (P3) | S | -| Y | The FD-PIC function pointer register (P1) | S | -| q0-q7 | R0-R7 individually | | -| qA | P0 | | -|-------+-------------------------------------------+------| -| Code | Constant | | -|-------+-------------------------------------------+------| -| J | 1< X(TheBlackfinTarget, "bfin", - "Analog Devices Blackfin [experimental]"); -} diff --git a/lib/Target/Blackfin/TargetInfo/CMakeLists.txt b/lib/Target/Blackfin/TargetInfo/CMakeLists.txt deleted file mode 100644 index 771f092..0000000 --- a/lib/Target/Blackfin/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMBlackfinInfo - BlackfinTargetInfo.cpp - ) - -add_llvm_library_dependencies(LLVMBlackfinInfo - LLVMMC - LLVMSupport - LLVMTarget - ) - -add_dependencies(LLVMBlackfinInfo BlackfinCommonTableGen) diff --git a/lib/Target/Blackfin/TargetInfo/Makefile b/lib/Target/Blackfin/TargetInfo/Makefile deleted file mode 100644 index c49cfbe..0000000 --- a/lib/Target/Blackfin/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/Blackfin/TargetInfo/Makefile -------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMBlackfinInfo - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common -- cgit v1.1 From 399cdca4d201f7232126c3a0643669971ede780a Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Tue, 25 Oct 2011 00:14:01 +0000 Subject: ARM assembly parsing and encoding for VLD1 with writeback. Four entry register lists. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142882 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseInstrInfo.cpp | 3 ++- lib/Target/ARM/ARMInstrNEON.td | 35 ++++++++++++++++--------- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 12 ++++++--- 3 files changed, 33 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 23fae3e..7a7267a 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2440,7 +2440,8 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, case ARM::VLD4d8_UPD: case ARM::VLD4d16_UPD: case ARM::VLD4d32_UPD: - case ARM::VLD1d64Q_UPD: + case ARM::VLD1d64Qwb_fixed: + case ARM::VLD1d64Qwb_register: case ARM::VLD4q8_UPD: case ARM::VLD4q16_UPD: case ARM::VLD4q32_UPD: diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 9d0350b..75418aa 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -424,13 +424,24 @@ class VLD1D4 op7_4, string Dt> let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVLDInstruction"; } -class VLD1D4WB op7_4, string Dt> - : NLdSt<0, 0b10, 0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm), IIC_VLD1x4u, "vld1", Dt, - "$Vd, $Rn$Rm", "$Rn.addr = $wb", - []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; +multiclass VLD1D4WB op7_4, string Dt> { + def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), + (ins addrmode6:$Rn), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbFixed"; + } + def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, + "vld1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVLDInstruction"; + let AsmMatchConverter = "cvtVLDwbRegister"; + } } def VLD1d8Q : VLD1D4<{0,0,?,?}, "8">; @@ -438,12 +449,12 @@ def VLD1d16Q : VLD1D4<{0,1,?,?}, "16">; def VLD1d32Q : VLD1D4<{1,0,?,?}, "32">; def VLD1d64Q : VLD1D4<{1,1,?,?}, "64">; -def VLD1d8Q_UPD : VLD1D4WB<{0,0,?,?}, "8">; -def VLD1d16Q_UPD : VLD1D4WB<{0,1,?,?}, "16">; -def VLD1d32Q_UPD : VLD1D4WB<{1,0,?,?}, "32">; -def VLD1d64Q_UPD : VLD1D4WB<{1,1,?,?}, "64">; +defm VLD1d8Qwb : VLD1D4WB<{0,0,?,?}, "8">; +defm VLD1d16Qwb : VLD1D4WB<{0,1,?,?}, "16">; +defm VLD1d32Qwb : VLD1D4WB<{1,0,?,?}, "32">; +defm VLD1d64Qwb : VLD1D4WB<{1,1,?,?}, "64">; -def VLD1d64QPseudo : VLDQQPseudo; +def VLD1d64QPseudo : VLDQQPseudo; // VLD2 : Vector Load (multiple 2-element structures) class VLD2D op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy> diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index ddc5c99..5174134 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -2078,10 +2078,14 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, case ARM::VLD1d32Twb_register: case ARM::VLD1d64Twb_fixed: case ARM::VLD1d64Twb_register: - case ARM::VLD1d8Q_UPD: - case ARM::VLD1d16Q_UPD: - case ARM::VLD1d32Q_UPD: - case ARM::VLD1d64Q_UPD: + case ARM::VLD1d8Qwb_fixed: + case ARM::VLD1d8Qwb_register: + case ARM::VLD1d16Qwb_fixed: + case ARM::VLD1d16Qwb_register: + case ARM::VLD1d32Qwb_fixed: + case ARM::VLD1d32Qwb_register: + case ARM::VLD1d64Qwb_fixed: + case ARM::VLD1d64Qwb_register: case ARM::VLD2d8_UPD: case ARM::VLD2d16_UPD: case ARM::VLD2d32_UPD: -- cgit v1.1 From 86ed4089e367956ea7134dd7e99334a084fec6df Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 25 Oct 2011 00:54:05 +0000 Subject: An MBB which branches to an EH landing pad shouldn't be considered for tail merging. In SjLj EH, the jump to the landing pad is not done explicitly through a branch statement. The EH landing pad is added as a successor to the throwing BB. Because of that however, the branch folding pass could mistakenly think that it could merge the throwing BB with another BB. This isn't safe to do. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142891 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/BranchFolding.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 75288b0..2c39f64 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -913,7 +913,8 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // reinsert conditional branch only, for now TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl); } - MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P)); + if (!PBB->getLandingPadSuccessor()) + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB),*P)); } } // If this is a large problem, avoid visiting the same basic blocks -- cgit v1.1 From fd747b24a3fb08805336f2070b9316ded6b57810 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Tue, 25 Oct 2011 00:55:35 +0000 Subject: Remove unused forward decl. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142892 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.h | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 4bd5167..b98ace2 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -31,7 +31,6 @@ namespace llvm { class CompileUnit; -class DbgConcreteScope; class DbgVariable; class MachineFrameInfo; class MachineModuleInfo; -- cgit v1.1 From 000b9bdd3396ca322413c4e419d41cd3a6235e74 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Tue, 25 Oct 2011 07:05:26 +0000 Subject: Remove dead enum value. There is no DIESectionOffset. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142912 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DIE.h | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index 3bd9ffd..12448af 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -200,7 +200,6 @@ namespace llvm { isInteger, isString, isLabel, - isSectionOffset, isDelta, isEntry, isBlock -- cgit v1.1 From 526c80bae4d9902fea9efd787c2f212bf111e85e Mon Sep 17 00:00:00 2001 From: Duncan Sands Date: Tue, 25 Oct 2011 09:26:43 +0000 Subject: Speculatively revert commits 142790 and 142843 to see if it fixes the dragonegg and llvm-gcc self-host buildbots. Original commit messages: - Reapply r142781 with fix. Original message: Enhance SCEV's brute force loop analysis to handle multiple PHI nodes in the loop header when computing the trip count. With this, we now constant evaluate: struct ListNode { const struct ListNode *next; int i; }; static const struct ListNode node1 = {0, 1}; static const struct ListNode node2 = {&node1, 2}; static const struct ListNode node3 = {&node2, 3}; int test() { int sum = 0; for (const struct ListNode *n = &node3; n != 0; n = n->next) sum += n->i; return sum; } - Now that we look at all the header PHIs, we need to consider all the header PHIs when deciding that the loop has stopped evolving. Fixes miscompile in the gcc torture testsuite! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142916 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 72 +++++++++++++++------------------------- 1 file changed, 26 insertions(+), 46 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index f65cf34..1e4bf19 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -4844,12 +4844,12 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, // EvaluateExpression adds non-phi values to the CurrentIterVals map. DenseMap NextIterVals; Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); + if (NextPHI == CurrentIterVals[PN]) + return RetVal = NextPHI; // Stopped evolving! if (NextPHI == 0) return 0; // Couldn't evaluate! NextIterVals[PN] = NextPHI; - bool StoppedEvolving = NextPHI == CurrentIterVals[PN]; - // Also evaluate the other PHI nodes. However, we don't get to stop if we // cease to be able to evaluate one of them or if they stop evolving, // because that doesn't necessarily prevent us from computing PN. @@ -4858,19 +4858,11 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, PHINode *PHI = dyn_cast(I->first); if (!PHI || PHI == PN || PHI->getParent() != Header) continue; Constant *&NextPHI = NextIterVals[PHI]; - if (!NextPHI) { // Not already computed. - Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); - NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); - } - if (NextPHI != I->second) - StoppedEvolving = false; - } - - // If all entries in CurrentIterVals == NextIterVals then we can stop - // iterating, the loop can't continue to change. - if (StoppedEvolving) - return RetVal = CurrentIterVals[PN]; + if (NextPHI) continue; // Already computed! + Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); + NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); + } CurrentIterVals.swap(NextIterVals); } } @@ -4890,33 +4882,29 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, // That's the only form we support here. if (PN->getNumIncomingValues() != 2) return getCouldNotCompute(); - DenseMap CurrentIterVals; - BasicBlock *Header = L->getHeader(); - assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); - // One entry must be a constant (coming in from outside of the loop), and the // second must be derived from the same PHI. bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); - PHINode *PHI = 0; - for (BasicBlock::iterator I = Header->begin(); - (PHI = dyn_cast(I)); ++I) { - Constant *StartCST = - dyn_cast(PHI->getIncomingValue(!SecondIsBackedge)); - if (StartCST == 0) continue; - CurrentIterVals[PHI] = StartCST; - } - if (!CurrentIterVals.count(PN)) - return getCouldNotCompute(); + Constant *StartCST = + dyn_cast(PN->getIncomingValue(!SecondIsBackedge)); + if (StartCST == 0) return getCouldNotCompute(); // Must be a constant. + + Value *BEValue = PN->getIncomingValue(SecondIsBackedge); + if (getConstantEvolvingPHI(BEValue, L) != PN && + !isa(BEValue)) + return getCouldNotCompute(); // Not derived from same PHI. // Okay, we find a PHI node that defines the trip count of this loop. Execute // the loop symbolically to determine when the condition gets a value of // "ExitWhen". - - unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. - for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){ + unsigned IterationNum = 0; + unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. + for (Constant *PHIVal = StartCST; + IterationNum != MaxIterations; ++IterationNum) { + DenseMap PHIValMap; + PHIValMap[PN] = PHIVal; ConstantInt *CondVal = - dyn_cast_or_null(EvaluateExpression(Cond, L, - CurrentIterVals, TD)); + dyn_cast_or_null(EvaluateExpression(Cond, L, PHIValMap, TD)); // Couldn't symbolically evaluate. if (!CondVal) return getCouldNotCompute(); @@ -4926,19 +4914,11 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, return getConstant(Type::getInt32Ty(getContext()), IterationNum); } - // Update all the PHI nodes for the next iteration. - DenseMap NextIterVals; - for (DenseMap::const_iterator - I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){ - PHINode *PHI = dyn_cast(I->first); - if (!PHI || PHI->getParent() != Header) continue; - Constant *&NextPHI = NextIterVals[PHI]; - if (NextPHI) continue; // Already computed! - - Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); - NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); - } - CurrentIterVals.swap(NextIterVals); + // Compute the value of the PHI node for the next iteration. + Constant *NextPHI = EvaluateExpression(BEValue, L, PHIValMap, TD); + if (NextPHI == 0 || NextPHI == PHIVal) + return getCouldNotCompute();// Couldn't evaluate or not making progress... + PHIVal = NextPHI; } // Too many iterations were needed to evaluate. -- cgit v1.1 From 45baf6bb85ee0d61e18213979a4efbd2f16eb338 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Tue, 25 Oct 2011 09:47:41 +0000 Subject: Fix the API usage in loop probability heuristics. It was incorrectly classifying many edges as exiting which were in fact not. These mainly formed edges into sub-loops. It was also not correctly classifying all returning edges out of loops as leaving the loop. With this match most of the loop heuristics are more rational. Several serious regressions on loop-intesive benchmarks like perlbench's loop tests when built with -enable-block-placement are fixed by these updated heuristics. Unfortunately they in turn uncover some other regressions. There are still several improvemenst that should be made to loop heuristics including trip-count, and early back-edge management. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142917 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/BranchProbabilityInfo.cpp | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 0396f99..258fe54 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -216,8 +216,6 @@ bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) { // Calculate Edge Weights using "Loop Branch Heuristics". Predict backedges // as taken, exiting edges as not-taken. bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { - uint32_t numSuccs = BB->getTerminator()->getNumSuccessors(); - Loop *L = LI->getLoopFor(BB); if (!L) return false; @@ -226,17 +224,13 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { SmallPtrSet ExitingEdges; SmallPtrSet InEdges; // Edges from header to the loop. - bool isHeader = BB == L->getHeader(); - for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) { - BasicBlock *Succ = *I; - Loop *SuccL = LI->getLoopFor(Succ); - if (SuccL != L) - ExitingEdges.insert(Succ); - else if (Succ == L->getHeader()) - BackEdges.insert(Succ); - else if (isHeader) - InEdges.insert(Succ); + if (!L->contains(*I)) + ExitingEdges.insert(*I); + else if (L->getHeader() == *I) + BackEdges.insert(*I); + else + InEdges.insert(*I); } if (uint32_t numBackEdges = BackEdges.size()) { @@ -263,9 +257,8 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { } } - uint32_t numExitingEdges = ExitingEdges.size(); - if (uint32_t numNonExitingEdges = numSuccs - numExitingEdges) { - uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numNonExitingEdges; + if (uint32_t numExitingEdges = ExitingEdges.size()) { + uint32_t exitWeight = LBH_NONTAKEN_WEIGHT / numExitingEdges; if (exitWeight < MIN_WEIGHT) exitWeight = MIN_WEIGHT; -- cgit v1.1 From f8a9eb1fa6b676468d0231d1a0ee5247b664dabb Mon Sep 17 00:00:00 2001 From: Duncan Sands Date: Tue, 25 Oct 2011 12:28:52 +0000 Subject: Restore commits 142790 and 142843 - they weren't breaking the build bots. Original commit messages: - Reapply r142781 with fix. Original message: Enhance SCEV's brute force loop analysis to handle multiple PHI nodes in the loop header when computing the trip count. With this, we now constant evaluate: struct ListNode { const struct ListNode *next; int i; }; static const struct ListNode node1 = {0, 1}; static const struct ListNode node2 = {&node1, 2}; static const struct ListNode node3 = {&node2, 3}; int test() { int sum = 0; for (const struct ListNode *n = &node3; n != 0; n = n->next) sum += n->i; return sum; } - Now that we look at all the header PHIs, we need to consider all the header PHIs when deciding that the loop has stopped evolving. Fixes miscompile in the gcc torture testsuite! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142919 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 72 +++++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 26 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 1e4bf19..f65cf34 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -4844,12 +4844,12 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, // EvaluateExpression adds non-phi values to the CurrentIterVals map. DenseMap NextIterVals; Constant *NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); - if (NextPHI == CurrentIterVals[PN]) - return RetVal = NextPHI; // Stopped evolving! if (NextPHI == 0) return 0; // Couldn't evaluate! NextIterVals[PN] = NextPHI; + bool StoppedEvolving = NextPHI == CurrentIterVals[PN]; + // Also evaluate the other PHI nodes. However, we don't get to stop if we // cease to be able to evaluate one of them or if they stop evolving, // because that doesn't necessarily prevent us from computing PN. @@ -4858,11 +4858,19 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, PHINode *PHI = dyn_cast(I->first); if (!PHI || PHI == PN || PHI->getParent() != Header) continue; Constant *&NextPHI = NextIterVals[PHI]; - if (NextPHI) continue; // Already computed! - - Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); - NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); + if (!NextPHI) { // Not already computed. + Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); + NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); + } + if (NextPHI != I->second) + StoppedEvolving = false; } + + // If all entries in CurrentIterVals == NextIterVals then we can stop + // iterating, the loop can't continue to change. + if (StoppedEvolving) + return RetVal = CurrentIterVals[PN]; + CurrentIterVals.swap(NextIterVals); } } @@ -4882,29 +4890,33 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, // That's the only form we support here. if (PN->getNumIncomingValues() != 2) return getCouldNotCompute(); + DenseMap CurrentIterVals; + BasicBlock *Header = L->getHeader(); + assert(PN->getParent() == Header && "Can't evaluate PHI not in loop header!"); + // One entry must be a constant (coming in from outside of the loop), and the // second must be derived from the same PHI. bool SecondIsBackedge = L->contains(PN->getIncomingBlock(1)); - Constant *StartCST = - dyn_cast(PN->getIncomingValue(!SecondIsBackedge)); - if (StartCST == 0) return getCouldNotCompute(); // Must be a constant. - - Value *BEValue = PN->getIncomingValue(SecondIsBackedge); - if (getConstantEvolvingPHI(BEValue, L) != PN && - !isa(BEValue)) - return getCouldNotCompute(); // Not derived from same PHI. + PHINode *PHI = 0; + for (BasicBlock::iterator I = Header->begin(); + (PHI = dyn_cast(I)); ++I) { + Constant *StartCST = + dyn_cast(PHI->getIncomingValue(!SecondIsBackedge)); + if (StartCST == 0) continue; + CurrentIterVals[PHI] = StartCST; + } + if (!CurrentIterVals.count(PN)) + return getCouldNotCompute(); // Okay, we find a PHI node that defines the trip count of this loop. Execute // the loop symbolically to determine when the condition gets a value of // "ExitWhen". - unsigned IterationNum = 0; - unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. - for (Constant *PHIVal = StartCST; - IterationNum != MaxIterations; ++IterationNum) { - DenseMap PHIValMap; - PHIValMap[PN] = PHIVal; + + unsigned MaxIterations = MaxBruteForceIterations; // Limit analysis. + for (unsigned IterationNum = 0; IterationNum != MaxIterations;++IterationNum){ ConstantInt *CondVal = - dyn_cast_or_null(EvaluateExpression(Cond, L, PHIValMap, TD)); + dyn_cast_or_null(EvaluateExpression(Cond, L, + CurrentIterVals, TD)); // Couldn't symbolically evaluate. if (!CondVal) return getCouldNotCompute(); @@ -4914,11 +4926,19 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, return getConstant(Type::getInt32Ty(getContext()), IterationNum); } - // Compute the value of the PHI node for the next iteration. - Constant *NextPHI = EvaluateExpression(BEValue, L, PHIValMap, TD); - if (NextPHI == 0 || NextPHI == PHIVal) - return getCouldNotCompute();// Couldn't evaluate or not making progress... - PHIVal = NextPHI; + // Update all the PHI nodes for the next iteration. + DenseMap NextIterVals; + for (DenseMap::const_iterator + I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){ + PHINode *PHI = dyn_cast(I->first); + if (!PHI || PHI->getParent() != Header) continue; + Constant *&NextPHI = NextIterVals[PHI]; + if (NextPHI) continue; // Already computed! + + Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); + NextPHI = EvaluateExpression(BEValue, L, CurrentIterVals, TD); + } + CurrentIterVals.swap(NextIterVals); } // Too many iterations were needed to evaluate. -- cgit v1.1 From 51583ce4b6938a44a246df65977e29e0509a7f5b Mon Sep 17 00:00:00 2001 From: Duncan Sands Date: Tue, 25 Oct 2011 12:30:22 +0000 Subject: Revert commit 142891. Takumi bisected the tablegen miscompiles down to this commit. Original commit message: An MBB which branches to an EH landing pad shouldn't be considered for tail merging. In SjLj EH, the jump to the landing pad is not done explicitly through a branch statement. The EH landing pad is added as a successor to the throwing BB. Because of that however, the branch folding pass could mistakenly think that it could merge the throwing BB with another BB. This isn't safe to do. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142920 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/BranchFolding.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 2c39f64..75288b0 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -913,8 +913,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // reinsert conditional branch only, for now TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl); } - if (!PBB->getLandingPadSuccessor()) - MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB),*P)); + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P)); } } // If this is a large problem, avoid visiting the same basic blocks -- cgit v1.1 From 02dc51806e4c83437fb9c0f0507aef7111076937 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Tue, 25 Oct 2011 17:30:47 +0000 Subject: Corrects previously incorrect $sp change in MipsCompilationCallback. The address for $sp, and addresses for sdc1/ldc1 must be 8-byte aligned Patch by Petar Jovanovic. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142930 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsJITInfo.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp index 28c2b48..e3f6a75 100644 --- a/lib/Target/Mips/MipsJITInfo.cpp +++ b/lib/Target/Mips/MipsJITInfo.cpp @@ -57,11 +57,11 @@ void MipsCompilationCallback(); ".globl " ASMPREFIX "MipsCompilationCallback\n" ASMPREFIX "MipsCompilationCallback:\n" ".ent " ASMPREFIX "MipsCompilationCallback\n" - ".frame $29, 32, $31\n" + ".frame $sp, 32, $ra\n" ".set noreorder\n" ".cpload $t9\n" - "addiu $sp, $sp, -60\n" + "addiu $sp, $sp, -64\n" ".cprestore 16\n" // Save argument registers a0, a1, a2, a3, f12, f14 since they may contain @@ -76,8 +76,8 @@ void MipsCompilationCallback(); "sw $a3, 32($sp)\n" "sw $ra, 36($sp)\n" "sw $t8, 40($sp)\n" - "sdc1 $f12, 44($sp)\n" - "sdc1 $f14, 52($sp)\n" + "sdc1 $f12, 48($sp)\n" + "sdc1 $f14, 56($sp)\n" // t8 points at the end of function stub. Pass the beginning of the stub // to the MipsCompilationCallbackC. @@ -92,9 +92,9 @@ void MipsCompilationCallback(); "lw $a3, 32($sp)\n" "lw $ra, 36($sp)\n" "lw $t8, 40($sp)\n" - "ldc1 $f12, 44($sp)\n" - "ldc1 $f14, 52($sp)\n" - "addiu $sp, $sp, 60\n" + "ldc1 $f12, 48($sp)\n" + "ldc1 $f14, 56($sp)\n" + "addiu $sp, $sp, 64\n" // Jump to the (newly modified) stub to invoke the real function. "addiu $t8, $t8, -16\n" -- cgit v1.1 From 3507d24547ce668c9a50c72b6748c0a303e295c1 Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Tue, 25 Oct 2011 18:13:20 +0000 Subject: This is the first of several patches for Mips direct object generation. This first patch is for expression variable kinds. Patch by Jack Carter! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142934 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCELFStreamer.cpp | 4 ++++ lib/MC/MCExpr.cpp | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'lib') diff --git a/lib/MC/MCELFStreamer.cpp b/lib/MC/MCELFStreamer.cpp index 9ada08e..0b366da 100644 --- a/lib/MC/MCELFStreamer.cpp +++ b/lib/MC/MCELFStreamer.cpp @@ -308,6 +308,10 @@ void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) { case MCSymbolRefExpr::VK_ARM_TLSGD: case MCSymbolRefExpr::VK_ARM_TPOFF: case MCSymbolRefExpr::VK_ARM_GOTTPOFF: + case MCSymbolRefExpr::VK_Mips_TLSGD: + case MCSymbolRefExpr::VK_Mips_GOTTPREL: + case MCSymbolRefExpr::VK_Mips_TPREL_HI: + case MCSymbolRefExpr::VK_Mips_TPREL_LO: break; } MCSymbolData &SD = getAssembler().getOrCreateSymbolData(symRef.getSymbol()); diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp index da297fb..4af27ab 100644 --- a/lib/MC/MCExpr.cpp +++ b/lib/MC/MCExpr.cpp @@ -26,6 +26,38 @@ STATISTIC(MCExprEvaluate, "Number of MCExpr evaluations"); } } +static bool printMipsSymbolRef(const MCSymbolRefExpr &SRE, + const MCSymbol &Sym, raw_ostream &OS) { + MCSymbolRefExpr::VariantKind Kind= SRE.getKind(); + + switch (Kind) { + default: + return false; + case MCSymbolRefExpr::VK_Mips_None: break; + case MCSymbolRefExpr::VK_Mips_GPREL: OS << "%gp_rel("; break; + case MCSymbolRefExpr::VK_Mips_GOT_CALL: OS << "%call16("; break; + case MCSymbolRefExpr::VK_Mips_GOT: OS << "%got("; break; + case MCSymbolRefExpr::VK_Mips_ABS_HI: OS << "%hi("; break; + case MCSymbolRefExpr::VK_Mips_ABS_LO: OS << "%lo("; break; + case MCSymbolRefExpr::VK_Mips_TLSGD: OS << "%tlsgd("; break; + case MCSymbolRefExpr::VK_Mips_GOTTPREL: OS << "%gottprel("; break; + case MCSymbolRefExpr::VK_Mips_TPREL_HI: OS << "%tprel_hi("; break; + case MCSymbolRefExpr::VK_Mips_TPREL_LO: OS << "%tprel_lo("; break; + case MCSymbolRefExpr::VK_Mips_GPOFF_HI: OS << "%hi(%neg(%gp_rel("; break; + case MCSymbolRefExpr::VK_Mips_GPOFF_LO: OS << "%lo(%neg(%gp_rel("; break; + case MCSymbolRefExpr::VK_Mips_GOT_DISP: OS << "%got_disp("; break; + case MCSymbolRefExpr::VK_Mips_GOT_PAGE: OS << "%got_page("; break; + case MCSymbolRefExpr::VK_Mips_GOT_OFST: OS << "%got_ofst("; break; + } + + OS << Sym; + + if (Kind != MCSymbolRefExpr::VK_Mips_None) + OS << ')'; + + return true; +} + void MCExpr::print(raw_ostream &OS) const { switch (getKind()) { case MCExpr::Target: @@ -41,6 +73,9 @@ void MCExpr::print(raw_ostream &OS) const { // absolute names. bool UseParens = Sym.getName()[0] == '$'; + if (printMipsSymbolRef(SRE, Sym, OS)) + return; + if (SRE.getKind() == MCSymbolRefExpr::VK_PPC_DARWIN_HA16 || SRE.getKind() == MCSymbolRefExpr::VK_PPC_DARWIN_LO16) { OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind()); -- cgit v1.1 From 013d756310a92fd358a9aab2a63cce52c107f777 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Tue, 25 Oct 2011 18:48:41 +0000 Subject: Teach the MachO relocation pretty-printer to interpret ARM half-relocations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142938 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/MachOObjectFile.cpp | 146 +++++++++++++++++++++++++++++------------ 1 file changed, 104 insertions(+), 42 deletions(-) (limited to 'lib') diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 0950020..96db34f 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -793,98 +793,160 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, InMemoryStruct RE; getRelocation(Rel, RE); - std::string addend; - raw_string_ostream addend_fmt(addend); - bool isPCRel = (RE->Word1 >> 25) & 1; unsigned Type = (RE->Word1 >> 28) & 0xF; + std::string fmtbuf; + raw_string_ostream fmt(fmtbuf); + // Determine any addends that should be displayed with the relocation. // These require decoding the relocation type, which is triple-specific. unsigned Arch = getArch(); // X86_64 has entirely custom relocation types. if (Arch == Triple::x86_64) { + StringRef Name; + if (error_code ec = getRelocationTargetName(RE->Word1, Name)) + report_fatal_error(ec.message()); + switch (Type) { case 5: { // X86_64_RELOC_SUBTRACTOR - RelocationRef NextReloc; - if (error_code ec = getRelocationNext(Rel, NextReloc)) - report_fatal_error(ec.message()); - - uint32_t SucessorType; - if (error_code ec = NextReloc.getType(SucessorType)) - report_fatal_error(ec.message()); + InMemoryStruct RENext; + DataRefImpl RelNext = Rel; + RelNext.d.a++; + getRelocation(RelNext, RENext); // X86_64_SUBTRACTOR must be followed by a relocation of type // X86_64_RELOC_UNSIGNED. - unsigned RType = (SucessorType >> 28) & 0xF; + unsigned RType = (RENext->Word1 >> 28) & 0xF; if (RType != 0) report_fatal_error("Expected X86_64_RELOC_UNSIGNED after " "X86_64_RELOC_SUBTRACTOR."); - StringRef Name; - if (error_code ec = getRelocationTargetName(SucessorType, Name)) + StringRef SucName; + if (error_code ec = getRelocationTargetName(RENext->Word1, SucName)) report_fatal_error(ec.message()); - addend_fmt << "-" << Name; + fmt << Name << "-" << SucName; + if (isPCRel) fmt << "-PC"; } case 6: // X86_64_RELOC_SIGNED1 - addend_fmt << "-1"; + fmt << Name << "-1"; break; case 7: // X86_64_RELOC_SIGNED2 - addend_fmt << "-2"; + fmt << Name << "-2"; break; case 8: // X86_64_RELOC_SIGNED4 - addend_fmt << "-4"; + fmt << Name << "-4"; + break; + default: + fmt << Name; break; } - } - // X86 and ARM share some relocation types in common. - if (Arch == Triple::x86 || Arch == Triple::arm) { + } else if (Arch == Triple::x86 || Arch == Triple::arm) { + // Generic relocation types... switch (Type) { case 1: // GENERIC_RELOC_PAIR - prints no info return object_error::success; case 2: // GENERIC_RELOC_SECTDIFF case 4: { // GENERIC_RELOC_LOCAL_SECTDIFF - RelocationRef NextReloc; - if (error_code ec = getRelocationNext(Rel, NextReloc)) - report_fatal_error(ec.message()); - - uint32_t SucessorType; - if (error_code ec = NextReloc.getType(SucessorType)) - report_fatal_error(ec.message()); + InMemoryStruct RENext; + DataRefImpl RelNext = Rel; + RelNext.d.a++; + getRelocation(RelNext, RENext); // X86 sect diff's must be followed by a relocation of type // GENERIC_RELOC_PAIR. - unsigned RType = (SucessorType >> 28) & 0xF; + unsigned RType = (RENext->Word1 >> 28) & 0xF; if (RType != 1) report_fatal_error("Expected GENERIC_RELOC_PAIR after " "GENERIC_RELOC_SECTDIFF or " "GENERIC_RELOC_LOCAL_SECTDIFF."); + StringRef SucName; + if (error_code ec = getRelocationTargetName(RENext->Word1, SucName)) + report_fatal_error(ec.message()); + StringRef Name; - if (error_code ec = getRelocationTargetName(SucessorType, Name)) + if (error_code ec = getRelocationTargetName(RE->Word1, Name)) report_fatal_error(ec.message()); - addend_fmt << "-" << Name; + fmt << Name << "-" << SucName; + break; + } + } + if (Arch == Triple::x86 && Type != 1) { + // All X86 relocations that need special printing were already + // handled in the generic code. + StringRef Name; + if (error_code ec = getRelocationTargetName(RE->Word1, Name)) + report_fatal_error(ec.message()); + fmt << Name; + } else { // ARM-specific relocations + switch (Type) { + case 8: // ARM_RELOC_HALF + case 9: { // ARM_RELOC_HALF_SECTDIFF + StringRef Name; + if (error_code ec = getRelocationTargetName(RE->Word1, Name)) + report_fatal_error(ec.message()); + + // Half relocations steal a bit from the length field to encode + // whether this is an upper16 or a lower16 relocation. + bool isUpper = (RE->Word1 >> 25) & 1; + if (isUpper) + fmt << ":upper16:(" << Name; + else + fmt << ":lower16:(" << Name; + + InMemoryStruct RENext; + DataRefImpl RelNext = Rel; + RelNext.d.a++; + getRelocation(RelNext, RENext); + + // ARM half relocs must be followed by a relocation of type + // ARM_RELOC_PAIR. + unsigned RType = (RENext->Word1 >> 28) & 0xF; + if (RType != 1) + report_fatal_error("Expected ARM_RELOC_PAIR after " + "GENERIC_RELOC_HALF"); + + // A constant addend for the relocation is stored in the address + // field of the follow-on relocation. If this is a lower16 relocation + // we need to shift it left by 16 before using it. + int32_t Addend = RENext->Word0; + if (!isUpper) Addend <<= 16; + + // ARM_RELOC_HALF_SECTDIFF encodes the second section in the + // symbol/section pointer of the follow-on relocation. + StringRef SucName; + if (Type == 9) { // ARM_RELOC_HALF_SECTDIFF + if (error_code ec = getRelocationTargetName(RENext->Word1, SucName)) + report_fatal_error(ec.message()); + } + + if (SucName.size()) fmt << "-" << SucName; + if (Addend > 0) fmt << "+" << Addend; + else if (Addend < 0) fmt << Addend; + fmt << ")"; + break; + } + default: { + StringRef Name; + if (error_code ec = getRelocationTargetName(RE->Word1, Name)) + report_fatal_error(ec.message()); + fmt << Name; + } } } + } else { + StringRef Name; + if (error_code ec = getRelocationTargetName(RE->Word1, Name)) + report_fatal_error(ec.message()); + fmt << Name; } - addend_fmt.flush(); - - std::string fmtbuf; - raw_string_ostream fmt(fmtbuf); - - StringRef Name; - if (error_code ec = getRelocationTargetName(RE->Word1, Name)) - report_fatal_error(ec.message()); - - fmt << Name << addend; - if (isPCRel) fmt << "-P"; - fmt.flush(); Result.append(fmtbuf.begin(), fmtbuf.end()); return object_error::success; -- cgit v1.1 From ca30f75703fa4f032b256bba445608c79e2bd82c Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Tue, 25 Oct 2011 20:30:48 +0000 Subject: Make assert() message more informative. PR11217. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142956 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LLVMTargetMachine.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 90501f0..268584c 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -121,7 +121,8 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, // we'll crash later. // Provide the user with a useful error message about what's wrong. assert(AsmInfo && "MCAsmInfo not initialized." - "Make sure you include the correct TargetSelect.h!"); + "Make sure you include the correct TargetSelect.h" + "and that InitializeAllTargetMCs() is being invoked!"); } bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, -- cgit v1.1 From 9312613137caf4f1e7b4747964b8daf3de79c190 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Tue, 25 Oct 2011 20:34:22 +0000 Subject: Remove a couple redundant checks. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142959 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 -- 1 file changed, 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 10daa31..91ce2a1 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7528,8 +7528,6 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, // Check to see if we can perform the "gzip trick", transforming // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT && - N0.getValueType().isInteger() && - N2.getValueType().isInteger() && (N1C->isNullValue() || // (a < 0) ? b : 0 (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 EVT XType = N0.getValueType(); -- cgit v1.1 From 0685e94895f26f96aa1032696e3150dd00aad1f3 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Tue, 25 Oct 2011 20:35:53 +0000 Subject: Add support for the notion of "hidden" relocations. On MachO, these are relocation entries that are used as additional information for other, real relocations, rather than being relocations themselves. I'm not familiar enough with ELF or COFF to know if they should have any relocations marked hidden. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142961 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/MachOObjectFile.cpp | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'lib') diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 96db34f..a4d7cb5 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -952,6 +952,38 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, return object_error::success; } +error_code MachOObjectFile::getRelocationHidden(DataRefImpl Rel, + bool &Result) const { + InMemoryStruct RE; + getRelocation(Rel, RE); + + unsigned Type = (RE->Word1 >> 28) & 0xF; + unsigned Arch = getArch(); + + Result = false; + + // On arches that use the generic relocations, GENERIC_RELOC_PAIR + // is always hidden. + if (Arch == Triple::x86 || Arch == Triple::arm) { + if (Type == 1) Result = true; + } else if (Arch == Triple::x86_64) { + // On x86_64, X86_64_RELOC_UNSIGNED is hidden only when it follows + // an X864_64_RELOC_SUBTRACTOR. + if (Type == 0 && Rel.d.a > 0) { + DataRefImpl RelPrev = Rel; + RelPrev.d.a--; + InMemoryStruct REPrev; + getRelocation(RelPrev, REPrev); + + unsigned PrevType = (REPrev->Word1 >> 28) & 0xF; + + if (PrevType == 5) Result = true; + } + } + + return object_error::success; +} + /*===-- Miscellaneous -----------------------------------------------------===*/ uint8_t MachOObjectFile::getBytesInAddress() const { -- cgit v1.1 From c8aa726ec22a362bf9fe0d81fdcf68a48bfc999f Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Tue, 25 Oct 2011 20:44:00 +0000 Subject: Remove extraneous printing of "-PC". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142970 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/MachOObjectFile.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index a4d7cb5..9abdc8b 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -828,7 +828,6 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, report_fatal_error(ec.message()); fmt << Name << "-" << SucName; - if (isPCRel) fmt << "-PC"; } case 6: // X86_64_RELOC_SIGNED1 fmt << Name << "-1"; -- cgit v1.1 From aaf98ead114bcd51cd2479badfe28b8d0e4895c2 Mon Sep 17 00:00:00 2001 From: "Michael J. Spencer" Date: Tue, 25 Oct 2011 22:30:42 +0000 Subject: Object/Archive: Add BSD style long file name support and skip internal members. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142981 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/Archive.cpp | 55 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 48 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp index e2eaff5..348b753 100644 --- a/lib/Object/Archive.cpp +++ b/lib/Object/Archive.cpp @@ -32,7 +32,11 @@ struct ArchiveMemberHeader { ///! Get the name without looking up long names. StringRef getName() const { - char EndCond = Name[0] == '/' ? ' ' : '/'; + char EndCond; + if (Name[0] == '/' || Name[0] == '#') + EndCond = ' '; + else + EndCond = '/'; StringRef::size_type end = StringRef(Name, sizeof(Name)).find(EndCond); if (end == StringRef::npos) end = sizeof(Name); @@ -53,6 +57,21 @@ const ArchiveMemberHeader *ToHeader(const char *base) { } } +static bool isInternalMember(const ArchiveMemberHeader &amh) { + const char *internals[] = { + "/", + "//", + "#_LLVM_SYM_TAB_#" + }; + + StringRef name = amh.getName(); + for (std::size_t i = 0; i < sizeof(internals) / sizeof(*internals); ++i) { + if (name == internals[i]) + return true; + } + return false; +} + Archive::Child Archive::Child::getNext() const { size_t SpaceToSkip = sizeof(ArchiveMemberHeader) + ToHeader(Data.data())->getSize(); @@ -101,6 +120,11 @@ error_code Archive::Child::getName(StringRef &Result) const { return object_error::parse_failed; Result = addr; return object_error::success; + } else if (name.startswith("#1/")) { + APInt name_size; + name.substr(3).getAsInteger(10, name_size); + Result = Data.substr(0, name_size.getZExtValue()); + return object_error::success; } // It's a simple name. if (name[name.size() - 1] == '/') @@ -111,14 +135,27 @@ error_code Archive::Child::getName(StringRef &Result) const { } uint64_t Archive::Child::getSize() const { - return ToHeader(Data.data())->getSize(); + uint64_t size = ToHeader(Data.data())->getSize(); + // Don't include attached name. + StringRef name = ToHeader(Data.data())->getName(); + if (name.startswith("#1/")) { + APInt name_size; + name.substr(3).getAsInteger(10, name_size); + size -= name_size.getZExtValue(); + } + return size; } MemoryBuffer *Archive::Child::getBuffer() const { StringRef name; if (getName(name)) return NULL; - return MemoryBuffer::getMemBuffer(Data.substr(sizeof(ArchiveMemberHeader), - getSize()), + int size = sizeof(ArchiveMemberHeader); + if (name.startswith("#1/")) { + APInt name_size; + name.substr(3).getAsInteger(10, name_size); + size += name_size.getZExtValue(); + } + return MemoryBuffer::getMemBuffer(Data.substr(size, getSize()), name, false); } @@ -144,7 +181,7 @@ Archive::Archive(MemoryBuffer *source, error_code &ec) } // Get the string table. It's the 3rd member. - child_iterator StrTable = begin_children(); + child_iterator StrTable = begin_children(false); child_iterator e = end_children(); for (int i = 0; StrTable != e && i < 2; ++StrTable, ++i) {} @@ -156,11 +193,15 @@ Archive::Archive(MemoryBuffer *source, error_code &ec) ec = object_error::success; } -Archive::child_iterator Archive::begin_children() const { +Archive::child_iterator Archive::begin_children(bool skip_internal) const { const char *Loc = Data->getBufferStart() + Magic.size(); size_t Size = sizeof(ArchiveMemberHeader) + ToHeader(Loc)->getSize(); - return Child(this, StringRef(Loc, Size)); + Child c(this, StringRef(Loc, Size)); + // Skip internals at the beginning of an archive. + if (skip_internal && isInternalMember(*ToHeader(Loc))) + return c.getNext(); + return c; } Archive::child_iterator Archive::end_children() const { -- cgit v1.1 From fe944e8cd03701b699c07590b4ec5203928b2b11 Mon Sep 17 00:00:00 2001 From: "Michael J. Spencer" Date: Tue, 25 Oct 2011 22:31:11 +0000 Subject: Object/Archive: Cleanup anon namespace. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142983 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/Archive.cpp | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp index 348b753..f1c4b2d 100644 --- a/lib/Object/Archive.cpp +++ b/lib/Object/Archive.cpp @@ -18,9 +18,9 @@ using namespace llvm; using namespace object; -namespace { -const StringRef Magic = "!\n"; +static const StringRef Magic = "!\n"; +namespace { struct ArchiveMemberHeader { char Name[16]; char LastModified[12]; @@ -51,11 +51,12 @@ struct ArchiveMemberHeader { return ret.getZExtValue(); } }; +} -const ArchiveMemberHeader *ToHeader(const char *base) { +static const ArchiveMemberHeader *ToHeader(const char *base) { return reinterpret_cast(base); } -} + static bool isInternalMember(const ArchiveMemberHeader &amh) { const char *internals[] = { @@ -207,7 +208,3 @@ Archive::child_iterator Archive::begin_children(bool skip_internal) const { Archive::child_iterator Archive::end_children() const { return Child(this, StringRef(0, 0)); } - -namespace llvm { - -} // end namespace llvm -- cgit v1.1 From cf62b371a970d109fa373e2fc2f5a024cdadcf42 Mon Sep 17 00:00:00 2001 From: Mon P Wang Date: Wed, 26 Oct 2011 00:34:48 +0000 Subject: The bitcode reader can create an shuffle with a place holder mask which it will fix up later. For this special case, allow such a mask to be considered valid. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142992 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/VMCore/Instructions.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp index b3a7205..c8dcdc8 100644 --- a/lib/VMCore/Instructions.cpp +++ b/lib/VMCore/Instructions.cpp @@ -1576,10 +1576,17 @@ bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2, return false; } } - } - else if (!isa(Mask) && !isa(Mask)) + } else if (!isa(Mask) && !isa(Mask)) { + // The bitcode reader can create a place holder for a forward reference + // used as the shuffle mask. When this occurs, the shuffle mask will + // fall into this case and fail. To avoid this error, do this bit of + // ugliness to allow such a mask pass. + if (const ConstantExpr* CE = dyn_cast(Mask)) { + if (CE->getOpcode() == Instruction::UserOp1) + return true; + } return false; - + } return true; } -- cgit v1.1 From a823e3d42c48ceeb2f5011f7402031bf02fa0fe1 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 26 Oct 2011 01:10:25 +0000 Subject: Reapply r142920 with fix: An MBB which branches to an EH landing pad shouldn't be considered for tail merging. In SjLj EH, the jump to the landing pad is not done explicitly through a branch statement. The EH landing pad is added as a successor to the throwing BB. Because of that however, the branch folding pass could mistakenly think that it could merge the throwing BB with another BB. This isn't safe to do. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143001 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/BranchFolding.cpp | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 75288b0..5dec368 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -870,6 +870,9 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { // Visit each predecessor only once. if (!UniquePreds.insert(PBB)) continue; + // Skip blocks which may jump to a landing pad. Can't tail merge these. + if (PBB->getLandingPadSuccessor()) + continue; MachineBasicBlock *TBB = 0, *FBB = 0; SmallVector Cond; if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) { -- cgit v1.1 From f31151f34ec37dd49bdf998e9e352d572f4a8e06 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 26 Oct 2011 01:17:44 +0000 Subject: Revert part of r142530. The patch potentially hurts performance especially on Darwin platforms where -Os means optimize for size without hurting performance. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143002 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 3f913de..fa5f0be 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2106,8 +2106,9 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, // If we have T2 ops, we can materialize the address directly via movt/movw // pair. This is always cheaper in terms of performance, but uses at least 2 // extra bytes. + MachineFunction &MF = DAG.getMachineFunction(); if (Subtarget->useMovt() && - !DAG.getMachineFunction().getFunction()->hasFnAttr(Attribute::OptimizeForSize)) { + !MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) { ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into two nodes. @@ -2132,8 +2133,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, ARMFunctionInfo *AFI = MF.getInfo(); // FIXME: Enable this for static codegen when tool issues are fixed. - if (Subtarget->useMovt() && RelocM != Reloc::Static && - !DAG.getMachineFunction().getFunction()->hasFnAttr(Attribute::OptimizeForSize)) { + if (Subtarget->useMovt() && RelocM != Reloc::Static) { ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into two nodes. -- cgit v1.1 From 73b5bb38650a1a1441fcf210c79f188d08990946 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 26 Oct 2011 01:26:57 +0000 Subject: Disable LICM speculation in high register pressure situation again now that Devang has fixed other issues. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143003 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineLICM.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 964e971..e756ded 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -45,7 +45,7 @@ using namespace llvm; static cl::opt AvoidSpeculation("avoid-speculation", cl::desc("MachineLICM should avoid speculation"), - cl::init(false), cl::Hidden); + cl::init(true), cl::Hidden); STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops"); -- cgit v1.1 From 794439183a29102c784484720c3d0b1960b297e6 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Wed, 26 Oct 2011 01:47:48 +0000 Subject: Don't use floating point to do an integer's job. This code makes different decisions when compiled into x87 instructions because of different rounding behavior. That caused phase 2/3 miscompares on 32-bit Linux when the phase 1 compiler was built with gcc (using x87), and the phase 2 compiler was built with clang (using SSE). This fixes PR11200. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143006 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 24bd2cd..4822da3 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -2034,14 +2034,17 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, return false; APInt Range = ComputeRange(First, Last); - double Density = TSize.roundToDouble() / Range.roundToDouble(); - if (Density < 0.4) + // The density is TSize / Range. Require at least 40%. + // It should not be possible for IntTSize to saturate for sane code, but make + // sure we handle Range saturation correctly. + uint64_t IntRange = Range.getLimitedValue(UINT64_MAX/10); + uint64_t IntTSize = TSize.getLimitedValue(UINT64_MAX/10); + if (IntTSize * 10 < IntRange * 4) return false; DEBUG(dbgs() << "Lowering jump table\n" << "First entry: " << First << ". Last entry: " << Last << '\n' - << "Range: " << Range - << ". Size: " << TSize << ". Density: " << Density << "\n\n"); + << "Range: " << Range << ". Size: " << TSize << ".\n\n"); // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. -- cgit v1.1 From b64541fc7285c7305c6df7a83e4514a89bdaaa7b Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 26 Oct 2011 03:47:16 +0000 Subject: Remove unused variable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143011 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/MachOObjectFile.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 9abdc8b..e3a7499 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -793,7 +793,6 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, InMemoryStruct RE; getRelocation(Rel, RE); - bool isPCRel = (RE->Word1 >> 25) & 1; unsigned Type = (RE->Word1 >> 28) & 0xF; std::string fmtbuf; -- cgit v1.1 From de39d86f26c5fa244a11d79988f26f52accabaf0 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 26 Oct 2011 07:16:18 +0000 Subject: Use a worklist to prevent the iterator from becoming invalidated because of the 'removeSuccessor' call. Noticed in a Release+Asserts+Check buildbot. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143018 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index fa5f0be..d7aca30 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -5996,9 +5996,10 @@ EmitSjLjDispatchBlock(MachineInstr *MI, MachineBasicBlock *MBB) const { // Remove the landing pad successor from the invoke block and replace it // with the new dispatch block. - for (MachineBasicBlock::succ_iterator - SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock *SMBB = *SI; + SmallVector Successors(BB->succ_begin(), + BB->succ_end()); + while (!Successors.empty()) { + MachineBasicBlock *SMBB = Successors.pop_back_val(); if (SMBB->isLandingPad()) { BB->removeSuccessor(SMBB); MBBLPads.push_back(SMBB); -- cgit v1.1 From 015cca6c08a5da57b78796d82ab2679e8f5dabe1 Mon Sep 17 00:00:00 2001 From: James Molloy Date: Wed, 26 Oct 2011 08:53:19 +0000 Subject: Revert r142530 at least temporarily while a discussion is had on llvm-commits regarding exactly how much optsize should optimize for size over performance. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143023 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index d7aca30..472bf4c 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -2104,11 +2104,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, } // If we have T2 ops, we can materialize the address directly via movt/movw - // pair. This is always cheaper in terms of performance, but uses at least 2 - // extra bytes. - MachineFunction &MF = DAG.getMachineFunction(); - if (Subtarget->useMovt() && - !MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize)) { + // pair. This is always cheaper. + if (Subtarget->useMovt()) { ++NumMovwMovt; // FIXME: Once remat is capable of dealing with instructions with register // operands, expand this into two nodes. -- cgit v1.1 From a921a468542a804ccebb680935175798ac48868b Mon Sep 17 00:00:00 2001 From: Duncan Sands Date: Wed, 26 Oct 2011 14:11:18 +0000 Subject: Simplify SplitVecRes_UnaryOp by removing all the code that is trying to legalize the operand types when only the result type is required to be legalized - the type legalization machinery will get round to the operands later if they need legalizing. There can be a point to legalizing operands in parallel with the result: when this saves compile time or results in better code. There was only one case in which this was true: when the operand is also split, so keep the logic for that bit. As a result of this change, additional operand legalization methods may need to be introduced to handle nodes where the result and operand types can differ, like SIGN_EXTEND, but the testsuite doesn't contain any tests where this is the case. In any case, it seems better to require such methods (and die with an assert if they doesn't exist) than to quietly produce wrong code if we forgot to special case the node in SplitVecRes_UnaryOp. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143026 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 48 +++--------------------- 1 file changed, 5 insertions(+), 43 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 8916e08..5f8931d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -773,56 +773,18 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, DebugLoc dl = N->getDebugLoc(); GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); - // Split the input. + // If the input also splits, handle it directly for a compile time speedup. + // Otherwise split it by hand. EVT InVT = N->getOperand(0).getValueType(); - switch (getTypeAction(InVT)) { - default: llvm_unreachable("Unexpected type action!"); - case TargetLowering::TypeLegal: { + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) { + GetSplitVector(N->getOperand(0), Lo, Hi); + } else { EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), LoVT.getVectorNumElements()); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), DAG.getIntPtrConstant(0)); Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - break; - } - case TargetLowering::TypePromoteInteger: { - SDValue InOp; - if (N->getOpcode() == ISD::SIGN_EXTEND || - N->getOpcode() == ISD::SINT_TO_FP) { - InOp = SExtPromotedInteger(N->getOperand(0)); - } else if ( - N->getOpcode() == ISD::ZERO_EXTEND || - N->getOpcode() == ISD::UINT_TO_FP) { - InOp = ZExtPromotedInteger(N->getOperand(0)); - } else { - InOp = GetPromotedInteger(N->getOperand(0)); - } - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), - InOp.getValueType().getVectorElementType(), - LoVT.getVectorNumElements()); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(0)); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - break; - } - case TargetLowering::TypeSplitVector: - GetSplitVector(N->getOperand(0), Lo, Hi); - break; - case TargetLowering::TypeWidenVector: { - // If the result needs to be split and the input needs to be widened, - // the two types must have different lengths. Use the widened result - // and extract from it to do the split. - SDValue InOp = GetWidenedVector(N->getOperand(0)); - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), - LoVT.getVectorNumElements()); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(0)); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - break; - } } if (N->getOpcode() == ISD::FP_ROUND) { -- cgit v1.1 From e8ec225e776c6b73ffc7d28f21677ce53b040df5 Mon Sep 17 00:00:00 2001 From: Duncan Sands Date: Wed, 26 Oct 2011 15:31:51 +0000 Subject: My super-optimizer noticed that we weren't folding this expression to true: (x *nsw x) sgt 0, where x = (y | 1). This occurs in 464.h264ref. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143028 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ValueTracking.cpp | 50 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index e80ee65..f2740a6 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -201,9 +201,36 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero, KnownOne, TD,Depth+1); ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + bool isKnownNegative = false; + bool isKnownNonNegative = false; + // If the multiplication is known not to overflow, compute the sign bit. + if (Mask.isNegative() && cast(I)->hasNoSignedWrap()) { + Value *Op1 = I->getOperand(1), *Op2 = I->getOperand(0); + if (Op1 == Op2) { + // The product of a number with itself is non-negative. + isKnownNonNegative = true; + } else { + bool isKnownNonNegative1 = KnownZero.isNegative(); + bool isKnownNonNegative2 = KnownZero2.isNegative(); + bool isKnownNegative1 = KnownOne.isNegative(); + bool isKnownNegative2 = KnownOne2.isNegative(); + // The product of two numbers with the same sign is non-negative. + isKnownNonNegative = (isKnownNegative1 && isKnownNegative2) || + (isKnownNonNegative1 && isKnownNonNegative2); + // The product of a negative number and a non-negative number is either + // negative or zero. + isKnownNegative = (isKnownNegative1 && isKnownNonNegative2 && + isKnownNonZero(Op2, TD, Depth)) || + (isKnownNegative2 && isKnownNonNegative1 && + isKnownNonZero(Op1, TD, Depth)); + assert(!(isKnownNegative && isKnownNonNegative) && + "Sign bit both zero and one?"); + } + } + // If low bits are zero in either operand, output low known-0 bits. // Also compute a conserative estimate for high known-0 bits. // More trickiness is possible, but this is sufficient for the @@ -220,6 +247,12 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | APInt::getHighBitsSet(BitWidth, LeadZ); KnownZero &= Mask; + + if (isKnownNonNegative) + KnownZero.setBit(BitWidth - 1); + else if (isKnownNegative) + KnownOne.setBit(BitWidth - 1); + return; } case Instruction::UDiv: { @@ -767,7 +800,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { } // The remaining tests are all recursive, so bail out if we hit the limit. - if (Depth++ == MaxDepth) + if (Depth++ >= MaxDepth) return false; unsigned BitWidth = getBitWidth(V->getType(), TD); @@ -851,6 +884,15 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { if (YKnownNonNegative && isPowerOfTwo(X, TD, Depth)) return true; } + // X * Y. + else if (match(V, m_Mul(m_Value(X), m_Value(Y)))) { + BinaryOperator *BO = cast(V); + // If X and Y are non-zero then so is X * Y as long as the multiplication + // does not overflow. + if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) && + isKnownNonZero(X, TD, Depth) && isKnownNonZero(Y, TD, Depth)) + return true; + } // (C ? X : Y) != 0 if X != 0 and Y != 0. else if (SelectInst *SI = dyn_cast(V)) { if (isKnownNonZero(SI->getTrueValue(), TD, Depth) && -- cgit v1.1 From 929e27cc3c67d4c6355c7b56b2983722d28d2624 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Wed, 26 Oct 2011 17:05:20 +0000 Subject: Improve pretty printing of GOT relocations in MachO on x86_64. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143031 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/MachOObjectFile.cpp | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'lib') diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index e3a7499..d973f43 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -807,8 +807,15 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, StringRef Name; if (error_code ec = getRelocationTargetName(RE->Word1, Name)) report_fatal_error(ec.message()); + bool isPCRel = ((RE->Word1 >> 24) & 1); switch (Type) { + case 3: // X86_64_RELOC_GOT_LOAD + case 4: { // X86_64_RELOC_GOT + fmt << Name << "@GOT"; + if (isPCRel) fmt << "PCREL"; + break; + } case 5: { // X86_64_RELOC_SUBTRACTOR InMemoryStruct RENext; DataRefImpl RelNext = Rel; -- cgit v1.1 From 9472b8d220fc746498429f430cb4c4dbfbd38f2a Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Wed, 26 Oct 2011 17:08:49 +0000 Subject: Expand relocation type field to 64 bits. MachO scattered relocations require 33 bits of type info. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143032 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/COFFObjectFile.cpp | 2 +- lib/Object/ELFObjectFile.cpp | 4 ++-- lib/Object/MachOObjectFile.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index a048cf5..f19836b 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -623,7 +623,7 @@ error_code COFFObjectFile::getRelocationSymbol(DataRefImpl Rel, return object_error::success; } error_code COFFObjectFile::getRelocationType(DataRefImpl Rel, - uint32_t &Res) const { + uint64_t &Res) const { const coff_relocation* R = toRel(Rel); Res = R->Type; return object_error::success; diff --git a/lib/Object/ELFObjectFile.cpp b/lib/Object/ELFObjectFile.cpp index 97ba916..d1a43e7 100644 --- a/lib/Object/ELFObjectFile.cpp +++ b/lib/Object/ELFObjectFile.cpp @@ -358,7 +358,7 @@ protected: virtual error_code getRelocationSymbol(DataRefImpl Rel, SymbolRef &Res) const; virtual error_code getRelocationType(DataRefImpl Rel, - uint32_t &Res) const; + uint64_t &Res) const; virtual error_code getRelocationTypeName(DataRefImpl Rel, SmallVectorImpl &Result) const; virtual error_code getRelocationAdditionalInfo(DataRefImpl Rel, @@ -923,7 +923,7 @@ error_code ELFObjectFile template error_code ELFObjectFile ::getRelocationType(DataRefImpl Rel, - uint32_t &Result) const { + uint64_t &Result) const { const Elf_Shdr *sec = getSection(Rel.w.b); switch (sec->sh_type) { default : diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index d973f43..9ed5445 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -634,7 +634,7 @@ error_code MachOObjectFile::getRelocationSymbol(DataRefImpl Rel, return object_error::success; } error_code MachOObjectFile::getRelocationType(DataRefImpl Rel, - uint32_t &Res) const { + uint64_t &Res) const { InMemoryStruct RE; getRelocation(Rel, RE); Res = RE->Word1; -- cgit v1.1 From f8261e7d07a8994e08cdba9f0e336e645daf104c Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Wed, 26 Oct 2011 17:10:22 +0000 Subject: Include the full 64 bits of relocation data in the type info for MachO relocations, so that we can recognize scattered relocations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143033 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/MachOObjectFile.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 9ed5445..82b9231 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -637,7 +637,9 @@ error_code MachOObjectFile::getRelocationType(DataRefImpl Rel, uint64_t &Res) const { InMemoryStruct RE; getRelocation(Rel, RE); - Res = RE->Word1; + Res = RE->Word0; + Res <<= 32; + Res |= RE->Word1; return object_error::success; } error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel, -- cgit v1.1 From 9c5edc00c41c29be5b088710a4a7ae8507179b64 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Wed, 26 Oct 2011 17:28:15 +0000 Subject: Thumb2 remove redundant ".w" suffix from t2MVNCCi pattern. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143034 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index a65a75f..3facc64 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -2887,7 +2887,7 @@ def t2MOVCCi32imm : PseudoInst<(outs rGPR:$dst), let isMoveImm = 1 in def t2MVNCCi : T2OneRegImm<(outs rGPR:$Rd), (ins rGPR:$false, t2_so_imm:$imm), - IIC_iCMOVi, "mvn", ".w\t$Rd, $imm", + IIC_iCMOVi, "mvn", "\t$Rd, $imm", [/*(set rGPR:$Rd,(ARMcmov rGPR:$false,t2_so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>, RegConstraint<"$false = $Rd"> { -- cgit v1.1 From ef22f78c68d3f096b3c8aac2752ca8e3e25106a6 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Wed, 26 Oct 2011 17:28:49 +0000 Subject: The order of the two symbol listings in a Macho x86_64 subtractor relocation is reversed from what seems intuitive to me. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143035 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/MachOObjectFile.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 82b9231..97eb52d 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -835,7 +835,9 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, if (error_code ec = getRelocationTargetName(RENext->Word1, SucName)) report_fatal_error(ec.message()); - fmt << Name << "-" << SucName; + // The X86_64_RELOC_UNSIGNED contains the minuend symbol, + // X86_64_SUBTRACTOR contains to the subtrahend. + fmt << SucName << "-" << Name; } case 6: // X86_64_RELOC_SIGNED1 fmt << Name << "-1"; -- cgit v1.1 From 5a86c5b4575d8ac8ca1d0bfa068e1ffcf20ee7ea Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Wed, 26 Oct 2011 18:44:32 +0000 Subject: On an ELF system, ".debug_str" is mergeable and contains null terminated strings composed of one byte characters. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143044 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCObjectFileInfo.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index df8b99d..397e9c3 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -354,8 +354,9 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { Ctx->getELFSection(".debug_pubtypes", ELF::SHT_PROGBITS, 0, SectionKind::getMetadata()); DwarfStrSection = - Ctx->getELFSection(".debug_str", ELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); + Ctx->getELFSection(".debug_str", ELF::SHT_PROGBITS, + ELF::SHF_MERGE | ELF::SHF_STRINGS, + SectionKind::getMergeable1ByteCString()); DwarfLocSection = Ctx->getELFSection(".debug_loc", ELF::SHT_PROGBITS, 0, SectionKind::getMetadata()); -- cgit v1.1 From 1832f4d94eb292d63824eaa043118ed6cc61389b Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Wed, 26 Oct 2011 20:42:54 +0000 Subject: Add support for scattered relocations to the MachO relocatation pretty printer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143051 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/MachOObjectFile.cpp | 230 ++++++++++++++++++++++++++--------------- 1 file changed, 144 insertions(+), 86 deletions(-) (limited to 'lib') diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 97eb52d..06d62fa 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -15,8 +15,8 @@ #include "llvm/ADT/Triple.h" #include "llvm/Object/MachO.h" #include "llvm/Object/MachOFormat.h" +#include "llvm/Support/Format.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/raw_ostream.h" #include #include @@ -609,7 +609,17 @@ error_code MachOObjectFile::getRelocationAddress(DataRefImpl Rel, } InMemoryStruct RE; getRelocation(Rel, RE); - Res = reinterpret_cast(sectAddress + RE->Word0); + + unsigned Arch = getArch(); + bool isScattered = (Arch != Triple::x86_64) && + (RE->Word0 & macho::RF_Scattered); + uint64_t RelAddr = 0; + if (isScattered) + RelAddr = RE->Word0 & 0xFFFFFF; + else + RelAddr = RE->Word0; + + Res = reinterpret_cast(sectAddress + RelAddr); return object_error::success; } error_code MachOObjectFile::getRelocationSymbol(DataRefImpl Rel, @@ -648,9 +658,17 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel, StringRef res; InMemoryStruct RE; getRelocation(Rel, RE); - unsigned r_type = (RE->Word1 >> 28) & 0xF; unsigned Arch = getArch(); + bool isScattered = (Arch != Triple::x86_64) && + (RE->Word0 & macho::RF_Scattered); + + unsigned r_type; + if (isScattered) + r_type = (RE->Word0 >> 24) & 0xF; + else + r_type = (RE->Word1 >> 28) & 0xF; + switch (Arch) { case Triple::x86: { const char* Table[] = { @@ -771,23 +789,56 @@ void advanceTo(T &it, size_t Val) { report_fatal_error(ec.message()); } -error_code -MachOObjectFile::getRelocationTargetName(uint32_t Idx, StringRef &S) const { - bool isExtern = (Idx >> 27) & 1; - uint32_t Val = Idx & 0xFFFFFF; - error_code ec; +void MachOObjectFile::printRelocationTargetName( + InMemoryStruct& RE, + raw_string_ostream &fmt) const { + unsigned Arch = getArch(); + bool isScattered = (Arch != Triple::x86_64) && + (RE->Word0 & macho::RF_Scattered); + + // Target of a scattered relocation is an address. In the interest of + // generating pretty output, scan through the symbol table looking for a + // symbol that aligns with that address. If we find one, print it. + // Otherwise, we just print the hex address of the target. + if (isScattered) { + uint32_t Val = RE->Word1; + + error_code ec; + for (symbol_iterator SI = begin_symbols(), SE = end_symbols(); SI != SE; + SI.increment(ec)) { + if (ec) report_fatal_error(ec.message()); + + uint64_t Addr; + StringRef Name; + + if ((ec = SI->getAddress(Addr))) + report_fatal_error(ec.message()); + if (Addr != Val) continue; + if ((ec = SI->getName(Name))) + report_fatal_error(ec.message()); + fmt << Name; + return; + } + + fmt << format("0x%x", Val); + return; + } + + StringRef S; + bool isExtern = (RE->Word1 >> 27) & 1; + uint32_t Val = RE->Word1 & 0xFFFFFF; if (isExtern) { symbol_iterator SI = begin_symbols(); advanceTo(SI, Val); - ec = SI->getName(S); + SI->getName(S); } else { section_iterator SI = begin_sections(); advanceTo(SI, Val); - ec = SI->getName(S); + SI->getName(S); } - return ec; + fmt << S; } error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, @@ -795,30 +846,35 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, InMemoryStruct RE; getRelocation(Rel, RE); - unsigned Type = (RE->Word1 >> 28) & 0xF; + unsigned Arch = getArch(); + bool isScattered = (Arch != Triple::x86_64) && + (RE->Word0 & macho::RF_Scattered); std::string fmtbuf; raw_string_ostream fmt(fmtbuf); + unsigned Type; + if (isScattered) + Type = (RE->Word0 >> 24) & 0xF; + else + Type = (RE->Word1 >> 28) & 0xF; + // Determine any addends that should be displayed with the relocation. // These require decoding the relocation type, which is triple-specific. - unsigned Arch = getArch(); // X86_64 has entirely custom relocation types. if (Arch == Triple::x86_64) { - StringRef Name; - if (error_code ec = getRelocationTargetName(RE->Word1, Name)) - report_fatal_error(ec.message()); bool isPCRel = ((RE->Word1 >> 24) & 1); switch (Type) { - case 3: // X86_64_RELOC_GOT_LOAD - case 4: { // X86_64_RELOC_GOT - fmt << Name << "@GOT"; + case macho::RIT_X86_64_GOTLoad: // X86_64_RELOC_GOT_LOAD + case macho::RIT_X86_64_GOT: { // X86_64_RELOC_GOT + printRelocationTargetName(RE, fmt); + fmt << "@GOT"; if (isPCRel) fmt << "PCREL"; break; } - case 5: { // X86_64_RELOC_SUBTRACTOR + case macho::RIT_X86_64_Subtractor: { // X86_64_RELOC_SUBTRACTOR InMemoryStruct RENext; DataRefImpl RelNext = Rel; RelNext.d.a++; @@ -826,40 +882,42 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, // X86_64_SUBTRACTOR must be followed by a relocation of type // X86_64_RELOC_UNSIGNED. + // NOTE: Scattered relocations don't exist on x86_64. unsigned RType = (RENext->Word1 >> 28) & 0xF; if (RType != 0) report_fatal_error("Expected X86_64_RELOC_UNSIGNED after " "X86_64_RELOC_SUBTRACTOR."); - StringRef SucName; - if (error_code ec = getRelocationTargetName(RENext->Word1, SucName)) - report_fatal_error(ec.message()); - // The X86_64_RELOC_UNSIGNED contains the minuend symbol, // X86_64_SUBTRACTOR contains to the subtrahend. - fmt << SucName << "-" << Name; + printRelocationTargetName(RENext, fmt); + fmt << "-"; + printRelocationTargetName(RE, fmt); } - case 6: // X86_64_RELOC_SIGNED1 - fmt << Name << "-1"; + case macho::RIT_X86_64_Signed1: // X86_64_RELOC_SIGNED1 + printRelocationTargetName(RE, fmt); + fmt << "-1"; break; - case 7: // X86_64_RELOC_SIGNED2 - fmt << Name << "-2"; + case macho::RIT_X86_64_Signed2: // X86_64_RELOC_SIGNED2 + printRelocationTargetName(RE, fmt); + fmt << "-2"; break; - case 8: // X86_64_RELOC_SIGNED4 - fmt << Name << "-4"; + case macho::RIT_X86_64_Signed4: // X86_64_RELOC_SIGNED4 + printRelocationTargetName(RE, fmt); + fmt << "-4"; break; default: - fmt << Name; + printRelocationTargetName(RE, fmt); break; } // X86 and ARM share some relocation types in common. } else if (Arch == Triple::x86 || Arch == Triple::arm) { // Generic relocation types... switch (Type) { - case 1: // GENERIC_RELOC_PAIR - prints no info + case macho::RIT_Pair: // GENERIC_RELOC_PAIR - prints no info return object_error::success; - case 2: // GENERIC_RELOC_SECTDIFF - case 4: { // GENERIC_RELOC_LOCAL_SECTDIFF + case macho::RIT_Difference: // GENERIC_RELOC_SECTDIFF + case macho::RIT_Generic_LocalDifference: { // GENERIC_RELOC_LOCAL_SECTDIFF InMemoryStruct RENext; DataRefImpl RelNext = Rel; RelNext.d.a++; @@ -867,47 +925,46 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, // X86 sect diff's must be followed by a relocation of type // GENERIC_RELOC_PAIR. - unsigned RType = (RENext->Word1 >> 28) & 0xF; + bool isNextScattered = (Arch != Triple::x86_64) && + (RENext->Word0 & macho::RF_Scattered); + unsigned RType; + if (isNextScattered) + RType = (RENext->Word0 >> 24) & 0xF; + else + RType = (RENext->Word1 >> 28) & 0xF; if (RType != 1) report_fatal_error("Expected GENERIC_RELOC_PAIR after " "GENERIC_RELOC_SECTDIFF or " "GENERIC_RELOC_LOCAL_SECTDIFF."); - StringRef SucName; - if (error_code ec = getRelocationTargetName(RENext->Word1, SucName)) - report_fatal_error(ec.message()); - - StringRef Name; - if (error_code ec = getRelocationTargetName(RE->Word1, Name)) - report_fatal_error(ec.message()); - - fmt << Name << "-" << SucName; + printRelocationTargetName(RE, fmt); + fmt << "-"; + printRelocationTargetName(RENext, fmt); break; } } - if (Arch == Triple::x86 && Type != 1) { + if (Arch == Triple::x86) { // All X86 relocations that need special printing were already // handled in the generic code. - StringRef Name; - if (error_code ec = getRelocationTargetName(RE->Word1, Name)) - report_fatal_error(ec.message()); - fmt << Name; + printRelocationTargetName(RE, fmt); } else { // ARM-specific relocations switch (Type) { - case 8: // ARM_RELOC_HALF - case 9: { // ARM_RELOC_HALF_SECTDIFF - StringRef Name; - if (error_code ec = getRelocationTargetName(RE->Word1, Name)) - report_fatal_error(ec.message()); - + case macho::RIT_ARM_Half: // ARM_RELOC_HALF + case macho::RIT_ARM_HalfDifference: { // ARM_RELOC_HALF_SECTDIFF // Half relocations steal a bit from the length field to encode // whether this is an upper16 or a lower16 relocation. - bool isUpper = (RE->Word1 >> 25) & 1; + bool isUpper; + if (isScattered) + isUpper = (RE->Word0 >> 28) & 1; + else + isUpper = (RE->Word1 >> 25) & 1; + if (isUpper) - fmt << ":upper16:(" << Name; + fmt << ":upper16:("; else - fmt << ":lower16:(" << Name; + fmt << ":lower16:("; + printRelocationTargetName(RE, fmt); InMemoryStruct RENext; DataRefImpl RelNext = Rel; @@ -916,45 +973,40 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, // ARM half relocs must be followed by a relocation of type // ARM_RELOC_PAIR. - unsigned RType = (RENext->Word1 >> 28) & 0xF; + bool isNextScattered = (Arch != Triple::x86_64) && + (RENext->Word0 & macho::RF_Scattered); + unsigned RType; + if (isNextScattered) + RType = (RENext->Word0 >> 24) & 0xF; + else + RType = (RENext->Word1 >> 28) & 0xF; + if (RType != 1) report_fatal_error("Expected ARM_RELOC_PAIR after " "GENERIC_RELOC_HALF"); - // A constant addend for the relocation is stored in the address - // field of the follow-on relocation. If this is a lower16 relocation - // we need to shift it left by 16 before using it. - int32_t Addend = RENext->Word0; - if (!isUpper) Addend <<= 16; + // NOTE: The half of the target virtual address is stashed in the + // address field of the secondary relocation, but we can't reverse + // engineer the constant offset from it without decoding the movw/movt + // instruction to find the other half in its immediate field. // ARM_RELOC_HALF_SECTDIFF encodes the second section in the // symbol/section pointer of the follow-on relocation. - StringRef SucName; - if (Type == 9) { // ARM_RELOC_HALF_SECTDIFF - if (error_code ec = getRelocationTargetName(RENext->Word1, SucName)) - report_fatal_error(ec.message()); + if (Type == macho::RIT_ARM_HalfDifference) { + fmt << "-"; + printRelocationTargetName(RENext, fmt); } - if (SucName.size()) fmt << "-" << SucName; - if (Addend > 0) fmt << "+" << Addend; - else if (Addend < 0) fmt << Addend; fmt << ")"; break; } default: { - StringRef Name; - if (error_code ec = getRelocationTargetName(RE->Word1, Name)) - report_fatal_error(ec.message()); - fmt << Name; + printRelocationTargetName(RE, fmt); } } } - } else { - StringRef Name; - if (error_code ec = getRelocationTargetName(RE->Word1, Name)) - report_fatal_error(ec.message()); - fmt << Name; - } + } else + printRelocationTargetName(RE, fmt); fmt.flush(); Result.append(fmtbuf.begin(), fmtbuf.end()); @@ -966,19 +1018,25 @@ error_code MachOObjectFile::getRelocationHidden(DataRefImpl Rel, InMemoryStruct RE; getRelocation(Rel, RE); - unsigned Type = (RE->Word1 >> 28) & 0xF; unsigned Arch = getArch(); + bool isScattered = (Arch != Triple::x86_64) && + (RE->Word0 & macho::RF_Scattered); + unsigned Type; + if (isScattered) + Type = (RE->Word0 >> 24) & 0xF; + else + Type = (RE->Word1 >> 28) & 0xF; Result = false; // On arches that use the generic relocations, GENERIC_RELOC_PAIR // is always hidden. if (Arch == Triple::x86 || Arch == Triple::arm) { - if (Type == 1) Result = true; + if (Type == macho::RIT_Pair) Result = true; } else if (Arch == Triple::x86_64) { // On x86_64, X86_64_RELOC_UNSIGNED is hidden only when it follows // an X864_64_RELOC_SUBTRACTOR. - if (Type == 0 && Rel.d.a > 0) { + if (Type == macho::RIT_X86_64_Unsigned && Rel.d.a > 0) { DataRefImpl RelPrev = Rel; RelPrev.d.a--; InMemoryStruct REPrev; @@ -986,7 +1044,7 @@ error_code MachOObjectFile::getRelocationHidden(DataRefImpl Rel, unsigned PrevType = (REPrev->Word1 >> 28) & 0xF; - if (PrevType == 5) Result = true; + if (PrevType == macho::RIT_X86_64_Subtractor) Result = true; } } -- cgit v1.1 From dd3149d57977d0632cfaf24290dd93416fb2a0ef Mon Sep 17 00:00:00 2001 From: Duncan Sands Date: Wed, 26 Oct 2011 20:55:21 +0000 Subject: The maximum power of 2 dividing a power of 2 is itself. This occurs in 403.gcc and was spotted by my super-optimizer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143054 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InstructionSimplify.cpp | 9 +++++++++ lib/Analysis/ValueTracking.cpp | 37 ++++++++++++++++++++++++++---------- 2 files changed, 36 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 131cc97..d9e3400 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -1197,6 +1197,15 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const TargetData *TD, (A == Op0 || B == Op0)) return Op0; + // A & (-A) = A if A is a power of two or zero. + if (match(Op0, m_Neg(m_Specific(Op1))) || + match(Op1, m_Neg(m_Specific(Op0)))) { + if (isPowerOfTwo(Op0, TD, /*OrZero*/true)) + return Op0; + if (isPowerOfTwo(Op1, TD, /*OrZero*/true)) + return Op1; + } + // Try some generic simplifications for associative operations. if (Value *V = SimplifyAssociativeBinOp(Instruction::And, Op0, Op1, TD, DT, MaxRecurse)) diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index f2740a6..9ea2703 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -745,10 +745,15 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, /// bit set when defined. For vectors return true if every element is known to /// be a power of two when defined. Supports values with integer or pointer /// types and vectors of integers. -bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, unsigned Depth) { - if (ConstantInt *CI = dyn_cast(V)) - return CI->getValue().isPowerOf2(); - // TODO: Handle vector constants. +bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, bool OrZero, + unsigned Depth) { + if (Constant *C = dyn_cast(V)) { + if (C->isNullValue()) + return OrZero; + if (ConstantInt *CI = dyn_cast(C)) + return CI->getValue().isPowerOf2(); + // TODO: Handle vector constants. + } // 1 << X is clearly a power of two if the one is not shifted off the end. If // it is shifted off the end then the result is undefined. @@ -765,11 +770,23 @@ bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, unsigned Depth) { return false; if (ZExtInst *ZI = dyn_cast(V)) - return isPowerOfTwo(ZI->getOperand(0), TD, Depth); + return isPowerOfTwo(ZI->getOperand(0), TD, OrZero, Depth); if (SelectInst *SI = dyn_cast(V)) - return isPowerOfTwo(SI->getTrueValue(), TD, Depth) && - isPowerOfTwo(SI->getFalseValue(), TD, Depth); + return isPowerOfTwo(SI->getTrueValue(), TD, OrZero, Depth) && + isPowerOfTwo(SI->getFalseValue(), TD, OrZero, Depth); + + Value *X = 0, *Y = 0; + if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) { + // A power of two and'd with anything is a power of two or zero. + if (isPowerOfTwo(X, TD, /*OrZero*/true, Depth) || + isPowerOfTwo(Y, TD, /*OrZero*/true, Depth)) + return true; + // X & (-X) is always a power of two or zero. + if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X)))) + return true; + return false; + } // An exact divide or right shift can only shift off zero bits, so the result // is a power of two only if the first operand is a power of two and not @@ -778,7 +795,7 @@ bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, unsigned Depth) { match(V, m_UDiv(m_Value(), m_Value()))) { PossiblyExactOperator *PEO = cast(V); if (PEO->isExact()) - return isPowerOfTwo(PEO->getOperand(0), TD, Depth); + return isPowerOfTwo(PEO->getOperand(0), TD, OrZero, Depth); } return false; @@ -879,9 +896,9 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { } // The sum of a non-negative number and a power of two is not zero. - if (XKnownNonNegative && isPowerOfTwo(Y, TD, Depth)) + if (XKnownNonNegative && isPowerOfTwo(Y, TD, /*OrZero*/false, Depth)) return true; - if (YKnownNonNegative && isPowerOfTwo(X, TD, Depth)) + if (YKnownNonNegative && isPowerOfTwo(X, TD, /*OrZero*/false, Depth)) return true; } // X * Y. -- cgit v1.1 From 75757f9fd13bffc51a6aa30eefe706c4f84d0913 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 26 Oct 2011 20:56:52 +0000 Subject: Make sure short memsets on ARM lower to stores, even when optimizing for size. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143055 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 472bf4c..31e522d 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -752,6 +752,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) //// temporary - rewrite interface to use type maxStoresPerMemcpy = maxStoresPerMemcpyOptSize = 1; + maxStoresPerMemset = 16; + maxStoresPerMemsetOptSize = Subtarget->isTargetDarwin() ? 8 : 4; // On ARM arguments smaller than 4 bytes are extended, so all arguments // are at least 4 bytes aligned. -- cgit v1.1 From e840e88239cf92a065cbf5f5b9c7d18bc139c0e1 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Wed, 26 Oct 2011 21:12:27 +0000 Subject: This commit introduces two fake instructions MORESTACK_RET and MORESTACK_RET_RESTORE_R10; which are lowered to a RET and a RET followed by a MOV respectively. Having a fake instruction prevents the verifier from seeing a MachineBasicBlock end with a non-terminator (MOV). It also prevents the rather eccentric case of a MachineBasicBlock ending with RET but having successors nevertheless. Patch by Sanjoy Das. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143062 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 26 +++++--------------------- lib/Target/X86/X86InstrCompiler.td | 18 ++++++++++++++++++ lib/Target/X86/X86MCInstLower.cpp | 16 ++++++++++++++++ 3 files changed, 39 insertions(+), 21 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index d54f4ae..ece90cb 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1336,26 +1336,16 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { // The MOV R10, RAX needs to be in a different block, since the RET we emit in // allocMBB needs to be last (terminating) instruction. - MachineBasicBlock *restoreR10MBB = NULL; - if (IsNested) - restoreR10MBB = MF.CreateMachineBasicBlock(); for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(), e = prologueMBB.livein_end(); i != e; i++) { allocMBB->addLiveIn(*i); checkMBB->addLiveIn(*i); - - if (IsNested) - restoreR10MBB->addLiveIn(*i); } - if (IsNested) { + if (IsNested) allocMBB->addLiveIn(X86::R10); - restoreR10MBB->addLiveIn(X86::RAX); - } - if (IsNested) - MF.push_front(restoreR10MBB); MF.push_front(allocMBB); MF.push_front(checkMBB); @@ -1425,18 +1415,12 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const { if (!Is64Bit) BuildMI(allocMBB, DL, TII.get(X86::ADD32ri), X86::ESP).addReg(X86::ESP) .addImm(8); - BuildMI(allocMBB, DL, TII.get(X86::RET)); - if (IsNested) - BuildMI(restoreR10MBB, DL, TII.get(X86::MOV64rr), X86::R10) - .addReg(X86::RAX); + BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10)); + else + BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET)); - if (IsNested) { - allocMBB->addSuccessor(restoreR10MBB); - restoreR10MBB->addSuccessor(&prologueMBB); - } else { - allocMBB->addSuccessor(&prologueMBB); - } + allocMBB->addSuccessor(&prologueMBB); checkMBB->addSuccessor(allocMBB); checkMBB->addSuccessor(&prologueMBB); diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index da28690..5f9bf16 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -150,6 +150,24 @@ def EH_RETURN64 : I<0xC3, RawFrm, (outs), (ins GR64:$addr), } //===----------------------------------------------------------------------===// +// Pseudo instructions used by segmented stacks. +// + +// This is lowered into a RET instruction by MCInstLower. We need +// this so that we don't have to have a MachineBasicBlock which ends +// with a RET and also has successors. +let isPseudo = 1 in { +def MORESTACK_RET: I<0, Pseudo, (outs), (ins), + "", []>; + +// This instruction is lowered to a RET followed by a MOV. The two +// instructions are not generated on a higher level since then the +// verifier sees a MachineBasicBlock ending with a non-terminator. +def MORESTACK_RET_RESTORE_R10 : I<0, Pseudo, (outs), (ins), + "", []>; +} + +//===----------------------------------------------------------------------===// // Alias Instructions //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 50bc14d..328cf67 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -527,6 +527,22 @@ ReSimplify: case X86::XOR16ri: SimplifyShortImmForm(OutMI, X86::XOR16i16); break; case X86::XOR32ri: SimplifyShortImmForm(OutMI, X86::XOR32i32); break; case X86::XOR64ri32: SimplifyShortImmForm(OutMI, X86::XOR64i32); break; + + case X86::MORESTACK_RET: + OutMI.setOpcode(X86::RET); + break; + + case X86::MORESTACK_RET_RESTORE_R10: { + MCInst retInst; + + OutMI.setOpcode(X86::MOV64rr); + OutMI.addOperand(MCOperand::CreateReg(X86::R10)); + OutMI.addOperand(MCOperand::CreateReg(X86::RAX)); + + retInst.setOpcode(X86::RET); + AsmPrinter.OutStreamer.EmitInstruction(retInst); + break; + } } } -- cgit v1.1 From 758a519a22b469ce8e2b8d0bf7a72813e87710d4 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Wed, 26 Oct 2011 21:14:08 +0000 Subject: ARM parse parenthesized expressions for label references. Partial fix for rdar://10348687. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143063 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index a278f6f..99f619c 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -3691,6 +3691,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl &Operands, // Fall though for the Identifier case that is not a register or a // special name. } + case AsmToken::LParen: // parenthesized expressions like (_strcmp-4) case AsmToken::Integer: // things like 1f and 2b as a branch targets case AsmToken::Dot: { // . as a branch target // This was not a register so parse other operands that start with an -- cgit v1.1 From 66bf7430f5586188944e45514abbb04fe3383107 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Wed, 26 Oct 2011 21:16:41 +0000 Subject: Fixes an issue reported by -verify-machineinstrs. Patch by Sanjoy Das. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143064 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 9 +++++---- lib/Target/X86/X86InstrCompiler.td | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 7dec7c4..ce2ef92 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -11784,6 +11784,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB, unsigned mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass), bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass), tmpSPVReg = MRI.createVirtualRegister(AddrRegClass), + SPLimitVReg = MRI.createVirtualRegister(AddrRegClass), sizeVReg = MI->getOperand(1).getReg(), physSPReg = Is64Bit ? X86::RSP : X86::ESP; @@ -11801,19 +11802,19 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB, // Add code to the main basic block to check if the stack limit has been hit, // and if so, jump to mallocMBB otherwise to bumpMBB. BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg); - BuildMI(BB, DL, TII->get(Is64Bit ? X86::SUB64rr:X86::SUB32rr), tmpSPVReg) + BuildMI(BB, DL, TII->get(Is64Bit ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg) .addReg(tmpSPVReg).addReg(sizeVReg); BuildMI(BB, DL, TII->get(Is64Bit ? X86::CMP64mr:X86::CMP32mr)) .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg) - .addReg(tmpSPVReg); + .addReg(SPLimitVReg); BuildMI(BB, DL, TII->get(X86::JG_4)).addMBB(mallocMBB); // bumpMBB simply decreases the stack pointer, since we know the current // stacklet has enough space. BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), physSPReg) - .addReg(tmpSPVReg); + .addReg(SPLimitVReg); BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), bumpSPPtrVReg) - .addReg(tmpSPVReg); + .addReg(SPLimitVReg); BuildMI(bumpMBB, DL, TII->get(X86::JMP_4)).addMBB(continueMBB); // Calls into a routine in libgcc to allocate more space from the heap. diff --git a/lib/Target/X86/X86InstrCompiler.td b/lib/Target/X86/X86InstrCompiler.td index 5f9bf16..e0cf669 100644 --- a/lib/Target/X86/X86InstrCompiler.td +++ b/lib/Target/X86/X86InstrCompiler.td @@ -112,14 +112,14 @@ let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in // allocated by bumping the stack pointer. Otherwise memory is allocated from // the heap. -let Defs = [EAX, ESP, EFLAGS], Uses = [ESP, EAX] in +let Defs = [EAX, ESP, EFLAGS], Uses = [ESP] in def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size), "# variable sized alloca for segmented stacks", [(set GR32:$dst, (X86SegAlloca GR32:$size))]>, Requires<[In32BitMode]>; -let Defs = [RAX, RSP, EFLAGS], Uses = [RSP, RAX] in +let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size), "# variable sized alloca for segmented stacks", [(set GR64:$dst, -- cgit v1.1 From a581328ceb4c9db165d79a4dabd6b28db799d70f Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Wed, 26 Oct 2011 22:22:01 +0000 Subject: Thumb2 ldr pc-relative encoding fixes. We were parsing label references to the i12 encoding, which isn't right. They need to go to the pci variant instead. More of rdar://10348687 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143068 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 17 ++++++++++++++++- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 6 ------ 2 files changed, 16 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 3facc64..cc137a8 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1460,7 +1460,7 @@ def t2STRD_POST : T2Ii8s4post<0, 1, 0, (outs GPR:$wb), "$addr.base = $wb", []>; // T2Ipl (Preload Data/Instruction) signals the memory system of possible future -// data/instruction access. These are for disassembly only. +// data/instruction access. // instr_write is inverted for Thumb mode: (prefetch 3) -> (preload 0), // (prefetch 1) -> (preload 2), (prefetch 2) -> (preload 1). multiclass T2Ipl write, bits<1> instr, string opc> { @@ -1518,6 +1518,10 @@ multiclass T2Ipl write, bits<1> instr, string opc> { let DecoderMethod = "DecodeT2LoadShift"; } + // FIXME: We should have a separate 'pci' variant here. As-is we represent + // it via the i12 variant, which it's related to, but that means we can + // represent negative immediates, which aren't legal for anything except + // the 'pci' case (Rn == 15). } defm t2PLD : T2Ipl<0, 0, "pld">, Requires<[IsThumb2]>; @@ -3906,6 +3910,17 @@ def : t2InstAlias<"ldrsb${p} $Rt, $addr", def : t2InstAlias<"ldrsh${p} $Rt, $addr", (t2LDRSHs rGPR:$Rt, t2addrmode_so_reg:$addr, pred:$p)>; +def : t2InstAlias<"ldr${p} $Rt, $addr", + (t2LDRpci GPR:$Rt, t2ldrlabel:$addr, pred:$p)>; +def : t2InstAlias<"ldrb${p} $Rt, $addr", + (t2LDRBpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>; +def : t2InstAlias<"ldrh${p} $Rt, $addr", + (t2LDRHpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>; +def : t2InstAlias<"ldrsb${p} $Rt, $addr", + (t2LDRSBpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>; +def : t2InstAlias<"ldrsh${p} $Rt, $addr", + (t2LDRSHpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>; + // Alias for MVN without the ".w" optional width specifier. def : t2InstAlias<"mvn${s}${p} $Rd, $Rm", (t2MVNr rGPR:$Rd, rGPR:$Rm, pred:$p, cc_out:$s)>; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 99f619c..8803b68 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -865,12 +865,6 @@ public: return Val > -256 && Val < 0; } bool isMemUImm12Offset() const { - // If we have an immediate that's not a constant, treat it as a label - // reference needing a fixup. If it is a constant, it's something else - // and we reject it. - if (Kind == k_Immediate && !isa(getImm())) - return true; - if (!isMemory() || Memory.OffsetRegNum != 0 || Memory.Alignment != 0) return false; // Immediate offset in range [0, 4095]. -- cgit v1.1 From 684457d179fcfdf8c1f8ee9f66cc40665ecc648c Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Wed, 26 Oct 2011 22:44:41 +0000 Subject: Trailing whitespace. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143071 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCAssembler.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp index 06c8aec..29adbcb 100644 --- a/lib/MC/MCAssembler.cpp +++ b/lib/MC/MCAssembler.cpp @@ -118,7 +118,7 @@ uint64_t MCAsmLayout::getSymbolOffset(const MCSymbolData *SD) const { if (Target.getSymB() && Target.getSymB()->getSymbol().isUndefined()) report_fatal_error("unable to evaluate offset to undefined symbol '" + Target.getSymB()->getSymbol().getName() + "'"); - + uint64_t Offset = Target.getConstant(); if (Target.getSymA()) Offset += getSymbolOffset(&Assembler.getSymbolData( @@ -295,7 +295,7 @@ bool MCAssembler::EvaluateFixup(const MCAsmLayout &Layout, if (IsPCRel) { uint32_t Offset = Layout.getFragmentOffset(DF) + Fixup.getOffset(); - + // A number of ARM fixups in Thumb mode require that the effective PC // address be determined as the 32-bit aligned version of the actual offset. if (ShouldAlignPC) Offset &= ~0x3; -- cgit v1.1 From 530f7cefd3082e8aaa74b7d65636f30d0312b6ec Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 26 Oct 2011 22:47:55 +0000 Subject: Factor out an EmitCmp function that can be used by both SelectCmp and SelectBranch. No functional change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143072 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 42 ++++++++++++++++++++++++------------------ 1 file changed, 24 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 73fd2e2..4e0ad36 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -173,6 +173,7 @@ class ARMFastISel : public FastISel { private: bool isTypeLegal(Type *Ty, MVT &VT); bool isLoadTypeLegal(Type *Ty, MVT &VT); + bool ARMEmitCmp(Type *Ty, const Value *Src1Value, const Value *Src2Value); bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr); bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr); bool ARMComputeAddress(const Value *Obj, Address &Addr); @@ -1214,54 +1215,58 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { return true; } -bool ARMFastISel::SelectCmp(const Instruction *I) { - const CmpInst *CI = cast(I); - +bool ARMFastISel::ARMEmitCmp(Type *Ty, const Value *Src1Value, + const Value *Src2Value) { MVT VT; - Type *Ty = CI->getOperand(0)->getType(); if (!isTypeLegal(Ty, VT)) return false; - bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); - if (isFloat && !Subtarget->hasVFP2()) + if ((Ty->isFloatTy() || Ty->isDoubleTy()) && !Subtarget->hasVFP2()) return false; unsigned CmpOpc; - unsigned CondReg; switch (VT.SimpleTy) { default: return false; // TODO: Verify compares. case MVT::f32: CmpOpc = ARM::VCMPES; - CondReg = ARM::FPSCR; break; case MVT::f64: CmpOpc = ARM::VCMPED; - CondReg = ARM::FPSCR; break; case MVT::i32: CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr; - CondReg = ARM::CPSR; break; } + unsigned Src1 = getRegForValue(Src1Value); + if (Src1 == 0) return false; + + unsigned Src2 = getRegForValue(Src2Value); + if (Src2 == 0) return false; + + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) + .addReg(Src1).addReg(Src2)); + return true; +} + +bool ARMFastISel::SelectCmp(const Instruction *I) { + const CmpInst *CI = cast(I); + // Get the compare predicate. ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate()); // We may not handle every CC for now. if (ARMPred == ARMCC::AL) return false; - unsigned Arg1 = getRegForValue(CI->getOperand(0)); - if (Arg1 == 0) return false; - - unsigned Arg2 = getRegForValue(CI->getOperand(1)); - if (Arg2 == 0) return false; - - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) - .addReg(Arg1).addReg(Arg2)); + // Emit the compare. + Type *Ty = CI->getOperand(0)->getType(); + if (!ARMEmitCmp(Ty, CI->getOperand(0), CI->getOperand(1))) + return false; // For floating point we need to move the result to a comparison register // that we can then use for branches. + bool isFloat = Ty->isFloatTy() || Ty->isDoubleTy(); if (isFloat) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::FMSTAT))); @@ -1275,6 +1280,7 @@ bool ARMFastISel::SelectCmp(const Instruction *I) { Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0); unsigned ZeroReg = TargetMaterializeConstant(Zero); + unsigned CondReg = isFloat ? ARM::FPSCR : ARM::CPSR; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg) .addReg(ZeroReg).addImm(1) .addImm(ARMPred).addReg(CondReg); -- cgit v1.1 From 746cb670c3cd4f79b288d56d8e9f195685a5381a Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Wed, 26 Oct 2011 22:55:33 +0000 Subject: Reflow lines, fix comments for doxygen style, fix whitespace. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143074 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 35 +++++++++++++---------------- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 26 +++++++++------------ 2 files changed, 27 insertions(+), 34 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 88b7524..9cabe55 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -98,7 +98,6 @@ void CompileUnit::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, Die->addValue(Attribute, Form, createDIEEntry(Entry)); } - /// addBlock - Add block data. /// void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form, @@ -135,8 +134,7 @@ void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) { unsigned Line = G.getLineNumber(); if (Line == 0) return; - unsigned FileID = DD->GetOrCreateSourceID(G.getFilename(), - G.getDirectory()); + unsigned FileID = DD->GetOrCreateSourceID(G.getFilename(), G.getDirectory()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -155,7 +153,8 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) { unsigned Line = SP.getLineNumber(); if (!SP.getContext().Verify()) return; - unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(), SP.getDirectory()); + unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(), + SP.getDirectory()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -171,7 +170,8 @@ void CompileUnit::addSourceLine(DIE *Die, DIType Ty) { unsigned Line = Ty.getLineNumber(); if (Line == 0 || !Ty.getContext().Verify()) return; - unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(), Ty.getDirectory()); + unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(), + Ty.getDirectory()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -458,7 +458,7 @@ static bool isTypeSigned(DIType Ty, int *SizeInBits) { /// addConstantValue - Add constant value entry in variable DIE. bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty) { - assert (MO.isImm() && "Invalid machine operand!"); + assert(MO.isImm() && "Invalid machine operand!"); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); int SizeInBits = -1; bool SignedConstant = isTypeSigned(Ty, &SizeInBits); @@ -479,7 +479,7 @@ bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO, /// addConstantFPValue - Add constant value entry in variable DIE. bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { - assert (MO.isFPImm() && "Invalid machine operand!"); + assert(MO.isFPImm() && "Invalid machine operand!"); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); APFloat FPImm = MO.getFPImm()->getValueAPF(); @@ -556,8 +556,8 @@ void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { Buffer.addChild(getOrCreateTemplateValueParameterDIE( DITemplateValueParameter(Element))); } - } + /// addToContextOwner - Add Die into the list of its context owner's children. void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) { if (Context.isType()) { @@ -669,7 +669,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { } Buffer.setTag(dwarf::DW_TAG_base_type); - addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, + addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, BTy.getEncoding()); uint64_t Size = BTy.getSizeInBits() >> 3; @@ -840,7 +840,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) - { + { // Add size if non-zero (derived types might be zero-sized.) if (Size) addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); @@ -932,9 +932,8 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { StringRef LinkageName = SP.getLinkageName(); if (!LinkageName.empty()) - addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, - dwarf::DW_FORM_string, - getRealLinkageName(LinkageName)); + addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, + getRealLinkageName(LinkageName)); // If this DIE is going to refer declaration info using AT_specification // then there is no need to add other attributes. @@ -943,8 +942,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // Constructors and operators for anonymous aggregates do not have names. if (!SP.getName().empty()) - addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, - SP.getName()); + addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName()); addSourceLine(SPDie, SP); @@ -1051,13 +1049,12 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { // Add name. addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, - GV.getDisplayName()); + GV.getDisplayName()); StringRef LinkageName = GV.getLinkageName(); bool isGlobalVariable = GV.getGlobal() != NULL; if (!LinkageName.empty() && isGlobalVariable) - addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, - dwarf::DW_FORM_string, - getRealLinkageName(LinkageName)); + addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, + dwarf::DW_FORM_string, getRealLinkageName(LinkageName)); // Add type. DIType GTy = GV.getType(); addType(VariableDIE, GTy); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 86b30b7..7ce9a06 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -203,7 +203,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, if (SP.isDefinition() && !SP.getContext().isCompileUnit() && !SP.getContext().isFile() && !isSubprogramContext(SP.getContext())) { - SPCU-> addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); + SPCU->addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); // Add arguments. DICompositeType SPTy = SP.getType(); @@ -248,7 +248,6 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels. DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { - DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block); if (Scope->isAbstractScope()) return ScopeDIE; @@ -294,10 +293,9 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, /// of the function. DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { - const SmallVector &Ranges = Scope->getRanges(); - assert (Ranges.empty() == false - && "LexicalScope does not have instruction markers!"); + assert(Ranges.empty() == false && + "LexicalScope does not have instruction markers!"); if (!Scope->getScopeNode()) return NULL; @@ -314,7 +312,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, const MCSymbol *EndLabel = getLabelAfterInsn(RI->second); if (StartLabel == 0 || EndLabel == 0) { - assert (0 && "Unexpected Start and End labels for a inlined scope!"); + assert(0 && "Unexpected Start and End labels for a inlined scope!"); return 0; } assert(StartLabel->isDefined() && @@ -358,8 +356,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, I = InlineInfo.find(InlinedSP); if (I == InlineInfo.end()) { - InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel, - ScopeDIE)); + InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel, ScopeDIE)); InlinedSPNodes.push_back(InlinedSP); } else I->second.push_back(std::make_pair(StartLabel, ScopeDIE)); @@ -376,7 +373,7 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { if (!Scope || !Scope->getScopeNode()) return NULL; - SmallVector Children; + SmallVector Children; // Collect arguments for current function. if (LScopes.isCurrentFunctionScope(Scope)) @@ -435,7 +432,6 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { /// source file names. If none currently exists, create a new id and insert it /// in the SourceIds map. This can update DirectoryNames and SourceFileNames /// maps as well. - unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, StringRef DirName) { // If FE did not provide a file name, then assume stdin. @@ -673,7 +669,7 @@ void DwarfDebug::endModule() { // Construct subprogram DIE and add variables DIEs. CompileUnit *SPCU = CUMap.lookup(TheCU); - assert (SPCU && "Unable to find Compile Unit!"); + assert(SPCU && "Unable to find Compile Unit!"); constructSubprogramDIE(SPCU, SP); DIE *ScopeDIE = SPCU->getDIE(SP); for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) { @@ -834,7 +830,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF, /// isDbgValueInDefinedReg - Return true if debug value, encoded by /// DBG_VALUE instruction, is in a defined reg. static bool isDbgValueInDefinedReg(const MachineInstr *MI) { - assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); + assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); return MI->getNumOperands() == 3 && MI->getOperand(0).isReg() && MI->getOperand(0).getReg() && MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0; @@ -864,7 +860,7 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, if (MI->getOperand(0).isCImm()) return DotDebugLocEntry(FLabel, SLabel, MI->getOperand(0).getCImm()); - assert (0 && "Unexpected 3 operand DBG_VALUE instruction!"); + assert(0 && "Unexpected 3 operand DBG_VALUE instruction!"); return DotDebugLocEntry(); } @@ -1132,7 +1128,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { const MachineInstr *MI = II; if (MI->isDebugValue()) { - assert (MI->getNumOperands() > 1 && "Invalid machine instruction!"); + assert(MI->getNumOperands() > 1 && "Invalid machine instruction!"); // Keep track of user variables. const MDNode *Var = @@ -1300,7 +1296,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); - assert (TheCU && "Unable to find compile unit!"); + assert(TheCU && "Unable to find compile unit!"); // Construct abstract scopes. ArrayRef AList = LScopes.getAbstractScopesList(); -- cgit v1.1 From 75698f346fc44bdd8803b5dda4071d4b5872d82b Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 26 Oct 2011 23:17:28 +0000 Subject: Use EmitCmp in SelectBranch. No functional change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143076 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 39 ++++++--------------------------------- 1 file changed, 6 insertions(+), 33 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 4e0ad36..3f32d58 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -1100,30 +1100,8 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { // If we can, avoid recomputing the compare - redoing it could lead to wonky // behavior. - // TODO: Factor this out. if (const CmpInst *CI = dyn_cast(BI->getCondition())) { - MVT SourceVT; - Type *Ty = CI->getOperand(0)->getType(); - if (CI->hasOneUse() && (CI->getParent() == I->getParent()) - && isTypeLegal(Ty, SourceVT)) { - bool isFloat = (Ty->isDoubleTy() || Ty->isFloatTy()); - if (isFloat && !Subtarget->hasVFP2()) - return false; - - unsigned CmpOpc; - switch (SourceVT.SimpleTy) { - default: return false; - // TODO: Verify compares. - case MVT::f32: - CmpOpc = ARM::VCMPES; - break; - case MVT::f64: - CmpOpc = ARM::VCMPED; - break; - case MVT::i32: - CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr; - break; - } + if (CI->hasOneUse() && (CI->getParent() == I->getParent())) { // Get the compare predicate. // Try to take advantage of fallthrough opportunities. @@ -1138,19 +1116,14 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { // We may not handle every CC for now. if (ARMPred == ARMCC::AL) return false; - unsigned Arg1 = getRegForValue(CI->getOperand(0)); - if (Arg1 == 0) return false; - - unsigned Arg2 = getRegForValue(CI->getOperand(1)); - if (Arg2 == 0) return false; - - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CmpOpc)) - .addReg(Arg1).addReg(Arg2)); + // Emit the compare. + Type *Ty = CI->getOperand(0)->getType(); + if (!ARMEmitCmp(Ty, CI->getOperand(0), CI->getOperand(1))) + return false; // For floating point we need to move the result to a comparison register // that we can then use for branches. - if (isFloat) + if (Ty->isFloatTy() || Ty->isDoubleTy()) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::FMSTAT))); -- cgit v1.1 From ade620065d1ad591e0f3d39d40cc241f49cf0a99 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 26 Oct 2011 23:25:44 +0000 Subject: Factor a little more code into EmitCmp, which should have been done in the first place. No functional change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143078 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 39 ++++++++++++++++----------------------- 1 file changed, 16 insertions(+), 23 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 3f32d58..72e69a0 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -173,7 +173,7 @@ class ARMFastISel : public FastISel { private: bool isTypeLegal(Type *Ty, MVT &VT); bool isLoadTypeLegal(Type *Ty, MVT &VT); - bool ARMEmitCmp(Type *Ty, const Value *Src1Value, const Value *Src2Value); + bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value); bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr); bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr); bool ARMComputeAddress(const Value *Obj, Address &Addr); @@ -1117,16 +1117,9 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { if (ARMPred == ARMCC::AL) return false; // Emit the compare. - Type *Ty = CI->getOperand(0)->getType(); - if (!ARMEmitCmp(Ty, CI->getOperand(0), CI->getOperand(1))) + if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1))) return false; - // For floating point we need to move the result to a comparison register - // that we can then use for branches. - if (Ty->isFloatTy() || Ty->isDoubleTy()) - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(ARM::FMSTAT))); - unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR); @@ -1188,13 +1181,14 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { return true; } -bool ARMFastISel::ARMEmitCmp(Type *Ty, const Value *Src1Value, - const Value *Src2Value) { +bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value) { MVT VT; + Type *Ty = Src1Value->getType(); if (!isTypeLegal(Ty, VT)) return false; - if ((Ty->isFloatTy() || Ty->isDoubleTy()) && !Subtarget->hasVFP2()) + bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy()); + if (isFloat && !Subtarget->hasVFP2()) return false; unsigned CmpOpc; @@ -1220,11 +1214,18 @@ bool ARMFastISel::ARMEmitCmp(Type *Ty, const Value *Src1Value, AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) .addReg(Src1).addReg(Src2)); + + // For floating point we need to move the result to a comparison register + // that we can then use for branches. + if (Ty->isFloatTy() || Ty->isDoubleTy()) + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(ARM::FMSTAT))); return true; } bool ARMFastISel::SelectCmp(const Instruction *I) { const CmpInst *CI = cast(I); + Type *Ty = CI->getOperand(0)->getType(); // Get the compare predicate. ARMCC::CondCodes ARMPred = getComparePred(CI->getPredicate()); @@ -1233,26 +1234,18 @@ bool ARMFastISel::SelectCmp(const Instruction *I) { if (ARMPred == ARMCC::AL) return false; // Emit the compare. - Type *Ty = CI->getOperand(0)->getType(); - if (!ARMEmitCmp(Ty, CI->getOperand(0), CI->getOperand(1))) + if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1))) return false; - // For floating point we need to move the result to a comparison register - // that we can then use for branches. - bool isFloat = Ty->isFloatTy() || Ty->isDoubleTy(); - if (isFloat) - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(ARM::FMSTAT))); - // Now set a register based on the comparison. Explicitly set the predicates // here. unsigned MovCCOpc = isThumb ? ARM::t2MOVCCi : ARM::MOVCCi; TargetRegisterClass *RC = isThumb ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass; unsigned DestReg = createResultReg(RC); - Constant *Zero - = ConstantInt::get(Type::getInt32Ty(*Context), 0); + Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0); unsigned ZeroReg = TargetMaterializeConstant(Zero); + bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy()); unsigned CondReg = isFloat ? ARM::FPSCR : ARM::CPSR; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), DestReg) .addReg(ZeroReg).addImm(1) -- cgit v1.1 From 8ff2664f2f3f4f5dbd847f94352ffc8b4e1b85e3 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 26 Oct 2011 23:34:37 +0000 Subject: Add a TODO comment. FastISel works by parsing each basic block from the bottom up. Thus, improving the support for compares is goodness because it increases the number of terminator instructions we can handle. This creates many more opportunities for target specific fast-isel. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143079 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 72e69a0..a67c867 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -1193,6 +1193,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value) { unsigned CmpOpc; switch (VT.SimpleTy) { + // TODO: Add support for non-legal types (i.e., i1, i8, i16). default: return false; // TODO: Verify compares. case MVT::f32: -- cgit v1.1 From 15701f8969fcb36899a75ca2df6fdcbc52141106 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 26 Oct 2011 23:50:43 +0000 Subject: Rename NonScalarIntSafe to something more appropriate. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143080 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 8 ++++---- lib/Target/PowerPC/PPCISelLowering.cpp | 4 ++-- lib/Target/PowerPC/PPCISelLowering.h | 4 ++-- lib/Target/X86/X86ISelLowering.cpp | 6 +++--- lib/Target/X86/X86ISelLowering.h | 4 ++-- 5 files changed, 13 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index df94e0f..07d2db6 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -3345,7 +3345,7 @@ static bool isMemSrcFromString(SDValue Src, std::string &Str) { static bool FindOptimalMemOpLowering(std::vector &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool NonScalarIntSafe, + bool IsZeroVal, bool MemcpyStrSrc, SelectionDAG &DAG, const TargetLowering &TLI) { @@ -3359,7 +3359,7 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does // not need to be loaded. EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, - NonScalarIntSafe, MemcpyStrSrc, + IsZeroVal, MemcpyStrSrc, DAG.getMachineFunction()); if (VT == MVT::Other) { @@ -3606,11 +3606,11 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; - bool NonScalarIntSafe = + bool IsZeroVal = isa(Src) && cast(Src)->isNullValue(); if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize), Size, (DstAlignCanChange ? 0 : Align), 0, - NonScalarIntSafe, false, DAG, TLI)) + IsZeroVal, false, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index d6b8a9e..6502eb1 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -5774,7 +5774,7 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, /// probably because the source does not need to be loaded. If -/// 'NonScalarIntSafe' is true, that means it's safe to return a +/// 'IsZeroVal' is true, that means it's safe to return a /// non-scalar-integer type, e.g. empty string source, constant, or loaded /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is /// constant so it does not need to be loaded. @@ -5782,7 +5782,7 @@ PPCTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { /// target-independent logic. EVT PPCTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool NonScalarIntSafe, + bool IsZeroVal, bool MemcpyStrSrc, MachineFunction &MF) const { if (this->PPCSubTarget.isPPC64()) { diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 430e45e..942f5ee 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -353,7 +353,7 @@ namespace llvm { /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, /// probably because the source does not need to be loaded. If - /// 'NonScalarIntSafe' is true, that means it's safe to return a + /// 'IsZeroVal' is true, that means it's safe to return a /// non-scalar-integer type, e.g. empty string source, constant, or loaded /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is /// constant so it does not need to be loaded. @@ -361,7 +361,7 @@ namespace llvm { /// target-independent logic. virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool NonScalarIntSafe, bool MemcpyStrSrc, + bool IsZeroVal, bool MemcpyStrSrc, MachineFunction &MF) const; private: diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index ce2ef92..2ec0814 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1232,7 +1232,7 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const { /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, /// probably because the source does not need to be loaded. If -/// 'NonScalarIntSafe' is true, that means it's safe to return a +/// 'IsZeroVal' is true, that means it's safe to return a /// non-scalar-integer type, e.g. empty string source, constant, or loaded /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is /// constant so it does not need to be loaded. @@ -1241,14 +1241,14 @@ unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty) const { EVT X86TargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool NonScalarIntSafe, + bool IsZeroVal, bool MemcpyStrSrc, MachineFunction &MF) const { // FIXME: This turns off use of xmm stores for memset/memcpy on targets like // linux. This is because the stack realignment code can't handle certain // cases like PR2962. This should be removed when PR2962 is fixed. const Function *F = MF.getFunction(); - if (NonScalarIntSafe && + if (IsZeroVal && !F->hasFnAttr(Attribute::NoImplicitFloat)) { if (Size >= 16 && (Subtarget->isUnalignedMemAccessFast() || diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index 0903b9f..3b7a14d 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -533,7 +533,7 @@ namespace llvm { /// alignment can satisfy any constraint. Similarly if SrcAlign is zero it /// means there isn't a need to check it against alignment requirement, /// probably because the source does not need to be loaded. If - /// 'NonScalarIntSafe' is true, that means it's safe to return a + /// 'IsZeroVal' is true, that means it's safe to return a /// non-scalar-integer type, e.g. empty string source, constant, or loaded /// from memory. 'MemcpyStrSrc' indicates whether the memcpy source is /// constant so it does not need to be loaded. @@ -541,7 +541,7 @@ namespace llvm { /// target-independent logic. virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool NonScalarIntSafe, bool MemcpyStrSrc, + bool IsZeroVal, bool MemcpyStrSrc, MachineFunction &MF) const; /// allowsUnalignedMemoryAccesses - Returns true if the target allows -- cgit v1.1 From 6d64b3adab682aea9c0b4dd665acc5e863ac6d21 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 27 Oct 2011 00:21:16 +0000 Subject: A branch predicated on a constant can just FastEmit an unconditional branch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143086 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index a67c867..6aff834 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -1151,6 +1151,12 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { FuncInfo.MBB->addSuccessor(TBB); return true; } + } else if (const ConstantInt *CI = + dyn_cast(BI->getCondition())) { + uint64_t Imm = CI->getZExtValue(); + MachineBasicBlock *Target = (Imm == 0) ? FBB : TBB; + FastEmitBranch(Target, DL); + return true; } unsigned CmpReg = getRegForValue(BI->getCondition()); -- cgit v1.1 From 8ecde6cbf87cfa3184cec5a4eedb70b06dbb4a85 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Thu, 27 Oct 2011 01:33:51 +0000 Subject: It is not safe to sink an alloca into a stacksave/stackrestore pair, so don't do that. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143093 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/IndVarSimplify.cpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 75fa011..874a135 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1680,11 +1680,12 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) { if (isa(I)) continue; - // Don't sink static AllocaInsts out of the entry block, which would - // turn them into dynamic allocas! - if (AllocaInst *AI = dyn_cast(I)) - if (AI->isStaticAlloca()) - continue; + // Don't sink alloca: we never want to sink static alloca's out of the + // entry block, and correctly sinking dynamic alloca's requires + // checks for stacksave/stackrestore intrinsics. + // FIXME: Refactor this check somehow? + if (isa(I)) + continue; // Determine if there is a use in or before the loop (direct or // otherwise). -- cgit v1.1 From fd58cd75630b1a2ace727d6caaee8c9308ba0240 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Thu, 27 Oct 2011 02:06:39 +0000 Subject: Don't crash on 128-bit sdiv by constant. Found by inspection. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143095 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 91ce2a1..b1afbf2 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1756,7 +1756,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { if (N0C && N1C && !N1C->isNullValue()) return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); // fold (sdiv X, 1) -> X - if (N1C && N1C->getSExtValue() == 1LL) + if (N1C && N1C->getAPIntValue() == 1LL) return N0; // fold (sdiv X, -1) -> 0-X if (N1C && N1C->isAllOnesValue()) @@ -1771,16 +1771,14 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { } // fold (sdiv X, pow2) -> simple ops after legalize if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap() && - (isPowerOf2_64(N1C->getSExtValue()) || - isPowerOf2_64(-N1C->getSExtValue()))) { + (N1C->getAPIntValue().isPowerOf2() || + (-N1C->getAPIntValue()).isPowerOf2())) { // If dividing by powers of two is cheap, then don't perform the following // fold. if (TLI.isPow2DivCheap()) return SDValue(); - int64_t pow2 = N1C->getSExtValue(); - int64_t abs2 = pow2 > 0 ? pow2 : -pow2; - unsigned lg2 = Log2_64(abs2); + unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); // Splat the sign bit into the register SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, @@ -1800,7 +1798,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // If we're dividing by a positive value, we're done. Otherwise, we must // negate the result. - if (pow2 > 0) + if (N1C->getAPIntValue().isNonNegative()) return SRA; AddToWorkList(SRA.getNode()); @@ -1810,8 +1808,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { // if integer divide is expensive and we satisfy the requirements, emit an // alternate sequence. - if (N1C && (N1C->getSExtValue() < -1 || N1C->getSExtValue() > 1) && - !TLI.isIntDivCheap()) { + if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap()) { SDValue Op = BuildSDIV(N); if (Op.getNode()) return Op; } -- cgit v1.1 From 390c40d96adb2eb4a778a0890c6c8743057e289e Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Thu, 27 Oct 2011 06:44:11 +0000 Subject: Teach our Dwarf emission to use the string pool. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143097 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 5 ++- lib/CodeGen/AsmPrinter/DIE.cpp | 2 + lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 61 ++++++++++++++++------------- lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 3 +- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 15 ++++--- lib/CodeGen/AsmPrinter/DwarfDebug.h | 9 ++++- lib/MC/MCAsmInfo.cpp | 1 + lib/MC/MCAsmInfoDarwin.cpp | 1 + 8 files changed, 58 insertions(+), 39 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 1999f36..0c0fdc1 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -613,6 +613,10 @@ bool AsmPrinter::needsSEHMoves() { MF->getFunction()->needsUnwindTableEntry(); } +bool AsmPrinter::needsRelocationsForDwarfStringPool() const { + return MAI->doesDwarfUseRelocationsForStringPool(); +} + void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { MCSymbol *Label = MI.getOperand(0).getMCSymbol(); @@ -2092,4 +2096,3 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) { report_fatal_error("no GCMetadataPrinter registered for GC: " + Twine(Name)); return 0; } - diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 9c1ce76..fad3d0d 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -267,6 +267,7 @@ void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const { /// unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const { if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_strp) return 4; return AP->getTargetData().getPointerSize(); } @@ -290,6 +291,7 @@ void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const { /// unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const { if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_strp) return 4; return AP->getTargetData().getPointerSize(); } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 9cabe55..a6a004d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -69,10 +69,21 @@ void CompileUnit::addSInt(DIE *Die, unsigned Attribute, /// addString - Add a string attribute data and value. DIEString only /// keeps string reference. -void CompileUnit::addString(DIE *Die, unsigned Attribute, unsigned Form, - StringRef String) { - DIEValue *Value = new (DIEValueAllocator) DIEString(String); - Die->addValue(Attribute, Form, Value); +void CompileUnit::addString(DIE *Die, unsigned Attribute, StringRef String) { + if (String.size() > 3) { + MCSymbol *Symb = DD->getStringPoolEntry(String); + DIEValue *Value; + if (Asm->needsRelocationsForDwarfStringPool()) + Value = new (DIEValueAllocator) DIELabel(Symb); + else { + MCSymbol *StringPool = DD->getStringPool(); + Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool); + } + Die->addValue(Attribute, dwarf::DW_FORM_strp, Value); + } else { + DIEValue *Value = new (DIEValueAllocator) DIEString(String); + Die->addValue(Attribute, dwarf::DW_FORM_string, Value); + } } /// addLabel - Add a Dwarf label attribute data and value. @@ -479,7 +490,7 @@ bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO, /// addConstantFPValue - Add constant value entry in variable DIE. bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { - assert(MO.isFPImm() && "Invalid machine operand!"); + assert (MO.isFPImm() && "Invalid machine operand!"); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); APFloat FPImm = MO.getFPImm()->getValueAPF(); @@ -660,7 +671,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { StringRef Name = BTy.getName(); // Add name if not anonymous or intermediate type. if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + addString(&Buffer, dwarf::DW_AT_name, Name); if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) { Buffer.setTag(dwarf::DW_TAG_unspecified_type); @@ -694,7 +705,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { // Add name if not anonymous or intermediate type. if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + addString(&Buffer, dwarf::DW_AT_name, Name); // Add size if non-zero (derived types might be zero-sized.) if (Size) @@ -791,8 +802,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { else if (Element.isVariable()) { DIVariable DV(Element); ElemDie = new DIE(dwarf::DW_TAG_variable); - addString(ElemDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, - DV.getName()); + addString(ElemDie, dwarf::DW_AT_name, DV.getName()); addType(ElemDie, DV.getType()); addUInt(ElemDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1); addUInt(ElemDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); @@ -836,7 +846,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Add name if not anonymous or intermediate type. if (!Name.empty()) - addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + addString(&Buffer, dwarf::DW_AT_name, Name); if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) @@ -868,7 +878,7 @@ CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) { ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter); addType(ParamDIE, TP.getType()); - addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName()); + addString(ParamDIE, dwarf::DW_AT_name, TP.getName()); return ParamDIE; } @@ -883,7 +893,7 @@ CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter); addType(ParamDIE, TPV.getType()); if (!TPV.getName().empty()) - addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName()); + addString(ParamDIE, dwarf::DW_AT_name, TPV.getName()); addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, TPV.getValue()); return ParamDIE; @@ -897,7 +907,7 @@ DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) { NDie = new DIE(dwarf::DW_TAG_namespace); insertDIE(NS, NDie); if (!NS.getName().empty()) - addString(NDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, NS.getName()); + addString(NDie, dwarf::DW_AT_name, NS.getName()); addSourceLine(NDie, NS); addToContextOwner(NDie, NS.getContext()); return NDie; @@ -932,7 +942,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { StringRef LinkageName = SP.getLinkageName(); if (!LinkageName.empty()) - addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string, + addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, getRealLinkageName(LinkageName)); // If this DIE is going to refer declaration info using AT_specification @@ -942,7 +952,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // Constructors and operators for anonymous aggregates do not have names. if (!SP.getName().empty()) - addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName()); + addString(SPDie, dwarf::DW_AT_name, SP.getName()); addSourceLine(SPDie, SP); @@ -1048,13 +1058,12 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { insertDIE(N, VariableDIE); // Add name. - addString(VariableDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, - GV.getDisplayName()); + addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); StringRef LinkageName = GV.getLinkageName(); bool isGlobalVariable = GV.getGlobal() != NULL; if (!LinkageName.empty() && isGlobalVariable) addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, - dwarf::DW_FORM_string, getRealLinkageName(LinkageName)); + getRealLinkageName(LinkageName)); // Add type. DIType GTy = GV.getType(); addType(VariableDIE, GTy); @@ -1170,7 +1179,7 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer, DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) { DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator); StringRef Name = ETy.getName(); - addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + addString(Enumerator, dwarf::DW_AT_name, Name); int64_t Value = ETy.getEnumValue(); addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); return Enumerator; @@ -1207,8 +1216,7 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) { addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, dwarf::DW_FORM_ref4, AbsDIE); else { - addString(VariableDie, dwarf::DW_AT_name, - dwarf::DW_FORM_string, Name); + addString(VariableDie, dwarf::DW_AT_name, Name); addSourceLine(VariableDie, DV->getVariable()); addType(VariableDie, DV->getType()); } @@ -1303,7 +1311,7 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { DIE *MemberDie = new DIE(DT.getTag()); StringRef Name = DT.getName(); if (!Name.empty()) - addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); + addString(MemberDie, dwarf::DW_AT_name, Name); addType(MemberDie, DT.getTypeDerivedFrom()); @@ -1377,16 +1385,13 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { // Objective-C properties. StringRef PropertyName = DT.getObjCPropertyName(); if (!PropertyName.empty()) { - addString(MemberDie, dwarf::DW_AT_APPLE_property_name, dwarf::DW_FORM_string, - PropertyName); + addString(MemberDie, dwarf::DW_AT_APPLE_property_name, PropertyName); StringRef GetterName = DT.getObjCPropertyGetterName(); if (!GetterName.empty()) - addString(MemberDie, dwarf::DW_AT_APPLE_property_getter, - dwarf::DW_FORM_string, GetterName); + addString(MemberDie, dwarf::DW_AT_APPLE_property_getter, GetterName); StringRef SetterName = DT.getObjCPropertySetterName(); if (!SetterName.empty()) - addString(MemberDie, dwarf::DW_AT_APPLE_property_setter, - dwarf::DW_FORM_string, SetterName); + addString(MemberDie, dwarf::DW_AT_APPLE_property_setter, SetterName); unsigned PropertyAttributes = 0; if (DT.isReadOnlyObjCProperty()) PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly; diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 7859265..96d7902 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -150,8 +150,7 @@ public: /// addString - Add a string attribute data and value. /// - void addString(DIE *Die, unsigned Attribute, unsigned Form, - const StringRef Str); + void addString(DIE *Die, unsigned Attribute, const StringRef Str); /// addLabel - Add a Dwarf label attribute data and value. /// diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 7ce9a06..1d5e05d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -136,6 +136,10 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfDebug::~DwarfDebug() { } +MCSymbol *DwarfDebug::getStringPool() { + return Asm->GetTempSymbol("section_str"); +} + MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) { std::pair &Entry = StringPool[Str]; if (Entry.first) return Entry.first; @@ -467,11 +471,10 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); CompileUnit *NewCU = new CompileUnit(ID, Die, Asm, this); - NewCU->addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string, - DIUnit.getProducer()); + NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer()); NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, DIUnit.getLanguage()); - NewCU->addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); + NewCU->addString(Die, dwarf::DW_AT_name, FN); // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This // simplifies debug range entries. NewCU->addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0); @@ -484,14 +487,13 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); if (!Dir.empty()) - NewCU->addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir); + NewCU->addString(Die, dwarf::DW_AT_comp_dir, Dir); if (DIUnit.isOptimized()) NewCU->addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); StringRef Flags = DIUnit.getFlags(); if (!Flags.empty()) - NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string, - Flags); + NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, Flags); if (unsigned RVer = DIUnit.getRunTimeVersion()) NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, @@ -1796,6 +1798,7 @@ void DwarfDebug::emitDebugStr() { // Emit the string itself. Asm->OutStreamer.EmitBytes(Entries[i].second->getKey(), 0/*addrspace*/); + Asm->OutStreamer.EmitZeros(1, 0); } } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index b98ace2..b280fbb 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -216,8 +216,6 @@ class DwarfDebug { StringMap > StringPool; unsigned NextStringPoolNumber; - MCSymbol *getStringPoolEntry(StringRef Str); - /// SectionMap - Provides a unique id per text section. /// UniqueVector SectionMap; @@ -504,6 +502,13 @@ public: /// createSubprogramDIE - Create new DIE using SP. DIE *createSubprogramDIE(DISubprogram SP); + + /// getStringPool - returns the entry into the start of the pool. + MCSymbol *getStringPool(); + + /// getStringPoolEntry - returns an entry into the string pool with the given + /// string text. + MCSymbol *getStringPoolEntry(StringRef Str); }; } // End of namespace llvm diff --git a/lib/MC/MCAsmInfo.cpp b/lib/MC/MCAsmInfo.cpp index 95861bc..c330e74 100644 --- a/lib/MC/MCAsmInfo.cpp +++ b/lib/MC/MCAsmInfo.cpp @@ -91,6 +91,7 @@ MCAsmInfo::MCAsmInfo() { DwarfRequiresRelocationForSectionOffset = true; DwarfSectionOffsetDirective = 0; DwarfUsesLabelOffsetForRanges = true; + DwarfUsesRelocationsForStringPool = true; DwarfRegNumForCFI = false; HasMicrosoftFastStdCallMangling = false; diff --git a/lib/MC/MCAsmInfoDarwin.cpp b/lib/MC/MCAsmInfoDarwin.cpp index b20e338..537d0a3 100644 --- a/lib/MC/MCAsmInfoDarwin.cpp +++ b/lib/MC/MCAsmInfoDarwin.cpp @@ -66,4 +66,5 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() { DwarfRequiresRelocationForSectionOffset = false; DwarfUsesLabelOffsetForRanges = false; + DwarfUsesRelocationsForStringPool = false; } -- cgit v1.1 From c45fe4c1dc9fb7cc3a1d58c7b022832eeb478abb Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 27 Oct 2011 14:08:01 +0000 Subject: LLLexer: Factor hex char parsing. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143101 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/AsmParser/LLLexer.cpp | 54 ++++++++++++++--------------------------------- 1 file changed, 16 insertions(+), 38 deletions(-) (limited to 'lib') diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 91d6c6a..1b6b11a 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -55,18 +55,22 @@ uint64_t LLLexer::atoull(const char *Buffer, const char *End) { return Result; } +static char parseHexChar(char C) { + if (C >= '0' && C <= '9') + return C-'0'; + if (C >= 'A' && C <= 'F') + return C-'A'+10; + if (C >= 'a' && C <= 'f') + return C-'a'+10; + return 0; +} + uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) { uint64_t Result = 0; for (; Buffer != End; ++Buffer) { uint64_t OldRes = Result; Result *= 16; - char C = *Buffer; - if (C >= '0' && C <= '9') - Result += C-'0'; - else if (C >= 'A' && C <= 'F') - Result += C-'A'+10; - else if (C >= 'a' && C <= 'f') - Result += C-'a'+10; + Result += parseHexChar(*Buffer); if (Result < OldRes) { // Uh, oh, overflow detected!!! Error("constant bigger than 64 bits detected!"); @@ -82,24 +86,12 @@ void LLLexer::HexToIntPair(const char *Buffer, const char *End, for (int i=0; i<16; i++, Buffer++) { assert(Buffer != End); Pair[0] *= 16; - char C = *Buffer; - if (C >= '0' && C <= '9') - Pair[0] += C-'0'; - else if (C >= 'A' && C <= 'F') - Pair[0] += C-'A'+10; - else if (C >= 'a' && C <= 'f') - Pair[0] += C-'a'+10; + Pair[0] += parseHexChar(*Buffer); } Pair[1] = 0; for (int i=0; i<16 && Buffer != End; i++, Buffer++) { Pair[1] *= 16; - char C = *Buffer; - if (C >= '0' && C <= '9') - Pair[1] += C-'0'; - else if (C >= 'A' && C <= 'F') - Pair[1] += C-'A'+10; - else if (C >= 'a' && C <= 'f') - Pair[1] += C-'a'+10; + Pair[1] += parseHexChar(*Buffer); } if (Buffer != End) Error("constant bigger than 128 bits detected!"); @@ -113,24 +105,12 @@ void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End, for (int i=0; i<4 && Buffer != End; i++, Buffer++) { assert(Buffer != End); Pair[1] *= 16; - char C = *Buffer; - if (C >= '0' && C <= '9') - Pair[1] += C-'0'; - else if (C >= 'A' && C <= 'F') - Pair[1] += C-'A'+10; - else if (C >= 'a' && C <= 'f') - Pair[1] += C-'a'+10; + Pair[1] += parseHexChar(*Buffer); } Pair[0] = 0; for (int i=0; i<16; i++, Buffer++) { Pair[0] *= 16; - char C = *Buffer; - if (C >= '0' && C <= '9') - Pair[0] += C-'0'; - else if (C >= 'A' && C <= 'F') - Pair[0] += C-'A'+10; - else if (C >= 'a' && C <= 'f') - Pair[0] += C-'a'+10; + Pair[0] += parseHexChar(*Buffer); } if (Buffer != End) Error("constant bigger than 128 bits detected!"); @@ -149,9 +129,7 @@ static void UnEscapeLexed(std::string &Str) { *BOut++ = '\\'; // Two \ becomes one BIn += 2; } else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) { - char Tmp = BIn[3]; BIn[3] = 0; // Terminate string - *BOut = (char)strtol(BIn+1, 0, 16); // Convert to number - BIn[3] = Tmp; // Restore character + *BOut = parseHexChar(BIn[1]) * 16 + parseHexChar(BIn[2]); BIn += 3; // Skip over handled chars ++BOut; } else { -- cgit v1.1 From 090697321b32fe010db07eb03b6a7af94d8caebd Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Thu, 27 Oct 2011 15:47:25 +0000 Subject: Revert Duncan's r143028 expression folding which appears to be the culprit behind a compile failure on 483.xalancbmk. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143102 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ValueTracking.cpp | 50 ++++-------------------------------------- 1 file changed, 4 insertions(+), 46 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 9ea2703..9a234c0 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -201,36 +201,9 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero, KnownOne, TD,Depth+1); ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - - bool isKnownNegative = false; - bool isKnownNonNegative = false; - // If the multiplication is known not to overflow, compute the sign bit. - if (Mask.isNegative() && cast(I)->hasNoSignedWrap()) { - Value *Op1 = I->getOperand(1), *Op2 = I->getOperand(0); - if (Op1 == Op2) { - // The product of a number with itself is non-negative. - isKnownNonNegative = true; - } else { - bool isKnownNonNegative1 = KnownZero.isNegative(); - bool isKnownNonNegative2 = KnownZero2.isNegative(); - bool isKnownNegative1 = KnownOne.isNegative(); - bool isKnownNegative2 = KnownOne2.isNegative(); - // The product of two numbers with the same sign is non-negative. - isKnownNonNegative = (isKnownNegative1 && isKnownNegative2) || - (isKnownNonNegative1 && isKnownNonNegative2); - // The product of a negative number and a non-negative number is either - // negative or zero. - isKnownNegative = (isKnownNegative1 && isKnownNonNegative2 && - isKnownNonZero(Op2, TD, Depth)) || - (isKnownNegative2 && isKnownNonNegative1 && - isKnownNonZero(Op1, TD, Depth)); - assert(!(isKnownNegative && isKnownNonNegative) && - "Sign bit both zero and one?"); - } - } - + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + // If low bits are zero in either operand, output low known-0 bits. // Also compute a conserative estimate for high known-0 bits. // More trickiness is possible, but this is sufficient for the @@ -247,12 +220,6 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | APInt::getHighBitsSet(BitWidth, LeadZ); KnownZero &= Mask; - - if (isKnownNonNegative) - KnownZero.setBit(BitWidth - 1); - else if (isKnownNegative) - KnownOne.setBit(BitWidth - 1); - return; } case Instruction::UDiv: { @@ -817,7 +784,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { } // The remaining tests are all recursive, so bail out if we hit the limit. - if (Depth++ >= MaxDepth) + if (Depth++ == MaxDepth) return false; unsigned BitWidth = getBitWidth(V->getType(), TD); @@ -901,15 +868,6 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { if (YKnownNonNegative && isPowerOfTwo(X, TD, /*OrZero*/false, Depth)) return true; } - // X * Y. - else if (match(V, m_Mul(m_Value(X), m_Value(Y)))) { - BinaryOperator *BO = cast(V); - // If X and Y are non-zero then so is X * Y as long as the multiplication - // does not overflow. - if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) && - isKnownNonZero(X, TD, Depth) && isKnownNonZero(Y, TD, Depth)) - return true; - } // (C ? X : Y) != 0 if X != 0 and Y != 0. else if (SelectInst *SI = dyn_cast(V)) { if (isKnownNonZero(SI->getTrueValue(), TD, Depth) && -- cgit v1.1 From bc1430a0a0b3f97b86039950e57d832cb01f1cf6 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 27 Oct 2011 16:38:50 +0000 Subject: BlockFrequency: Use a smarter overflow check. This trades one 64 bit div for one 64 bit mul and some arithmetic. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143106 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/BlockFrequency.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Support/BlockFrequency.cpp b/lib/Support/BlockFrequency.cpp index a63bf83..84a993e 100644 --- a/lib/Support/BlockFrequency.cpp +++ b/lib/Support/BlockFrequency.cpp @@ -70,8 +70,13 @@ BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) { assert(n <= d && "Probability must be less or equal to 1."); - // If we can overflow use 96-bit operations. - if (n > 0 && Frequency > UINT64_MAX / n) { + // Calculate Frequency * n. + uint64_t mulLo = (Frequency & UINT32_MAX) * n; + uint64_t mulHi = (Frequency >> 32) * n; + uint64_t mulRes = (mulHi << 32) + mulLo; + + // If there was overflow use 96-bit operations. + if (mulHi > UINT32_MAX || mulRes < mulLo) { // 96-bit value represented as W[1]:W[0]. uint64_t W[2]; @@ -82,8 +87,7 @@ BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) { return *this; } - Frequency *= n; - Frequency /= d; + Frequency = mulRes / d; return *this; } -- cgit v1.1 From d8b0b915c5a94806596c381660c548aabef447b2 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Thu, 27 Oct 2011 17:15:47 +0000 Subject: Add relocation iterators to the libObject C API. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143107 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/Object.cpp | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'lib') diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp index dea1466..a404cb3 100644 --- a/lib/Object/Object.cpp +++ b/lib/Object/Object.cpp @@ -112,6 +112,29 @@ LLVMBool LLVMGetSectionContainsSymbol(LLVMSectionIteratorRef SI, return ret; } +// Section Relocation iterators +LLVMRelocationIteratorRef LLVMGetRelocations(LLVMSectionIteratorRef Section) { + relocation_iterator SI = (*unwrap(Section))->begin_relocations(); + return wrap(new relocation_iterator(SI)); +} + +void LLVMDisposeRelocationIterator(LLVMRelocationIteratorRef SI) { + delete unwrap(SI); +} + +LLVMBool LLVMIsRelocationIteratorAtEnd(LLVMSectionIteratorRef Section, + LLVMRelocationIteratorRef SI) { + return (*unwrap(SI) == (*unwrap(Section))->end_relocations()) ? 1 : 0; +} + +void LLVMMoveToNextRelocation(LLVMRelocationIteratorRef SI) { + error_code ec; + unwrap(SI)->increment(ec); + if (ec) report_fatal_error("LLVMMoveToNextRelocation failed: " + + ec.message()); +} + + // SymbolRef accessors const char *LLVMGetSymbolName(LLVMSymbolIteratorRef SI) { StringRef ret; -- cgit v1.1 From 036a67d670413f8116415b87457f22d256f314ae Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Thu, 27 Oct 2011 17:16:55 +0000 Subject: Thumb2 t2MVNi assembly parsing to recognize ".w" suffix. rdar://10348584 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143108 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index cc137a8..cef4c7b 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3921,7 +3921,9 @@ def : t2InstAlias<"ldrsb${p} $Rt, $addr", def : t2InstAlias<"ldrsh${p} $Rt, $addr", (t2LDRSHpci rGPR:$Rt, t2ldrlabel:$addr, pred:$p)>; -// Alias for MVN without the ".w" optional width specifier. +// Alias for MVN with(out) the ".w" optional width specifier. +def : t2InstAlias<"mvn${s}${p}.w $Rd, $imm", + (t2MVNi rGPR:$Rd, t2_so_imm:$imm, pred:$p, cc_out:$s)>; def : t2InstAlias<"mvn${s}${p} $Rd, $Rm", (t2MVNr rGPR:$Rd, rGPR:$Rm, pred:$p, cc_out:$s)>; def : t2InstAlias<"mvn${s}${p} $Rd, $ShiftedRm", -- cgit v1.1 From 3529c53d2f07bb283e990d1678962f1b5eba9ba4 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Thu, 27 Oct 2011 17:32:36 +0000 Subject: Expose relocation accessors through the libObject C API. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143109 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/Object.cpp | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'lib') diff --git a/lib/Object/Object.cpp b/lib/Object/Object.cpp index a404cb3..719bf88 100644 --- a/lib/Object/Object.cpp +++ b/lib/Object/Object.cpp @@ -164,3 +164,48 @@ uint64_t LLVMGetSymbolSize(LLVMSymbolIteratorRef SI) { return ret; } +// RelocationRef accessors +uint64_t LLVMGetRelocationAddress(LLVMRelocationIteratorRef RI) { + uint64_t ret; + if (error_code ec = (*unwrap(RI))->getAddress(ret)) + report_fatal_error(ec.message()); + return ret; +} + +LLVMSymbolIteratorRef LLVMGetRelocationSymbol(LLVMRelocationIteratorRef RI) { + SymbolRef ret; + if (error_code ec = (*unwrap(RI))->getSymbol(ret)) + report_fatal_error(ec.message()); + + return wrap(new symbol_iterator(ret)); +} + +uint64_t LLVMGetRelocationType(LLVMRelocationIteratorRef RI) { + uint64_t ret; + if (error_code ec = (*unwrap(RI))->getType(ret)) + report_fatal_error(ec.message()); + return ret; +} + +// NOTE: Caller takes ownership of returned string. +const char *LLVMGetRelocationTypeName(LLVMRelocationIteratorRef RI) { + SmallVector ret; + if (error_code ec = (*unwrap(RI))->getTypeName(ret)) + report_fatal_error(ec.message()); + + char *str = static_cast(malloc(ret.size())); + std::copy(ret.begin(), ret.end(), str); + return str; +} + +// NOTE: Caller takes ownership of returned string. +const char *LLVMGetRelocationValueString(LLVMRelocationIteratorRef RI) { + SmallVector ret; + if (error_code ec = (*unwrap(RI))->getValueString(ret)) + report_fatal_error(ec.message()); + + char *str = static_cast(malloc(ret.size())); + std::copy(ret.begin(), ret.end(), str); + return str; +} + -- cgit v1.1 From 88484c00307274568ab068909cb38ecaedd41cbf Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Thu, 27 Oct 2011 17:33:59 +0000 Subject: Thumb2 t2LDMDB[_UPD] assembly parsing to recognize .w suffix. rdar://10348844 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143110 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index cef4c7b..7d98182 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3944,6 +3944,12 @@ def : t2InstAlias<"push${p} $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>; def : t2InstAlias<"pop${p}.w $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>; def : t2InstAlias<"pop${p} $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>; +// LDMDB/LDMDB_UPD aliases w/ the optional .w suffix +def : t2InstAlias<"ldmdb${p}.w $Rn, $regs", + (t2LDMDB GPR:$Rn, pred:$p, reglist:$regs)>; +def : t2InstAlias<"ldmdb${p}.w $Rn!, $regs", + (t2LDMDB_UPD GPR:$Rn, pred:$p, reglist:$regs)>; + // Alias for REV/REV16/REVSH without the ".w" optional width specifier. def : t2InstAlias<"rev${p} $Rd, $Rm", (t2REV rGPR:$Rd, rGPR:$Rm, pred:$p)>; def : t2InstAlias<"rev16${p} $Rd, $Rm", (t2REV16 rGPR:$Rd, rGPR:$Rm, pred:$p)>; -- cgit v1.1 From 55c4127134d127ccd52cc2f4115af00084b28807 Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Thu, 27 Oct 2011 17:40:41 +0000 Subject: Change the sysexit mnemonic (and sysexitl) to never have the REX.W prefix and not depend on In32BitMode. Use the sysexitq mnemonic for the version with the REX.W prefix and only allow it only In64BitMode. rdar://9738584 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143112 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.td | 1 + lib/Target/X86/X86InstrSystem.td | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index aa35cf0..2a85ba1 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1573,6 +1573,7 @@ def : MnemonicAlias<"verrw", "verr">; // System instruction aliases. def : MnemonicAlias<"iret", "iretl">; def : MnemonicAlias<"sysret", "sysretl">; +def : MnemonicAlias<"sysexit", "sysexitl">; def : MnemonicAlias<"lgdtl", "lgdt">, Requires<[In32BitMode]>; def : MnemonicAlias<"lgdtq", "lgdt">, Requires<[In64BitMode]>; diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index b5651f3..be3500a 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -51,9 +51,8 @@ def SYSRETQ :RI<0x07, RawFrm, (outs), (ins), "sysretq", []>, TB, def SYSENTER : I<0x34, RawFrm, (outs), (ins), "sysenter", []>, TB; -def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexit", []>, TB, - Requires<[In32BitMode]>; -def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexit", []>, TB, +def SYSEXIT : I<0x35, RawFrm, (outs), (ins), "sysexitl", []>, TB; +def SYSEXIT64 :RI<0x35, RawFrm, (outs), (ins), "sysexitq", []>, TB, Requires<[In64BitMode]>; def IRET16 : I<0xcf, RawFrm, (outs), (ins), "iretw", []>, OpSize; -- cgit v1.1 From 32a43cc0fc3cd42702d7859eaa58dd42f561a54d Mon Sep 17 00:00:00 2001 From: Duncan Sands Date: Thu, 27 Oct 2011 19:16:21 +0000 Subject: Reapply commit 143028 with a fix: the problem was casting a ConstantExpr Mul using BinaryOperator (which only works for instructions) when it should have been a cast to OverflowingBinaryOperator (which also works for constants). While there, correct a few other dubious looking uses of BinaryOperator. Thanks to Chad Rosier for the testcase. Original commit message: My super-optimizer noticed that we weren't folding this expression to true: (x *nsw x) sgt 0, where x = (y | 1). This occurs in 464.h264ref. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143125 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InstructionSimplify.cpp | 5 ++-- lib/Analysis/ValueTracking.cpp | 56 +++++++++++++++++++++++++++++++----- 2 files changed, 52 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index d9e3400..31cbbba 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -758,7 +758,8 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const TargetData *TD, Value *X = 0, *Y = 0; if ((match(Op0, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op1) || // (X / Y) * Y (match(Op1, m_IDiv(m_Value(X), m_Value(Y))) && Y == Op0)) { // Y * (X / Y) - BinaryOperator *Div = cast(Y == Op1 ? Op0 : Op1); + PossiblyExactOperator *Div = + cast(Y == Op1 ? Op0 : Op1); if (Div->isExact()) return X; } @@ -842,7 +843,7 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1, Value *X = 0, *Y = 0; if (match(Op0, m_Mul(m_Value(X), m_Value(Y))) && (X == Op1 || Y == Op1)) { if (Y != Op1) std::swap(X, Y); // Ensure expression is (X * Y) / Y, Y = Op1 - BinaryOperator *Mul = cast(Op0); + OverflowingBinaryOperator *Mul = cast(Op0); // If the Mul knows it does not overflow, then we are good to go. if ((isSigned && Mul->hasNoSignedWrap()) || (!isSigned && Mul->hasNoUnsignedWrap())) diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 9a234c0..90757f9 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -201,9 +201,36 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero, KnownOne, TD,Depth+1); ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + bool isKnownNegative = false; + bool isKnownNonNegative = false; + // If the multiplication is known not to overflow, compute the sign bit. + if (Mask.isNegative() && + cast(I)->hasNoSignedWrap()) { + Value *Op1 = I->getOperand(1), *Op2 = I->getOperand(0); + if (Op1 == Op2) { + // The product of a number with itself is non-negative. + isKnownNonNegative = true; + } else { + bool isKnownNonNegative1 = KnownZero.isNegative(); + bool isKnownNonNegative2 = KnownZero2.isNegative(); + bool isKnownNegative1 = KnownOne.isNegative(); + bool isKnownNegative2 = KnownOne2.isNegative(); + // The product of two numbers with the same sign is non-negative. + isKnownNonNegative = (isKnownNegative1 && isKnownNegative2) || + (isKnownNonNegative1 && isKnownNonNegative2); + // The product of a negative number and a non-negative number is either + // negative or zero. + if (!isKnownNonNegative) + isKnownNegative = (isKnownNegative1 && isKnownNonNegative2 && + isKnownNonZero(Op2, TD, Depth)) || + (isKnownNegative2 && isKnownNonNegative1 && + isKnownNonZero(Op1, TD, Depth)); + } + } + // If low bits are zero in either operand, output low known-0 bits. // Also compute a conserative estimate for high known-0 bits. // More trickiness is possible, but this is sufficient for the @@ -220,6 +247,12 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | APInt::getHighBitsSet(BitWidth, LeadZ); KnownZero &= Mask; + + if (isKnownNonNegative) + KnownZero.setBit(BitWidth - 1); + else if (isKnownNegative) + KnownOne.setBit(BitWidth - 1); + return; } case Instruction::UDiv: { @@ -784,7 +817,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { } // The remaining tests are all recursive, so bail out if we hit the limit. - if (Depth++ == MaxDepth) + if (Depth++ >= MaxDepth) return false; unsigned BitWidth = getBitWidth(V->getType(), TD); @@ -802,7 +835,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { // if the lowest bit is shifted off the end. if (BitWidth && match(V, m_Shl(m_Value(X), m_Value(Y)))) { // shl nuw can't remove any non-zero bits. - BinaryOperator *BO = cast(V); + OverflowingBinaryOperator *BO = cast(V); if (BO->hasNoUnsignedWrap()) return isKnownNonZero(X, TD, Depth); @@ -816,7 +849,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { // defined if the sign bit is shifted off the end. else if (match(V, m_Shr(m_Value(X), m_Value(Y)))) { // shr exact can only shift out zero bits. - BinaryOperator *BO = cast(V); + PossiblyExactOperator *BO = cast(V); if (BO->isExact()) return isKnownNonZero(X, TD, Depth); @@ -827,7 +860,7 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { } // div exact can only produce a zero if the dividend is zero. else if (match(V, m_IDiv(m_Value(X), m_Value()))) { - BinaryOperator *BO = cast(V); + PossiblyExactOperator *BO = cast(V); if (BO->isExact()) return isKnownNonZero(X, TD, Depth); } @@ -868,6 +901,15 @@ bool llvm::isKnownNonZero(Value *V, const TargetData *TD, unsigned Depth) { if (YKnownNonNegative && isPowerOfTwo(X, TD, /*OrZero*/false, Depth)) return true; } + // X * Y. + else if (match(V, m_Mul(m_Value(X), m_Value(Y)))) { + OverflowingBinaryOperator *BO = cast(V); + // If X and Y are non-zero then so is X * Y as long as the multiplication + // does not overflow. + if ((BO->hasNoSignedWrap() || BO->hasNoUnsignedWrap()) && + isKnownNonZero(X, TD, Depth) && isKnownNonZero(Y, TD, Depth)) + return true; + } // (C ? X : Y) != 0 if X != 0 and Y != 0. else if (SelectInst *SI = dyn_cast(V)) { if (isKnownNonZero(SI->getTrueValue(), TD, Depth) && -- cgit v1.1 From 999f90bedf94e7d55508f0797802b75064f1de09 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Thu, 27 Oct 2011 19:19:14 +0000 Subject: Add a pinned metadata name for fpaccuracy, and document it git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143135 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/VMCore/LLVMContext.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib') diff --git a/lib/VMCore/LLVMContext.cpp b/lib/VMCore/LLVMContext.cpp index 3ed2c2c..e1a9b17 100644 --- a/lib/VMCore/LLVMContext.cpp +++ b/lib/VMCore/LLVMContext.cpp @@ -43,6 +43,11 @@ LLVMContext::LLVMContext() : pImpl(new LLVMContextImpl(*this)) { // Create the 'prof' metadata kind. unsigned ProfID = getMDKindID("prof"); assert(ProfID == MD_prof && "prof kind id drifted"); (void)ProfID; + + // Create the 'fpaccuracy' metadata kind. + unsigned FPAccuracyID = getMDKindID("fpaccuracy"); + assert(FPAccuracyID == MD_fpaccuracy && "fpaccuracy kind id drifted"); + (void)FPAccuracyID; } LLVMContext::~LLVMContext() { delete pImpl; } -- cgit v1.1 From eb6bd339954376e4ab1fda52133ef3f94c3029b7 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Thu, 27 Oct 2011 20:46:09 +0000 Subject: Fix pretty printing of i386 local sect diff relocations, TLV relocations, and x86_64 TLV relocations in MachO. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143140 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/MachOObjectFile.cpp | 57 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 06d62fa..099ac2c 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -675,10 +675,11 @@ error_code MachOObjectFile::getRelocationTypeName(DataRefImpl Rel, "GENERIC_RELOC_VANILLA", "GENERIC_RELOC_PAIR", "GENERIC_RELOC_SECTDIFF", + "GENERIC_RELOC_PB_LA_PTR", "GENERIC_RELOC_LOCAL_SECTDIFF", - "GENERIC_RELOC_PB_LA_PTR" }; + "GENERIC_RELOC_TLV" }; - if (r_type > 4) + if (r_type > 6) res = "Unknown"; else res = Table[r_type]; @@ -859,6 +860,12 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, else Type = (RE->Word1 >> 28) & 0xF; + bool isPCRel; + if (isScattered) + isPCRel = ((RE->Word0 >> 30) & 1); + else + isPCRel = ((RE->Word1 >> 24) & 1); + // Determine any addends that should be displayed with the relocation. // These require decoding the relocation type, which is triple-specific. @@ -894,6 +901,11 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, fmt << "-"; printRelocationTargetName(RE, fmt); } + case macho::RIT_X86_64_TLV: + printRelocationTargetName(RE, fmt); + fmt << "@TLV"; + if (isPCRel) fmt << "P"; + break; case macho::RIT_X86_64_Signed1: // X86_64_RELOC_SIGNED1 printRelocationTargetName(RE, fmt); fmt << "-1"; @@ -916,8 +928,7 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, switch (Type) { case macho::RIT_Pair: // GENERIC_RELOC_PAIR - prints no info return object_error::success; - case macho::RIT_Difference: // GENERIC_RELOC_SECTDIFF - case macho::RIT_Generic_LocalDifference: { // GENERIC_RELOC_LOCAL_SECTDIFF + case macho::RIT_Difference: { // GENERIC_RELOC_SECTDIFF InMemoryStruct RENext; DataRefImpl RelNext = Rel; RelNext.d.a++; @@ -934,8 +945,7 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, RType = (RENext->Word1 >> 28) & 0xF; if (RType != 1) report_fatal_error("Expected GENERIC_RELOC_PAIR after " - "GENERIC_RELOC_SECTDIFF or " - "GENERIC_RELOC_LOCAL_SECTDIFF."); + "GENERIC_RELOC_SECTDIFF."); printRelocationTargetName(RE, fmt); fmt << "-"; @@ -947,7 +957,40 @@ error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, if (Arch == Triple::x86) { // All X86 relocations that need special printing were already // handled in the generic code. - printRelocationTargetName(RE, fmt); + switch (Type) { + case macho::RIT_Generic_LocalDifference:{// GENERIC_RELOC_LOCAL_SECTDIFF + InMemoryStruct RENext; + DataRefImpl RelNext = Rel; + RelNext.d.a++; + getRelocation(RelNext, RENext); + + // X86 sect diff's must be followed by a relocation of type + // GENERIC_RELOC_PAIR. + bool isNextScattered = (Arch != Triple::x86_64) && + (RENext->Word0 & macho::RF_Scattered); + unsigned RType; + if (isNextScattered) + RType = (RENext->Word0 >> 24) & 0xF; + else + RType = (RENext->Word1 >> 28) & 0xF; + if (RType != 1) + report_fatal_error("Expected GENERIC_RELOC_PAIR after " + "GENERIC_RELOC_LOCAL_SECTDIFF."); + + printRelocationTargetName(RE, fmt); + fmt << "-"; + printRelocationTargetName(RENext, fmt); + break; + } + case macho::RIT_Generic_TLV: { + printRelocationTargetName(RE, fmt); + fmt << "@TLV"; + if (isPCRel) fmt << "P"; + break; + } + default: + printRelocationTargetName(RE, fmt); + } } else { // ARM-specific relocations switch (Type) { case macho::RIT_ARM_Half: // ARM_RELOC_HALF -- cgit v1.1 From 824a70a384988aebbe3b46254a3631e81a8f0690 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Thu, 27 Oct 2011 21:21:05 +0000 Subject: Avoid partial CPSR dependency from loop backedges. rdar://10357570 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143145 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/Thumb2SizeReduction.cpp | 67 ++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index 89a155c..e5fc8b4 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -146,7 +146,8 @@ namespace { /// ReduceOpcodeMap - Maps wide opcode to index of entry in ReduceTable. DenseMap ReduceOpcodeMap; - bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use); + bool canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use, + bool IsSelfLoop); bool VerifyPredAndCC(MachineInstr *MI, const ReduceEntry &Entry, bool is2Addr, ARMCC::CondCodes Pred, @@ -157,19 +158,21 @@ namespace { bool ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, bool LiveCPSR, - MachineInstr *CPSRDef); + MachineInstr *CPSRDef, bool IsSelfLoop); /// ReduceTo2Addr - Reduce a 32-bit instruction to a 16-bit two-address /// instruction. bool ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR, MachineInstr *CPSRDef); + bool LiveCPSR, MachineInstr *CPSRDef, + bool IsSelfLoop); /// ReduceToNarrow - Reduce a 32-bit instruction to a 16-bit /// non-two-address instruction. bool ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR, MachineInstr *CPSRDef); + bool LiveCPSR, MachineInstr *CPSRDef, + bool IsSelfLoop); /// ReduceMBB - Reduce width of instructions in the specified basic block. bool ReduceMBB(MachineBasicBlock &MBB); @@ -210,10 +213,17 @@ static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) { /// In this case it would have been ok to narrow the mul.w to muls since there /// are indirect RAW dependency between the muls and the mul.w bool -Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use) { - if (!Def || !STI->avoidCPSRPartialUpdate()) +Thumb2SizeReduce::canAddPseudoFlagDep(MachineInstr *Def, MachineInstr *Use, + bool FirstInSelfLoop) { + // FIXME: Disable check for -Oz (aka OptimizeForSizeHarder). + if (!STI->avoidCPSRPartialUpdate()) return false; + if (!Def) + // If this BB loops back to itself, conservatively avoid narrowing the + // first instruction that does partial flag update. + return FirstInSelfLoop; + SmallSet Defs; for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) { const MachineOperand &MO = Def->getOperand(i); @@ -476,15 +486,16 @@ Thumb2SizeReduce::ReduceLoadStore(MachineBasicBlock &MBB, MachineInstr *MI, bool Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR, MachineInstr *CPSRDef) { + bool LiveCPSR, MachineInstr *CPSRDef, + bool IsSelfLoop) { unsigned Opc = MI->getOpcode(); if (Opc == ARM::t2ADDri) { // If the source register is SP, try to reduce to tADDrSPi, otherwise // it's a normal reduce. if (MI->getOperand(1).getReg() != ARM::SP) { - if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) + if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) return true; - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); } // Try to reduce to tADDrSPi. unsigned Imm = MI->getOperand(2).getImm(); @@ -535,12 +546,12 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, switch (Opc) { default: break; case ARM::t2ADDSri: { - if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) + if (ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) return true; // fallthrough } case ARM::t2ADDSrr: - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); } } break; @@ -552,13 +563,13 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, case ARM::t2UXTB: case ARM::t2UXTH: if (MI->getOperand(2).getImm() == 0) - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); break; case ARM::t2MOVi16: // Can convert only 'pure' immediate operands, not immediates obtained as // globals' addresses. if (MI->getOperand(1).isImm()) - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); break; case ARM::t2CMPrr: { // Try to reduce to the lo-reg only version first. Why there are two @@ -568,9 +579,9 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, // source insn opcode. So for now, we hack a local entry record to use. static const ReduceEntry NarrowEntry = { ARM::t2CMPrr,ARM::tCMPr, 0, 0, 0, 1, 1,2, 0, 0,1 }; - if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef)) + if (ReduceToNarrow(MBB, MI, NarrowEntry, LiveCPSR, CPSRDef, IsSelfLoop)) return true; - return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef); + return ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop); } } return false; @@ -579,7 +590,8 @@ Thumb2SizeReduce::ReduceSpecial(MachineBasicBlock &MBB, MachineInstr *MI, bool Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR, MachineInstr *CPSRDef) { + bool LiveCPSR, MachineInstr *CPSRDef, + bool IsSelfLoop) { if (ReduceLimit2Addr != -1 && ((int)Num2Addrs >= ReduceLimit2Addr)) return false; @@ -637,7 +649,7 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, // Avoid adding a false dependency on partial flag update by some 16-bit // instructions which has the 's' bit set. if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && - canAddPseudoFlagDep(CPSRDef, MI)) + canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop)) return false; // Add the 16-bit instruction. @@ -674,7 +686,8 @@ Thumb2SizeReduce::ReduceTo2Addr(MachineBasicBlock &MBB, MachineInstr *MI, bool Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, const ReduceEntry &Entry, - bool LiveCPSR, MachineInstr *CPSRDef) { + bool LiveCPSR, MachineInstr *CPSRDef, + bool IsSelfLoop) { if (ReduceLimit != -1 && ((int)NumNarrows >= ReduceLimit)) return false; @@ -727,7 +740,7 @@ Thumb2SizeReduce::ReduceToNarrow(MachineBasicBlock &MBB, MachineInstr *MI, // Avoid adding a false dependency on partial flag update by some 16-bit // instructions which has the 's' bit set. if (Entry.PartFlag && NewMCID.hasOptionalDef() && HasCC && - canAddPseudoFlagDep(CPSRDef, MI)) + canAddPseudoFlagDep(CPSRDef, MI, IsSelfLoop)) return false; // Add the 16-bit instruction. @@ -818,6 +831,9 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { bool LiveCPSR = MBB.isLiveIn(ARM::CPSR); MachineInstr *CPSRDef = 0; + // If this BB loops back to itself, conservatively avoid narrowing the + // first instruction that does partial flag update. + bool IsSelfLoop = MBB.isSuccessor(&MBB); MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); MachineBasicBlock::iterator NextMII; for (; MII != E; MII = NextMII) { @@ -832,7 +848,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { const ReduceEntry &Entry = ReduceTable[OPI->second]; // Ignore "special" cases for now. if (Entry.Special) { - if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef)) { + if (ReduceSpecial(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; MachineBasicBlock::iterator I = prior(NextMII); MI = &*I; @@ -842,7 +858,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { // Try to transform to a 16-bit two-address instruction. if (Entry.NarrowOpc2 && - ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef)) { + ReduceTo2Addr(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; MachineBasicBlock::iterator I = prior(NextMII); MI = &*I; @@ -851,7 +867,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { // Try to transform to a 16-bit non-two-address instruction. if (Entry.NarrowOpc1 && - ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef)) { + ReduceToNarrow(MBB, MI, Entry, LiveCPSR, CPSRDef, IsSelfLoop)) { Modified = true; MachineBasicBlock::iterator I = prior(NextMII); MI = &*I; @@ -861,12 +877,15 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) { ProcessNext: bool DefCPSR = false; LiveCPSR = UpdateCPSRDef(*MI, LiveCPSR, DefCPSR); - if (MI->getDesc().isCall()) + if (MI->getDesc().isCall()) { // Calls don't really set CPSR. CPSRDef = 0; - else if (DefCPSR) + IsSelfLoop = false; + } else if (DefCPSR) { // This is the last CPSR defining instruction. CPSRDef = MI; + IsSelfLoop = false; + } } return Modified; -- cgit v1.1 From b28bdbf8468e728501828fd41b3f2dba8db62efa Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Thu, 27 Oct 2011 21:53:50 +0000 Subject: If we're searching for a symbol reference to pretty-print a scattered relocation address, and we don't find a symbol table entry, try section begin addresses as well. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143151 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/MachOObjectFile.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'lib') diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index 099ac2c..65ce5f8 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -821,6 +821,24 @@ void MachOObjectFile::printRelocationTargetName( return; } + // If we couldn't find a symbol that this relocation refers to, try + // to find a section beginning instead. + for (section_iterator SI = begin_sections(), SE = end_sections(); SI != SE; + SI.increment(ec)) { + if (ec) report_fatal_error(ec.message()); + + uint64_t Addr; + StringRef Name; + + if ((ec = SI->getAddress(Addr))) + report_fatal_error(ec.message()); + if (Addr != Val) continue; + if ((ec = SI->getName(Name))) + report_fatal_error(ec.message()); + fmt << Name; + return; + } + fmt << format("0x%x", Val); return; } -- cgit v1.1 From 55dabaa73a7a0be4398fae58443f3ad8264e537e Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Thu, 27 Oct 2011 22:25:42 +0000 Subject: ARM isel for vld1, opcode selection for register stride post-index pseudos. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143158 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelDAGToDAG.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 8a1b618..39d4d39 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1562,6 +1562,10 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; + case ARM::VLD1q8PseudoWB_fixed: return ARM::VLD1q8PseudoWB_register; + case ARM::VLD1q16PseudoWB_fixed: return ARM::VLD1q16PseudoWB_register; + case ARM::VLD1q32PseudoWB_fixed: return ARM::VLD1q32PseudoWB_register; + case ARM::VLD1q64PseudoWB_fixed: return ARM::VLD1q64PseudoWB_register; } return Opc; // If not one we handle, return it unchanged. } -- cgit v1.1 From b0117eed84b7899c677a1da5e074fe3a2b7046dd Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Thu, 27 Oct 2011 22:39:16 +0000 Subject: Also set addrmode6 alignment when align==size. Previously, we were only setting the alignment bits on over-aligned loads and stores. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143160 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelDAGToDAG.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 39d4d39..7c67e0a 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -923,7 +923,7 @@ bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, // The maximum alignment is equal to the memory size being referenced. unsigned LSNAlign = LSN->getAlignment(); unsigned MemSize = LSN->getMemoryVT().getSizeInBits() / 8; - if (LSNAlign > MemSize && MemSize > 1) + if (LSNAlign >= MemSize && MemSize > 1) Alignment = MemSize; } else { // All other uses of addrmode6 are for intrinsics. For now just record -- cgit v1.1 From 04b12a4cfb74ac65ea86d57bde5999ef6ab09ad4 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Thu, 27 Oct 2011 22:53:10 +0000 Subject: Add some NEON stores to the VLD decoding hook that were accidentally omitted previously. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143162 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 5174134..0e63d9c 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -2136,6 +2136,10 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, case ARM::VLD1d16wb_fixed: case ARM::VLD1d32wb_fixed: case ARM::VLD1d64wb_fixed: + case ARM::VLD1d8Twb_fixed: + case ARM::VLD1d16Twb_fixed: + case ARM::VLD1d32Twb_fixed: + case ARM::VLD1d64Twb_fixed: case ARM::VLD1d8wb_register: case ARM::VLD1d16wb_register: case ARM::VLD1d32wb_register: -- cgit v1.1 From 33ba8b0e96acde0d8ab1ffc565a5ef4c8b6b6ac2 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Thu, 27 Oct 2011 22:56:32 +0000 Subject: Remove the Alpha backend. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143164 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Triple.cpp | 7 - lib/Target/Alpha/Alpha.h | 43 - lib/Target/Alpha/Alpha.td | 68 -- lib/Target/Alpha/AlphaAsmPrinter.cpp | 166 --- lib/Target/Alpha/AlphaBranchSelector.cpp | 66 -- lib/Target/Alpha/AlphaCallingConv.td | 38 - lib/Target/Alpha/AlphaFrameLowering.cpp | 143 --- lib/Target/Alpha/AlphaFrameLowering.h | 43 - lib/Target/Alpha/AlphaISelDAGToDAG.cpp | 425 ------- lib/Target/Alpha/AlphaISelLowering.cpp | 962 ---------------- lib/Target/Alpha/AlphaISelLowering.h | 142 --- lib/Target/Alpha/AlphaInstrFormats.td | 268 ----- lib/Target/Alpha/AlphaInstrInfo.cpp | 382 ------- lib/Target/Alpha/AlphaInstrInfo.h | 85 -- lib/Target/Alpha/AlphaInstrInfo.td | 1159 -------------------- lib/Target/Alpha/AlphaLLRP.cpp | 158 --- lib/Target/Alpha/AlphaMachineFunctionInfo.h | 62 -- lib/Target/Alpha/AlphaRegisterInfo.cpp | 199 ---- lib/Target/Alpha/AlphaRegisterInfo.h | 56 - lib/Target/Alpha/AlphaRegisterInfo.td | 133 --- lib/Target/Alpha/AlphaRelocations.h | 31 - lib/Target/Alpha/AlphaSchedule.td | 85 -- lib/Target/Alpha/AlphaSelectionDAGInfo.cpp | 23 - lib/Target/Alpha/AlphaSelectionDAGInfo.h | 31 - lib/Target/Alpha/AlphaSubtarget.cpp | 35 - lib/Target/Alpha/AlphaSubtarget.h | 49 - lib/Target/Alpha/AlphaTargetMachine.cpp | 51 - lib/Target/Alpha/AlphaTargetMachine.h | 66 -- lib/Target/Alpha/CMakeLists.txt | 38 - lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp | 23 - lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h | 29 - .../Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp | 78 -- lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h | 40 - lib/Target/Alpha/MCTargetDesc/CMakeLists.txt | 11 - lib/Target/Alpha/MCTargetDesc/Makefile | 16 - lib/Target/Alpha/Makefile | 21 - lib/Target/Alpha/README.txt | 42 - lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp | 20 - lib/Target/Alpha/TargetInfo/CMakeLists.txt | 13 - lib/Target/Alpha/TargetInfo/Makefile | 15 - 40 files changed, 5322 deletions(-) delete mode 100644 lib/Target/Alpha/Alpha.h delete mode 100644 lib/Target/Alpha/Alpha.td delete mode 100644 lib/Target/Alpha/AlphaAsmPrinter.cpp delete mode 100644 lib/Target/Alpha/AlphaBranchSelector.cpp delete mode 100644 lib/Target/Alpha/AlphaCallingConv.td delete mode 100644 lib/Target/Alpha/AlphaFrameLowering.cpp delete mode 100644 lib/Target/Alpha/AlphaFrameLowering.h delete mode 100644 lib/Target/Alpha/AlphaISelDAGToDAG.cpp delete mode 100644 lib/Target/Alpha/AlphaISelLowering.cpp delete mode 100644 lib/Target/Alpha/AlphaISelLowering.h delete mode 100644 lib/Target/Alpha/AlphaInstrFormats.td delete mode 100644 lib/Target/Alpha/AlphaInstrInfo.cpp delete mode 100644 lib/Target/Alpha/AlphaInstrInfo.h delete mode 100644 lib/Target/Alpha/AlphaInstrInfo.td delete mode 100644 lib/Target/Alpha/AlphaLLRP.cpp delete mode 100644 lib/Target/Alpha/AlphaMachineFunctionInfo.h delete mode 100644 lib/Target/Alpha/AlphaRegisterInfo.cpp delete mode 100644 lib/Target/Alpha/AlphaRegisterInfo.h delete mode 100644 lib/Target/Alpha/AlphaRegisterInfo.td delete mode 100644 lib/Target/Alpha/AlphaRelocations.h delete mode 100644 lib/Target/Alpha/AlphaSchedule.td delete mode 100644 lib/Target/Alpha/AlphaSelectionDAGInfo.cpp delete mode 100644 lib/Target/Alpha/AlphaSelectionDAGInfo.h delete mode 100644 lib/Target/Alpha/AlphaSubtarget.cpp delete mode 100644 lib/Target/Alpha/AlphaSubtarget.h delete mode 100644 lib/Target/Alpha/AlphaTargetMachine.cpp delete mode 100644 lib/Target/Alpha/AlphaTargetMachine.h delete mode 100644 lib/Target/Alpha/CMakeLists.txt delete mode 100644 lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp delete mode 100644 lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h delete mode 100644 lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp delete mode 100644 lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h delete mode 100644 lib/Target/Alpha/MCTargetDesc/CMakeLists.txt delete mode 100644 lib/Target/Alpha/MCTargetDesc/Makefile delete mode 100644 lib/Target/Alpha/Makefile delete mode 100644 lib/Target/Alpha/README.txt delete mode 100644 lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp delete mode 100644 lib/Target/Alpha/TargetInfo/CMakeLists.txt delete mode 100644 lib/Target/Alpha/TargetInfo/Makefile (limited to 'lib') diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp index 6e252a5..ac4f005 100644 --- a/lib/Support/Triple.cpp +++ b/lib/Support/Triple.cpp @@ -18,7 +18,6 @@ const char *Triple::getArchTypeName(ArchType Kind) { case InvalidArch: return ""; case UnknownArch: return "unknown"; - case alpha: return "alpha"; case arm: return "arm"; case cellspu: return "cellspu"; case mips: return "mips"; @@ -50,8 +49,6 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { default: return 0; - case alpha: return "alpha"; - case arm: case thumb: return "arm"; @@ -131,8 +128,6 @@ const char *Triple::getEnvironmentTypeName(EnvironmentType Kind) { } Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { - if (Name == "alpha") - return alpha; if (Name == "arm") return arm; if (Name == "cellspu") @@ -286,8 +281,6 @@ Triple::ArchType Triple::ParseArch(StringRef ArchName) { else if (ArchName == "thumb" || ArchName.startswith("thumbv")) return thumb; - else if (ArchName.startswith("alpha")) - return alpha; else if (ArchName == "spu" || ArchName == "cellspu") return cellspu; else if (ArchName == "msp430") diff --git a/lib/Target/Alpha/Alpha.h b/lib/Target/Alpha/Alpha.h deleted file mode 100644 index 6ffaf45..0000000 --- a/lib/Target/Alpha/Alpha.h +++ /dev/null @@ -1,43 +0,0 @@ -//===-- Alpha.h - Top-level interface for Alpha representation --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the entry points for global functions defined in the LLVM -// Alpha back-end. -// -//===----------------------------------------------------------------------===// - -#ifndef TARGET_ALPHA_H -#define TARGET_ALPHA_H - -#include "MCTargetDesc/AlphaMCTargetDesc.h" -#include "llvm/Target/TargetMachine.h" - -namespace llvm { - namespace Alpha { - // These describe LDAx - - static const int IMM_LOW = -32768; - static const int IMM_HIGH = 32767; - static const int IMM_MULT = 65536; - } - - class AlphaTargetMachine; - class FunctionPass; - class formatted_raw_ostream; - - FunctionPass *createAlphaISelDag(AlphaTargetMachine &TM); - FunctionPass *createAlphaPatternInstructionSelector(TargetMachine &TM); - FunctionPass *createAlphaJITCodeEmitterPass(AlphaTargetMachine &TM, - JITCodeEmitter &JCE); - FunctionPass *createAlphaLLRPPass(AlphaTargetMachine &tm); - FunctionPass *createAlphaBranchSelectionPass(); - -} // end namespace llvm; - -#endif diff --git a/lib/Target/Alpha/Alpha.td b/lib/Target/Alpha/Alpha.td deleted file mode 100644 index ae79c2e..0000000 --- a/lib/Target/Alpha/Alpha.td +++ /dev/null @@ -1,68 +0,0 @@ -//===- Alpha.td - Describe the Alpha Target Machine --------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -// Get the target-independent interfaces which we are implementing... -// -include "llvm/Target/Target.td" - -//Alpha is little endian - -//===----------------------------------------------------------------------===// -// Subtarget Features -//===----------------------------------------------------------------------===// - -def FeatureCIX : SubtargetFeature<"cix", "HasCT", "true", - "Enable CIX extensions">; - -//===----------------------------------------------------------------------===// -// Register File Description -//===----------------------------------------------------------------------===// - -include "AlphaRegisterInfo.td" - -//===----------------------------------------------------------------------===// -// Calling Convention Description -//===----------------------------------------------------------------------===// - -include "AlphaCallingConv.td" - -//===----------------------------------------------------------------------===// -// Schedule Description -//===----------------------------------------------------------------------===// - -include "AlphaSchedule.td" - -//===----------------------------------------------------------------------===// -// Instruction Descriptions -//===----------------------------------------------------------------------===// - -include "AlphaInstrInfo.td" - -def AlphaInstrInfo : InstrInfo; - -//===----------------------------------------------------------------------===// -// Alpha Processor Definitions -//===----------------------------------------------------------------------===// - -def : Processor<"generic", Alpha21264Itineraries, []>; -def : Processor<"ev6" , Alpha21264Itineraries, []>; -def : Processor<"ev67" , Alpha21264Itineraries, [FeatureCIX]>; - -//===----------------------------------------------------------------------===// -// The Alpha Target -//===----------------------------------------------------------------------===// - - -def Alpha : Target { - // Pull in Instruction Info: - let InstructionSet = AlphaInstrInfo; -} diff --git a/lib/Target/Alpha/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AlphaAsmPrinter.cpp deleted file mode 100644 index 5dce06a..0000000 --- a/lib/Target/Alpha/AlphaAsmPrinter.cpp +++ /dev/null @@ -1,166 +0,0 @@ -//===-- AlphaAsmPrinter.cpp - Alpha LLVM assembly writer ------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains a printer that converts from our internal representation -// of machine-dependent LLVM code to GAS-format Alpha assembly language. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "asm-printer" -#include "Alpha.h" -#include "AlphaInstrInfo.h" -#include "AlphaTargetMachine.h" -#include "llvm/Module.h" -#include "llvm/Type.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/MC/MCStreamer.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/Target/Mangler.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -namespace { - struct AlphaAsmPrinter : public AsmPrinter { - /// Unique incrementer for label values for referencing Global values. - /// - - explicit AlphaAsmPrinter(TargetMachine &tm, MCStreamer &Streamer) - : AsmPrinter(tm, Streamer) {} - - virtual const char *getPassName() const { - return "Alpha Assembly Printer"; - } - void printInstruction(const MachineInstr *MI, raw_ostream &O); - void EmitInstruction(const MachineInstr *MI) { - SmallString<128> Str; - raw_svector_ostream OS(Str); - printInstruction(MI, OS); - OutStreamer.EmitRawText(OS.str()); - } - static const char *getRegisterName(unsigned RegNo); - - void printOp(const MachineOperand &MO, raw_ostream &O); - void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O); - virtual void EmitFunctionBodyStart(); - virtual void EmitFunctionBodyEnd(); - void EmitStartOfAsmFile(Module &M); - - bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, const char *ExtraCode, - raw_ostream &O); - bool PrintAsmMemoryOperand(const MachineInstr *MI, - unsigned OpNo, unsigned AsmVariant, - const char *ExtraCode, raw_ostream &O); - }; -} // end of anonymous namespace - -#include "AlphaGenAsmWriter.inc" - -void AlphaAsmPrinter::printOperand(const MachineInstr *MI, int opNum, - raw_ostream &O) { - const MachineOperand &MO = MI->getOperand(opNum); - if (MO.isReg()) { - assert(TargetRegisterInfo::isPhysicalRegister(MO.getReg()) && - "Not physreg??"); - O << getRegisterName(MO.getReg()); - } else if (MO.isImm()) { - O << MO.getImm(); - assert(MO.getImm() < (1 << 30)); - } else { - printOp(MO, O); - } -} - - -void AlphaAsmPrinter::printOp(const MachineOperand &MO, raw_ostream &O) { - switch (MO.getType()) { - case MachineOperand::MO_Register: - O << getRegisterName(MO.getReg()); - return; - - case MachineOperand::MO_Immediate: - assert(0 && "printOp() does not handle immediate values"); - return; - - case MachineOperand::MO_MachineBasicBlock: - O << *MO.getMBB()->getSymbol(); - return; - - case MachineOperand::MO_ConstantPoolIndex: - O << MAI->getPrivateGlobalPrefix() << "CPI" << getFunctionNumber() << "_" - << MO.getIndex(); - return; - - case MachineOperand::MO_ExternalSymbol: - O << MO.getSymbolName(); - return; - - case MachineOperand::MO_GlobalAddress: - O << *Mang->getSymbol(MO.getGlobal()); - return; - - case MachineOperand::MO_JumpTableIndex: - O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber() - << '_' << MO.getIndex(); - return; - - default: - O << ""; - return; - } -} - -/// EmitFunctionBodyStart - Targets can override this to emit stuff before -/// the first basic block in the function. -void AlphaAsmPrinter::EmitFunctionBodyStart() { - OutStreamer.EmitRawText("\t.ent " + Twine(CurrentFnSym->getName())); -} - -/// EmitFunctionBodyEnd - Targets can override this to emit stuff after -/// the last basic block in the function. -void AlphaAsmPrinter::EmitFunctionBodyEnd() { - OutStreamer.EmitRawText("\t.end " + Twine(CurrentFnSym->getName())); -} - -void AlphaAsmPrinter::EmitStartOfAsmFile(Module &M) { - OutStreamer.EmitRawText(StringRef("\t.arch ev6")); - OutStreamer.EmitRawText(StringRef("\t.set noat")); -} - -/// PrintAsmOperand - Print out an operand for an inline asm expression. -/// -bool AlphaAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, - unsigned AsmVariant, - const char *ExtraCode, raw_ostream &O) { - printOperand(MI, OpNo, O); - return false; -} - -bool AlphaAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, - unsigned OpNo, unsigned AsmVariant, - const char *ExtraCode, - raw_ostream &O) { - if (ExtraCode && ExtraCode[0]) - return true; // Unknown modifier. - O << "0("; - printOperand(MI, OpNo, O); - O << ")"; - return false; -} - -// Force static initialization. -extern "C" void LLVMInitializeAlphaAsmPrinter() { - RegisterAsmPrinter X(TheAlphaTarget); -} diff --git a/lib/Target/Alpha/AlphaBranchSelector.cpp b/lib/Target/Alpha/AlphaBranchSelector.cpp deleted file mode 100644 index 3768117..0000000 --- a/lib/Target/Alpha/AlphaBranchSelector.cpp +++ /dev/null @@ -1,66 +0,0 @@ -//===-- AlphaBranchSelector.cpp - Convert Pseudo branchs ----------*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Replace Pseudo COND_BRANCH_* with their appropriate real branch -// Simplified version of the PPC Branch Selector -// -//===----------------------------------------------------------------------===// - -#include "Alpha.h" -#include "AlphaInstrInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/MC/MCAsmInfo.h" -using namespace llvm; - -namespace { - struct AlphaBSel : public MachineFunctionPass { - static char ID; - AlphaBSel() : MachineFunctionPass(ID) {} - - virtual bool runOnMachineFunction(MachineFunction &Fn); - - virtual const char *getPassName() const { - return "Alpha Branch Selection"; - } - }; - char AlphaBSel::ID = 0; -} - -/// createAlphaBranchSelectionPass - returns an instance of the Branch Selection -/// Pass -/// -FunctionPass *llvm::createAlphaBranchSelectionPass() { - return new AlphaBSel(); -} - -bool AlphaBSel::runOnMachineFunction(MachineFunction &Fn) { - - for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E; - ++MFI) { - MachineBasicBlock *MBB = MFI; - - for (MachineBasicBlock::iterator MBBI = MBB->begin(), EE = MBB->end(); - MBBI != EE; ++MBBI) { - if (MBBI->getOpcode() == Alpha::COND_BRANCH_I || - MBBI->getOpcode() == Alpha::COND_BRANCH_F) { - - // condbranch operands: - // 0. bc opcode - // 1. reg - // 2. target MBB - const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo(); - MBBI->setDesc(TII->get(MBBI->getOperand(0).getImm())); - } - } - } - - return true; -} - diff --git a/lib/Target/Alpha/AlphaCallingConv.td b/lib/Target/Alpha/AlphaCallingConv.td deleted file mode 100644 index bde8819..0000000 --- a/lib/Target/Alpha/AlphaCallingConv.td +++ /dev/null @@ -1,38 +0,0 @@ -//===- AlphaCallingConv.td - Calling Conventions for Alpha -*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// This describes the calling conventions for Alpha architecture. -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// Alpha Return Value Calling Convention -//===----------------------------------------------------------------------===// -def RetCC_Alpha : CallingConv<[ - // i64 is returned in register R0 - // R1 is an llvm extension, I don't know what gcc does - CCIfType<[i64], CCAssignToReg<[R0,R1]>>, - - // f32 / f64 are returned in F0/F1 - CCIfType<[f32, f64], CCAssignToReg<[F0, F1]>> -]>; - -//===----------------------------------------------------------------------===// -// Alpha Argument Calling Conventions -//===----------------------------------------------------------------------===// -def CC_Alpha : CallingConv<[ - // The first 6 arguments are passed in registers, whether integer or - // floating-point - CCIfType<[i64], CCAssignToRegWithShadow<[R16, R17, R18, R19, R20, R21], - [F16, F17, F18, F19, F20, F21]>>, - - CCIfType<[f32, f64], CCAssignToRegWithShadow<[F16, F17, F18, F19, F20, F21], - [R16, R17, R18, R19, R20, R21]>>, - - // Stack slots are 8 bytes in size and 8-byte aligned. - CCIfType<[i64, f32, f64], CCAssignToStack<8, 8>> -]>; diff --git a/lib/Target/Alpha/AlphaFrameLowering.cpp b/lib/Target/Alpha/AlphaFrameLowering.cpp deleted file mode 100644 index 690cd1d..0000000 --- a/lib/Target/Alpha/AlphaFrameLowering.cpp +++ /dev/null @@ -1,143 +0,0 @@ -//=====- AlphaFrameLowering.cpp - Alpha Frame Information ------*- C++ -*-====// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the Alpha implementation of TargetFrameLowering class. -// -//===----------------------------------------------------------------------===// - -#include "AlphaFrameLowering.h" -#include "AlphaInstrInfo.h" -#include "AlphaMachineFunctionInfo.h" -#include "llvm/Function.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/ADT/Twine.h" - -using namespace llvm; - -static long getUpper16(long l) { - long y = l / Alpha::IMM_MULT; - if (l % Alpha::IMM_MULT > Alpha::IMM_HIGH) - ++y; - return y; -} - -static long getLower16(long l) { - long h = getUpper16(l); - return l - h * Alpha::IMM_MULT; -} - -// hasFP - Return true if the specified function should have a dedicated frame -// pointer register. This is true if the function has variable sized allocas or -// if frame pointer elimination is disabled. -// -bool AlphaFrameLowering::hasFP(const MachineFunction &MF) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - return MFI->hasVarSizedObjects(); -} - -void AlphaFrameLowering::emitPrologue(MachineFunction &MF) const { - MachineBasicBlock &MBB = MF.front(); // Prolog goes in entry BB - MachineBasicBlock::iterator MBBI = MBB.begin(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - - DebugLoc dl = (MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc()); - bool FP = hasFP(MF); - - // Handle GOP offset - BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAHg), Alpha::R29) - .addGlobalAddress(MF.getFunction()).addReg(Alpha::R27).addImm(++curgpdist); - BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAg), Alpha::R29) - .addGlobalAddress(MF.getFunction()).addReg(Alpha::R29).addImm(curgpdist); - - BuildMI(MBB, MBBI, dl, TII.get(Alpha::ALTENT)) - .addGlobalAddress(MF.getFunction()); - - // Get the number of bytes to allocate from the FrameInfo - long NumBytes = MFI->getStackSize(); - - if (FP) - NumBytes += 8; //reserve space for the old FP - - // Do we need to allocate space on the stack? - if (NumBytes == 0) return; - - unsigned Align = getStackAlignment(); - NumBytes = (NumBytes+Align-1)/Align*Align; - - // Update frame info to pretend that this is part of the stack... - MFI->setStackSize(NumBytes); - - // adjust stack pointer: r30 -= numbytes - NumBytes = -NumBytes; - if (NumBytes >= Alpha::IMM_LOW) { - BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes) - .addReg(Alpha::R30); - } else if (getUpper16(NumBytes) >= Alpha::IMM_LOW) { - BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30) - .addImm(getUpper16(NumBytes)).addReg(Alpha::R30); - BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30) - .addImm(getLower16(NumBytes)).addReg(Alpha::R30); - } else { - report_fatal_error("Too big a stack frame at " + Twine(NumBytes)); - } - - // Now if we need to, save the old FP and set the new - if (FP) { - BuildMI(MBB, MBBI, dl, TII.get(Alpha::STQ)) - .addReg(Alpha::R15).addImm(0).addReg(Alpha::R30); - // This must be the last instr in the prolog - BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R15) - .addReg(Alpha::R30).addReg(Alpha::R30); - } - -} - -void AlphaFrameLowering::emitEpilogue(MachineFunction &MF, - MachineBasicBlock &MBB) const { - const MachineFrameInfo *MFI = MF.getFrameInfo(); - MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); - const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); - - assert((MBBI->getOpcode() == Alpha::RETDAG || - MBBI->getOpcode() == Alpha::RETDAGp) - && "Can only insert epilog into returning blocks"); - DebugLoc dl = MBBI->getDebugLoc(); - - bool FP = hasFP(MF); - - // Get the number of bytes allocated from the FrameInfo... - long NumBytes = MFI->getStackSize(); - - //now if we need to, restore the old FP - if (FP) { - //copy the FP into the SP (discards allocas) - BuildMI(MBB, MBBI, dl, TII.get(Alpha::BISr), Alpha::R30).addReg(Alpha::R15) - .addReg(Alpha::R15); - //restore the FP - BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDQ), Alpha::R15) - .addImm(0).addReg(Alpha::R15); - } - - if (NumBytes != 0) { - if (NumBytes <= Alpha::IMM_HIGH) { - BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30).addImm(NumBytes) - .addReg(Alpha::R30); - } else if (getUpper16(NumBytes) <= Alpha::IMM_HIGH) { - BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDAH), Alpha::R30) - .addImm(getUpper16(NumBytes)).addReg(Alpha::R30); - BuildMI(MBB, MBBI, dl, TII.get(Alpha::LDA), Alpha::R30) - .addImm(getLower16(NumBytes)).addReg(Alpha::R30); - } else { - report_fatal_error("Too big a stack frame at " + Twine(NumBytes)); - } - } -} diff --git a/lib/Target/Alpha/AlphaFrameLowering.h b/lib/Target/Alpha/AlphaFrameLowering.h deleted file mode 100644 index ebd9e1b..0000000 --- a/lib/Target/Alpha/AlphaFrameLowering.h +++ /dev/null @@ -1,43 +0,0 @@ -//==-- AlphaFrameLowering.h - Define frame lowering for Alpha --*- C++ -*---==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -// -//===----------------------------------------------------------------------===// - -#ifndef ALPHA_FRAMEINFO_H -#define ALPHA_FRAMEINFO_H - -#include "Alpha.h" -#include "AlphaSubtarget.h" -#include "llvm/Target/TargetFrameLowering.h" - -namespace llvm { - class AlphaSubtarget; - -class AlphaFrameLowering : public TargetFrameLowering { - const AlphaSubtarget &STI; - // FIXME: This should end in MachineFunctionInfo, not here! - mutable int curgpdist; -public: - explicit AlphaFrameLowering(const AlphaSubtarget &sti) - : TargetFrameLowering(StackGrowsDown, 16, 0), STI(sti), curgpdist(0) { - } - - /// emitProlog/emitEpilog - These methods insert prolog and epilog code into - /// the function. - void emitPrologue(MachineFunction &MF) const; - void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const; - - bool hasFP(const MachineFunction &MF) const; -}; - -} // End llvm namespace - -#endif diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp deleted file mode 100644 index f877c65..0000000 --- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp +++ /dev/null @@ -1,425 +0,0 @@ -//===-- AlphaISelDAGToDAG.cpp - Alpha pattern matching inst selector ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines a pattern matching instruction selector for Alpha, -// converting from a legalized dag to a Alpha dag. -// -//===----------------------------------------------------------------------===// - -#include "Alpha.h" -#include "AlphaTargetMachine.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/GlobalValue.h" -#include "llvm/Intrinsics.h" -#include "llvm/LLVMContext.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" -#include -using namespace llvm; - -namespace { - - //===--------------------------------------------------------------------===// - /// AlphaDAGToDAGISel - Alpha specific code to select Alpha machine - /// instructions for SelectionDAG operations. - class AlphaDAGToDAGISel : public SelectionDAGISel { - static const int64_t IMM_LOW = -32768; - static const int64_t IMM_HIGH = 32767; - static const int64_t IMM_MULT = 65536; - static const int64_t IMM_FULLHIGH = IMM_HIGH + IMM_HIGH * IMM_MULT; - static const int64_t IMM_FULLLOW = IMM_LOW + IMM_LOW * IMM_MULT; - - static int64_t get_ldah16(int64_t x) { - int64_t y = x / IMM_MULT; - if (x % IMM_MULT > IMM_HIGH) - ++y; - return y; - } - - static int64_t get_lda16(int64_t x) { - return x - get_ldah16(x) * IMM_MULT; - } - - /// get_zapImm - Return a zap mask if X is a valid immediate for a zapnot - /// instruction (if not, return 0). Note that this code accepts partial - /// zap masks. For example (and LHS, 1) is a valid zap, as long we know - /// that the bits 1-7 of LHS are already zero. If LHS is non-null, we are - /// in checking mode. If LHS is null, we assume that the mask has already - /// been validated before. - uint64_t get_zapImm(SDValue LHS, uint64_t Constant) const { - uint64_t BitsToCheck = 0; - unsigned Result = 0; - for (unsigned i = 0; i != 8; ++i) { - if (((Constant >> 8*i) & 0xFF) == 0) { - // nothing to do. - } else { - Result |= 1 << i; - if (((Constant >> 8*i) & 0xFF) == 0xFF) { - // If the entire byte is set, zapnot the byte. - } else if (LHS.getNode() == 0) { - // Otherwise, if the mask was previously validated, we know its okay - // to zapnot this entire byte even though all the bits aren't set. - } else { - // Otherwise we don't know that the it's okay to zapnot this entire - // byte. Only do this iff we can prove that the missing bits are - // already null, so the bytezap doesn't need to really null them. - BitsToCheck |= ~Constant & (0xFFULL << 8*i); - } - } - } - - // If there are missing bits in a byte (for example, X & 0xEF00), check to - // see if the missing bits (0x1000) are already known zero if not, the zap - // isn't okay to do, as it won't clear all the required bits. - if (BitsToCheck && - !CurDAG->MaskedValueIsZero(LHS, - APInt(LHS.getValueSizeInBits(), - BitsToCheck))) - return 0; - - return Result; - } - - static uint64_t get_zapImm(uint64_t x) { - unsigned build = 0; - for(int i = 0; i != 8; ++i) { - if ((x & 0x00FF) == 0x00FF) - build |= 1 << i; - else if ((x & 0x00FF) != 0) - return 0; - x >>= 8; - } - return build; - } - - - static uint64_t getNearPower2(uint64_t x) { - if (!x) return 0; - unsigned at = CountLeadingZeros_64(x); - uint64_t complow = 1ULL << (63 - at); - uint64_t comphigh = complow << 1; - if (x - complow <= comphigh - x) - return complow; - else - return comphigh; - } - - static bool chkRemNearPower2(uint64_t x, uint64_t r, bool swap) { - uint64_t y = getNearPower2(x); - if (swap) - return (y - x) == r; - else - return (x - y) == r; - } - - public: - explicit AlphaDAGToDAGISel(AlphaTargetMachine &TM) - : SelectionDAGISel(TM) - {} - - /// getI64Imm - Return a target constant with the specified value, of type - /// i64. - inline SDValue getI64Imm(int64_t Imm) { - return CurDAG->getTargetConstant(Imm, MVT::i64); - } - - // Select - Convert the specified operand from a target-independent to a - // target-specific node if it hasn't already been changed. - SDNode *Select(SDNode *N); - - virtual const char *getPassName() const { - return "Alpha DAG->DAG Pattern Instruction Selection"; - } - - /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for - /// inline asm expressions. - virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op, - char ConstraintCode, - std::vector &OutOps) { - SDValue Op0; - switch (ConstraintCode) { - default: return true; - case 'm': // memory - Op0 = Op; - break; - } - - OutOps.push_back(Op0); - return false; - } - -// Include the pieces autogenerated from the target description. -#include "AlphaGenDAGISel.inc" - -private: - /// getTargetMachine - Return a reference to the TargetMachine, casted - /// to the target-specific type. - const AlphaTargetMachine &getTargetMachine() { - return static_cast(TM); - } - - /// getInstrInfo - Return a reference to the TargetInstrInfo, casted - /// to the target-specific type. - const AlphaInstrInfo *getInstrInfo() { - return getTargetMachine().getInstrInfo(); - } - - SDNode *getGlobalBaseReg(); - SDNode *getGlobalRetAddr(); - void SelectCALL(SDNode *Op); - - }; -} - -/// getGlobalBaseReg - Output the instructions required to put the -/// GOT address into a register. -/// -SDNode *AlphaDAGToDAGISel::getGlobalBaseReg() { - unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); - return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode(); -} - -/// getGlobalRetAddr - Grab the return address. -/// -SDNode *AlphaDAGToDAGISel::getGlobalRetAddr() { - unsigned GlobalRetAddr = getInstrInfo()->getGlobalRetAddr(MF); - return CurDAG->getRegister(GlobalRetAddr, TLI.getPointerTy()).getNode(); -} - -// Select - Convert the specified operand from a target-independent to a -// target-specific node if it hasn't already been changed. -SDNode *AlphaDAGToDAGISel::Select(SDNode *N) { - if (N->isMachineOpcode()) - return NULL; // Already selected. - DebugLoc dl = N->getDebugLoc(); - - switch (N->getOpcode()) { - default: break; - case AlphaISD::CALL: - SelectCALL(N); - return NULL; - - case ISD::FrameIndex: { - int FI = cast(N)->getIndex(); - return CurDAG->SelectNodeTo(N, Alpha::LDA, MVT::i64, - CurDAG->getTargetFrameIndex(FI, MVT::i32), - getI64Imm(0)); - } - case ISD::GLOBAL_OFFSET_TABLE: - return getGlobalBaseReg(); - case AlphaISD::GlobalRetAddr: - return getGlobalRetAddr(); - - case AlphaISD::DivCall: { - SDValue Chain = CurDAG->getEntryNode(); - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - SDValue N2 = N->getOperand(2); - Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R24, N1, - SDValue(0,0)); - Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R25, N2, - Chain.getValue(1)); - Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, N0, - Chain.getValue(1)); - SDNode *CNode = - CurDAG->getMachineNode(Alpha::JSRs, dl, MVT::Other, MVT::Glue, - Chain, Chain.getValue(1)); - Chain = CurDAG->getCopyFromReg(Chain, dl, Alpha::R27, MVT::i64, - SDValue(CNode, 1)); - return CurDAG->SelectNodeTo(N, Alpha::BISr, MVT::i64, Chain, Chain); - } - - case ISD::READCYCLECOUNTER: { - SDValue Chain = N->getOperand(0); - return CurDAG->getMachineNode(Alpha::RPCC, dl, MVT::i64, MVT::Other, - Chain); - } - - case ISD::Constant: { - uint64_t uval = cast(N)->getZExtValue(); - - if (uval == 0) { - SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, - Alpha::R31, MVT::i64); - ReplaceUses(SDValue(N, 0), Result); - return NULL; - } - - int64_t val = (int64_t)uval; - int32_t val32 = (int32_t)val; - if (val <= IMM_HIGH + IMM_HIGH * IMM_MULT && - val >= IMM_LOW + IMM_LOW * IMM_MULT) - break; //(LDAH (LDA)) - if ((uval >> 32) == 0 && //empty upper bits - val32 <= IMM_HIGH + IMM_HIGH * IMM_MULT) - // val32 >= IMM_LOW + IMM_LOW * IMM_MULT) //always true - break; //(zext (LDAH (LDA))) - //Else use the constant pool - ConstantInt *C = ConstantInt::get( - Type::getInt64Ty(*CurDAG->getContext()), uval); - SDValue CPI = CurDAG->getTargetConstantPool(C, MVT::i64); - SDNode *Tmp = CurDAG->getMachineNode(Alpha::LDAHr, dl, MVT::i64, CPI, - SDValue(getGlobalBaseReg(), 0)); - return CurDAG->SelectNodeTo(N, Alpha::LDQr, MVT::i64, MVT::Other, - CPI, SDValue(Tmp, 0), CurDAG->getEntryNode()); - } - case ISD::TargetConstantFP: - case ISD::ConstantFP: { - ConstantFPSDNode *CN = cast(N); - bool isDouble = N->getValueType(0) == MVT::f64; - EVT T = isDouble ? MVT::f64 : MVT::f32; - if (CN->getValueAPF().isPosZero()) { - return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYST : Alpha::CPYSS, - T, CurDAG->getRegister(Alpha::F31, T), - CurDAG->getRegister(Alpha::F31, T)); - } else if (CN->getValueAPF().isNegZero()) { - return CurDAG->SelectNodeTo(N, isDouble ? Alpha::CPYSNT : Alpha::CPYSNS, - T, CurDAG->getRegister(Alpha::F31, T), - CurDAG->getRegister(Alpha::F31, T)); - } else { - report_fatal_error("Unhandled FP constant type"); - } - break; - } - - case ISD::SETCC: - if (N->getOperand(0).getNode()->getValueType(0).isFloatingPoint()) { - ISD::CondCode CC = cast(N->getOperand(2))->get(); - - unsigned Opc = Alpha::WTF; - bool rev = false; - bool inv = false; - switch(CC) { - default: DEBUG(N->dump(CurDAG)); llvm_unreachable("Unknown FP comparison!"); - case ISD::SETEQ: case ISD::SETOEQ: case ISD::SETUEQ: - Opc = Alpha::CMPTEQ; break; - case ISD::SETLT: case ISD::SETOLT: case ISD::SETULT: - Opc = Alpha::CMPTLT; break; - case ISD::SETLE: case ISD::SETOLE: case ISD::SETULE: - Opc = Alpha::CMPTLE; break; - case ISD::SETGT: case ISD::SETOGT: case ISD::SETUGT: - Opc = Alpha::CMPTLT; rev = true; break; - case ISD::SETGE: case ISD::SETOGE: case ISD::SETUGE: - Opc = Alpha::CMPTLE; rev = true; break; - case ISD::SETNE: case ISD::SETONE: case ISD::SETUNE: - Opc = Alpha::CMPTEQ; inv = true; break; - case ISD::SETO: - Opc = Alpha::CMPTUN; inv = true; break; - case ISD::SETUO: - Opc = Alpha::CMPTUN; break; - }; - SDValue tmp1 = N->getOperand(rev?1:0); - SDValue tmp2 = N->getOperand(rev?0:1); - SDNode *cmp = CurDAG->getMachineNode(Opc, dl, MVT::f64, tmp1, tmp2); - if (inv) - cmp = CurDAG->getMachineNode(Alpha::CMPTEQ, dl, - MVT::f64, SDValue(cmp, 0), - CurDAG->getRegister(Alpha::F31, MVT::f64)); - switch(CC) { - case ISD::SETUEQ: case ISD::SETULT: case ISD::SETULE: - case ISD::SETUNE: case ISD::SETUGT: case ISD::SETUGE: - { - SDNode* cmp2 = CurDAG->getMachineNode(Alpha::CMPTUN, dl, MVT::f64, - tmp1, tmp2); - cmp = CurDAG->getMachineNode(Alpha::ADDT, dl, MVT::f64, - SDValue(cmp2, 0), SDValue(cmp, 0)); - break; - } - default: break; - } - - SDNode* LD = CurDAG->getMachineNode(Alpha::FTOIT, dl, - MVT::i64, SDValue(cmp, 0)); - return CurDAG->getMachineNode(Alpha::CMPULT, dl, MVT::i64, - CurDAG->getRegister(Alpha::R31, MVT::i64), - SDValue(LD,0)); - } - break; - - case ISD::AND: { - ConstantSDNode* SC = NULL; - ConstantSDNode* MC = NULL; - if (N->getOperand(0).getOpcode() == ISD::SRL && - (MC = dyn_cast(N->getOperand(1))) && - (SC = dyn_cast(N->getOperand(0).getOperand(1)))) { - uint64_t sval = SC->getZExtValue(); - uint64_t mval = MC->getZExtValue(); - // If the result is a zap, let the autogened stuff handle it. - if (get_zapImm(N->getOperand(0), mval)) - break; - // given mask X, and shift S, we want to see if there is any zap in the - // mask if we play around with the botton S bits - uint64_t dontcare = (~0ULL) >> (64 - sval); - uint64_t mask = mval << sval; - - if (get_zapImm(mask | dontcare)) - mask = mask | dontcare; - - if (get_zapImm(mask)) { - SDValue Z = - SDValue(CurDAG->getMachineNode(Alpha::ZAPNOTi, dl, MVT::i64, - N->getOperand(0).getOperand(0), - getI64Imm(get_zapImm(mask))), 0); - return CurDAG->getMachineNode(Alpha::SRLr, dl, MVT::i64, Z, - getI64Imm(sval)); - } - } - break; - } - - } - - return SelectCode(N); -} - -void AlphaDAGToDAGISel::SelectCALL(SDNode *N) { - //TODO: add flag stuff to prevent nondeturministic breakage! - - SDValue Chain = N->getOperand(0); - SDValue Addr = N->getOperand(1); - SDValue InFlag = N->getOperand(N->getNumOperands() - 1); - DebugLoc dl = N->getDebugLoc(); - - if (Addr.getOpcode() == AlphaISD::GPRelLo) { - SDValue GOT = SDValue(getGlobalBaseReg(), 0); - Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R29, GOT, InFlag); - InFlag = Chain.getValue(1); - Chain = SDValue(CurDAG->getMachineNode(Alpha::BSR, dl, MVT::Other, - MVT::Glue, Addr.getOperand(0), - Chain, InFlag), 0); - } else { - Chain = CurDAG->getCopyToReg(Chain, dl, Alpha::R27, Addr, InFlag); - InFlag = Chain.getValue(1); - Chain = SDValue(CurDAG->getMachineNode(Alpha::JSR, dl, MVT::Other, - MVT::Glue, Chain, InFlag), 0); - } - InFlag = Chain.getValue(1); - - ReplaceUses(SDValue(N, 0), Chain); - ReplaceUses(SDValue(N, 1), InFlag); -} - - -/// createAlphaISelDag - This pass converts a legalized DAG into a -/// Alpha-specific DAG, ready for instruction scheduling. -/// -FunctionPass *llvm::createAlphaISelDag(AlphaTargetMachine &TM) { - return new AlphaDAGToDAGISel(TM); -} diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp deleted file mode 100644 index 3057eb8..0000000 --- a/lib/Target/Alpha/AlphaISelLowering.cpp +++ /dev/null @@ -1,962 +0,0 @@ -//===-- AlphaISelLowering.cpp - Alpha DAG Lowering Implementation ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the AlphaISelLowering class. -// -//===----------------------------------------------------------------------===// - -#include "AlphaISelLowering.h" -#include "AlphaTargetMachine.h" -#include "AlphaMachineFunctionInfo.h" -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Module.h" -#include "llvm/Intrinsics.h" -#include "llvm/Type.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -/// AddLiveIn - This helper function adds the specified physical register to the -/// MachineFunction as a live in value. It also creates a corresponding virtual -/// register for it. -static unsigned AddLiveIn(MachineFunction &MF, unsigned PReg, - TargetRegisterClass *RC) { - assert(RC->contains(PReg) && "Not the correct regclass!"); - unsigned VReg = MF.getRegInfo().createVirtualRegister(RC); - MF.getRegInfo().addLiveIn(PReg, VReg); - return VReg; -} - -AlphaTargetLowering::AlphaTargetLowering(TargetMachine &TM) - : TargetLowering(TM, new TargetLoweringObjectFileELF()) { - // Set up the TargetLowering object. - //I am having problems with shr n i8 1 - setBooleanContents(ZeroOrOneBooleanContent); - setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? - - addRegisterClass(MVT::i64, Alpha::GPRCRegisterClass); - addRegisterClass(MVT::f64, Alpha::F8RCRegisterClass); - addRegisterClass(MVT::f32, Alpha::F4RCRegisterClass); - - // We want to custom lower some of our intrinsics. - setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - - setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand); - - setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::ZEXTLOAD, MVT::i32, Expand); - - setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); - setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand); - setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand); - - setTruncStoreAction(MVT::f64, MVT::f32, Expand); - - // setOperationAction(ISD::BRIND, MVT::Other, Expand); - setOperationAction(ISD::BR_JT, MVT::Other, Expand); - setOperationAction(ISD::BR_CC, MVT::Other, Expand); - setOperationAction(ISD::SELECT_CC, MVT::Other, Expand); - - setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand); - - setOperationAction(ISD::FREM, MVT::f32, Expand); - setOperationAction(ISD::FREM, MVT::f64, Expand); - - setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand); - setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); - setOperationAction(ISD::FP_TO_UINT, MVT::i64, Expand); - setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); - - if (!TM.getSubtarget().hasCT()) { - setOperationAction(ISD::CTPOP , MVT::i64 , Expand); - setOperationAction(ISD::CTTZ , MVT::i64 , Expand); - setOperationAction(ISD::CTLZ , MVT::i64 , Expand); - } - setOperationAction(ISD::BSWAP , MVT::i64, Expand); - setOperationAction(ISD::ROTL , MVT::i64, Expand); - setOperationAction(ISD::ROTR , MVT::i64, Expand); - - setOperationAction(ISD::SREM , MVT::i64, Custom); - setOperationAction(ISD::UREM , MVT::i64, Custom); - setOperationAction(ISD::SDIV , MVT::i64, Custom); - setOperationAction(ISD::UDIV , MVT::i64, Custom); - - setOperationAction(ISD::ADDC , MVT::i64, Expand); - setOperationAction(ISD::ADDE , MVT::i64, Expand); - setOperationAction(ISD::SUBC , MVT::i64, Expand); - setOperationAction(ISD::SUBE , MVT::i64, Expand); - - setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand); - setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand); - - setOperationAction(ISD::SRL_PARTS, MVT::i64, Custom); - setOperationAction(ISD::SRA_PARTS, MVT::i64, Expand); - setOperationAction(ISD::SHL_PARTS, MVT::i64, Expand); - - // We don't support sin/cos/sqrt/pow - setOperationAction(ISD::FSIN , MVT::f64, Expand); - setOperationAction(ISD::FCOS , MVT::f64, Expand); - setOperationAction(ISD::FSIN , MVT::f32, Expand); - setOperationAction(ISD::FCOS , MVT::f32, Expand); - - setOperationAction(ISD::FSQRT, MVT::f64, Expand); - setOperationAction(ISD::FSQRT, MVT::f32, Expand); - - setOperationAction(ISD::FPOW , MVT::f32, Expand); - setOperationAction(ISD::FPOW , MVT::f64, Expand); - - setOperationAction(ISD::FMA, MVT::f64, Expand); - setOperationAction(ISD::FMA, MVT::f32, Expand); - - setOperationAction(ISD::SETCC, MVT::f32, Promote); - - setOperationAction(ISD::BITCAST, MVT::f32, Promote); - - setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); - - // Not implemented yet. - setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); - setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i64, Expand); - - // We want to legalize GlobalAddress and ConstantPool and - // ExternalSymbols nodes into the appropriate instructions to - // materialize the address. - setOperationAction(ISD::GlobalAddress, MVT::i64, Custom); - setOperationAction(ISD::ConstantPool, MVT::i64, Custom); - setOperationAction(ISD::ExternalSymbol, MVT::i64, Custom); - setOperationAction(ISD::GlobalTLSAddress, MVT::i64, Custom); - - setOperationAction(ISD::VASTART, MVT::Other, Custom); - setOperationAction(ISD::VAEND, MVT::Other, Expand); - setOperationAction(ISD::VACOPY, MVT::Other, Custom); - setOperationAction(ISD::VAARG, MVT::Other, Custom); - setOperationAction(ISD::VAARG, MVT::i32, Custom); - - setOperationAction(ISD::JumpTable, MVT::i64, Custom); - setOperationAction(ISD::JumpTable, MVT::i32, Custom); - - setOperationAction(ISD::ATOMIC_LOAD, MVT::i32, Expand); - setOperationAction(ISD::ATOMIC_STORE, MVT::i32, Expand); - - setStackPointerRegisterToSaveRestore(Alpha::R30); - - setJumpBufSize(272); - setJumpBufAlignment(16); - - setMinFunctionAlignment(4); - - setInsertFencesForAtomic(true); - - computeRegisterProperties(); -} - -EVT AlphaTargetLowering::getSetCCResultType(EVT VT) const { - return MVT::i64; -} - -const char *AlphaTargetLowering::getTargetNodeName(unsigned Opcode) const { - switch (Opcode) { - default: return 0; - case AlphaISD::CVTQT_: return "Alpha::CVTQT_"; - case AlphaISD::CVTQS_: return "Alpha::CVTQS_"; - case AlphaISD::CVTTQ_: return "Alpha::CVTTQ_"; - case AlphaISD::GPRelHi: return "Alpha::GPRelHi"; - case AlphaISD::GPRelLo: return "Alpha::GPRelLo"; - case AlphaISD::RelLit: return "Alpha::RelLit"; - case AlphaISD::GlobalRetAddr: return "Alpha::GlobalRetAddr"; - case AlphaISD::CALL: return "Alpha::CALL"; - case AlphaISD::DivCall: return "Alpha::DivCall"; - case AlphaISD::RET_FLAG: return "Alpha::RET_FLAG"; - case AlphaISD::COND_BRANCH_I: return "Alpha::COND_BRANCH_I"; - case AlphaISD::COND_BRANCH_F: return "Alpha::COND_BRANCH_F"; - } -} - -static SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) { - EVT PtrVT = Op.getValueType(); - JumpTableSDNode *JT = cast(Op); - SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT); - // FIXME there isn't really any debug info here - DebugLoc dl = Op.getDebugLoc(); - - SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, JTI, - DAG.getGLOBAL_OFFSET_TABLE(MVT::i64)); - SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, JTI, Hi); - return Lo; -} - -//http://www.cs.arizona.edu/computer.help/policy/DIGITAL_unix/ -//AA-PY8AC-TET1_html/callCH3.html#BLOCK21 - -//For now, just use variable size stack frame format - -//In a standard call, the first six items are passed in registers $16 -//- $21 and/or registers $f16 - $f21. (See Section 4.1.2 for details -//of argument-to-register correspondence.) The remaining items are -//collected in a memory argument list that is a naturally aligned -//array of quadwords. In a standard call, this list, if present, must -//be passed at 0(SP). -//7 ... n 0(SP) ... (n-7)*8(SP) - -// //#define FP $15 -// //#define RA $26 -// //#define PV $27 -// //#define GP $29 -// //#define SP $30 - -#include "AlphaGenCallingConv.inc" - -SDValue -AlphaTargetLowering::LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, - bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const { - // Alpha target does not yet support tail call optimization. - isTailCall = false; - - // Analyze operands of the call, assigning locations to each operand. - SmallVector ArgLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), ArgLocs, *DAG.getContext()); - - CCInfo.AnalyzeCallOperands(Outs, CC_Alpha); - - // Get a count of how many bytes are to be pushed on the stack. - unsigned NumBytes = CCInfo.getNextStackOffset(); - - Chain = DAG.getCALLSEQ_START(Chain, DAG.getConstant(NumBytes, - getPointerTy(), true)); - - SmallVector, 4> RegsToPass; - SmallVector MemOpChains; - SDValue StackPtr; - - // Walk the register/memloc assignments, inserting copies/loads. - for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { - CCValAssign &VA = ArgLocs[i]; - - SDValue Arg = OutVals[i]; - - // Promote the value if needed. - switch (VA.getLocInfo()) { - default: assert(0 && "Unknown loc info!"); - case CCValAssign::Full: break; - case CCValAssign::SExt: - Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); - break; - case CCValAssign::ZExt: - Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); - break; - case CCValAssign::AExt: - Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); - break; - } - - // Arguments that can be passed on register must be kept at RegsToPass - // vector - if (VA.isRegLoc()) { - RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); - } else { - assert(VA.isMemLoc()); - - if (StackPtr.getNode() == 0) - StackPtr = DAG.getCopyFromReg(Chain, dl, Alpha::R30, MVT::i64); - - SDValue PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), - StackPtr, - DAG.getIntPtrConstant(VA.getLocMemOffset())); - - MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, - MachinePointerInfo(),false, false, 0)); - } - } - - // Transform all store nodes into one single node because all store nodes are - // independent of each other. - if (!MemOpChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - &MemOpChains[0], MemOpChains.size()); - - // Build a sequence of copy-to-reg nodes chained together with token chain and - // flag operands which copy the outgoing args into registers. The InFlag in - // necessary since all emitted instructions must be stuck together. - SDValue InFlag; - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { - Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, - RegsToPass[i].second, InFlag); - InFlag = Chain.getValue(1); - } - - // Returns a chain & a flag for retval copy to use. - SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); - SmallVector Ops; - Ops.push_back(Chain); - Ops.push_back(Callee); - - // Add argument registers to the end of the list so that they are - // known live into the call. - for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) - Ops.push_back(DAG.getRegister(RegsToPass[i].first, - RegsToPass[i].second.getValueType())); - - if (InFlag.getNode()) - Ops.push_back(InFlag); - - Chain = DAG.getNode(AlphaISD::CALL, dl, NodeTys, &Ops[0], Ops.size()); - InFlag = Chain.getValue(1); - - // Create the CALLSEQ_END node. - Chain = DAG.getCALLSEQ_END(Chain, - DAG.getConstant(NumBytes, getPointerTy(), true), - DAG.getConstant(0, getPointerTy(), true), - InFlag); - InFlag = Chain.getValue(1); - - // Handle result values, copying them out of physregs into vregs that we - // return. - return LowerCallResult(Chain, InFlag, CallConv, isVarArg, - Ins, dl, DAG, InVals); -} - -/// LowerCallResult - Lower the result values of a call into the -/// appropriate copies out of appropriate physical registers. -/// -SDValue -AlphaTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const { - - // Assign locations to each value returned by this call. - SmallVector RVLocs; - CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), - getTargetMachine(), RVLocs, *DAG.getContext()); - - CCInfo.AnalyzeCallResult(Ins, RetCC_Alpha); - - // Copy all of the result registers out of their specified physreg. - for (unsigned i = 0; i != RVLocs.size(); ++i) { - CCValAssign &VA = RVLocs[i]; - - Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), - VA.getLocVT(), InFlag).getValue(1); - SDValue RetValue = Chain.getValue(0); - InFlag = Chain.getValue(2); - - // If this is an 8/16/32-bit value, it is really passed promoted to 64 - // bits. Insert an assert[sz]ext to capture this, then truncate to the - // right size. - if (VA.getLocInfo() == CCValAssign::SExt) - RetValue = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), RetValue, - DAG.getValueType(VA.getValVT())); - else if (VA.getLocInfo() == CCValAssign::ZExt) - RetValue = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), RetValue, - DAG.getValueType(VA.getValVT())); - - if (VA.getLocInfo() != CCValAssign::Full) - RetValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), RetValue); - - InVals.push_back(RetValue); - } - - return Chain; -} - -SDValue -AlphaTargetLowering::LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl - &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) - const { - - MachineFunction &MF = DAG.getMachineFunction(); - MachineFrameInfo *MFI = MF.getFrameInfo(); - AlphaMachineFunctionInfo *FuncInfo = MF.getInfo(); - - unsigned args_int[] = { - Alpha::R16, Alpha::R17, Alpha::R18, Alpha::R19, Alpha::R20, Alpha::R21}; - unsigned args_float[] = { - Alpha::F16, Alpha::F17, Alpha::F18, Alpha::F19, Alpha::F20, Alpha::F21}; - - for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) { - SDValue argt; - EVT ObjectVT = Ins[ArgNo].VT; - SDValue ArgVal; - - if (ArgNo < 6) { - switch (ObjectVT.getSimpleVT().SimpleTy) { - default: - assert(false && "Invalid value type!"); - case MVT::f64: - args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo], - &Alpha::F8RCRegClass); - ArgVal = DAG.getCopyFromReg(Chain, dl, args_float[ArgNo], ObjectVT); - break; - case MVT::f32: - args_float[ArgNo] = AddLiveIn(MF, args_float[ArgNo], - &Alpha::F4RCRegClass); - ArgVal = DAG.getCopyFromReg(Chain, dl, args_float[ArgNo], ObjectVT); - break; - case MVT::i64: - args_int[ArgNo] = AddLiveIn(MF, args_int[ArgNo], - &Alpha::GPRCRegClass); - ArgVal = DAG.getCopyFromReg(Chain, dl, args_int[ArgNo], MVT::i64); - break; - } - } else { //more args - // Create the frame index object for this incoming parameter... - int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6), true); - - // Create the SelectionDAG nodes corresponding to a load - //from this parameter - SDValue FIN = DAG.getFrameIndex(FI, MVT::i64); - ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), - false, false, 0); - } - InVals.push_back(ArgVal); - } - - // If the functions takes variable number of arguments, copy all regs to stack - if (isVarArg) { - FuncInfo->setVarArgsOffset(Ins.size() * 8); - std::vector LS; - for (int i = 0; i < 6; ++i) { - if (TargetRegisterInfo::isPhysicalRegister(args_int[i])) - args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass); - SDValue argt = DAG.getCopyFromReg(Chain, dl, args_int[i], MVT::i64); - int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true); - if (i == 0) FuncInfo->setVarArgsBase(FI); - SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64); - LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo(), - false, false, 0)); - - if (TargetRegisterInfo::isPhysicalRegister(args_float[i])) - args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass); - argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64); - FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true); - SDFI = DAG.getFrameIndex(FI, MVT::i64); - LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, MachinePointerInfo(), - false, false, 0)); - } - - //Set up a token factor with all the stack traffic - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LS[0], LS.size()); - } - - return Chain; -} - -SDValue -AlphaTargetLowering::LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - DebugLoc dl, SelectionDAG &DAG) const { - - SDValue Copy = DAG.getCopyToReg(Chain, dl, Alpha::R26, - DAG.getNode(AlphaISD::GlobalRetAddr, - DebugLoc(), MVT::i64), - SDValue()); - switch (Outs.size()) { - default: - llvm_unreachable("Do not know how to return this many arguments!"); - case 0: - break; - //return SDValue(); // ret void is legal - case 1: { - EVT ArgVT = Outs[0].VT; - unsigned ArgReg; - if (ArgVT.isInteger()) - ArgReg = Alpha::R0; - else { - assert(ArgVT.isFloatingPoint()); - ArgReg = Alpha::F0; - } - Copy = DAG.getCopyToReg(Copy, dl, ArgReg, - OutVals[0], Copy.getValue(1)); - if (DAG.getMachineFunction().getRegInfo().liveout_empty()) - DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg); - break; - } - case 2: { - EVT ArgVT = Outs[0].VT; - unsigned ArgReg1, ArgReg2; - if (ArgVT.isInteger()) { - ArgReg1 = Alpha::R0; - ArgReg2 = Alpha::R1; - } else { - assert(ArgVT.isFloatingPoint()); - ArgReg1 = Alpha::F0; - ArgReg2 = Alpha::F1; - } - Copy = DAG.getCopyToReg(Copy, dl, ArgReg1, - OutVals[0], Copy.getValue(1)); - if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(), - DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg1) - == DAG.getMachineFunction().getRegInfo().liveout_end()) - DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg1); - Copy = DAG.getCopyToReg(Copy, dl, ArgReg2, - OutVals[1], Copy.getValue(1)); - if (std::find(DAG.getMachineFunction().getRegInfo().liveout_begin(), - DAG.getMachineFunction().getRegInfo().liveout_end(), ArgReg2) - == DAG.getMachineFunction().getRegInfo().liveout_end()) - DAG.getMachineFunction().getRegInfo().addLiveOut(ArgReg2); - break; - } - } - return DAG.getNode(AlphaISD::RET_FLAG, dl, - MVT::Other, Copy, Copy.getValue(1)); -} - -void AlphaTargetLowering::LowerVAARG(SDNode *N, SDValue &Chain, - SDValue &DataPtr, - SelectionDAG &DAG) const { - Chain = N->getOperand(0); - SDValue VAListP = N->getOperand(1); - const Value *VAListS = cast(N->getOperand(2))->getValue(); - DebugLoc dl = N->getDebugLoc(); - - SDValue Base = DAG.getLoad(MVT::i64, dl, Chain, VAListP, - MachinePointerInfo(VAListS), - false, false, 0); - SDValue Tmp = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP, - DAG.getConstant(8, MVT::i64)); - SDValue Offset = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Base.getValue(1), - Tmp, MachinePointerInfo(), - MVT::i32, false, false, 0); - DataPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Base, Offset); - if (N->getValueType(0).isFloatingPoint()) - { - //if fp && Offset < 6*8, then subtract 6*8 from DataPtr - SDValue FPDataPtr = DAG.getNode(ISD::SUB, dl, MVT::i64, DataPtr, - DAG.getConstant(8*6, MVT::i64)); - SDValue CC = DAG.getSetCC(dl, MVT::i64, Offset, - DAG.getConstant(8*6, MVT::i64), ISD::SETLT); - DataPtr = DAG.getNode(ISD::SELECT, dl, MVT::i64, CC, FPDataPtr, DataPtr); - } - - SDValue NewOffset = DAG.getNode(ISD::ADD, dl, MVT::i64, Offset, - DAG.getConstant(8, MVT::i64)); - Chain = DAG.getTruncStore(Offset.getValue(1), dl, NewOffset, Tmp, - MachinePointerInfo(), - MVT::i32, false, false, 0); -} - -/// LowerOperation - Provide custom lowering hooks for some operations. -/// -SDValue AlphaTargetLowering::LowerOperation(SDValue Op, - SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); - switch (Op.getOpcode()) { - default: llvm_unreachable("Wasn't expecting to be able to lower this!"); - case ISD::JumpTable: return LowerJumpTable(Op, DAG); - - case ISD::INTRINSIC_WO_CHAIN: { - unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); - switch (IntNo) { - default: break; // Don't custom lower most intrinsics. - case Intrinsic::alpha_umulh: - return DAG.getNode(ISD::MULHU, dl, MVT::i64, - Op.getOperand(1), Op.getOperand(2)); - } - } - - case ISD::SRL_PARTS: { - SDValue ShOpLo = Op.getOperand(0); - SDValue ShOpHi = Op.getOperand(1); - SDValue ShAmt = Op.getOperand(2); - SDValue bm = DAG.getNode(ISD::SUB, dl, MVT::i64, - DAG.getConstant(64, MVT::i64), ShAmt); - SDValue BMCC = DAG.getSetCC(dl, MVT::i64, bm, - DAG.getConstant(0, MVT::i64), ISD::SETLE); - // if 64 - shAmt <= 0 - SDValue Hi_Neg = DAG.getConstant(0, MVT::i64); - SDValue ShAmt_Neg = DAG.getNode(ISD::SUB, dl, MVT::i64, - DAG.getConstant(0, MVT::i64), bm); - SDValue Lo_Neg = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpHi, ShAmt_Neg); - // else - SDValue carries = DAG.getNode(ISD::SHL, dl, MVT::i64, ShOpHi, bm); - SDValue Hi_Pos = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpHi, ShAmt); - SDValue Lo_Pos = DAG.getNode(ISD::SRL, dl, MVT::i64, ShOpLo, ShAmt); - Lo_Pos = DAG.getNode(ISD::OR, dl, MVT::i64, Lo_Pos, carries); - // Merge - SDValue Hi = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Hi_Neg, Hi_Pos); - SDValue Lo = DAG.getNode(ISD::SELECT, dl, MVT::i64, BMCC, Lo_Neg, Lo_Pos); - SDValue Ops[2] = { Lo, Hi }; - return DAG.getMergeValues(Ops, 2, dl); - } - // case ISD::SRA_PARTS: - - // case ISD::SHL_PARTS: - - - case ISD::SINT_TO_FP: { - assert(Op.getOperand(0).getValueType() == MVT::i64 && - "Unhandled SINT_TO_FP type in custom expander!"); - SDValue LD; - bool isDouble = Op.getValueType() == MVT::f64; - LD = DAG.getNode(ISD::BITCAST, dl, MVT::f64, Op.getOperand(0)); - SDValue FP = DAG.getNode(isDouble?AlphaISD::CVTQT_:AlphaISD::CVTQS_, dl, - isDouble?MVT::f64:MVT::f32, LD); - return FP; - } - case ISD::FP_TO_SINT: { - bool isDouble = Op.getOperand(0).getValueType() == MVT::f64; - SDValue src = Op.getOperand(0); - - if (!isDouble) //Promote - src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, src); - - src = DAG.getNode(AlphaISD::CVTTQ_, dl, MVT::f64, src); - - return DAG.getNode(ISD::BITCAST, dl, MVT::i64, src); - } - case ISD::ConstantPool: { - ConstantPoolSDNode *CP = cast(Op); - const Constant *C = CP->getConstVal(); - SDValue CPI = DAG.getTargetConstantPool(C, MVT::i64, CP->getAlignment()); - // FIXME there isn't really any debug info here - - SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, CPI, - DAG.getGLOBAL_OFFSET_TABLE(MVT::i64)); - SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, CPI, Hi); - return Lo; - } - case ISD::GlobalTLSAddress: - llvm_unreachable("TLS not implemented for Alpha."); - case ISD::GlobalAddress: { - GlobalAddressSDNode *GSDN = cast(Op); - const GlobalValue *GV = GSDN->getGlobal(); - SDValue GA = DAG.getTargetGlobalAddress(GV, dl, MVT::i64, - GSDN->getOffset()); - // FIXME there isn't really any debug info here - - // if (!GV->hasWeakLinkage() && !GV->isDeclaration() - // && !GV->hasLinkOnceLinkage()) { - if (GV->hasLocalLinkage()) { - SDValue Hi = DAG.getNode(AlphaISD::GPRelHi, dl, MVT::i64, GA, - DAG.getGLOBAL_OFFSET_TABLE(MVT::i64)); - SDValue Lo = DAG.getNode(AlphaISD::GPRelLo, dl, MVT::i64, GA, Hi); - return Lo; - } else - return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64, GA, - DAG.getGLOBAL_OFFSET_TABLE(MVT::i64)); - } - case ISD::ExternalSymbol: { - return DAG.getNode(AlphaISD::RelLit, dl, MVT::i64, - DAG.getTargetExternalSymbol(cast(Op) - ->getSymbol(), MVT::i64), - DAG.getGLOBAL_OFFSET_TABLE(MVT::i64)); - } - - case ISD::UREM: - case ISD::SREM: - //Expand only on constant case - if (Op.getOperand(1).getOpcode() == ISD::Constant) { - EVT VT = Op.getNode()->getValueType(0); - SDValue Tmp1 = Op.getNode()->getOpcode() == ISD::UREM ? - BuildUDIV(Op.getNode(), DAG, NULL) : - BuildSDIV(Op.getNode(), DAG, NULL); - Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Op.getOperand(1)); - Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Op.getOperand(0), Tmp1); - return Tmp1; - } - //fall through - case ISD::SDIV: - case ISD::UDIV: - if (Op.getValueType().isInteger()) { - if (Op.getOperand(1).getOpcode() == ISD::Constant) - return Op.getOpcode() == ISD::SDIV ? BuildSDIV(Op.getNode(), DAG, NULL) - : BuildUDIV(Op.getNode(), DAG, NULL); - const char* opstr = 0; - switch (Op.getOpcode()) { - case ISD::UREM: opstr = "__remqu"; break; - case ISD::SREM: opstr = "__remq"; break; - case ISD::UDIV: opstr = "__divqu"; break; - case ISD::SDIV: opstr = "__divq"; break; - } - SDValue Tmp1 = Op.getOperand(0), - Tmp2 = Op.getOperand(1), - Addr = DAG.getExternalSymbol(opstr, MVT::i64); - return DAG.getNode(AlphaISD::DivCall, dl, MVT::i64, Addr, Tmp1, Tmp2); - } - break; - - case ISD::VAARG: { - SDValue Chain, DataPtr; - LowerVAARG(Op.getNode(), Chain, DataPtr, DAG); - - SDValue Result; - if (Op.getValueType() == MVT::i32) - Result = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Chain, DataPtr, - MachinePointerInfo(), MVT::i32, false, false, 0); - else - Result = DAG.getLoad(Op.getValueType(), dl, Chain, DataPtr, - MachinePointerInfo(), - false, false, 0); - return Result; - } - case ISD::VACOPY: { - SDValue Chain = Op.getOperand(0); - SDValue DestP = Op.getOperand(1); - SDValue SrcP = Op.getOperand(2); - const Value *DestS = cast(Op.getOperand(3))->getValue(); - const Value *SrcS = cast(Op.getOperand(4))->getValue(); - - SDValue Val = DAG.getLoad(getPointerTy(), dl, Chain, SrcP, - MachinePointerInfo(SrcS), - false, false, 0); - SDValue Result = DAG.getStore(Val.getValue(1), dl, Val, DestP, - MachinePointerInfo(DestS), - false, false, 0); - SDValue NP = DAG.getNode(ISD::ADD, dl, MVT::i64, SrcP, - DAG.getConstant(8, MVT::i64)); - Val = DAG.getExtLoad(ISD::SEXTLOAD, dl, MVT::i64, Result, - NP, MachinePointerInfo(), MVT::i32, false, false, 0); - SDValue NPD = DAG.getNode(ISD::ADD, dl, MVT::i64, DestP, - DAG.getConstant(8, MVT::i64)); - return DAG.getTruncStore(Val.getValue(1), dl, Val, NPD, - MachinePointerInfo(), MVT::i32, - false, false, 0); - } - case ISD::VASTART: { - MachineFunction &MF = DAG.getMachineFunction(); - AlphaMachineFunctionInfo *FuncInfo = MF.getInfo(); - - SDValue Chain = Op.getOperand(0); - SDValue VAListP = Op.getOperand(1); - const Value *VAListS = cast(Op.getOperand(2))->getValue(); - - // vastart stores the address of the VarArgsBase and VarArgsOffset - SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsBase(), MVT::i64); - SDValue S1 = DAG.getStore(Chain, dl, FR, VAListP, - MachinePointerInfo(VAListS), false, false, 0); - SDValue SA2 = DAG.getNode(ISD::ADD, dl, MVT::i64, VAListP, - DAG.getConstant(8, MVT::i64)); - return DAG.getTruncStore(S1, dl, - DAG.getConstant(FuncInfo->getVarArgsOffset(), - MVT::i64), - SA2, MachinePointerInfo(), - MVT::i32, false, false, 0); - } - case ISD::RETURNADDR: - return DAG.getNode(AlphaISD::GlobalRetAddr, DebugLoc(), MVT::i64); - //FIXME: implement - case ISD::FRAMEADDR: break; - } - - return SDValue(); -} - -void AlphaTargetLowering::ReplaceNodeResults(SDNode *N, - SmallVectorImpl&Results, - SelectionDAG &DAG) const { - DebugLoc dl = N->getDebugLoc(); - assert(N->getValueType(0) == MVT::i32 && - N->getOpcode() == ISD::VAARG && - "Unknown node to custom promote!"); - - SDValue Chain, DataPtr; - LowerVAARG(N, Chain, DataPtr, DAG); - SDValue Res = DAG.getLoad(N->getValueType(0), dl, Chain, DataPtr, - MachinePointerInfo(), - false, false, 0); - Results.push_back(Res); - Results.push_back(SDValue(Res.getNode(), 1)); -} - - -//Inline Asm - -/// getConstraintType - Given a constraint letter, return the type of -/// constraint it is for this target. -AlphaTargetLowering::ConstraintType -AlphaTargetLowering::getConstraintType(const std::string &Constraint) const { - if (Constraint.size() == 1) { - switch (Constraint[0]) { - default: break; - case 'f': - case 'r': - return C_RegisterClass; - } - } - return TargetLowering::getConstraintType(Constraint); -} - -/// Examine constraint type and operand type and determine a weight value. -/// This object must already have been set up with the operand type -/// and the current alternative constraint selected. -TargetLowering::ConstraintWeight -AlphaTargetLowering::getSingleConstraintMatchWeight( - AsmOperandInfo &info, const char *constraint) const { - ConstraintWeight weight = CW_Invalid; - Value *CallOperandVal = info.CallOperandVal; - // If we don't have a value, we can't do a match, - // but allow it at the lowest weight. - if (CallOperandVal == NULL) - return CW_Default; - // Look at the constraint type. - switch (*constraint) { - default: - weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); - break; - case 'f': - weight = CW_Register; - break; - } - return weight; -} - -/// Given a register class constraint, like 'r', if this corresponds directly -/// to an LLVM register class, return a register of 0 and the register class -/// pointer. -std::pair AlphaTargetLowering:: -getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const -{ - if (Constraint.size() == 1) { - switch (Constraint[0]) { - case 'r': - return std::make_pair(0U, Alpha::GPRCRegisterClass); - case 'f': - return VT == MVT::f64 ? std::make_pair(0U, Alpha::F8RCRegisterClass) : - std::make_pair(0U, Alpha::F4RCRegisterClass); - } - } - return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); -} - -//===----------------------------------------------------------------------===// -// Other Lowering Code -//===----------------------------------------------------------------------===// - -MachineBasicBlock * -AlphaTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const { - const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); - assert((MI->getOpcode() == Alpha::CAS32 || - MI->getOpcode() == Alpha::CAS64 || - MI->getOpcode() == Alpha::LAS32 || - MI->getOpcode() == Alpha::LAS64 || - MI->getOpcode() == Alpha::SWAP32 || - MI->getOpcode() == Alpha::SWAP64) && - "Unexpected instr type to insert"); - - bool is32 = MI->getOpcode() == Alpha::CAS32 || - MI->getOpcode() == Alpha::LAS32 || - MI->getOpcode() == Alpha::SWAP32; - - //Load locked store conditional for atomic ops take on the same form - //start: - //ll - //do stuff (maybe branch to exit) - //sc - //test sc and maybe branck to start - //exit: - const BasicBlock *LLVM_BB = BB->getBasicBlock(); - DebugLoc dl = MI->getDebugLoc(); - MachineFunction::iterator It = BB; - ++It; - - MachineBasicBlock *thisMBB = BB; - MachineFunction *F = BB->getParent(); - MachineBasicBlock *llscMBB = F->CreateMachineBasicBlock(LLVM_BB); - MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB); - - sinkMBB->splice(sinkMBB->begin(), thisMBB, - llvm::next(MachineBasicBlock::iterator(MI)), - thisMBB->end()); - sinkMBB->transferSuccessorsAndUpdatePHIs(thisMBB); - - F->insert(It, llscMBB); - F->insert(It, sinkMBB); - - BuildMI(thisMBB, dl, TII->get(Alpha::BR)).addMBB(llscMBB); - - unsigned reg_res = MI->getOperand(0).getReg(), - reg_ptr = MI->getOperand(1).getReg(), - reg_v2 = MI->getOperand(2).getReg(), - reg_store = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass); - - BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::LDL_L : Alpha::LDQ_L), - reg_res).addImm(0).addReg(reg_ptr); - switch (MI->getOpcode()) { - case Alpha::CAS32: - case Alpha::CAS64: { - unsigned reg_cmp - = F->getRegInfo().createVirtualRegister(&Alpha::GPRCRegClass); - BuildMI(llscMBB, dl, TII->get(Alpha::CMPEQ), reg_cmp) - .addReg(reg_v2).addReg(reg_res); - BuildMI(llscMBB, dl, TII->get(Alpha::BEQ)) - .addImm(0).addReg(reg_cmp).addMBB(sinkMBB); - BuildMI(llscMBB, dl, TII->get(Alpha::BISr), reg_store) - .addReg(Alpha::R31).addReg(MI->getOperand(3).getReg()); - break; - } - case Alpha::LAS32: - case Alpha::LAS64: { - BuildMI(llscMBB, dl,TII->get(is32 ? Alpha::ADDLr : Alpha::ADDQr), reg_store) - .addReg(reg_res).addReg(reg_v2); - break; - } - case Alpha::SWAP32: - case Alpha::SWAP64: { - BuildMI(llscMBB, dl, TII->get(Alpha::BISr), reg_store) - .addReg(reg_v2).addReg(reg_v2); - break; - } - } - BuildMI(llscMBB, dl, TII->get(is32 ? Alpha::STL_C : Alpha::STQ_C), reg_store) - .addReg(reg_store).addImm(0).addReg(reg_ptr); - BuildMI(llscMBB, dl, TII->get(Alpha::BEQ)) - .addImm(0).addReg(reg_store).addMBB(llscMBB); - BuildMI(llscMBB, dl, TII->get(Alpha::BR)).addMBB(sinkMBB); - - thisMBB->addSuccessor(llscMBB); - llscMBB->addSuccessor(llscMBB); - llscMBB->addSuccessor(sinkMBB); - MI->eraseFromParent(); // The pseudo instruction is gone now. - - return sinkMBB; -} - -bool -AlphaTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { - // The Alpha target isn't yet aware of offsets. - return false; -} - -bool AlphaTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const { - if (VT != MVT::f32 && VT != MVT::f64) - return false; - // +0.0 F31 - // +0.0f F31 - // -0.0 -F31 - // -0.0f -F31 - return Imm.isZero() || Imm.isNegZero(); -} diff --git a/lib/Target/Alpha/AlphaISelLowering.h b/lib/Target/Alpha/AlphaISelLowering.h deleted file mode 100644 index 80f8efa..0000000 --- a/lib/Target/Alpha/AlphaISelLowering.h +++ /dev/null @@ -1,142 +0,0 @@ -//===-- AlphaISelLowering.h - Alpha DAG Lowering Interface ------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interfaces that Alpha uses to lower LLVM code into a -// selection DAG. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H -#define LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H - -#include "llvm/ADT/VectorExtras.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "Alpha.h" - -namespace llvm { - - namespace AlphaISD { - enum NodeType { - // Start the numbering where the builting ops and target ops leave off. - FIRST_NUMBER = ISD::BUILTIN_OP_END, - //These corrospond to the identical Instruction - CVTQT_, CVTQS_, CVTTQ_, - - /// GPRelHi/GPRelLo - These represent the high and low 16-bit - /// parts of a global address respectively. - GPRelHi, GPRelLo, - - /// RetLit - Literal Relocation of a Global - RelLit, - - /// GlobalRetAddr - used to restore the return address - GlobalRetAddr, - - /// CALL - Normal call. - CALL, - - /// DIVCALL - used for special library calls for div and rem - DivCall, - - /// return flag operand - RET_FLAG, - - /// CHAIN = COND_BRANCH CHAIN, OPC, (G|F)PRC, DESTBB [, INFLAG] - This - /// corresponds to the COND_BRANCH pseudo instruction. - /// *PRC is the input register to compare to zero, - /// OPC is the branch opcode to use (e.g. Alpha::BEQ), - /// DESTBB is the destination block to branch to, and INFLAG is - /// an optional input flag argument. - COND_BRANCH_I, COND_BRANCH_F - - }; - } - - class AlphaTargetLowering : public TargetLowering { - public: - explicit AlphaTargetLowering(TargetMachine &TM); - - virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i64; } - - /// getSetCCResultType - Get the SETCC result ValueType - virtual EVT getSetCCResultType(EVT VT) const; - - /// LowerOperation - Provide custom lowering hooks for some operations. - /// - virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; - - /// ReplaceNodeResults - Replace the results of node with an illegal result - /// type with new values built out of custom code. - /// - virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl&Results, - SelectionDAG &DAG) const; - - // Friendly names for dumps - const char *getTargetNodeName(unsigned Opcode) const; - - SDValue LowerCallResult(SDValue Chain, SDValue InFlag, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; - - ConstraintType getConstraintType(const std::string &Constraint) const; - - /// Examine constraint string and operand type and determine a weight value. - /// The operand object must already have been set up with the operand type. - ConstraintWeight getSingleConstraintMatchWeight( - AsmOperandInfo &info, const char *constraint) const; - - std::pair - getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; - - MachineBasicBlock * - EmitInstrWithCustomInserter(MachineInstr *MI, - MachineBasicBlock *BB) const; - - virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; - - /// isFPImmLegal - Returns true if the target can instruction select the - /// specified FP immediate natively. If false, the legalizer will - /// materialize the FP immediate as a load from a constant pool. - virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const; - - private: - // Helpers for custom lowering. - void LowerVAARG(SDNode *N, SDValue &Chain, SDValue &DataPtr, - SelectionDAG &DAG) const; - - virtual SDValue - LowerFormalArguments(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; - - virtual SDValue - LowerCall(SDValue Chain, SDValue Callee, - CallingConv::ID CallConv, bool isVarArg, bool &isTailCall, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, - SmallVectorImpl &InVals) const; - - virtual SDValue - LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - DebugLoc dl, SelectionDAG &DAG) const; - }; -} - -#endif // LLVM_TARGET_ALPHA_ALPHAISELLOWERING_H diff --git a/lib/Target/Alpha/AlphaInstrFormats.td b/lib/Target/Alpha/AlphaInstrFormats.td deleted file mode 100644 index 6f4ebf2..0000000 --- a/lib/Target/Alpha/AlphaInstrFormats.td +++ /dev/null @@ -1,268 +0,0 @@ -//===- AlphaInstrFormats.td - Alpha Instruction Formats ----*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -//3.3: -//Memory -//Branch -//Operate -//Floating-point -//PALcode - -def u8imm : Operand; -def s14imm : Operand; -def s16imm : Operand; -def s21imm : Operand; -def s64imm : Operand; -def u64imm : Operand; - -//===----------------------------------------------------------------------===// -// Instruction format superclass -//===----------------------------------------------------------------------===// -// Alpha instruction baseline -class InstAlpha op, string asmstr, InstrItinClass itin> : Instruction { - field bits<32> Inst; - let Namespace = "Alpha"; - let AsmString = asmstr; - let Inst{31-26} = op; - let Itinerary = itin; -} - - -//3.3.1 -class MForm opcode, bit load, string asmstr, list pattern, InstrItinClass itin> - : InstAlpha { - let Pattern = pattern; - let canFoldAsLoad = load; - let Defs = [R28]; //We may use this for frame index calculations, so reserve it here - - bits<5> Ra; - bits<16> disp; - bits<5> Rb; - - let Inst{25-21} = Ra; - let Inst{20-16} = Rb; - let Inst{15-0} = disp; -} -class MfcForm opcode, bits<16> fc, string asmstr, InstrItinClass itin> - : InstAlpha { - bits<5> Ra; - - let OutOperandList = (outs GPRC:$RA); - let InOperandList = (ins); - let Inst{25-21} = Ra; - let Inst{20-16} = 0; - let Inst{15-0} = fc; -} -class MfcPForm opcode, bits<16> fc, string asmstr, InstrItinClass itin> - : InstAlpha { - let OutOperandList = (outs); - let InOperandList = (ins); - let Inst{25-21} = 0; - let Inst{20-16} = 0; - let Inst{15-0} = fc; -} - -class MbrForm opcode, bits<2> TB, dag OL, string asmstr, InstrItinClass itin> - : InstAlpha { - bits<5> Ra; - bits<5> Rb; - bits<14> disp; - - let OutOperandList = (outs); - let InOperandList = OL; - - let Inst{25-21} = Ra; - let Inst{20-16} = Rb; - let Inst{15-14} = TB; - let Inst{13-0} = disp; -} -class MbrpForm opcode, bits<2> TB, dag OL, string asmstr, list pattern, InstrItinClass itin> - : InstAlpha { - let Pattern=pattern; - bits<5> Ra; - bits<5> Rb; - bits<14> disp; - - let OutOperandList = (outs); - let InOperandList = OL; - - let Inst{25-21} = Ra; - let Inst{20-16} = Rb; - let Inst{15-14} = TB; - let Inst{13-0} = disp; -} - -//3.3.2 -def target : Operand {} - -let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { -class BFormN opcode, dag OL, string asmstr, InstrItinClass itin> - : InstAlpha { - let OutOperandList = (outs); - let InOperandList = OL; - bits<64> Opc; //dummy - bits<5> Ra; - bits<21> disp; - - let Inst{25-21} = Ra; - let Inst{20-0} = disp; -} -} - -let isBranch = 1, isTerminator = 1 in -class BFormD opcode, string asmstr, list pattern, InstrItinClass itin> - : InstAlpha { - let Pattern = pattern; - let OutOperandList = (outs); - let InOperandList = (ins target:$DISP); - bits<5> Ra; - bits<21> disp; - - let Inst{25-21} = Ra; - let Inst{20-0} = disp; -} - -//3.3.3 -class OForm opcode, bits<7> fun, string asmstr, list pattern, InstrItinClass itin> - : InstAlpha { - let Pattern = pattern; - let OutOperandList = (outs GPRC:$RC); - let InOperandList = (ins GPRC:$RA, GPRC:$RB); - - bits<5> Rc; - bits<5> Ra; - bits<5> Rb; - bits<7> Function = fun; - - let Inst{25-21} = Ra; - let Inst{20-16} = Rb; - let Inst{15-13} = 0; - let Inst{12} = 0; - let Inst{11-5} = Function; - let Inst{4-0} = Rc; -} - -class OForm2 opcode, bits<7> fun, string asmstr, list pattern, InstrItinClass itin> - : InstAlpha { - let Pattern = pattern; - let OutOperandList = (outs GPRC:$RC); - let InOperandList = (ins GPRC:$RB); - - bits<5> Rc; - bits<5> Rb; - bits<7> Function = fun; - - let Inst{25-21} = 31; - let Inst{20-16} = Rb; - let Inst{15-13} = 0; - let Inst{12} = 0; - let Inst{11-5} = Function; - let Inst{4-0} = Rc; -} - -class OForm4 opcode, bits<7> fun, string asmstr, list pattern, InstrItinClass itin> - : InstAlpha { - let Pattern = pattern; - let OutOperandList = (outs GPRC:$RDEST); - let InOperandList = (ins GPRC:$RCOND, GPRC:$RTRUE, GPRC:$RFALSE); - let Constraints = "$RFALSE = $RDEST"; - let DisableEncoding = "$RFALSE"; - - bits<5> Rc; - bits<5> Ra; - bits<5> Rb; - bits<7> Function = fun; - -// let Constraints = "$RFALSE = $RDEST"; - let Inst{25-21} = Ra; - let Inst{20-16} = Rb; - let Inst{15-13} = 0; - let Inst{12} = 0; - let Inst{11-5} = Function; - let Inst{4-0} = Rc; -} - - -class OFormL opcode, bits<7> fun, string asmstr, list pattern, InstrItinClass itin> - : InstAlpha { - let Pattern = pattern; - let OutOperandList = (outs GPRC:$RC); - let InOperandList = (ins GPRC:$RA, u8imm:$L); - - bits<5> Rc; - bits<5> Ra; - bits<8> LIT; - bits<7> Function = fun; - - let Inst{25-21} = Ra; - let Inst{20-13} = LIT; - let Inst{12} = 1; - let Inst{11-5} = Function; - let Inst{4-0} = Rc; -} - -class OForm4L opcode, bits<7> fun, string asmstr, list pattern, InstrItinClass itin> - : InstAlpha { - let Pattern = pattern; - let OutOperandList = (outs GPRC:$RDEST); - let InOperandList = (ins GPRC:$RCOND, s64imm:$RTRUE, GPRC:$RFALSE); - let Constraints = "$RFALSE = $RDEST"; - let DisableEncoding = "$RFALSE"; - - bits<5> Rc; - bits<5> Ra; - bits<8> LIT; - bits<7> Function = fun; - -// let Constraints = "$RFALSE = $RDEST"; - let Inst{25-21} = Ra; - let Inst{20-13} = LIT; - let Inst{12} = 1; - let Inst{11-5} = Function; - let Inst{4-0} = Rc; -} - -//3.3.4 -class FPForm opcode, bits<11> fun, string asmstr, list pattern, InstrItinClass itin> - : InstAlpha { - let Pattern = pattern; - - bits<5> Fc; - bits<5> Fa; - bits<5> Fb; - bits<11> Function = fun; - - let Inst{25-21} = Fa; - let Inst{20-16} = Fb; - let Inst{15-5} = Function; - let Inst{4-0} = Fc; -} - -//3.3.5 -class PALForm opcode, dag OL, string asmstr, InstrItinClass itin> - : InstAlpha { - let OutOperandList = (outs); - let InOperandList = OL; - bits<26> Function; - - let Inst{25-0} = Function; -} - - -// Pseudo instructions. -class PseudoInstAlpha pattern, InstrItinClass itin> - : InstAlpha<0, nm, itin> { - let OutOperandList = OOL; - let InOperandList = IOL; - let Pattern = pattern; - -} diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp deleted file mode 100644 index 8df2ed7..0000000 --- a/lib/Target/Alpha/AlphaInstrInfo.cpp +++ /dev/null @@ -1,382 +0,0 @@ -//===- AlphaInstrInfo.cpp - Alpha Instruction Information -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the Alpha implementation of the TargetInstrInfo class. -// -//===----------------------------------------------------------------------===// - -#include "Alpha.h" -#include "AlphaInstrInfo.h" -#include "AlphaMachineFunctionInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/ErrorHandling.h" - -#define GET_INSTRINFO_CTOR -#include "AlphaGenInstrInfo.inc" -using namespace llvm; - -AlphaInstrInfo::AlphaInstrInfo() - : AlphaGenInstrInfo(Alpha::ADJUSTSTACKDOWN, Alpha::ADJUSTSTACKUP), - RI(*this) { -} - - -unsigned -AlphaInstrInfo::isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - case Alpha::LDL: - case Alpha::LDQ: - case Alpha::LDBU: - case Alpha::LDWU: - case Alpha::LDS: - case Alpha::LDT: - if (MI->getOperand(1).isFI()) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - } - return 0; -} - -unsigned -AlphaInstrInfo::isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const { - switch (MI->getOpcode()) { - case Alpha::STL: - case Alpha::STQ: - case Alpha::STB: - case Alpha::STW: - case Alpha::STS: - case Alpha::STT: - if (MI->getOperand(1).isFI()) { - FrameIndex = MI->getOperand(1).getIndex(); - return MI->getOperand(0).getReg(); - } - break; - } - return 0; -} - -static bool isAlphaIntCondCode(unsigned Opcode) { - switch (Opcode) { - case Alpha::BEQ: - case Alpha::BNE: - case Alpha::BGE: - case Alpha::BGT: - case Alpha::BLE: - case Alpha::BLT: - case Alpha::BLBC: - case Alpha::BLBS: - return true; - default: - return false; - } -} - -unsigned AlphaInstrInfo::InsertBranch(MachineBasicBlock &MBB, - MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, - DebugLoc DL) const { - assert(TBB && "InsertBranch must not be told to insert a fallthrough"); - assert((Cond.size() == 2 || Cond.size() == 0) && - "Alpha branch conditions have two components!"); - - // One-way branch. - if (FBB == 0) { - if (Cond.empty()) // Unconditional branch - BuildMI(&MBB, DL, get(Alpha::BR)).addMBB(TBB); - else // Conditional branch - if (isAlphaIntCondCode(Cond[0].getImm())) - BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_I)) - .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); - else - BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_F)) - .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); - return 1; - } - - // Two-way Conditional Branch. - if (isAlphaIntCondCode(Cond[0].getImm())) - BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_I)) - .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); - else - BuildMI(&MBB, DL, get(Alpha::COND_BRANCH_F)) - .addImm(Cond[0].getImm()).addReg(Cond[1].getReg()).addMBB(TBB); - BuildMI(&MBB, DL, get(Alpha::BR)).addMBB(FBB); - return 2; -} - -void AlphaInstrInfo::copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const { - if (Alpha::GPRCRegClass.contains(DestReg, SrcReg)) { - BuildMI(MBB, MI, DL, get(Alpha::BISr), DestReg) - .addReg(SrcReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - } else if (Alpha::F4RCRegClass.contains(DestReg, SrcReg)) { - BuildMI(MBB, MI, DL, get(Alpha::CPYSS), DestReg) - .addReg(SrcReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - } else if (Alpha::F8RCRegClass.contains(DestReg, SrcReg)) { - BuildMI(MBB, MI, DL, get(Alpha::CPYST), DestReg) - .addReg(SrcReg) - .addReg(SrcReg, getKillRegState(KillSrc)); - } else { - llvm_unreachable("Attempt to copy register that is not GPR or FPR"); - } -} - -void -AlphaInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned SrcReg, bool isKill, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - //cerr << "Trying to store " << getPrettyName(SrcReg) << " to " - // << FrameIdx << "\n"; - //BuildMI(MBB, MI, Alpha::WTF, 0).addReg(SrcReg); - - DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - - if (RC == Alpha::F4RCRegisterClass) - BuildMI(MBB, MI, DL, get(Alpha::STS)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FrameIdx).addReg(Alpha::F31); - else if (RC == Alpha::F8RCRegisterClass) - BuildMI(MBB, MI, DL, get(Alpha::STT)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FrameIdx).addReg(Alpha::F31); - else if (RC == Alpha::GPRCRegisterClass) - BuildMI(MBB, MI, DL, get(Alpha::STQ)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FrameIdx).addReg(Alpha::F31); - else - llvm_unreachable("Unhandled register class"); -} - -void -AlphaInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, - unsigned DestReg, int FrameIdx, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const { - //cerr << "Trying to load " << getPrettyName(DestReg) << " to " - // << FrameIdx << "\n"; - DebugLoc DL; - if (MI != MBB.end()) DL = MI->getDebugLoc(); - - if (RC == Alpha::F4RCRegisterClass) - BuildMI(MBB, MI, DL, get(Alpha::LDS), DestReg) - .addFrameIndex(FrameIdx).addReg(Alpha::F31); - else if (RC == Alpha::F8RCRegisterClass) - BuildMI(MBB, MI, DL, get(Alpha::LDT), DestReg) - .addFrameIndex(FrameIdx).addReg(Alpha::F31); - else if (RC == Alpha::GPRCRegisterClass) - BuildMI(MBB, MI, DL, get(Alpha::LDQ), DestReg) - .addFrameIndex(FrameIdx).addReg(Alpha::F31); - else - llvm_unreachable("Unhandled register class"); -} - -static unsigned AlphaRevCondCode(unsigned Opcode) { - switch (Opcode) { - case Alpha::BEQ: return Alpha::BNE; - case Alpha::BNE: return Alpha::BEQ; - case Alpha::BGE: return Alpha::BLT; - case Alpha::BGT: return Alpha::BLE; - case Alpha::BLE: return Alpha::BGT; - case Alpha::BLT: return Alpha::BGE; - case Alpha::BLBC: return Alpha::BLBS; - case Alpha::BLBS: return Alpha::BLBC; - case Alpha::FBEQ: return Alpha::FBNE; - case Alpha::FBNE: return Alpha::FBEQ; - case Alpha::FBGE: return Alpha::FBLT; - case Alpha::FBGT: return Alpha::FBLE; - case Alpha::FBLE: return Alpha::FBGT; - case Alpha::FBLT: return Alpha::FBGE; - default: - llvm_unreachable("Unknown opcode"); - } - return 0; // Not reached -} - -// Branch analysis. -bool AlphaInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify) const { - // If the block has no terminators, it just falls into the block after it. - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) - return false; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return false; - --I; - } - if (!isUnpredicatedTerminator(I)) - return false; - - // Get the last instruction in the block. - MachineInstr *LastInst = I; - - // If there is only one terminator instruction, process it. - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - if (LastInst->getOpcode() == Alpha::BR) { - TBB = LastInst->getOperand(0).getMBB(); - return false; - } else if (LastInst->getOpcode() == Alpha::COND_BRANCH_I || - LastInst->getOpcode() == Alpha::COND_BRANCH_F) { - // Block ends with fall-through condbranch. - TBB = LastInst->getOperand(2).getMBB(); - Cond.push_back(LastInst->getOperand(0)); - Cond.push_back(LastInst->getOperand(1)); - return false; - } - // Otherwise, don't know what this is. - return true; - } - - // Get the instruction before it if it's a terminator. - MachineInstr *SecondLastInst = I; - - // If there are three terminators, we don't know what sort of block this is. - if (SecondLastInst && I != MBB.begin() && - isUnpredicatedTerminator(--I)) - return true; - - // If the block ends with Alpha::BR and Alpha::COND_BRANCH_*, handle it. - if ((SecondLastInst->getOpcode() == Alpha::COND_BRANCH_I || - SecondLastInst->getOpcode() == Alpha::COND_BRANCH_F) && - LastInst->getOpcode() == Alpha::BR) { - TBB = SecondLastInst->getOperand(2).getMBB(); - Cond.push_back(SecondLastInst->getOperand(0)); - Cond.push_back(SecondLastInst->getOperand(1)); - FBB = LastInst->getOperand(0).getMBB(); - return false; - } - - // If the block ends with two Alpha::BRs, handle it. The second one is not - // executed, so remove it. - if (SecondLastInst->getOpcode() == Alpha::BR && - LastInst->getOpcode() == Alpha::BR) { - TBB = SecondLastInst->getOperand(0).getMBB(); - I = LastInst; - if (AllowModify) - I->eraseFromParent(); - return false; - } - - // Otherwise, can't handle this. - return true; -} - -unsigned AlphaInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const { - MachineBasicBlock::iterator I = MBB.end(); - if (I == MBB.begin()) return 0; - --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return 0; - --I; - } - if (I->getOpcode() != Alpha::BR && - I->getOpcode() != Alpha::COND_BRANCH_I && - I->getOpcode() != Alpha::COND_BRANCH_F) - return 0; - - // Remove the branch. - I->eraseFromParent(); - - I = MBB.end(); - - if (I == MBB.begin()) return 1; - --I; - if (I->getOpcode() != Alpha::COND_BRANCH_I && - I->getOpcode() != Alpha::COND_BRANCH_F) - return 1; - - // Remove the branch. - I->eraseFromParent(); - return 2; -} - -void AlphaInstrInfo::insertNoop(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const { - DebugLoc DL; - BuildMI(MBB, MI, DL, get(Alpha::BISr), Alpha::R31) - .addReg(Alpha::R31) - .addReg(Alpha::R31); -} - -bool AlphaInstrInfo:: -ReverseBranchCondition(SmallVectorImpl &Cond) const { - assert(Cond.size() == 2 && "Invalid Alpha branch opcode!"); - Cond[0].setImm(AlphaRevCondCode(Cond[0].getImm())); - return false; -} - -/// getGlobalBaseReg - Return a virtual register initialized with the -/// the global base register value. Output instructions required to -/// initialize the register in the function entry block, if necessary. -/// -unsigned AlphaInstrInfo::getGlobalBaseReg(MachineFunction *MF) const { - AlphaMachineFunctionInfo *AlphaFI = MF->getInfo(); - unsigned GlobalBaseReg = AlphaFI->getGlobalBaseReg(); - if (GlobalBaseReg != 0) - return GlobalBaseReg; - - // Insert the set of GlobalBaseReg into the first MBB of the function - MachineBasicBlock &FirstMBB = MF->front(); - MachineBasicBlock::iterator MBBI = FirstMBB.begin(); - MachineRegisterInfo &RegInfo = MF->getRegInfo(); - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); - - GlobalBaseReg = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass); - BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), - GlobalBaseReg).addReg(Alpha::R29); - RegInfo.addLiveIn(Alpha::R29); - - AlphaFI->setGlobalBaseReg(GlobalBaseReg); - return GlobalBaseReg; -} - -/// getGlobalRetAddr - Return a virtual register initialized with the -/// the global base register value. Output instructions required to -/// initialize the register in the function entry block, if necessary. -/// -unsigned AlphaInstrInfo::getGlobalRetAddr(MachineFunction *MF) const { - AlphaMachineFunctionInfo *AlphaFI = MF->getInfo(); - unsigned GlobalRetAddr = AlphaFI->getGlobalRetAddr(); - if (GlobalRetAddr != 0) - return GlobalRetAddr; - - // Insert the set of GlobalRetAddr into the first MBB of the function - MachineBasicBlock &FirstMBB = MF->front(); - MachineBasicBlock::iterator MBBI = FirstMBB.begin(); - MachineRegisterInfo &RegInfo = MF->getRegInfo(); - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); - - GlobalRetAddr = RegInfo.createVirtualRegister(&Alpha::GPRCRegClass); - BuildMI(FirstMBB, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), - GlobalRetAddr).addReg(Alpha::R26); - RegInfo.addLiveIn(Alpha::R26); - - AlphaFI->setGlobalRetAddr(GlobalRetAddr); - return GlobalRetAddr; -} diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h deleted file mode 100644 index 337a85c..0000000 --- a/lib/Target/Alpha/AlphaInstrInfo.h +++ /dev/null @@ -1,85 +0,0 @@ -//===- AlphaInstrInfo.h - Alpha Instruction Information ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the Alpha implementation of the TargetInstrInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef ALPHAINSTRUCTIONINFO_H -#define ALPHAINSTRUCTIONINFO_H - -#include "llvm/Target/TargetInstrInfo.h" -#include "AlphaRegisterInfo.h" - -#define GET_INSTRINFO_HEADER -#include "AlphaGenInstrInfo.inc" - -namespace llvm { - -class AlphaInstrInfo : public AlphaGenInstrInfo { - const AlphaRegisterInfo RI; -public: - AlphaInstrInfo(); - - /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As - /// such, whenever a client has an instance of instruction info, it should - /// always be able to get register info as well (through this method). - /// - virtual const AlphaRegisterInfo &getRegisterInfo() const { return RI; } - - virtual unsigned isLoadFromStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - virtual unsigned isStoreToStackSlot(const MachineInstr *MI, - int &FrameIndex) const; - - virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, - MachineBasicBlock *FBB, - const SmallVectorImpl &Cond, - DebugLoc DL) const; - virtual void copyPhysReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI, DebugLoc DL, - unsigned DestReg, unsigned SrcReg, - bool KillSrc) const; - virtual void storeRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned SrcReg, bool isKill, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - virtual void loadRegFromStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI, - unsigned DestReg, int FrameIndex, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) const; - - bool AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, - MachineBasicBlock *&FBB, - SmallVectorImpl &Cond, - bool AllowModify) const; - unsigned RemoveBranch(MachineBasicBlock &MBB) const; - void insertNoop(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MI) const; - bool ReverseBranchCondition(SmallVectorImpl &Cond) const; - - /// getGlobalBaseReg - Return a virtual register initialized with the - /// the global base register value. Output instructions required to - /// initialize the register in the function entry block, if necessary. - /// - unsigned getGlobalBaseReg(MachineFunction *MF) const; - - /// getGlobalRetAddr - Return a virtual register initialized with the - /// the global return address register value. Output instructions required to - /// initialize the register in the function entry block, if necessary. - /// - unsigned getGlobalRetAddr(MachineFunction *MF) const; -}; - -} - -#endif diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td deleted file mode 100644 index c8c9377..0000000 --- a/lib/Target/Alpha/AlphaInstrInfo.td +++ /dev/null @@ -1,1159 +0,0 @@ -//===- AlphaInstrInfo.td - The Alpha Instruction Set -------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -include "AlphaInstrFormats.td" - -//******************** -//Custom DAG Nodes -//******************** - -def SDTFPUnaryOpUnC : SDTypeProfile<1, 1, [ - SDTCisFP<1>, SDTCisFP<0> -]>; -def Alpha_cvtqt : SDNode<"AlphaISD::CVTQT_", SDTFPUnaryOpUnC, []>; -def Alpha_cvtqs : SDNode<"AlphaISD::CVTQS_", SDTFPUnaryOpUnC, []>; -def Alpha_cvttq : SDNode<"AlphaISD::CVTTQ_" , SDTFPUnaryOp, []>; -def Alpha_gprello : SDNode<"AlphaISD::GPRelLo", SDTIntBinOp, []>; -def Alpha_gprelhi : SDNode<"AlphaISD::GPRelHi", SDTIntBinOp, []>; -def Alpha_rellit : SDNode<"AlphaISD::RelLit", SDTIntBinOp, [SDNPMayLoad]>; - -def retflag : SDNode<"AlphaISD::RET_FLAG", SDTNone, - [SDNPHasChain, SDNPOptInGlue]>; - -// These are target-independent nodes, but have target-specific formats. -def SDT_AlphaCallSeqStart : SDCallSeqStart<[ SDTCisVT<0, i64> ]>; -def SDT_AlphaCallSeqEnd : SDCallSeqEnd<[ SDTCisVT<0, i64>, - SDTCisVT<1, i64> ]>; - -def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_AlphaCallSeqStart, - [SDNPHasChain, SDNPOutGlue]>; -def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_AlphaCallSeqEnd, - [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; - -//******************** -//Paterns for matching -//******************** -def invX : SDNodeXFormgetZExtValue()); -}]>; -def negX : SDNodeXFormgetZExtValue() + 1); -}]>; -def SExt32 : SDNodeXFormgetZExtValue() << 32) >> 32); -}]>; -def SExt16 : SDNodeXFormgetZExtValue() << 48) >> 48); -}]>; -def LL16 : SDNodeXFormgetZExtValue())); -}]>; -def LH16 : SDNodeXFormgetZExtValue())); -}]>; -def iZAPX : SDNodeXForm(N->getOperand(1)); - return getI64Imm(get_zapImm(SDValue(), RHS->getZExtValue())); -}]>; -def nearP2X : SDNodeXFormgetZExtValue()))); -}]>; -def nearP2RemX : SDNodeXFormgetZExtValue() - getNearPower2((uint64_t)N->getZExtValue())); - return getI64Imm(Log2_64(x)); -}]>; - -def immUExt8 : PatLeaf<(imm), [{ //imm fits in 8 bit zero extended field - return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue(); -}]>; -def immUExt8inv : PatLeaf<(imm), [{ //inverted imm fits in 8 bit zero extended field - return (uint64_t)~N->getZExtValue() == (uint8_t)~N->getZExtValue(); -}], invX>; -def immUExt8neg : PatLeaf<(imm), [{ //negated imm fits in 8 bit zero extended field - return ((uint64_t)~N->getZExtValue() + 1) == - (uint8_t)((uint64_t)~N->getZExtValue() + 1); -}], negX>; -def immSExt16 : PatLeaf<(imm), [{ //imm fits in 16 bit sign extended field - return ((int64_t)N->getZExtValue() << 48) >> 48 == - (int64_t)N->getZExtValue(); -}]>; -def immSExt16int : PatLeaf<(imm), [{ //(int)imm fits in a 16 bit sign extended field - return ((int64_t)N->getZExtValue() << 48) >> 48 == - ((int64_t)N->getZExtValue() << 32) >> 32; -}], SExt16>; - -def zappat : PatFrag<(ops node:$LHS), (and node:$LHS, imm), [{ - ConstantSDNode *RHS = dyn_cast(N->getOperand(1)); - if (!RHS) return 0; - uint64_t build = get_zapImm(N->getOperand(0), (uint64_t)RHS->getZExtValue()); - return build != 0; -}]>; - -def immFPZ : PatLeaf<(fpimm), [{ //the only fpconstant nodes are +/- 0.0 - (void)N; // silence warning. - return true; -}]>; - -def immRem1 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,0);}]>; -def immRem2 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,0);}]>; -def immRem3 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,0);}]>; -def immRem4 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,0);}]>; -def immRem5 :PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,0);}]>; -def immRem1n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),1,1);}]>; -def immRem2n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),2,1);}]>; -def immRem3n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),3,1);}]>; -def immRem4n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),4,1);}]>; -def immRem5n:PatLeaf<(imm),[{return chkRemNearPower2(N->getZExtValue(),5,1);}]>; - -def immRemP2n : PatLeaf<(imm), [{ - return isPowerOf2_64(getNearPower2((uint64_t)N->getZExtValue()) - - N->getZExtValue()); -}]>; -def immRemP2 : PatLeaf<(imm), [{ - return isPowerOf2_64(N->getZExtValue() - - getNearPower2((uint64_t)N->getZExtValue())); -}]>; -def immUExt8ME : PatLeaf<(imm), [{ //use this imm for mulqi - int64_t d = abs64((int64_t)N->getZExtValue() - - (int64_t)getNearPower2((uint64_t)N->getZExtValue())); - if (isPowerOf2_64(d)) return false; - switch (d) { - case 1: case 3: case 5: return false; - default: return (uint64_t)N->getZExtValue() == (uint8_t)N->getZExtValue(); - }; -}]>; - -def intop : PatFrag<(ops node:$op), (sext_inreg node:$op, i32)>; -def add4 : PatFrag<(ops node:$op1, node:$op2), - (add (shl node:$op1, 2), node:$op2)>; -def sub4 : PatFrag<(ops node:$op1, node:$op2), - (sub (shl node:$op1, 2), node:$op2)>; -def add8 : PatFrag<(ops node:$op1, node:$op2), - (add (shl node:$op1, 3), node:$op2)>; -def sub8 : PatFrag<(ops node:$op1, node:$op2), - (sub (shl node:$op1, 3), node:$op2)>; -class BinOpFrag : PatFrag<(ops node:$LHS, node:$RHS), res>; -class CmpOpFrag : PatFrag<(ops node:$R), res>; - -//Pseudo ops for selection - -def WTF : PseudoInstAlpha<(outs), (ins variable_ops), "#wtf", [], s_pseudo>; - -let hasCtrlDep = 1, Defs = [R30], Uses = [R30] in { -def ADJUSTSTACKUP : PseudoInstAlpha<(outs), (ins s64imm:$amt), - "; ADJUP $amt", - [(callseq_start timm:$amt)], s_pseudo>; -def ADJUSTSTACKDOWN : PseudoInstAlpha<(outs), (ins s64imm:$amt1, s64imm:$amt2), - "; ADJDOWN $amt1", - [(callseq_end timm:$amt1, timm:$amt2)], s_pseudo>; -} - -def ALTENT : PseudoInstAlpha<(outs), (ins s64imm:$TARGET), "$$$TARGET..ng:\n", [], s_pseudo>; -def PCLABEL : PseudoInstAlpha<(outs), (ins s64imm:$num), "PCMARKER_$num:\n",[], s_pseudo>; -def MEMLABEL : PseudoInstAlpha<(outs), (ins s64imm:$i, s64imm:$j, s64imm:$k, s64imm:$m), - "LSMARKER$$$i$$$j$$$k$$$m:", [], s_pseudo>; - - -let usesCustomInserter = 1 in { // Expanded after instruction selection. -def CAS32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "", - [(set GPRC:$dst, (atomic_cmp_swap_32 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))], s_pseudo>; -def CAS64 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$cmp, GPRC:$swp), "", - [(set GPRC:$dst, (atomic_cmp_swap_64 GPRC:$ptr, GPRC:$cmp, GPRC:$swp))], s_pseudo>; - -def LAS32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "", - [(set GPRC:$dst, (atomic_load_add_32 GPRC:$ptr, GPRC:$swp))], s_pseudo>; -def LAS64 :PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "", - [(set GPRC:$dst, (atomic_load_add_64 GPRC:$ptr, GPRC:$swp))], s_pseudo>; - -def SWAP32 : PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "", - [(set GPRC:$dst, (atomic_swap_32 GPRC:$ptr, GPRC:$swp))], s_pseudo>; -def SWAP64 :PseudoInstAlpha<(outs GPRC:$dst), (ins GPRC:$ptr, GPRC:$swp), "", - [(set GPRC:$dst, (atomic_swap_64 GPRC:$ptr, GPRC:$swp))], s_pseudo>; -} - -//*********************** -//Real instructions -//*********************** - -//Operation Form: - -//conditional moves, int - -multiclass cmov_inst fun, string asmstr, PatFrag OpNode> { -def r : OForm4<0x11, fun, !strconcat(asmstr, " $RCOND,$RTRUE,$RDEST"), - [(set GPRC:$RDEST, (select (OpNode GPRC:$RCOND), GPRC:$RTRUE, GPRC:$RFALSE))], s_cmov>; -def i : OForm4L<0x11, fun, !strconcat(asmstr, " $RCOND,$RTRUE,$RDEST"), - [(set GPRC:$RDEST, (select (OpNode GPRC:$RCOND), immUExt8:$RTRUE, GPRC:$RFALSE))], s_cmov>; -} - -defm CMOVEQ : cmov_inst<0x24, "cmoveq", CmpOpFrag<(seteq node:$R, 0)>>; -defm CMOVNE : cmov_inst<0x26, "cmovne", CmpOpFrag<(setne node:$R, 0)>>; -defm CMOVLT : cmov_inst<0x44, "cmovlt", CmpOpFrag<(setlt node:$R, 0)>>; -defm CMOVLE : cmov_inst<0x64, "cmovle", CmpOpFrag<(setle node:$R, 0)>>; -defm CMOVGT : cmov_inst<0x66, "cmovgt", CmpOpFrag<(setgt node:$R, 0)>>; -defm CMOVGE : cmov_inst<0x46, "cmovge", CmpOpFrag<(setge node:$R, 0)>>; -defm CMOVLBC : cmov_inst<0x16, "cmovlbc", CmpOpFrag<(xor node:$R, 1)>>; -defm CMOVLBS : cmov_inst<0x14, "cmovlbs", CmpOpFrag<(and node:$R, 1)>>; - -//General pattern for cmov -def : Pat<(select GPRC:$which, GPRC:$src1, GPRC:$src2), - (CMOVNEr GPRC:$src2, GPRC:$src1, GPRC:$which)>; -def : Pat<(select GPRC:$which, GPRC:$src1, immUExt8:$src2), - (CMOVEQi GPRC:$src1, immUExt8:$src2, GPRC:$which)>; - -//Invert sense when we can for constants: -def : Pat<(select (setne GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE), - (CMOVEQi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>; -def : Pat<(select (setgt GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE), - (CMOVLEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>; -def : Pat<(select (setge GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE), - (CMOVLTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>; -def : Pat<(select (setlt GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE), - (CMOVGEi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>; -def : Pat<(select (setle GPRC:$RCOND, 0), GPRC:$RTRUE, immUExt8:$RFALSE), - (CMOVGTi GPRC:$RCOND, immUExt8:$RFALSE, GPRC:$RTRUE)>; - -multiclass all_inst opc, bits<7> funl, bits<7> funq, - string asmstr, PatFrag OpNode, InstrItinClass itin> { - def Lr : OForm< opc, funl, !strconcat(asmstr, "l $RA,$RB,$RC"), - [(set GPRC:$RC, (intop (OpNode GPRC:$RA, GPRC:$RB)))], itin>; - def Li : OFormL; - def Qr : OForm< opc, funq, !strconcat(asmstr, "q $RA,$RB,$RC"), - [(set GPRC:$RC, (OpNode GPRC:$RA, GPRC:$RB))], itin>; - def Qi : OFormL; -} - -defm MUL : all_inst<0x13, 0x00, 0x20, "mul", BinOpFrag<(mul node:$LHS, node:$RHS)>, s_imul>; -defm ADD : all_inst<0x10, 0x00, 0x20, "add", BinOpFrag<(add node:$LHS, node:$RHS)>, s_iadd>; -defm S4ADD : all_inst<0x10, 0x02, 0x22, "s4add", add4, s_iadd>; -defm S8ADD : all_inst<0x10, 0x12, 0x32, "s8add", add8, s_iadd>; -defm S4SUB : all_inst<0x10, 0x0B, 0x2B, "s4sub", sub4, s_iadd>; -defm S8SUB : all_inst<0x10, 0x1B, 0x3B, "s8sub", sub8, s_iadd>; -defm SUB : all_inst<0x10, 0x09, 0x29, "sub", BinOpFrag<(sub node:$LHS, node:$RHS)>, s_iadd>; -//Const cases since legalize does sub x, int -> add x, inv(int) + 1 -def : Pat<(intop (add GPRC:$RA, immUExt8neg:$L)), (SUBLi GPRC:$RA, immUExt8neg:$L)>; -def : Pat<(add GPRC:$RA, immUExt8neg:$L), (SUBQi GPRC:$RA, immUExt8neg:$L)>; -def : Pat<(intop (add4 GPRC:$RA, immUExt8neg:$L)), (S4SUBLi GPRC:$RA, immUExt8neg:$L)>; -def : Pat<(add4 GPRC:$RA, immUExt8neg:$L), (S4SUBQi GPRC:$RA, immUExt8neg:$L)>; -def : Pat<(intop (add8 GPRC:$RA, immUExt8neg:$L)), (S8SUBLi GPRC:$RA, immUExt8neg:$L)>; -def : Pat<(add8 GPRC:$RA, immUExt8neg:$L), (S8SUBQi GPRC:$RA, immUExt8neg:$L)>; - -multiclass log_inst opc, bits<7> fun, string asmstr, SDNode OpNode, InstrItinClass itin> { -def r : OForm; -def i : OFormL; -} -multiclass inv_inst opc, bits<7> fun, string asmstr, SDNode OpNode, InstrItinClass itin> { -def r : OForm; -def i : OFormL; -} - -defm AND : log_inst<0x11, 0x00, "and", and, s_ilog>; -defm BIC : inv_inst<0x11, 0x08, "bic", and, s_ilog>; -defm BIS : log_inst<0x11, 0x20, "bis", or, s_ilog>; -defm ORNOT : inv_inst<0x11, 0x28, "ornot", or, s_ilog>; -defm XOR : log_inst<0x11, 0x40, "xor", xor, s_ilog>; -defm EQV : inv_inst<0x11, 0x48, "eqv", xor, s_ilog>; - -defm SL : log_inst<0x12, 0x39, "sll", shl, s_ishf>; -defm SRA : log_inst<0x12, 0x3c, "sra", sra, s_ishf>; -defm SRL : log_inst<0x12, 0x34, "srl", srl, s_ishf>; -defm UMULH : log_inst<0x13, 0x30, "umulh", mulhu, s_imul>; - -def CTLZ : OForm2<0x1C, 0x32, "CTLZ $RB,$RC", - [(set GPRC:$RC, (ctlz GPRC:$RB))], s_imisc>; -def CTPOP : OForm2<0x1C, 0x30, "CTPOP $RB,$RC", - [(set GPRC:$RC, (ctpop GPRC:$RB))], s_imisc>; -def CTTZ : OForm2<0x1C, 0x33, "CTTZ $RB,$RC", - [(set GPRC:$RC, (cttz GPRC:$RB))], s_imisc>; -def EXTBL : OForm< 0x12, 0x06, "EXTBL $RA,$RB,$RC", - [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 255))], s_ishf>; -def EXTWL : OForm< 0x12, 0x16, "EXTWL $RA,$RB,$RC", - [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 65535))], s_ishf>; -def EXTLL : OForm< 0x12, 0x26, "EXTLL $RA,$RB,$RC", - [(set GPRC:$RC, (and (srl GPRC:$RA, (shl GPRC:$RB, 3)), 4294967295))], s_ishf>; -def SEXTB : OForm2<0x1C, 0x00, "sextb $RB,$RC", - [(set GPRC:$RC, (sext_inreg GPRC:$RB, i8))], s_ishf>; -def SEXTW : OForm2<0x1C, 0x01, "sextw $RB,$RC", - [(set GPRC:$RC, (sext_inreg GPRC:$RB, i16))], s_ishf>; - -//def EXTBLi : OFormL<0x12, 0x06, "EXTBL $RA,$L,$RC", []>; //Extract byte low -//def EXTLH : OForm< 0x12, 0x6A, "EXTLH $RA,$RB,$RC", []>; //Extract longword high -//def EXTLHi : OFormL<0x12, 0x6A, "EXTLH $RA,$L,$RC", []>; //Extract longword high -//def EXTLLi : OFormL<0x12, 0x26, "EXTLL $RA,$L,$RC", []>; //Extract longword low -//def EXTQH : OForm< 0x12, 0x7A, "EXTQH $RA,$RB,$RC", []>; //Extract quadword high -//def EXTQHi : OFormL<0x12, 0x7A, "EXTQH $RA,$L,$RC", []>; //Extract quadword high -//def EXTQ : OForm< 0x12, 0x36, "EXTQ $RA,$RB,$RC", []>; //Extract quadword low -//def EXTQi : OFormL<0x12, 0x36, "EXTQ $RA,$L,$RC", []>; //Extract quadword low -//def EXTWH : OForm< 0x12, 0x5A, "EXTWH $RA,$RB,$RC", []>; //Extract word high -//def EXTWHi : OFormL<0x12, 0x5A, "EXTWH $RA,$L,$RC", []>; //Extract word high -//def EXTWLi : OFormL<0x12, 0x16, "EXTWL $RA,$L,$RC", []>; //Extract word low - -//def INSBL : OForm< 0x12, 0x0B, "INSBL $RA,$RB,$RC", []>; //Insert byte low -//def INSBLi : OFormL<0x12, 0x0B, "INSBL $RA,$L,$RC", []>; //Insert byte low -//def INSLH : OForm< 0x12, 0x67, "INSLH $RA,$RB,$RC", []>; //Insert longword high -//def INSLHi : OFormL<0x12, 0x67, "INSLH $RA,$L,$RC", []>; //Insert longword high -//def INSLL : OForm< 0x12, 0x2B, "INSLL $RA,$RB,$RC", []>; //Insert longword low -//def INSLLi : OFormL<0x12, 0x2B, "INSLL $RA,$L,$RC", []>; //Insert longword low -//def INSQH : OForm< 0x12, 0x77, "INSQH $RA,$RB,$RC", []>; //Insert quadword high -//def INSQHi : OFormL<0x12, 0x77, "INSQH $RA,$L,$RC", []>; //Insert quadword high -//def INSQL : OForm< 0x12, 0x3B, "INSQL $RA,$RB,$RC", []>; //Insert quadword low -//def INSQLi : OFormL<0x12, 0x3B, "INSQL $RA,$L,$RC", []>; //Insert quadword low -//def INSWH : OForm< 0x12, 0x57, "INSWH $RA,$RB,$RC", []>; //Insert word high -//def INSWHi : OFormL<0x12, 0x57, "INSWH $RA,$L,$RC", []>; //Insert word high -//def INSWL : OForm< 0x12, 0x1B, "INSWL $RA,$RB,$RC", []>; //Insert word low -//def INSWLi : OFormL<0x12, 0x1B, "INSWL $RA,$L,$RC", []>; //Insert word low - -//def MSKBL : OForm< 0x12, 0x02, "MSKBL $RA,$RB,$RC", []>; //Mask byte low -//def MSKBLi : OFormL<0x12, 0x02, "MSKBL $RA,$L,$RC", []>; //Mask byte low -//def MSKLH : OForm< 0x12, 0x62, "MSKLH $RA,$RB,$RC", []>; //Mask longword high -//def MSKLHi : OFormL<0x12, 0x62, "MSKLH $RA,$L,$RC", []>; //Mask longword high -//def MSKLL : OForm< 0x12, 0x22, "MSKLL $RA,$RB,$RC", []>; //Mask longword low -//def MSKLLi : OFormL<0x12, 0x22, "MSKLL $RA,$L,$RC", []>; //Mask longword low -//def MSKQH : OForm< 0x12, 0x72, "MSKQH $RA,$RB,$RC", []>; //Mask quadword high -//def MSKQHi : OFormL<0x12, 0x72, "MSKQH $RA,$L,$RC", []>; //Mask quadword high -//def MSKQL : OForm< 0x12, 0x32, "MSKQL $RA,$RB,$RC", []>; //Mask quadword low -//def MSKQLi : OFormL<0x12, 0x32, "MSKQL $RA,$L,$RC", []>; //Mask quadword low -//def MSKWH : OForm< 0x12, 0x52, "MSKWH $RA,$RB,$RC", []>; //Mask word high -//def MSKWHi : OFormL<0x12, 0x52, "MSKWH $RA,$L,$RC", []>; //Mask word high -//def MSKWL : OForm< 0x12, 0x12, "MSKWL $RA,$RB,$RC", []>; //Mask word low -//def MSKWLi : OFormL<0x12, 0x12, "MSKWL $RA,$L,$RC", []>; //Mask word low - -def ZAPNOTi : OFormL<0x12, 0x31, "zapnot $RA,$L,$RC", [], s_ishf>; - -// Define the pattern that produces ZAPNOTi. -def : Pat<(zappat:$imm GPRC:$RA), - (ZAPNOTi GPRC:$RA, (iZAPX GPRC:$imm))>; - - -//Comparison, int -//So this is a waste of what this instruction can do, but it still saves something -def CMPBGE : OForm< 0x10, 0x0F, "cmpbge $RA,$RB,$RC", - [(set GPRC:$RC, (setuge (and GPRC:$RA, 255), (and GPRC:$RB, 255)))], s_ilog>; -def CMPBGEi : OFormL<0x10, 0x0F, "cmpbge $RA,$L,$RC", - [(set GPRC:$RC, (setuge (and GPRC:$RA, 255), immUExt8:$L))], s_ilog>; -def CMPEQ : OForm< 0x10, 0x2D, "cmpeq $RA,$RB,$RC", - [(set GPRC:$RC, (seteq GPRC:$RA, GPRC:$RB))], s_iadd>; -def CMPEQi : OFormL<0x10, 0x2D, "cmpeq $RA,$L,$RC", - [(set GPRC:$RC, (seteq GPRC:$RA, immUExt8:$L))], s_iadd>; -def CMPLE : OForm< 0x10, 0x6D, "cmple $RA,$RB,$RC", - [(set GPRC:$RC, (setle GPRC:$RA, GPRC:$RB))], s_iadd>; -def CMPLEi : OFormL<0x10, 0x6D, "cmple $RA,$L,$RC", - [(set GPRC:$RC, (setle GPRC:$RA, immUExt8:$L))], s_iadd>; -def CMPLT : OForm< 0x10, 0x4D, "cmplt $RA,$RB,$RC", - [(set GPRC:$RC, (setlt GPRC:$RA, GPRC:$RB))], s_iadd>; -def CMPLTi : OFormL<0x10, 0x4D, "cmplt $RA,$L,$RC", - [(set GPRC:$RC, (setlt GPRC:$RA, immUExt8:$L))], s_iadd>; -def CMPULE : OForm< 0x10, 0x3D, "cmpule $RA,$RB,$RC", - [(set GPRC:$RC, (setule GPRC:$RA, GPRC:$RB))], s_iadd>; -def CMPULEi : OFormL<0x10, 0x3D, "cmpule $RA,$L,$RC", - [(set GPRC:$RC, (setule GPRC:$RA, immUExt8:$L))], s_iadd>; -def CMPULT : OForm< 0x10, 0x1D, "cmpult $RA,$RB,$RC", - [(set GPRC:$RC, (setult GPRC:$RA, GPRC:$RB))], s_iadd>; -def CMPULTi : OFormL<0x10, 0x1D, "cmpult $RA,$L,$RC", - [(set GPRC:$RC, (setult GPRC:$RA, immUExt8:$L))], s_iadd>; - -//Patterns for unsupported int comparisons -def : Pat<(setueq GPRC:$X, GPRC:$Y), (CMPEQ GPRC:$X, GPRC:$Y)>; -def : Pat<(setueq GPRC:$X, immUExt8:$Y), (CMPEQi GPRC:$X, immUExt8:$Y)>; - -def : Pat<(setugt GPRC:$X, GPRC:$Y), (CMPULT GPRC:$Y, GPRC:$X)>; -def : Pat<(setugt immUExt8:$X, GPRC:$Y), (CMPULTi GPRC:$Y, immUExt8:$X)>; - -def : Pat<(setuge GPRC:$X, GPRC:$Y), (CMPULE GPRC:$Y, GPRC:$X)>; -def : Pat<(setuge immUExt8:$X, GPRC:$Y), (CMPULEi GPRC:$Y, immUExt8:$X)>; - -def : Pat<(setgt GPRC:$X, GPRC:$Y), (CMPLT GPRC:$Y, GPRC:$X)>; -def : Pat<(setgt immUExt8:$X, GPRC:$Y), (CMPLTi GPRC:$Y, immUExt8:$X)>; - -def : Pat<(setge GPRC:$X, GPRC:$Y), (CMPLE GPRC:$Y, GPRC:$X)>; -def : Pat<(setge immUExt8:$X, GPRC:$Y), (CMPLEi GPRC:$Y, immUExt8:$X)>; - -def : Pat<(setne GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>; -def : Pat<(setne GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQi GPRC:$X, immUExt8:$Y), 0)>; - -def : Pat<(setune GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>; -def : Pat<(setune GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQ GPRC:$X, immUExt8:$Y), 0)>; - - -let isReturn = 1, isTerminator = 1, isBarrier = 1, Ra = 31, Rb = 26, disp = 1, Uses = [R26] in { - def RETDAG : MbrForm< 0x1A, 0x02, (ins), "ret $$31,($$26),1", s_jsr>; //Return from subroutine - def RETDAGp : MbrpForm< 0x1A, 0x02, (ins), "ret $$31,($$26),1", [(retflag)], s_jsr>; //Return from subroutine -} - -let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1, Ra = 31, disp = 0 in -def JMP : MbrpForm< 0x1A, 0x00, (ins GPRC:$RS), "jmp $$31,($RS),0", - [(brind GPRC:$RS)], s_jsr>; //Jump - -let isCall = 1, Ra = 26, - Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19, - R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, - F0, F1, - F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, - F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R29] in { - def BSR : BFormD<0x34, "bsr $$26,$$$DISP..ng", [], s_jsr>; //Branch to subroutine -} -let isCall = 1, Ra = 26, Rb = 27, disp = 0, - Defs = [R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19, - R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, - F0, F1, - F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, - F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30], Uses = [R27, R29] in { - def JSR : MbrForm< 0x1A, 0x01, (ins), "jsr $$26,($$27),0", s_jsr>; //Jump to subroutine -} - -let isCall = 1, Ra = 23, Rb = 27, disp = 0, - Defs = [R23, R24, R25, R27, R28], Uses = [R24, R25, R27] in - def JSRs : MbrForm< 0x1A, 0x01, (ins), "jsr $$23,($$27),0", s_jsr>; //Jump to div or rem - - -def JSR_COROUTINE : MbrForm< 0x1A, 0x03, (ins GPRC:$RD, GPRC:$RS, s14imm:$DISP), "jsr_coroutine $RD,($RS),$DISP", s_jsr>; //Jump to subroutine return - - -let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in { -def LDQ : MForm<0x29, 1, "ldq $RA,$DISP($RB)", - [(set GPRC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_ild>; -def LDQr : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!gprellow", - [(set GPRC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>; -def LDL : MForm<0x28, 1, "ldl $RA,$DISP($RB)", - [(set GPRC:$RA, (sextloadi32 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>; -def LDLr : MForm<0x28, 1, "ldl $RA,$DISP($RB)\t\t!gprellow", - [(set GPRC:$RA, (sextloadi32 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>; -def LDBU : MForm<0x0A, 1, "ldbu $RA,$DISP($RB)", - [(set GPRC:$RA, (zextloadi8 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>; -def LDBUr : MForm<0x0A, 1, "ldbu $RA,$DISP($RB)\t\t!gprellow", - [(set GPRC:$RA, (zextloadi8 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>; -def LDWU : MForm<0x0C, 1, "ldwu $RA,$DISP($RB)", - [(set GPRC:$RA, (zextloadi16 (add GPRC:$RB, immSExt16:$DISP)))], s_ild>; -def LDWUr : MForm<0x0C, 1, "ldwu $RA,$DISP($RB)\t\t!gprellow", - [(set GPRC:$RA, (zextloadi16 (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_ild>; -} - - -let OutOperandList = (outs), InOperandList = (ins GPRC:$RA, s64imm:$DISP, GPRC:$RB) in { -def STB : MForm<0x0E, 0, "stb $RA,$DISP($RB)", - [(truncstorei8 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>; -def STBr : MForm<0x0E, 0, "stb $RA,$DISP($RB)\t\t!gprellow", - [(truncstorei8 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>; -def STW : MForm<0x0D, 0, "stw $RA,$DISP($RB)", - [(truncstorei16 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>; -def STWr : MForm<0x0D, 0, "stw $RA,$DISP($RB)\t\t!gprellow", - [(truncstorei16 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>; -def STL : MForm<0x2C, 0, "stl $RA,$DISP($RB)", - [(truncstorei32 GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>; -def STLr : MForm<0x2C, 0, "stl $RA,$DISP($RB)\t\t!gprellow", - [(truncstorei32 GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>; -def STQ : MForm<0x2D, 0, "stq $RA,$DISP($RB)", - [(store GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_ist>; -def STQr : MForm<0x2D, 0, "stq $RA,$DISP($RB)\t\t!gprellow", - [(store GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_ist>; -} - -//Load address -let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in { -def LDA : MForm<0x08, 0, "lda $RA,$DISP($RB)", - [(set GPRC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_lda>; -def LDAr : MForm<0x08, 0, "lda $RA,$DISP($RB)\t\t!gprellow", - [(set GPRC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_lda>; //Load address -def LDAH : MForm<0x09, 0, "ldah $RA,$DISP($RB)", - [], s_lda>; //Load address high -def LDAHr : MForm<0x09, 0, "ldah $RA,$DISP($RB)\t\t!gprelhigh", - [(set GPRC:$RA, (Alpha_gprelhi tglobaladdr:$DISP, GPRC:$RB))], s_lda>; //Load address high -} - -let OutOperandList = (outs), InOperandList = (ins F4RC:$RA, s64imm:$DISP, GPRC:$RB) in { -def STS : MForm<0x26, 0, "sts $RA,$DISP($RB)", - [(store F4RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>; -def STSr : MForm<0x26, 0, "sts $RA,$DISP($RB)\t\t!gprellow", - [(store F4RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>; -} -let OutOperandList = (outs F4RC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in { -def LDS : MForm<0x22, 1, "lds $RA,$DISP($RB)", - [(set F4RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>; -def LDSr : MForm<0x22, 1, "lds $RA,$DISP($RB)\t\t!gprellow", - [(set F4RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>; -} -let OutOperandList = (outs), InOperandList = (ins F8RC:$RA, s64imm:$DISP, GPRC:$RB) in { -def STT : MForm<0x27, 0, "stt $RA,$DISP($RB)", - [(store F8RC:$RA, (add GPRC:$RB, immSExt16:$DISP))], s_fst>; -def STTr : MForm<0x27, 0, "stt $RA,$DISP($RB)\t\t!gprellow", - [(store F8RC:$RA, (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB))], s_fst>; -} -let OutOperandList = (outs F8RC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in { -def LDT : MForm<0x23, 1, "ldt $RA,$DISP($RB)", - [(set F8RC:$RA, (load (add GPRC:$RB, immSExt16:$DISP)))], s_fld>; -def LDTr : MForm<0x23, 1, "ldt $RA,$DISP($RB)\t\t!gprellow", - [(set F8RC:$RA, (load (Alpha_gprello tglobaladdr:$DISP, GPRC:$RB)))], s_fld>; -} - - -//constpool rels -def : Pat<(i64 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))), - (LDQr tconstpool:$DISP, GPRC:$RB)>; -def : Pat<(i64 (sextloadi32 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))), - (LDLr tconstpool:$DISP, GPRC:$RB)>; -def : Pat<(i64 (zextloadi8 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))), - (LDBUr tconstpool:$DISP, GPRC:$RB)>; -def : Pat<(i64 (zextloadi16 (Alpha_gprello tconstpool:$DISP, GPRC:$RB))), - (LDWUr tconstpool:$DISP, GPRC:$RB)>; -def : Pat<(i64 (Alpha_gprello tconstpool:$DISP, GPRC:$RB)), - (LDAr tconstpool:$DISP, GPRC:$RB)>; -def : Pat<(i64 (Alpha_gprelhi tconstpool:$DISP, GPRC:$RB)), - (LDAHr tconstpool:$DISP, GPRC:$RB)>; -def : Pat<(f32 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))), - (LDSr tconstpool:$DISP, GPRC:$RB)>; -def : Pat<(f64 (load (Alpha_gprello tconstpool:$DISP, GPRC:$RB))), - (LDTr tconstpool:$DISP, GPRC:$RB)>; - -//jumptable rels -def : Pat<(i64 (Alpha_gprelhi tjumptable:$DISP, GPRC:$RB)), - (LDAHr tjumptable:$DISP, GPRC:$RB)>; -def : Pat<(i64 (Alpha_gprello tjumptable:$DISP, GPRC:$RB)), - (LDAr tjumptable:$DISP, GPRC:$RB)>; - - -//misc ext patterns -def : Pat<(i64 (extloadi8 (add GPRC:$RB, immSExt16:$DISP))), - (LDBU immSExt16:$DISP, GPRC:$RB)>; -def : Pat<(i64 (extloadi16 (add GPRC:$RB, immSExt16:$DISP))), - (LDWU immSExt16:$DISP, GPRC:$RB)>; -def : Pat<(i64 (extloadi32 (add GPRC:$RB, immSExt16:$DISP))), - (LDL immSExt16:$DISP, GPRC:$RB)>; - -//0 disp patterns -def : Pat<(i64 (load GPRC:$addr)), - (LDQ 0, GPRC:$addr)>; -def : Pat<(f64 (load GPRC:$addr)), - (LDT 0, GPRC:$addr)>; -def : Pat<(f32 (load GPRC:$addr)), - (LDS 0, GPRC:$addr)>; -def : Pat<(i64 (sextloadi32 GPRC:$addr)), - (LDL 0, GPRC:$addr)>; -def : Pat<(i64 (zextloadi16 GPRC:$addr)), - (LDWU 0, GPRC:$addr)>; -def : Pat<(i64 (zextloadi8 GPRC:$addr)), - (LDBU 0, GPRC:$addr)>; -def : Pat<(i64 (extloadi8 GPRC:$addr)), - (LDBU 0, GPRC:$addr)>; -def : Pat<(i64 (extloadi16 GPRC:$addr)), - (LDWU 0, GPRC:$addr)>; -def : Pat<(i64 (extloadi32 GPRC:$addr)), - (LDL 0, GPRC:$addr)>; - -def : Pat<(store GPRC:$DATA, GPRC:$addr), - (STQ GPRC:$DATA, 0, GPRC:$addr)>; -def : Pat<(store F8RC:$DATA, GPRC:$addr), - (STT F8RC:$DATA, 0, GPRC:$addr)>; -def : Pat<(store F4RC:$DATA, GPRC:$addr), - (STS F4RC:$DATA, 0, GPRC:$addr)>; -def : Pat<(truncstorei32 GPRC:$DATA, GPRC:$addr), - (STL GPRC:$DATA, 0, GPRC:$addr)>; -def : Pat<(truncstorei16 GPRC:$DATA, GPRC:$addr), - (STW GPRC:$DATA, 0, GPRC:$addr)>; -def : Pat<(truncstorei8 GPRC:$DATA, GPRC:$addr), - (STB GPRC:$DATA, 0, GPRC:$addr)>; - - -//load address, rellocated gpdist form -let OutOperandList = (outs GPRC:$RA), - InOperandList = (ins s16imm:$DISP, GPRC:$RB, s16imm:$NUM), - mayLoad = 1 in { -def LDAg : MForm<0x08, 1, "lda $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>; //Load address -def LDAHg : MForm<0x09, 1, "ldah $RA,0($RB)\t\t!gpdisp!$NUM", [], s_lda>; //Load address -} - -//Load quad, rellocated literal form -let OutOperandList = (outs GPRC:$RA), InOperandList = (ins s64imm:$DISP, GPRC:$RB) in -def LDQl : MForm<0x29, 1, "ldq $RA,$DISP($RB)\t\t!literal", - [(set GPRC:$RA, (Alpha_rellit tglobaladdr:$DISP, GPRC:$RB))], s_ild>; -def : Pat<(Alpha_rellit texternalsym:$ext, GPRC:$RB), - (LDQl texternalsym:$ext, GPRC:$RB)>; - -let OutOperandList = (outs GPRC:$RR), - InOperandList = (ins GPRC:$RA, s64imm:$DISP, GPRC:$RB), - Constraints = "$RA = $RR", - DisableEncoding = "$RR" in { -def STQ_C : MForm<0x2F, 0, "stq_l $RA,$DISP($RB)", [], s_ist>; -def STL_C : MForm<0x2E, 0, "stl_l $RA,$DISP($RB)", [], s_ist>; -} -let OutOperandList = (outs GPRC:$RA), - InOperandList = (ins s64imm:$DISP, GPRC:$RB), - mayLoad = 1 in { -def LDQ_L : MForm<0x2B, 1, "ldq_l $RA,$DISP($RB)", [], s_ild>; -def LDL_L : MForm<0x2A, 1, "ldl_l $RA,$DISP($RB)", [], s_ild>; -} - -def RPCC : MfcForm<0x18, 0xC000, "rpcc $RA", s_rpcc>; //Read process cycle counter -def MB : MfcPForm<0x18, 0x4000, "mb", s_imisc>; //memory barrier -def WMB : MfcPForm<0x18, 0x4400, "wmb", s_imisc>; //write memory barrier - -def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 1), (i64 imm)), - (WMB)>; -def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 imm), (i64 imm)), - (MB)>; - -def : Pat<(atomic_fence (imm), (imm)), (MB)>; - -//Basic Floating point ops - -//Floats - -let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F4RC:$RB), Fa = 31 in -def SQRTS : FPForm<0x14, 0x58B, "sqrts/su $RB,$RC", - [(set F4RC:$RC, (fsqrt F4RC:$RB))], s_fsqrts>; - -let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F4RC:$RA, F4RC:$RB) in { -def ADDS : FPForm<0x16, 0x580, "adds/su $RA,$RB,$RC", - [(set F4RC:$RC, (fadd F4RC:$RA, F4RC:$RB))], s_fadd>; -def SUBS : FPForm<0x16, 0x581, "subs/su $RA,$RB,$RC", - [(set F4RC:$RC, (fsub F4RC:$RA, F4RC:$RB))], s_fadd>; -def DIVS : FPForm<0x16, 0x583, "divs/su $RA,$RB,$RC", - [(set F4RC:$RC, (fdiv F4RC:$RA, F4RC:$RB))], s_fdivs>; -def MULS : FPForm<0x16, 0x582, "muls/su $RA,$RB,$RC", - [(set F4RC:$RC, (fmul F4RC:$RA, F4RC:$RB))], s_fmul>; - -def CPYSS : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC", - [(set F4RC:$RC, (fcopysign F4RC:$RB, F4RC:$RA))], s_fadd>; -def CPYSES : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent -def CPYSNS : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC", - [(set F4RC:$RC, (fneg (fcopysign F4RC:$RB, F4RC:$RA)))], s_fadd>; -} - -//Doubles - -let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in -def SQRTT : FPForm<0x14, 0x5AB, "sqrtt/su $RB,$RC", - [(set F8RC:$RC, (fsqrt F8RC:$RB))], s_fsqrtt>; - -let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RA, F8RC:$RB) in { -def ADDT : FPForm<0x16, 0x5A0, "addt/su $RA,$RB,$RC", - [(set F8RC:$RC, (fadd F8RC:$RA, F8RC:$RB))], s_fadd>; -def SUBT : FPForm<0x16, 0x5A1, "subt/su $RA,$RB,$RC", - [(set F8RC:$RC, (fsub F8RC:$RA, F8RC:$RB))], s_fadd>; -def DIVT : FPForm<0x16, 0x5A3, "divt/su $RA,$RB,$RC", - [(set F8RC:$RC, (fdiv F8RC:$RA, F8RC:$RB))], s_fdivt>; -def MULT : FPForm<0x16, 0x5A2, "mult/su $RA,$RB,$RC", - [(set F8RC:$RC, (fmul F8RC:$RA, F8RC:$RB))], s_fmul>; - -def CPYST : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC", - [(set F8RC:$RC, (fcopysign F8RC:$RB, F8RC:$RA))], s_fadd>; -def CPYSET : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent -def CPYSNT : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC", - [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F8RC:$RA)))], s_fadd>; - -def CMPTEQ : FPForm<0x16, 0x5A5, "cmpteq/su $RA,$RB,$RC", [], s_fadd>; -// [(set F8RC:$RC, (seteq F8RC:$RA, F8RC:$RB))]>; -def CMPTLE : FPForm<0x16, 0x5A7, "cmptle/su $RA,$RB,$RC", [], s_fadd>; -// [(set F8RC:$RC, (setle F8RC:$RA, F8RC:$RB))]>; -def CMPTLT : FPForm<0x16, 0x5A6, "cmptlt/su $RA,$RB,$RC", [], s_fadd>; -// [(set F8RC:$RC, (setlt F8RC:$RA, F8RC:$RB))]>; -def CMPTUN : FPForm<0x16, 0x5A4, "cmptun/su $RA,$RB,$RC", [], s_fadd>; -// [(set F8RC:$RC, (setuo F8RC:$RA, F8RC:$RB))]>; -} - -//More CPYS forms: -let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F4RC:$RA, F8RC:$RB) in { -def CPYSTs : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC", - [(set F8RC:$RC, (fcopysign F8RC:$RB, F4RC:$RA))], s_fadd>; -def CPYSNTs : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC", - [(set F8RC:$RC, (fneg (fcopysign F8RC:$RB, F4RC:$RA)))], s_fadd>; -} -let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RA, F4RC:$RB) in { -def CPYSSt : FPForm<0x17, 0x020, "cpys $RA,$RB,$RC", - [(set F4RC:$RC, (fcopysign F4RC:$RB, F8RC:$RA))], s_fadd>; -def CPYSESt : FPForm<0x17, 0x022, "cpyse $RA,$RB,$RC",[], s_fadd>; //Copy sign and exponent -def CPYSNSt : FPForm<0x17, 0x021, "cpysn $RA,$RB,$RC", - [(set F4RC:$RC, (fneg (fcopysign F4RC:$RB, F8RC:$RA)))], s_fadd>; -} - -//conditional moves, floats -let OutOperandList = (outs F4RC:$RDEST), - InOperandList = (ins F4RC:$RFALSE, F4RC:$RTRUE, F8RC:$RCOND), - Constraints = "$RTRUE = $RDEST" in { -def FCMOVEQS : FPForm<0x17, 0x02A, - "fcmoveq $RCOND,$RTRUE,$RDEST", - [], s_fcmov>; //FCMOVE if = zero -def FCMOVGES : FPForm<0x17, 0x02D, - "fcmovge $RCOND,$RTRUE,$RDEST", - [], s_fcmov>; //FCMOVE if >= zero -def FCMOVGTS : FPForm<0x17, 0x02F, - "fcmovgt $RCOND,$RTRUE,$RDEST", - [], s_fcmov>; //FCMOVE if > zero -def FCMOVLES : FPForm<0x17, 0x02E, - "fcmovle $RCOND,$RTRUE,$RDEST", - [], s_fcmov>; //FCMOVE if <= zero -def FCMOVLTS : FPForm<0x17, 0x02C, - "fcmovlt $RCOND,$RTRUE,$RDEST", - [], s_fcmov>; // FCMOVE if < zero -def FCMOVNES : FPForm<0x17, 0x02B, - "fcmovne $RCOND,$RTRUE,$RDEST", - [], s_fcmov>; //FCMOVE if != zero -} -//conditional moves, doubles -let OutOperandList = (outs F8RC:$RDEST), - InOperandList = (ins F8RC:$RFALSE, F8RC:$RTRUE, F8RC:$RCOND), - Constraints = "$RTRUE = $RDEST" in { -def FCMOVEQT : FPForm<0x17, 0x02A, "fcmoveq $RCOND,$RTRUE,$RDEST", [], s_fcmov>; -def FCMOVGET : FPForm<0x17, 0x02D, "fcmovge $RCOND,$RTRUE,$RDEST", [], s_fcmov>; -def FCMOVGTT : FPForm<0x17, 0x02F, "fcmovgt $RCOND,$RTRUE,$RDEST", [], s_fcmov>; -def FCMOVLET : FPForm<0x17, 0x02E, "fcmovle $RCOND,$RTRUE,$RDEST", [], s_fcmov>; -def FCMOVLTT : FPForm<0x17, 0x02C, "fcmovlt $RCOND,$RTRUE,$RDEST", [], s_fcmov>; -def FCMOVNET : FPForm<0x17, 0x02B, "fcmovne $RCOND,$RTRUE,$RDEST", [], s_fcmov>; -} - -//misc FP selects -//Select double - -def : Pat<(select (seteq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), - (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>; -def : Pat<(select (setoeq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), - (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>; -def : Pat<(select (setueq F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), - (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>; - -def : Pat<(select (setne F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), - (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>; -def : Pat<(select (setone F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), - (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>; -def : Pat<(select (setune F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), - (FCMOVEQT F8RC:$sf, F8RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>; - -def : Pat<(select (setgt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), - (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>; -def : Pat<(select (setogt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), - (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>; -def : Pat<(select (setugt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), - (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>; - -def : Pat<(select (setge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), - (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>; -def : Pat<(select (setoge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), - (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>; -def : Pat<(select (setuge F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), - (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>; - -def : Pat<(select (setlt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), - (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>; -def : Pat<(select (setolt F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), - (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>; -def : Pat<(select (setult F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), - (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>; - -def : Pat<(select (setle F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), - (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>; -def : Pat<(select (setole F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), - (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>; -def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F8RC:$st, F8RC:$sf), - (FCMOVNET F8RC:$sf, F8RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>; - -//Select single -def : Pat<(select (seteq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), - (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>; -def : Pat<(select (setoeq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), - (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>; -def : Pat<(select (setueq F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), - (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>; - -def : Pat<(select (setne F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), - (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>; -def : Pat<(select (setone F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), - (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>; -def : Pat<(select (setune F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), - (FCMOVEQS F4RC:$sf, F4RC:$st, (CMPTEQ F8RC:$RA, F8RC:$RB))>; - -def : Pat<(select (setgt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), - (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>; -def : Pat<(select (setogt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), - (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>; -def : Pat<(select (setugt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), - (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RB, F8RC:$RA))>; - -def : Pat<(select (setge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), - (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>; -def : Pat<(select (setoge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), - (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>; -def : Pat<(select (setuge F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), - (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RB, F8RC:$RA))>; - -def : Pat<(select (setlt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), - (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>; -def : Pat<(select (setolt F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), - (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>; -def : Pat<(select (setult F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), - (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLT F8RC:$RA, F8RC:$RB))>; - -def : Pat<(select (setle F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), - (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>; -def : Pat<(select (setole F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), - (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>; -def : Pat<(select (setule F8RC:$RA, F8RC:$RB), F4RC:$st, F4RC:$sf), - (FCMOVNES F4RC:$sf, F4RC:$st, (CMPTLE F8RC:$RA, F8RC:$RB))>; - - - -let OutOperandList = (outs GPRC:$RC), InOperandList = (ins F4RC:$RA), Fb = 31 in -def FTOIS : FPForm<0x1C, 0x078, "ftois $RA,$RC", - [(set GPRC:$RC, (bitconvert F4RC:$RA))], s_ftoi>; //Floating to integer move, S_floating -let OutOperandList = (outs GPRC:$RC), InOperandList = (ins F8RC:$RA), Fb = 31 in -def FTOIT : FPForm<0x1C, 0x070, "ftoit $RA,$RC", - [(set GPRC:$RC, (bitconvert F8RC:$RA))], s_ftoi>; //Floating to integer move -let OutOperandList = (outs F4RC:$RC), InOperandList = (ins GPRC:$RA), Fb = 31 in -def ITOFS : FPForm<0x14, 0x004, "itofs $RA,$RC", - [(set F4RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move, S_floating -let OutOperandList = (outs F8RC:$RC), InOperandList = (ins GPRC:$RA), Fb = 31 in -def ITOFT : FPForm<0x14, 0x024, "itoft $RA,$RC", - [(set F8RC:$RC, (bitconvert GPRC:$RA))], s_itof>; //Integer to floating move - - -let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in -def CVTQS : FPForm<0x16, 0x7BC, "cvtqs/sui $RB,$RC", - [(set F4RC:$RC, (Alpha_cvtqs F8RC:$RB))], s_fadd>; -let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in -def CVTQT : FPForm<0x16, 0x7BE, "cvtqt/sui $RB,$RC", - [(set F8RC:$RC, (Alpha_cvtqt F8RC:$RB))], s_fadd>; -let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in -def CVTTQ : FPForm<0x16, 0x52F, "cvttq/svc $RB,$RC", - [(set F8RC:$RC, (Alpha_cvttq F8RC:$RB))], s_fadd>; -let OutOperandList = (outs F8RC:$RC), InOperandList = (ins F4RC:$RB), Fa = 31 in -def CVTST : FPForm<0x16, 0x6AC, "cvtst/s $RB,$RC", - [(set F8RC:$RC, (fextend F4RC:$RB))], s_fadd>; -let OutOperandList = (outs F4RC:$RC), InOperandList = (ins F8RC:$RB), Fa = 31 in -def CVTTS : FPForm<0x16, 0x7AC, "cvtts/sui $RB,$RC", - [(set F4RC:$RC, (fround F8RC:$RB))], s_fadd>; - -def : Pat<(select GPRC:$RC, F8RC:$st, F8RC:$sf), - (f64 (FCMOVEQT F8RC:$st, F8RC:$sf, (ITOFT GPRC:$RC)))>; -def : Pat<(select GPRC:$RC, F4RC:$st, F4RC:$sf), - (f32 (FCMOVEQS F4RC:$st, F4RC:$sf, (ITOFT GPRC:$RC)))>; - -///////////////////////////////////////////////////////// -//Branching -///////////////////////////////////////////////////////// -class br_icc opc, string asmstr> - : BFormN; -class br_fcc opc, string asmstr> - : BFormN; - -let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { -let Ra = 31, isBarrier = 1 in -def BR : BFormD<0x30, "br $$31,$DISP", [(br bb:$DISP)], s_ubr>; - -def COND_BRANCH_I : BFormN<0, (ins u64imm:$opc, GPRC:$R, target:$dst), - "{:comment} COND_BRANCH imm:$opc, GPRC:$R, bb:$dst", - s_icbr>; -def COND_BRANCH_F : BFormN<0, (ins u64imm:$opc, F8RC:$R, target:$dst), - "{:comment} COND_BRANCH imm:$opc, F8RC:$R, bb:$dst", - s_fbr>; -//Branches, int -def BEQ : br_icc<0x39, "beq">; -def BGE : br_icc<0x3E, "bge">; -def BGT : br_icc<0x3F, "bgt">; -def BLBC : br_icc<0x38, "blbc">; -def BLBS : br_icc<0x3C, "blbs">; -def BLE : br_icc<0x3B, "ble">; -def BLT : br_icc<0x3A, "blt">; -def BNE : br_icc<0x3D, "bne">; - -//Branches, float -def FBEQ : br_fcc<0x31, "fbeq">; -def FBGE : br_fcc<0x36, "fbge">; -def FBGT : br_fcc<0x37, "fbgt">; -def FBLE : br_fcc<0x33, "fble">; -def FBLT : br_fcc<0x32, "fblt">; -def FBNE : br_fcc<0x36, "fbne">; -} - -//An ugly trick to get the opcode as an imm I can use -def immBRCond : SDNodeXFormgetZExtValue()) { - default: assert(0 && "Unknown branch type"); - case 0: return getI64Imm(Alpha::BEQ); - case 1: return getI64Imm(Alpha::BNE); - case 2: return getI64Imm(Alpha::BGE); - case 3: return getI64Imm(Alpha::BGT); - case 4: return getI64Imm(Alpha::BLE); - case 5: return getI64Imm(Alpha::BLT); - case 6: return getI64Imm(Alpha::BLBS); - case 7: return getI64Imm(Alpha::BLBC); - case 20: return getI64Imm(Alpha::FBEQ); - case 21: return getI64Imm(Alpha::FBNE); - case 22: return getI64Imm(Alpha::FBGE); - case 23: return getI64Imm(Alpha::FBGT); - case 24: return getI64Imm(Alpha::FBLE); - case 25: return getI64Imm(Alpha::FBLT); - } -}]>; - -//Int cond patterns -def : Pat<(brcond (seteq GPRC:$RA, 0), bb:$DISP), - (COND_BRANCH_I (immBRCond 0), GPRC:$RA, bb:$DISP)>; -def : Pat<(brcond (setge GPRC:$RA, 0), bb:$DISP), - (COND_BRANCH_I (immBRCond 2), GPRC:$RA, bb:$DISP)>; -def : Pat<(brcond (setgt GPRC:$RA, 0), bb:$DISP), - (COND_BRANCH_I (immBRCond 3), GPRC:$RA, bb:$DISP)>; -def : Pat<(brcond (and GPRC:$RA, 1), bb:$DISP), - (COND_BRANCH_I (immBRCond 6), GPRC:$RA, bb:$DISP)>; -def : Pat<(brcond (setle GPRC:$RA, 0), bb:$DISP), - (COND_BRANCH_I (immBRCond 4), GPRC:$RA, bb:$DISP)>; -def : Pat<(brcond (setlt GPRC:$RA, 0), bb:$DISP), - (COND_BRANCH_I (immBRCond 5), GPRC:$RA, bb:$DISP)>; -def : Pat<(brcond (setne GPRC:$RA, 0), bb:$DISP), - (COND_BRANCH_I (immBRCond 1), GPRC:$RA, bb:$DISP)>; - -def : Pat<(brcond GPRC:$RA, bb:$DISP), - (COND_BRANCH_I (immBRCond 1), GPRC:$RA, bb:$DISP)>; -def : Pat<(brcond (setne GPRC:$RA, GPRC:$RB), bb:$DISP), - (COND_BRANCH_I (immBRCond 0), (CMPEQ GPRC:$RA, GPRC:$RB), bb:$DISP)>; -def : Pat<(brcond (setne GPRC:$RA, immUExt8:$L), bb:$DISP), - (COND_BRANCH_I (immBRCond 0), (CMPEQi GPRC:$RA, immUExt8:$L), bb:$DISP)>; - -//FP cond patterns -def : Pat<(brcond (seteq F8RC:$RA, immFPZ), bb:$DISP), - (COND_BRANCH_F (immBRCond 20), F8RC:$RA, bb:$DISP)>; -def : Pat<(brcond (setne F8RC:$RA, immFPZ), bb:$DISP), - (COND_BRANCH_F (immBRCond 21), F8RC:$RA, bb:$DISP)>; -def : Pat<(brcond (setge F8RC:$RA, immFPZ), bb:$DISP), - (COND_BRANCH_F (immBRCond 22), F8RC:$RA, bb:$DISP)>; -def : Pat<(brcond (setgt F8RC:$RA, immFPZ), bb:$DISP), - (COND_BRANCH_F (immBRCond 23), F8RC:$RA, bb:$DISP)>; -def : Pat<(brcond (setle F8RC:$RA, immFPZ), bb:$DISP), - (COND_BRANCH_F (immBRCond 24), F8RC:$RA, bb:$DISP)>; -def : Pat<(brcond (setlt F8RC:$RA, immFPZ), bb:$DISP), - (COND_BRANCH_F (immBRCond 25), F8RC:$RA, bb:$DISP)>; - - -def : Pat<(brcond (seteq F8RC:$RA, F8RC:$RB), bb:$DISP), - (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>; -def : Pat<(brcond (setoeq F8RC:$RA, F8RC:$RB), bb:$DISP), - (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>; -def : Pat<(brcond (setueq F8RC:$RA, F8RC:$RB), bb:$DISP), - (COND_BRANCH_F (immBRCond 21), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>; - -def : Pat<(brcond (setlt F8RC:$RA, F8RC:$RB), bb:$DISP), - (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>; -def : Pat<(brcond (setolt F8RC:$RA, F8RC:$RB), bb:$DISP), - (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>; -def : Pat<(brcond (setult F8RC:$RA, F8RC:$RB), bb:$DISP), - (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RA, F8RC:$RB), bb:$DISP)>; - -def : Pat<(brcond (setle F8RC:$RA, F8RC:$RB), bb:$DISP), - (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>; -def : Pat<(brcond (setole F8RC:$RA, F8RC:$RB), bb:$DISP), - (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>; -def : Pat<(brcond (setule F8RC:$RA, F8RC:$RB), bb:$DISP), - (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RA, F8RC:$RB), bb:$DISP)>; - -def : Pat<(brcond (setgt F8RC:$RA, F8RC:$RB), bb:$DISP), - (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>; -def : Pat<(brcond (setogt F8RC:$RA, F8RC:$RB), bb:$DISP), - (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>; -def : Pat<(brcond (setugt F8RC:$RA, F8RC:$RB), bb:$DISP), - (COND_BRANCH_F (immBRCond 21), (CMPTLT F8RC:$RB, F8RC:$RA), bb:$DISP)>; - -def : Pat<(brcond (setge F8RC:$RA, F8RC:$RB), bb:$DISP), - (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>; -def : Pat<(brcond (setoge F8RC:$RA, F8RC:$RB), bb:$DISP), - (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>; -def : Pat<(brcond (setuge F8RC:$RA, F8RC:$RB), bb:$DISP), - (COND_BRANCH_F (immBRCond 21), (CMPTLE F8RC:$RB, F8RC:$RA), bb:$DISP)>; - -def : Pat<(brcond (setne F8RC:$RA, F8RC:$RB), bb:$DISP), - (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>; -def : Pat<(brcond (setone F8RC:$RA, F8RC:$RB), bb:$DISP), - (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>; -def : Pat<(brcond (setune F8RC:$RA, F8RC:$RB), bb:$DISP), - (COND_BRANCH_F (immBRCond 20), (CMPTEQ F8RC:$RA, F8RC:$RB), bb:$DISP)>; - - -def : Pat<(brcond (setoeq F8RC:$RA, immFPZ), bb:$DISP), - (COND_BRANCH_F (immBRCond 20), F8RC:$RA,bb:$DISP)>; -def : Pat<(brcond (setueq F8RC:$RA, immFPZ), bb:$DISP), - (COND_BRANCH_F (immBRCond 20), F8RC:$RA,bb:$DISP)>; - -def : Pat<(brcond (setoge F8RC:$RA, immFPZ), bb:$DISP), - (COND_BRANCH_F (immBRCond 22), F8RC:$RA,bb:$DISP)>; -def : Pat<(brcond (setuge F8RC:$RA, immFPZ), bb:$DISP), - (COND_BRANCH_F (immBRCond 22), F8RC:$RA,bb:$DISP)>; - -def : Pat<(brcond (setogt F8RC:$RA, immFPZ), bb:$DISP), - (COND_BRANCH_F (immBRCond 23), F8RC:$RA,bb:$DISP)>; -def : Pat<(brcond (setugt F8RC:$RA, immFPZ), bb:$DISP), - (COND_BRANCH_F (immBRCond 23), F8RC:$RA,bb:$DISP)>; - -def : Pat<(brcond (setole F8RC:$RA, immFPZ), bb:$DISP), - (COND_BRANCH_F (immBRCond 24), F8RC:$RA,bb:$DISP)>; -def : Pat<(brcond (setule F8RC:$RA, immFPZ), bb:$DISP), - (COND_BRANCH_F (immBRCond 24), F8RC:$RA,bb:$DISP)>; - -def : Pat<(brcond (setolt F8RC:$RA, immFPZ), bb:$DISP), - (COND_BRANCH_F (immBRCond 25), F8RC:$RA,bb:$DISP)>; -def : Pat<(brcond (setult F8RC:$RA, immFPZ), bb:$DISP), - (COND_BRANCH_F (immBRCond 25), F8RC:$RA,bb:$DISP)>; - -def : Pat<(brcond (setone F8RC:$RA, immFPZ), bb:$DISP), - (COND_BRANCH_F (immBRCond 21), F8RC:$RA,bb:$DISP)>; -def : Pat<(brcond (setune F8RC:$RA, immFPZ), bb:$DISP), - (COND_BRANCH_F (immBRCond 21), F8RC:$RA,bb:$DISP)>; - -//End Branches - -//S_floating : IEEE Single -//T_floating : IEEE Double - -//Unused instructions -//Mnemonic Format Opcode Description -//CALL_PAL Pcd 00 Trap to PALcode -//ECB Mfc 18.E800 Evict cache block -//EXCB Mfc 18.0400 Exception barrier -//FETCH Mfc 18.8000 Prefetch data -//FETCH_M Mfc 18.A000 Prefetch data, modify intent -//LDQ_U Mem 0B Load unaligned quadword -//MB Mfc 18.4000 Memory barrier -//STQ_U Mem 0F Store unaligned quadword -//TRAPB Mfc 18.0000 Trap barrier -//WH64 Mfc 18.F800 Write hint  64 bytes -//WMB Mfc 18.4400 Write memory barrier -//MF_FPCR F-P 17.025 Move from FPCR -//MT_FPCR F-P 17.024 Move to FPCR -//There are in the Multimedia extensions, so let's not use them yet -//def MAXSB8 : OForm<0x1C, 0x3E, "MAXSB8 $RA,$RB,$RC">; //Vector signed byte maximum -//def MAXSW4 : OForm< 0x1C, 0x3F, "MAXSW4 $RA,$RB,$RC">; //Vector signed word maximum -//def MAXUB8 : OForm<0x1C, 0x3C, "MAXUB8 $RA,$RB,$RC">; //Vector unsigned byte maximum -//def MAXUW4 : OForm< 0x1C, 0x3D, "MAXUW4 $RA,$RB,$RC">; //Vector unsigned word maximum -//def MINSB8 : OForm< 0x1C, 0x38, "MINSB8 $RA,$RB,$RC">; //Vector signed byte minimum -//def MINSW4 : OForm< 0x1C, 0x39, "MINSW4 $RA,$RB,$RC">; //Vector signed word minimum -//def MINUB8 : OForm< 0x1C, 0x3A, "MINUB8 $RA,$RB,$RC">; //Vector unsigned byte minimum -//def MINUW4 : OForm< 0x1C, 0x3B, "MINUW4 $RA,$RB,$RC">; //Vector unsigned word minimum -//def PERR : OForm< 0x1C, 0x31, "PERR $RA,$RB,$RC">; //Pixel error -//def PKLB : OForm< 0x1C, 0x37, "PKLB $RA,$RB,$RC">; //Pack longwords to bytes -//def PKWB : OForm<0x1C, 0x36, "PKWB $RA,$RB,$RC">; //Pack words to bytes -//def UNPKBL : OForm< 0x1C, 0x35, "UNPKBL $RA,$RB,$RC">; //Unpack bytes to longwords -//def UNPKBW : OForm< 0x1C, 0x34, "UNPKBW $RA,$RB,$RC">; //Unpack bytes to words -//CVTLQ F-P 17.010 Convert longword to quadword -//CVTQL F-P 17.030 Convert quadword to longword - - -//Constant handling - -def immConst2Part : PatLeaf<(imm), [{ - //true if imm fits in a LDAH LDA pair - int64_t val = (int64_t)N->getZExtValue(); - return (val <= IMM_FULLHIGH && val >= IMM_FULLLOW); -}]>; -def immConst2PartInt : PatLeaf<(imm), [{ - //true if imm fits in a LDAH LDA pair with zeroext - uint64_t uval = N->getZExtValue(); - int32_t val32 = (int32_t)uval; - return ((uval >> 32) == 0 && //empty upper bits - val32 <= IMM_FULLHIGH); -// val32 >= IMM_FULLLOW + IMM_LOW * IMM_MULT); //Always True -}], SExt32>; - -def : Pat<(i64 immConst2Part:$imm), - (LDA (LL16 immConst2Part:$imm), (LDAH (LH16 immConst2Part:$imm), R31))>; - -def : Pat<(i64 immSExt16:$imm), - (LDA immSExt16:$imm, R31)>; - -def : Pat<(i64 immSExt16int:$imm), - (ZAPNOTi (LDA (SExt16 immSExt16int:$imm), R31), 15)>; -def : Pat<(i64 immConst2PartInt:$imm), - (ZAPNOTi (LDA (LL16 (i64 (SExt32 immConst2PartInt:$imm))), - (LDAH (LH16 (i64 (SExt32 immConst2PartInt:$imm))), R31)), 15)>; - - -//TODO: I want to just define these like this! -//def : Pat<(i64 0), -// (R31)>; -//def : Pat<(f64 0.0), -// (F31)>; -//def : Pat<(f64 -0.0), -// (CPYSNT F31, F31)>; -//def : Pat<(f32 0.0), -// (F31)>; -//def : Pat<(f32 -0.0), -// (CPYSNS F31, F31)>; - -//Misc Patterns: - -def : Pat<(sext_inreg GPRC:$RB, i32), - (ADDLi GPRC:$RB, 0)>; - -def : Pat<(fabs F8RC:$RB), - (CPYST F31, F8RC:$RB)>; -def : Pat<(fabs F4RC:$RB), - (CPYSS F31, F4RC:$RB)>; -def : Pat<(fneg F8RC:$RB), - (CPYSNT F8RC:$RB, F8RC:$RB)>; -def : Pat<(fneg F4RC:$RB), - (CPYSNS F4RC:$RB, F4RC:$RB)>; - -def : Pat<(fcopysign F4RC:$A, (fneg F4RC:$B)), - (CPYSNS F4RC:$B, F4RC:$A)>; -def : Pat<(fcopysign F8RC:$A, (fneg F8RC:$B)), - (CPYSNT F8RC:$B, F8RC:$A)>; -def : Pat<(fcopysign F4RC:$A, (fneg F8RC:$B)), - (CPYSNSt F8RC:$B, F4RC:$A)>; -def : Pat<(fcopysign F8RC:$A, (fneg F4RC:$B)), - (CPYSNTs F4RC:$B, F8RC:$A)>; - -//Yes, signed multiply high is ugly -def : Pat<(mulhs GPRC:$RA, GPRC:$RB), - (SUBQr (UMULHr GPRC:$RA, GPRC:$RB), (ADDQr (CMOVGEr GPRC:$RB, R31, GPRC:$RA), - (CMOVGEr GPRC:$RA, R31, GPRC:$RB)))>; - -//Stupid crazy arithmetic stuff: -let AddedComplexity = 1 in { -def : Pat<(mul GPRC:$RA, 5), (S4ADDQr GPRC:$RA, GPRC:$RA)>; -def : Pat<(mul GPRC:$RA, 9), (S8ADDQr GPRC:$RA, GPRC:$RA)>; -def : Pat<(mul GPRC:$RA, 3), (S4SUBQr GPRC:$RA, GPRC:$RA)>; -def : Pat<(mul GPRC:$RA, 7), (S8SUBQr GPRC:$RA, GPRC:$RA)>; - -//slight tree expansion if we are multiplying near to a power of 2 -//n is above a power of 2 -def : Pat<(mul GPRC:$RA, immRem1:$imm), - (ADDQr (SLr GPRC:$RA, (nearP2X immRem1:$imm)), GPRC:$RA)>; -def : Pat<(mul GPRC:$RA, immRem2:$imm), - (ADDQr (SLr GPRC:$RA, (nearP2X immRem2:$imm)), (ADDQr GPRC:$RA, GPRC:$RA))>; -def : Pat<(mul GPRC:$RA, immRem3:$imm), - (ADDQr (SLr GPRC:$RA, (nearP2X immRem3:$imm)), (S4SUBQr GPRC:$RA, GPRC:$RA))>; -def : Pat<(mul GPRC:$RA, immRem4:$imm), - (S4ADDQr GPRC:$RA, (SLr GPRC:$RA, (nearP2X immRem4:$imm)))>; -def : Pat<(mul GPRC:$RA, immRem5:$imm), - (ADDQr (SLr GPRC:$RA, (nearP2X immRem5:$imm)), (S4ADDQr GPRC:$RA, GPRC:$RA))>; -def : Pat<(mul GPRC:$RA, immRemP2:$imm), - (ADDQr (SLr GPRC:$RA, (nearP2X immRemP2:$imm)), (SLi GPRC:$RA, (nearP2RemX immRemP2:$imm)))>; - -//n is below a power of 2 -//FIXME: figure out why something is truncating the imm to 32bits -// this will fix 2007-11-27-mulneg3 -//def : Pat<(mul GPRC:$RA, immRem1n:$imm), -// (SUBQr (SLr GPRC:$RA, (nearP2X immRem1n:$imm)), GPRC:$RA)>; -//def : Pat<(mul GPRC:$RA, immRem2n:$imm), -// (SUBQr (SLr GPRC:$RA, (nearP2X immRem2n:$imm)), (ADDQr GPRC:$RA, GPRC:$RA))>; -//def : Pat<(mul GPRC:$RA, immRem3n:$imm), -// (SUBQr (SLr GPRC:$RA, (nearP2X immRem3n:$imm)), (S4SUBQr GPRC:$RA, GPRC:$RA))>; -//def : Pat<(mul GPRC:$RA, immRem4n:$imm), -// (SUBQr (SLr GPRC:$RA, (nearP2X immRem4n:$imm)), (SLi GPRC:$RA, 2))>; -//def : Pat<(mul GPRC:$RA, immRem5n:$imm), -// (SUBQr (SLr GPRC:$RA, (nearP2X immRem5n:$imm)), (S4ADDQr GPRC:$RA, GPRC:$RA))>; -//def : Pat<(mul GPRC:$RA, immRemP2n:$imm), -// (SUBQr (SLr GPRC:$RA, (nearP2X immRemP2n:$imm)), (SLi GPRC:$RA, (nearP2RemX immRemP2n:$imm)))>; -} //Added complexity diff --git a/lib/Target/Alpha/AlphaLLRP.cpp b/lib/Target/Alpha/AlphaLLRP.cpp deleted file mode 100644 index 85fbfd1..0000000 --- a/lib/Target/Alpha/AlphaLLRP.cpp +++ /dev/null @@ -1,158 +0,0 @@ -//===-- AlphaLLRP.cpp - Alpha Load Load Replay Trap elimination pass. -- --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Here we check for potential replay traps introduced by the spiller -// We also align some branch targets if we can do so for free. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "alpha-nops" -#include "Alpha.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/ADT/SetOperations.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h" -using namespace llvm; - -STATISTIC(nopintro, "Number of nops inserted"); -STATISTIC(nopalign, "Number of nops inserted for alignment"); - -namespace { - cl::opt - AlignAll("alpha-align-all", cl::Hidden, - cl::desc("Align all blocks")); - - struct AlphaLLRPPass : public MachineFunctionPass { - /// Target machine description which we query for reg. names, data - /// layout, etc. - /// - AlphaTargetMachine &TM; - - static char ID; - AlphaLLRPPass(AlphaTargetMachine &tm) - : MachineFunctionPass(ID), TM(tm) { } - - virtual const char *getPassName() const { - return "Alpha NOP inserter"; - } - - bool runOnMachineFunction(MachineFunction &F) { - const TargetInstrInfo *TII = F.getTarget().getInstrInfo(); - bool Changed = false; - MachineInstr* prev[3] = {0,0,0}; - DebugLoc dl; - unsigned count = 0; - for (MachineFunction::iterator FI = F.begin(), FE = F.end(); - FI != FE; ++FI) { - MachineBasicBlock& MBB = *FI; - bool ub = false; - for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) { - if (count%4 == 0) - prev[0] = prev[1] = prev[2] = 0; //Slots cleared at fetch boundary - ++count; - MachineInstr *MI = I++; - switch (MI->getOpcode()) { - case Alpha::LDQ: case Alpha::LDL: - case Alpha::LDWU: case Alpha::LDBU: - case Alpha::LDT: case Alpha::LDS: - case Alpha::STQ: case Alpha::STL: - case Alpha::STW: case Alpha::STB: - case Alpha::STT: case Alpha::STS: - if (MI->getOperand(2).getReg() == Alpha::R30) { - if (prev[0] && - prev[0]->getOperand(2).getReg() == MI->getOperand(2).getReg()&& - prev[0]->getOperand(1).getImm() == MI->getOperand(1).getImm()){ - prev[0] = prev[1]; - prev[1] = prev[2]; - prev[2] = 0; - BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31) - .addReg(Alpha::R31) - .addReg(Alpha::R31); - Changed = true; nopintro += 1; - count += 1; - } else if (prev[1] - && prev[1]->getOperand(2).getReg() == - MI->getOperand(2).getReg() - && prev[1]->getOperand(1).getImm() == - MI->getOperand(1).getImm()) { - prev[0] = prev[2]; - prev[1] = prev[2] = 0; - BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31) - .addReg(Alpha::R31) - .addReg(Alpha::R31); - BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31) - .addReg(Alpha::R31) - .addReg(Alpha::R31); - Changed = true; nopintro += 2; - count += 2; - } else if (prev[2] - && prev[2]->getOperand(2).getReg() == - MI->getOperand(2).getReg() - && prev[2]->getOperand(1).getImm() == - MI->getOperand(1).getImm()) { - prev[0] = prev[1] = prev[2] = 0; - BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31) - .addReg(Alpha::R31).addReg(Alpha::R31); - BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31) - .addReg(Alpha::R31).addReg(Alpha::R31); - BuildMI(MBB, MI, dl, TII->get(Alpha::BISr), Alpha::R31) - .addReg(Alpha::R31).addReg(Alpha::R31); - Changed = true; nopintro += 3; - count += 3; - } - prev[0] = prev[1]; - prev[1] = prev[2]; - prev[2] = MI; - break; - } - prev[0] = prev[1]; - prev[1] = prev[2]; - prev[2] = 0; - break; - case Alpha::ALTENT: - case Alpha::MEMLABEL: - case Alpha::PCLABEL: - --count; - break; - case Alpha::BR: - case Alpha::JMP: - ub = true; - //fall through - default: - prev[0] = prev[1]; - prev[1] = prev[2]; - prev[2] = 0; - break; - } - } - if (ub || AlignAll) { - //we can align stuff for free at this point - while (count % 4) { - BuildMI(MBB, MBB.end(), dl, TII->get(Alpha::BISr), Alpha::R31) - .addReg(Alpha::R31).addReg(Alpha::R31); - ++count; - ++nopalign; - prev[0] = prev[1]; - prev[1] = prev[2]; - prev[2] = 0; - } - } - } - return Changed; - } - }; - char AlphaLLRPPass::ID = 0; -} // end of anonymous namespace - -FunctionPass *llvm::createAlphaLLRPPass(AlphaTargetMachine &tm) { - return new AlphaLLRPPass(tm); -} diff --git a/lib/Target/Alpha/AlphaMachineFunctionInfo.h b/lib/Target/Alpha/AlphaMachineFunctionInfo.h deleted file mode 100644 index 186738c..0000000 --- a/lib/Target/Alpha/AlphaMachineFunctionInfo.h +++ /dev/null @@ -1,62 +0,0 @@ -//====- AlphaMachineFuctionInfo.h - Alpha machine function info -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares Alpha-specific per-machine-function information. -// -//===----------------------------------------------------------------------===// - -#ifndef ALPHAMACHINEFUNCTIONINFO_H -#define ALPHAMACHINEFUNCTIONINFO_H - -#include "llvm/CodeGen/MachineFunction.h" - -namespace llvm { - -/// AlphaMachineFunctionInfo - This class is derived from MachineFunction -/// private Alpha target-specific information for each MachineFunction. -class AlphaMachineFunctionInfo : public MachineFunctionInfo { - /// GlobalBaseReg - keeps track of the virtual register initialized for - /// use as the global base register. This is used for PIC in some PIC - /// relocation models. - unsigned GlobalBaseReg; - - /// GlobalRetAddr = keeps track of the virtual register initialized for - /// the return address value. - unsigned GlobalRetAddr; - - /// VarArgsOffset - What is the offset to the first vaarg - int VarArgsOffset; - /// VarArgsBase - What is the base FrameIndex - int VarArgsBase; - -public: - AlphaMachineFunctionInfo() : GlobalBaseReg(0), GlobalRetAddr(0), - VarArgsOffset(0), VarArgsBase(0) {} - - explicit AlphaMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0), - GlobalRetAddr(0), - VarArgsOffset(0), - VarArgsBase(0) {} - - unsigned getGlobalBaseReg() const { return GlobalBaseReg; } - void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; } - - unsigned getGlobalRetAddr() const { return GlobalRetAddr; } - void setGlobalRetAddr(unsigned Reg) { GlobalRetAddr = Reg; } - - int getVarArgsOffset() const { return VarArgsOffset; } - void setVarArgsOffset(int Offset) { VarArgsOffset = Offset; } - - int getVarArgsBase() const { return VarArgsBase; } - void setVarArgsBase(int Base) { VarArgsBase = Base; } -}; - -} // End llvm namespace - -#endif diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp deleted file mode 100644 index 8b6230f..0000000 --- a/lib/Target/Alpha/AlphaRegisterInfo.cpp +++ /dev/null @@ -1,199 +0,0 @@ -//===- AlphaRegisterInfo.cpp - Alpha Register Information -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the Alpha implementation of the TargetRegisterInfo class. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "reginfo" -#include "Alpha.h" -#include "AlphaRegisterInfo.h" -#include "llvm/Constants.h" -#include "llvm/Type.h" -#include "llvm/Function.h" -#include "llvm/CodeGen/ValueTypes.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/STLExtras.h" -#include - -#define GET_REGINFO_TARGET_DESC -#include "AlphaGenRegisterInfo.inc" - -using namespace llvm; - -AlphaRegisterInfo::AlphaRegisterInfo(const TargetInstrInfo &tii) - : AlphaGenRegisterInfo(Alpha::R26), TII(tii) { -} - -static long getUpper16(long l) { - long y = l / Alpha::IMM_MULT; - if (l % Alpha::IMM_MULT > Alpha::IMM_HIGH) - ++y; - return y; -} - -static long getLower16(long l) { - long h = getUpper16(l); - return l - h * Alpha::IMM_MULT; -} - -const unsigned* AlphaRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) - const { - static const unsigned CalleeSavedRegs[] = { - Alpha::R9, Alpha::R10, - Alpha::R11, Alpha::R12, - Alpha::R13, Alpha::R14, - Alpha::F2, Alpha::F3, - Alpha::F4, Alpha::F5, - Alpha::F6, Alpha::F7, - Alpha::F8, Alpha::F9, 0 - }; - return CalleeSavedRegs; -} - -BitVector AlphaRegisterInfo::getReservedRegs(const MachineFunction &MF) const { - BitVector Reserved(getNumRegs()); - Reserved.set(Alpha::R15); - Reserved.set(Alpha::R29); - Reserved.set(Alpha::R30); - Reserved.set(Alpha::R31); - return Reserved; -} - -//===----------------------------------------------------------------------===// -// Stack Frame Processing methods -//===----------------------------------------------------------------------===// - -void AlphaRegisterInfo:: -eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - if (TFI->hasFP(MF)) { - // If we have a frame pointer, turn the adjcallstackup instruction into a - // 'sub ESP, ' and the adjcallstackdown instruction into 'add ESP, - // ' - MachineInstr *Old = I; - uint64_t Amount = Old->getOperand(0).getImm(); - if (Amount != 0) { - // We need to keep the stack aligned properly. To do this, we round the - // amount of space needed for the outgoing arguments up to the next - // alignment boundary. - unsigned Align = TFI->getStackAlignment(); - Amount = (Amount+Align-1)/Align*Align; - - MachineInstr *New; - if (Old->getOpcode() == Alpha::ADJUSTSTACKDOWN) { - New=BuildMI(MF, Old->getDebugLoc(), TII.get(Alpha::LDA), Alpha::R30) - .addImm(-Amount).addReg(Alpha::R30); - } else { - assert(Old->getOpcode() == Alpha::ADJUSTSTACKUP); - New=BuildMI(MF, Old->getDebugLoc(), TII.get(Alpha::LDA), Alpha::R30) - .addImm(Amount).addReg(Alpha::R30); - } - - // Replace the pseudo instruction with a new instruction... - MBB.insert(I, New); - } - } - - MBB.erase(I); -} - -//Alpha has a slightly funny stack: -//Args -//<- incoming SP -//fixed locals (and spills, callee saved, etc) -//<- FP -//variable locals -//<- SP - -void -AlphaRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS) const { - assert(SPAdj == 0 && "Unexpected"); - - unsigned i = 0; - MachineInstr &MI = *II; - MachineBasicBlock &MBB = *MI.getParent(); - MachineFunction &MF = *MBB.getParent(); - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - bool FP = TFI->hasFP(MF); - - while (!MI.getOperand(i).isFI()) { - ++i; - assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!"); - } - - int FrameIndex = MI.getOperand(i).getIndex(); - - // Add the base register of R30 (SP) or R15 (FP). - MI.getOperand(i + 1).ChangeToRegister(FP ? Alpha::R15 : Alpha::R30, false); - - // Now add the frame object offset to the offset from the virtual frame index. - int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex); - - DEBUG(errs() << "FI: " << FrameIndex << " Offset: " << Offset << "\n"); - - Offset += MF.getFrameInfo()->getStackSize(); - - DEBUG(errs() << "Corrected Offset " << Offset - << " for stack size: " << MF.getFrameInfo()->getStackSize() << "\n"); - - if (Offset > Alpha::IMM_HIGH || Offset < Alpha::IMM_LOW) { - DEBUG(errs() << "Unconditionally using R28 for evil purposes Offset: " - << Offset << "\n"); - //so in this case, we need to use a temporary register, and move the - //original inst off the SP/FP - //fix up the old: - MI.getOperand(i + 1).ChangeToRegister(Alpha::R28, false); - MI.getOperand(i).ChangeToImmediate(getLower16(Offset)); - //insert the new - MachineInstr* nMI=BuildMI(MF, MI.getDebugLoc(), - TII.get(Alpha::LDAH), Alpha::R28) - .addImm(getUpper16(Offset)).addReg(FP ? Alpha::R15 : Alpha::R30); - MBB.insert(II, nMI); - } else { - MI.getOperand(i).ChangeToImmediate(Offset); - } -} - -unsigned AlphaRegisterInfo::getFrameRegister(const MachineFunction &MF) const { - const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); - - return TFI->hasFP(MF) ? Alpha::R15 : Alpha::R30; -} - -unsigned AlphaRegisterInfo::getEHExceptionRegister() const { - llvm_unreachable("What is the exception register"); - return 0; -} - -unsigned AlphaRegisterInfo::getEHHandlerRegister() const { - llvm_unreachable("What is the exception handler register"); - return 0; -} - -std::string AlphaRegisterInfo::getPrettyName(unsigned reg) -{ - std::string s(AlphaRegDesc[reg].Name); - return s; -} diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h deleted file mode 100644 index e35be27..0000000 --- a/lib/Target/Alpha/AlphaRegisterInfo.h +++ /dev/null @@ -1,56 +0,0 @@ -//===- AlphaRegisterInfo.h - Alpha Register Information Impl ----*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the Alpha implementation of the TargetRegisterInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef ALPHAREGISTERINFO_H -#define ALPHAREGISTERINFO_H - -#include "llvm/Target/TargetRegisterInfo.h" - -#define GET_REGINFO_HEADER -#include "AlphaGenRegisterInfo.inc" - -namespace llvm { - -class TargetInstrInfo; -class Type; - -struct AlphaRegisterInfo : public AlphaGenRegisterInfo { - const TargetInstrInfo &TII; - - AlphaRegisterInfo(const TargetInstrInfo &tii); - - /// Code Generation virtual methods... - const unsigned *getCalleeSavedRegs(const MachineFunction *MF = 0) const; - - BitVector getReservedRegs(const MachineFunction &MF) const; - - void eliminateCallFramePseudoInstr(MachineFunction &MF, - MachineBasicBlock &MBB, - MachineBasicBlock::iterator I) const; - - void eliminateFrameIndex(MachineBasicBlock::iterator II, - int SPAdj, RegScavenger *RS = NULL) const; - - // Debug information queries. - unsigned getFrameRegister(const MachineFunction &MF) const; - - // Exception handling queries. - unsigned getEHExceptionRegister() const; - unsigned getEHHandlerRegister() const; - - static std::string getPrettyName(unsigned reg); -}; - -} // end namespace llvm - -#endif diff --git a/lib/Target/Alpha/AlphaRegisterInfo.td b/lib/Target/Alpha/AlphaRegisterInfo.td deleted file mode 100644 index 32120d7..0000000 --- a/lib/Target/Alpha/AlphaRegisterInfo.td +++ /dev/null @@ -1,133 +0,0 @@ -//===- AlphaRegisterInfo.td - The Alpha Register File ------*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file describes the Alpha register set. -// -//===----------------------------------------------------------------------===// - -class AlphaReg : Register { - field bits<5> Num; - let Namespace = "Alpha"; -} - -// We identify all our registers with a 5-bit ID, for consistency's sake. - -// GPR - One of the 32 32-bit general-purpose registers -class GPR num, string n> : AlphaReg { - let Num = num; -} - -// FPR - One of the 32 64-bit floating-point registers -class FPR num, string n> : AlphaReg { - let Num = num; -} - -//#define FP $15 -//#define RA $26 -//#define PV $27 -//#define GP $29 -//#define SP $30 - -// General-purpose registers -def R0 : GPR< 0, "$0">, DwarfRegNum<[0]>; -def R1 : GPR< 1, "$1">, DwarfRegNum<[1]>; -def R2 : GPR< 2, "$2">, DwarfRegNum<[2]>; -def R3 : GPR< 3, "$3">, DwarfRegNum<[3]>; -def R4 : GPR< 4, "$4">, DwarfRegNum<[4]>; -def R5 : GPR< 5, "$5">, DwarfRegNum<[5]>; -def R6 : GPR< 6, "$6">, DwarfRegNum<[6]>; -def R7 : GPR< 7, "$7">, DwarfRegNum<[7]>; -def R8 : GPR< 8, "$8">, DwarfRegNum<[8]>; -def R9 : GPR< 9, "$9">, DwarfRegNum<[9]>; -def R10 : GPR<10, "$10">, DwarfRegNum<[10]>; -def R11 : GPR<11, "$11">, DwarfRegNum<[11]>; -def R12 : GPR<12, "$12">, DwarfRegNum<[12]>; -def R13 : GPR<13, "$13">, DwarfRegNum<[13]>; -def R14 : GPR<14, "$14">, DwarfRegNum<[14]>; -def R15 : GPR<15, "$15">, DwarfRegNum<[15]>; -def R16 : GPR<16, "$16">, DwarfRegNum<[16]>; -def R17 : GPR<17, "$17">, DwarfRegNum<[17]>; -def R18 : GPR<18, "$18">, DwarfRegNum<[18]>; -def R19 : GPR<19, "$19">, DwarfRegNum<[19]>; -def R20 : GPR<20, "$20">, DwarfRegNum<[20]>; -def R21 : GPR<21, "$21">, DwarfRegNum<[21]>; -def R22 : GPR<22, "$22">, DwarfRegNum<[22]>; -def R23 : GPR<23, "$23">, DwarfRegNum<[23]>; -def R24 : GPR<24, "$24">, DwarfRegNum<[24]>; -def R25 : GPR<25, "$25">, DwarfRegNum<[25]>; -def R26 : GPR<26, "$26">, DwarfRegNum<[26]>; -def R27 : GPR<27, "$27">, DwarfRegNum<[27]>; -def R28 : GPR<28, "$28">, DwarfRegNum<[28]>; -def R29 : GPR<29, "$29">, DwarfRegNum<[29]>; -def R30 : GPR<30, "$30">, DwarfRegNum<[30]>; -def R31 : GPR<31, "$31">, DwarfRegNum<[31]>; - -// Floating-point registers -def F0 : FPR< 0, "$f0">, DwarfRegNum<[33]>; -def F1 : FPR< 1, "$f1">, DwarfRegNum<[34]>; -def F2 : FPR< 2, "$f2">, DwarfRegNum<[35]>; -def F3 : FPR< 3, "$f3">, DwarfRegNum<[36]>; -def F4 : FPR< 4, "$f4">, DwarfRegNum<[37]>; -def F5 : FPR< 5, "$f5">, DwarfRegNum<[38]>; -def F6 : FPR< 6, "$f6">, DwarfRegNum<[39]>; -def F7 : FPR< 7, "$f7">, DwarfRegNum<[40]>; -def F8 : FPR< 8, "$f8">, DwarfRegNum<[41]>; -def F9 : FPR< 9, "$f9">, DwarfRegNum<[42]>; -def F10 : FPR<10, "$f10">, DwarfRegNum<[43]>; -def F11 : FPR<11, "$f11">, DwarfRegNum<[44]>; -def F12 : FPR<12, "$f12">, DwarfRegNum<[45]>; -def F13 : FPR<13, "$f13">, DwarfRegNum<[46]>; -def F14 : FPR<14, "$f14">, DwarfRegNum<[47]>; -def F15 : FPR<15, "$f15">, DwarfRegNum<[48]>; -def F16 : FPR<16, "$f16">, DwarfRegNum<[49]>; -def F17 : FPR<17, "$f17">, DwarfRegNum<[50]>; -def F18 : FPR<18, "$f18">, DwarfRegNum<[51]>; -def F19 : FPR<19, "$f19">, DwarfRegNum<[52]>; -def F20 : FPR<20, "$f20">, DwarfRegNum<[53]>; -def F21 : FPR<21, "$f21">, DwarfRegNum<[54]>; -def F22 : FPR<22, "$f22">, DwarfRegNum<[55]>; -def F23 : FPR<23, "$f23">, DwarfRegNum<[56]>; -def F24 : FPR<24, "$f24">, DwarfRegNum<[57]>; -def F25 : FPR<25, "$f25">, DwarfRegNum<[58]>; -def F26 : FPR<26, "$f26">, DwarfRegNum<[59]>; -def F27 : FPR<27, "$f27">, DwarfRegNum<[60]>; -def F28 : FPR<28, "$f28">, DwarfRegNum<[61]>; -def F29 : FPR<29, "$f29">, DwarfRegNum<[62]>; -def F30 : FPR<30, "$f30">, DwarfRegNum<[63]>; -def F31 : FPR<31, "$f31">, DwarfRegNum<[64]>; - - // //#define FP $15 - // //#define RA $26 - // //#define PV $27 - // //#define GP $29 - // //#define SP $30 - // $28 is undefined after any and all calls - -/// Register classes -def GPRC : RegisterClass<"Alpha", [i64], 64, (add - // Volatile - R0, R1, R2, R3, R4, R5, R6, R7, R8, R16, R17, R18, R19, R20, R21, R22, - R23, R24, R25, R28, - //Special meaning, but volatile - R27, //procedure address - R26, //return address - R29, //global offset table address - // Non-volatile - R9, R10, R11, R12, R13, R14, -// Don't allocate 15, 30, 31 - R15, R30, R31)>; //zero - -def F4RC : RegisterClass<"Alpha", [f32], 64, (add F0, F1, - F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, - F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, - // Saved: - F2, F3, F4, F5, F6, F7, F8, F9, - F31)>; //zero - -def F8RC : RegisterClass<"Alpha", [f64], 64, (add F4RC)>; diff --git a/lib/Target/Alpha/AlphaRelocations.h b/lib/Target/Alpha/AlphaRelocations.h deleted file mode 100644 index 4c92045..0000000 --- a/lib/Target/Alpha/AlphaRelocations.h +++ /dev/null @@ -1,31 +0,0 @@ -//===- AlphaRelocations.h - Alpha Code Relocations --------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the Alpha target-specific relocation types. -// -//===----------------------------------------------------------------------===// - -#ifndef ALPHARELOCATIONS_H -#define ALPHARELOCATIONS_H - -#include "llvm/CodeGen/MachineRelocation.h" - -namespace llvm { - namespace Alpha { - enum RelocationType { - reloc_literal, - reloc_gprellow, - reloc_gprelhigh, - reloc_gpdist, - reloc_bsr - }; - } -} - -#endif diff --git a/lib/Target/Alpha/AlphaSchedule.td b/lib/Target/Alpha/AlphaSchedule.td deleted file mode 100644 index 3703dd4..0000000 --- a/lib/Target/Alpha/AlphaSchedule.td +++ /dev/null @@ -1,85 +0,0 @@ -//===- AlphaSchedule.td - Alpha Scheduling Definitions -----*- tablegen -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -//This is table 2-2 from the 21264 compiler writers guide -//modified some - -//Pipelines - -def L0 : FuncUnit; -def L1 : FuncUnit; -def FST0 : FuncUnit; -def FST1 : FuncUnit; -def U0 : FuncUnit; -def U1 : FuncUnit; -def FA : FuncUnit; -def FM : FuncUnit; - -def s_ild : InstrItinClass; -def s_fld : InstrItinClass; -def s_ist : InstrItinClass; -def s_fst : InstrItinClass; -def s_lda : InstrItinClass; -def s_rpcc : InstrItinClass; -def s_rx : InstrItinClass; -def s_mxpr : InstrItinClass; -def s_icbr : InstrItinClass; -def s_ubr : InstrItinClass; -def s_jsr : InstrItinClass; -def s_iadd : InstrItinClass; -def s_ilog : InstrItinClass; -def s_ishf : InstrItinClass; -def s_cmov : InstrItinClass; -def s_imul : InstrItinClass; -def s_imisc : InstrItinClass; -def s_fbr : InstrItinClass; -def s_fadd : InstrItinClass; -def s_fmul : InstrItinClass; -def s_fcmov : InstrItinClass; -def s_fdivt : InstrItinClass; -def s_fdivs : InstrItinClass; -def s_fsqrts: InstrItinClass; -def s_fsqrtt: InstrItinClass; -def s_ftoi : InstrItinClass; -def s_itof : InstrItinClass; -def s_pseudo : InstrItinClass; - -//Table 2-4 Instruction Class Latency in Cycles -//modified some - -def Alpha21264Itineraries : ProcessorItineraries< - [L0, L1, FST0, FST1, U0, U1, FA, FM], [], [ - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]>, - InstrItinData]> -]>; diff --git a/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp b/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp deleted file mode 100644 index f1958fe..0000000 --- a/lib/Target/Alpha/AlphaSelectionDAGInfo.cpp +++ /dev/null @@ -1,23 +0,0 @@ -//===-- AlphaSelectionDAGInfo.cpp - Alpha SelectionDAG Info ---------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the AlphaSelectionDAGInfo class. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "alpha-selectiondag-info" -#include "AlphaTargetMachine.h" -using namespace llvm; - -AlphaSelectionDAGInfo::AlphaSelectionDAGInfo(const AlphaTargetMachine &TM) - : TargetSelectionDAGInfo(TM) { -} - -AlphaSelectionDAGInfo::~AlphaSelectionDAGInfo() { -} diff --git a/lib/Target/Alpha/AlphaSelectionDAGInfo.h b/lib/Target/Alpha/AlphaSelectionDAGInfo.h deleted file mode 100644 index 3405cc0..0000000 --- a/lib/Target/Alpha/AlphaSelectionDAGInfo.h +++ /dev/null @@ -1,31 +0,0 @@ -//===-- AlphaSelectionDAGInfo.h - Alpha SelectionDAG Info -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the Alpha subclass for TargetSelectionDAGInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef ALPHASELECTIONDAGINFO_H -#define ALPHASELECTIONDAGINFO_H - -#include "llvm/Target/TargetSelectionDAGInfo.h" - -namespace llvm { - -class AlphaTargetMachine; - -class AlphaSelectionDAGInfo : public TargetSelectionDAGInfo { -public: - explicit AlphaSelectionDAGInfo(const AlphaTargetMachine &TM); - ~AlphaSelectionDAGInfo(); -}; - -} - -#endif diff --git a/lib/Target/Alpha/AlphaSubtarget.cpp b/lib/Target/Alpha/AlphaSubtarget.cpp deleted file mode 100644 index bd55ce9..0000000 --- a/lib/Target/Alpha/AlphaSubtarget.cpp +++ /dev/null @@ -1,35 +0,0 @@ -//===- AlphaSubtarget.cpp - Alpha Subtarget Information ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the Alpha specific subclass of TargetSubtargetInfo. -// -//===----------------------------------------------------------------------===// - -#include "AlphaSubtarget.h" -#include "Alpha.h" - -#define GET_SUBTARGETINFO_TARGET_DESC -#define GET_SUBTARGETINFO_CTOR -#include "AlphaGenSubtargetInfo.inc" - -using namespace llvm; - -AlphaSubtarget::AlphaSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS) - : AlphaGenSubtargetInfo(TT, CPU, FS), HasCT(false) { - std::string CPUName = CPU; - if (CPUName.empty()) - CPUName = "generic"; - - // Parse features string. - ParseSubtargetFeatures(CPUName, FS); - - // Initialize scheduling itinerary for the specified CPU. - InstrItins = getInstrItineraryForCPU(CPUName); -} diff --git a/lib/Target/Alpha/AlphaSubtarget.h b/lib/Target/Alpha/AlphaSubtarget.h deleted file mode 100644 index 70b3116..0000000 --- a/lib/Target/Alpha/AlphaSubtarget.h +++ /dev/null @@ -1,49 +0,0 @@ -//=====-- AlphaSubtarget.h - Define Subtarget for the Alpha --*- C++ -*--====// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the Alpha specific subclass of TargetSubtargetInfo. -// -//===----------------------------------------------------------------------===// - -#ifndef ALPHASUBTARGET_H -#define ALPHASUBTARGET_H - -#include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/MC/MCInstrItineraries.h" -#include - -#define GET_SUBTARGETINFO_HEADER -#include "AlphaGenSubtargetInfo.inc" - -namespace llvm { -class StringRe; - -class AlphaSubtarget : public AlphaGenSubtargetInfo { -protected: - - bool HasCT; - - InstrItineraryData InstrItins; - -public: - /// This constructor initializes the data members to match that - /// of the specified triple. - /// - AlphaSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS); - - /// ParseSubtargetFeatures - Parses features string setting specified - /// subtarget options. Definition of function is auto generated by tblgen. - void ParseSubtargetFeatures(StringRef CPU, StringRef FS); - - bool hasCT() const { return HasCT; } -}; -} // End llvm namespace - -#endif diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp deleted file mode 100644 index fc9a677..0000000 --- a/lib/Target/Alpha/AlphaTargetMachine.cpp +++ /dev/null @@ -1,51 +0,0 @@ -//===-- AlphaTargetMachine.cpp - Define TargetMachine for Alpha -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// -//===----------------------------------------------------------------------===// - -#include "Alpha.h" -#include "AlphaTargetMachine.h" -#include "llvm/PassManager.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/TargetRegistry.h" -using namespace llvm; - -extern "C" void LLVMInitializeAlphaTarget() { - // Register the target. - RegisterTargetMachine X(TheAlphaTarget); -} - -AlphaTargetMachine::AlphaTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM) - : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), - DataLayout("e-f128:128:128-n64"), - FrameLowering(Subtarget), - Subtarget(TT, CPU, FS), - TLInfo(*this), - TSInfo(*this) { -} - -//===----------------------------------------------------------------------===// -// Pass Pipeline Configuration -//===----------------------------------------------------------------------===// - -bool AlphaTargetMachine::addInstSelector(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { - PM.add(createAlphaISelDag(*this)); - return false; -} -bool AlphaTargetMachine::addPreEmitPass(PassManagerBase &PM, - CodeGenOpt::Level OptLevel) { - // Must run branch selection immediately preceding the asm printer - PM.add(createAlphaBranchSelectionPass()); - PM.add(createAlphaLLRPPass(*this)); - return false; -} diff --git a/lib/Target/Alpha/AlphaTargetMachine.h b/lib/Target/Alpha/AlphaTargetMachine.h deleted file mode 100644 index 48bb948..0000000 --- a/lib/Target/Alpha/AlphaTargetMachine.h +++ /dev/null @@ -1,66 +0,0 @@ -//===-- AlphaTargetMachine.h - Define TargetMachine for Alpha ---*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the Alpha-specific subclass of TargetMachine. -// -//===----------------------------------------------------------------------===// - -#ifndef ALPHA_TARGETMACHINE_H -#define ALPHA_TARGETMACHINE_H - -#include "AlphaInstrInfo.h" -#include "AlphaISelLowering.h" -#include "AlphaFrameLowering.h" -#include "AlphaSelectionDAGInfo.h" -#include "AlphaSubtarget.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetFrameLowering.h" - -namespace llvm { - -class GlobalValue; - -class AlphaTargetMachine : public LLVMTargetMachine { - const TargetData DataLayout; // Calculates type size & alignment - AlphaInstrInfo InstrInfo; - AlphaFrameLowering FrameLowering; - AlphaSubtarget Subtarget; - AlphaTargetLowering TLInfo; - AlphaSelectionDAGInfo TSInfo; - -public: - AlphaTargetMachine(const Target &T, StringRef TT, - StringRef CPU, StringRef FS, - Reloc::Model RM, CodeModel::Model CM); - - virtual const AlphaInstrInfo *getInstrInfo() const { return &InstrInfo; } - virtual const TargetFrameLowering *getFrameLowering() const { - return &FrameLowering; - } - virtual const AlphaSubtarget *getSubtargetImpl() const{ return &Subtarget; } - virtual const AlphaRegisterInfo *getRegisterInfo() const { - return &InstrInfo.getRegisterInfo(); - } - virtual const AlphaTargetLowering* getTargetLowering() const { - return &TLInfo; - } - virtual const AlphaSelectionDAGInfo* getSelectionDAGInfo() const { - return &TSInfo; - } - virtual const TargetData *getTargetData() const { return &DataLayout; } - - // Pass Pipeline Configuration - virtual bool addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel); - virtual bool addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel); -}; - -} // end namespace llvm - -#endif diff --git a/lib/Target/Alpha/CMakeLists.txt b/lib/Target/Alpha/CMakeLists.txt deleted file mode 100644 index a6d5516..0000000 --- a/lib/Target/Alpha/CMakeLists.txt +++ /dev/null @@ -1,38 +0,0 @@ -set(LLVM_TARGET_DEFINITIONS Alpha.td) - -llvm_tablegen(AlphaGenRegisterInfo.inc -gen-register-info) -llvm_tablegen(AlphaGenInstrInfo.inc -gen-instr-info) -llvm_tablegen(AlphaGenAsmWriter.inc -gen-asm-writer) -llvm_tablegen(AlphaGenDAGISel.inc -gen-dag-isel) -llvm_tablegen(AlphaGenCallingConv.inc -gen-callingconv) -llvm_tablegen(AlphaGenSubtargetInfo.inc -gen-subtarget) -add_public_tablegen_target(AlphaCommonTableGen) - -add_llvm_target(AlphaCodeGen - AlphaAsmPrinter.cpp - AlphaBranchSelector.cpp - AlphaInstrInfo.cpp - AlphaISelDAGToDAG.cpp - AlphaISelLowering.cpp - AlphaFrameLowering.cpp - AlphaLLRP.cpp - AlphaRegisterInfo.cpp - AlphaSubtarget.cpp - AlphaTargetMachine.cpp - AlphaSelectionDAGInfo.cpp - ) - -add_llvm_library_dependencies(LLVMAlphaCodeGen - LLVMAlphaDesc - LLVMAlphaInfo - LLVMAsmPrinter - LLVMCodeGen - LLVMCore - LLVMMC - LLVMSelectionDAG - LLVMSupport - LLVMTarget - ) - -add_subdirectory(TargetInfo) -add_subdirectory(MCTargetDesc) diff --git a/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp b/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp deleted file mode 100644 index a35e884..0000000 --- a/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.cpp +++ /dev/null @@ -1,23 +0,0 @@ -//===-- AlphaMCAsmInfo.cpp - Alpha asm properties ---------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declarations of the AlphaMCAsmInfo properties. -// -//===----------------------------------------------------------------------===// - -#include "AlphaMCAsmInfo.h" -using namespace llvm; - -AlphaMCAsmInfo::AlphaMCAsmInfo(const Target &T, StringRef TT) { - AlignmentIsInBytes = false; - PrivateGlobalPrefix = "$"; - GPRel32Directive = ".gprel32"; - WeakRefDirective = "\t.weak\t"; - HasSetDirective = false; -} diff --git a/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h b/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h deleted file mode 100644 index 837844b..0000000 --- a/lib/Target/Alpha/MCTargetDesc/AlphaMCAsmInfo.h +++ /dev/null @@ -1,29 +0,0 @@ -//=====-- AlphaMCAsmInfo.h - Alpha asm properties -------------*- C++ -*--====// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the declaration of the AlphaMCAsmInfo class. -// -//===----------------------------------------------------------------------===// - -#ifndef ALPHATARGETASMINFO_H -#define ALPHATARGETASMINFO_H - -#include "llvm/ADT/StringRef.h" -#include "llvm/MC/MCAsmInfo.h" - -namespace llvm { - class Target; - - struct AlphaMCAsmInfo : public MCAsmInfo { - explicit AlphaMCAsmInfo(const Target &T, StringRef TT); - }; - -} // namespace llvm - -#endif diff --git a/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp b/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp deleted file mode 100644 index 4ad021c..0000000 --- a/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.cpp +++ /dev/null @@ -1,78 +0,0 @@ -//===-- AlphaMCTargetDesc.cpp - Alpha Target Descriptions -------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides Alpha specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#include "AlphaMCTargetDesc.h" -#include "AlphaMCAsmInfo.h" -#include "llvm/MC/MCCodeGenInfo.h" -#include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" -#include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/Support/TargetRegistry.h" - -#define GET_INSTRINFO_MC_DESC -#include "AlphaGenInstrInfo.inc" - -#define GET_SUBTARGETINFO_MC_DESC -#include "AlphaGenSubtargetInfo.inc" - -#define GET_REGINFO_MC_DESC -#include "AlphaGenRegisterInfo.inc" - -using namespace llvm; - - -static MCInstrInfo *createAlphaMCInstrInfo() { - MCInstrInfo *X = new MCInstrInfo(); - InitAlphaMCInstrInfo(X); - return X; -} - -static MCRegisterInfo *createAlphaMCRegisterInfo(StringRef TT) { - MCRegisterInfo *X = new MCRegisterInfo(); - InitAlphaMCRegisterInfo(X, Alpha::R26); - return X; -} - -static MCSubtargetInfo *createAlphaMCSubtargetInfo(StringRef TT, StringRef CPU, - StringRef FS) { - MCSubtargetInfo *X = new MCSubtargetInfo(); - InitAlphaMCSubtargetInfo(X, TT, CPU, FS); - return X; -} - -static MCCodeGenInfo *createAlphaMCCodeGenInfo(StringRef TT, Reloc::Model RM, - CodeModel::Model CM) { - MCCodeGenInfo *X = new MCCodeGenInfo(); - X->InitMCCodeGenInfo(Reloc::PIC_, CM); - return X; -} - -// Force static initialization. -extern "C" void LLVMInitializeAlphaTargetMC() { - // Register the MC asm info. - RegisterMCAsmInfo X(TheAlphaTarget); - - // Register the MC codegen info. - TargetRegistry::RegisterMCCodeGenInfo(TheAlphaTarget, - createAlphaMCCodeGenInfo); - - // Register the MC instruction info. - TargetRegistry::RegisterMCInstrInfo(TheAlphaTarget, createAlphaMCInstrInfo); - - // Register the MC register info. - TargetRegistry::RegisterMCRegInfo(TheAlphaTarget, createAlphaMCRegisterInfo); - - // Register the MC subtarget info. - TargetRegistry::RegisterMCSubtargetInfo(TheAlphaTarget, - createAlphaMCSubtargetInfo); -} diff --git a/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h b/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h deleted file mode 100644 index b0619e6..0000000 --- a/lib/Target/Alpha/MCTargetDesc/AlphaMCTargetDesc.h +++ /dev/null @@ -1,40 +0,0 @@ -//===-- AlphaMCTargetDesc.h - Alpha Target Descriptions ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides Alpha specific target descriptions. -// -//===----------------------------------------------------------------------===// - -#ifndef ALPHAMCTARGETDESC_H -#define ALPHAMCTARGETDESC_H - -namespace llvm { -class MCSubtargetInfo; -class Target; -class StringRef; - -extern Target TheAlphaTarget; - -} // End llvm namespace - -// Defines symbolic names for Alpha registers. This defines a mapping from -// register name to register number. -// -#define GET_REGINFO_ENUM -#include "AlphaGenRegisterInfo.inc" - -// Defines symbolic names for the Alpha instructions. -// -#define GET_INSTRINFO_ENUM -#include "AlphaGenInstrInfo.inc" - -#define GET_SUBTARGETINFO_ENUM -#include "AlphaGenSubtargetInfo.inc" - -#endif diff --git a/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt b/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt deleted file mode 100644 index f745ecb..0000000 --- a/lib/Target/Alpha/MCTargetDesc/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -add_llvm_library(LLVMAlphaDesc - AlphaMCTargetDesc.cpp - AlphaMCAsmInfo.cpp - ) - -add_llvm_library_dependencies(LLVMAlphaDesc - LLVMAlphaInfo - LLVMMC - ) - -add_dependencies(LLVMAlphaDesc AlphaCommonTableGen) diff --git a/lib/Target/Alpha/MCTargetDesc/Makefile b/lib/Target/Alpha/MCTargetDesc/Makefile deleted file mode 100644 index d55175f..0000000 --- a/lib/Target/Alpha/MCTargetDesc/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -##===- lib/Target/Alpha/TargetDesc/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../../.. -LIBRARYNAME = LLVMAlphaDesc - -# Hack: we need to include 'main' target directory to grab private headers -CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/Alpha/Makefile b/lib/Target/Alpha/Makefile deleted file mode 100644 index f48847a..0000000 --- a/lib/Target/Alpha/Makefile +++ /dev/null @@ -1,21 +0,0 @@ -##===- lib/Target/Alpha/Makefile -------------------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## - -LEVEL = ../../.. -LIBRARYNAME = LLVMAlphaCodeGen -TARGET = Alpha - -# Make sure that tblgen is run, first thing. -BUILT_SOURCES = AlphaGenRegisterInfo.inc AlphaGenInstrInfo.inc \ - AlphaGenAsmWriter.inc AlphaGenDAGISel.inc \ - AlphaGenCallingConv.inc AlphaGenSubtargetInfo.inc - -DIRS = TargetInfo MCTargetDesc - -include $(LEVEL)/Makefile.common diff --git a/lib/Target/Alpha/README.txt b/lib/Target/Alpha/README.txt deleted file mode 100644 index cc170e3..0000000 --- a/lib/Target/Alpha/README.txt +++ /dev/null @@ -1,42 +0,0 @@ -*** - -add gcc builtins for alpha instructions - - -*** - -custom expand byteswap into nifty -extract/insert/mask byte/word/longword/quadword low/high -sequences - -*** - -see if any of the extract/insert/mask operations can be added - -*** - -match more interesting things for cmovlbc cmovlbs (move if low bit clear/set) - -*** - -lower srem and urem - -remq(i,j): i - (j * divq(i,j)) if j != 0 -remqu(i,j): i - (j * divqu(i,j)) if j != 0 -reml(i,j): i - (j * divl(i,j)) if j != 0 -remlu(i,j): i - (j * divlu(i,j)) if j != 0 - -*** - -add crazy vector instructions (MVI): - -(MIN|MAX)(U|S)(B8|W4) min and max, signed and unsigned, byte and word -PKWB, UNPKBW pack/unpack word to byte -PKLB UNPKBL pack/unpack long to byte -PERR pixel error (sum across bytes of bytewise abs(i8v8 a - i8v8 b)) - -cmpbytes bytewise cmpeq of i8v8 a and i8v8 b (not part of MVI extensions) - -this has some good examples for other operations that can be synthesised well -from these rather meager vector ops (such as saturating add). -http://www.alphalinux.org/docs/MVI-full.html diff --git a/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp b/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp deleted file mode 100644 index bdc69e7..0000000 --- a/lib/Target/Alpha/TargetInfo/AlphaTargetInfo.cpp +++ /dev/null @@ -1,20 +0,0 @@ -//===-- AlphaTargetInfo.cpp - Alpha Target Implementation -----------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#include "Alpha.h" -#include "llvm/Module.h" -#include "llvm/Support/TargetRegistry.h" -using namespace llvm; - -llvm::Target llvm::TheAlphaTarget; - -extern "C" void LLVMInitializeAlphaTargetInfo() { - RegisterTarget - X(TheAlphaTarget, "alpha", "Alpha [experimental]"); -} diff --git a/lib/Target/Alpha/TargetInfo/CMakeLists.txt b/lib/Target/Alpha/TargetInfo/CMakeLists.txt deleted file mode 100644 index cac3178..0000000 --- a/lib/Target/Alpha/TargetInfo/CMakeLists.txt +++ /dev/null @@ -1,13 +0,0 @@ -include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. ) - -add_llvm_library(LLVMAlphaInfo - AlphaTargetInfo.cpp - ) - -add_llvm_library_dependencies(LLVMAlphaInfo - LLVMMC - LLVMSupport - LLVMTarget - ) - -add_dependencies(LLVMAlphaInfo AlphaCommonTableGen) diff --git a/lib/Target/Alpha/TargetInfo/Makefile b/lib/Target/Alpha/TargetInfo/Makefile deleted file mode 100644 index de01d7f..0000000 --- a/lib/Target/Alpha/TargetInfo/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -##===- lib/Target/Alpha/TargetInfo/Makefile ----------------*- Makefile -*-===## -# -# The LLVM Compiler Infrastructure -# -# This file is distributed under the University of Illinois Open Source -# License. See LICENSE.TXT for details. -# -##===----------------------------------------------------------------------===## -LEVEL = ../../../.. -LIBRARYNAME = LLVMAlphaInfo - -# Hack: we need to include 'main' target directory to grab private headers -CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/.. - -include $(LEVEL)/Makefile.common -- cgit v1.1 From c73d73eb881ebe7493e934c00ca1c474ffd0ed2d Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Fri, 28 Oct 2011 00:06:50 +0000 Subject: ARM Allow 'q' registers in VLD/VST vector lists. Just treat it as if the constituent D registers where specified. rdar://10348896 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143167 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 51 ++++++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 8803b68..dbdce29 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -2440,6 +2440,29 @@ parseRegisterList(SmallVectorImpl &Operands) { return false; } +// Return the low-subreg of a given Q register. +static unsigned getDRegFromQReg(unsigned QReg) { + switch (QReg) { + default: llvm_unreachable("expected a Q register!"); + case ARM::Q0: return ARM::D0; + case ARM::Q1: return ARM::D2; + case ARM::Q2: return ARM::D4; + case ARM::Q3: return ARM::D6; + case ARM::Q4: return ARM::D8; + case ARM::Q5: return ARM::D10; + case ARM::Q6: return ARM::D12; + case ARM::Q7: return ARM::D14; + case ARM::Q8: return ARM::D16; + case ARM::Q9: return ARM::D19; + case ARM::Q10: return ARM::D20; + case ARM::Q11: return ARM::D22; + case ARM::Q12: return ARM::D24; + case ARM::Q13: return ARM::D26; + case ARM::Q14: return ARM::D28; + case ARM::Q15: return ARM::D30; + } +} + // parse a vector register list ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseVectorList(SmallVectorImpl &Operands) { @@ -2455,9 +2478,16 @@ parseVectorList(SmallVectorImpl &Operands) { Error(RegLoc, "register expected"); return MatchOperand_ParseFail; } - - unsigned FirstReg = Reg; unsigned Count = 1; + unsigned FirstReg = Reg; + // The list is of D registers, but we also allow Q regs and just interpret + // them as the two D sub-registers. + if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { + FirstReg = Reg = getDRegFromQReg(Reg); + ++Reg; + ++Count; + } + while (Parser.getTok().is(AsmToken::Comma)) { Parser.Lex(); // Eat the comma. RegLoc = Parser.getTok().getLoc(); @@ -2467,14 +2497,27 @@ parseVectorList(SmallVectorImpl &Operands) { Error(RegLoc, "register expected"); return MatchOperand_ParseFail; } - // vector register lists must also be contiguous. + // vector register lists must be contiguous. // It's OK to use the enumeration values directly here rather, as the // VFP register classes have the enum sorted properly. + // + // The list is of D registers, but we also allow Q regs and just interpret + // them as the two D sub-registers. + if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { + Reg = getDRegFromQReg(Reg); + if (Reg != OldReg + 1) { + Error(RegLoc, "non-contiguous register range"); + return MatchOperand_ParseFail; + } + ++Reg; + Count += 2; + continue; + } + // Normal D register. Just check that it's contiguous and keep going. if (Reg != OldReg + 1) { Error(RegLoc, "non-contiguous register range"); return MatchOperand_ParseFail; } - ++Count; } -- cgit v1.1 From 2ba60e593012ba9b2a9d20b86733eadca288bcb2 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 28 Oct 2011 01:29:32 +0000 Subject: Eliminate LegalizeOps' LegalizedNodes map and have it just call RAUW on every node as it legalizes them. This makes it easier to use hasOneUse() heuristics, since unneeded nodes can be removed from the DAG earlier. Make LegalizeOps visit the DAG in an operands-last order. It previously used operands-first, because LegalizeTypes has to go operands-first, and LegalizeTypes used to be part of LegalizeOps, but they're now split. The operands-last order is more natural for several legalization tasks. For example, it allows lowering code for nodes with floating-point or vector constants to see those constants directly instead of seeing the lowered form (often constant-pool loads). This makes some things somewhat more complicated today, though it ought to allow things to be simpler in the future. It also fixes some bugs exposed by Legalizing using RAUW aggressively. Remove the part of LegalizeOps that attempted to patch up invalid chain operands on libcalls generated by LegalizeTypes, since it doesn't work with the new LegalizeOps traversal order. Instead, define what LegalizeTypes is doing to be correct, and transfer the responsibility of keeping calls from having overlapping calling sequences into the scheduler. Teach the scheduler to model callseq_begin/end pairs as having a physical register definition/use to prevent calls from having overlapping calling sequences. This is also somewhat complicated, though there are ways it might be simplified in the future. This addresses rdar://9816668, rdar://10043614, rdar://8434668, and others. Please direct high-level questions about this patch to management. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143177 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 787 +++++++++---------------- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 1 - lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 134 +++++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 16 + lib/Target/ARM/ARMISelLowering.cpp | 19 +- lib/Target/X86/X86ISelDAGToDAG.cpp | 4 +- lib/Target/X86/X86ISelLowering.cpp | 194 +++++- lib/Target/XCore/XCoreISelLowering.cpp | 9 + 8 files changed, 635 insertions(+), 529 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index a8bccda..263333d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -46,37 +46,18 @@ using namespace llvm; /// will attempt merge setcc and brc instructions into brcc's. /// namespace { -class SelectionDAGLegalize { +class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener { const TargetMachine &TM; const TargetLowering &TLI; SelectionDAG &DAG; - // Libcall insertion helpers. - - /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been - /// legalized. We use this to ensure that calls are properly serialized - /// against each other, including inserted libcalls. - SDValue LastCALLSEQ_END; - - /// IsLegalizingCall - This member is used *only* for purposes of providing - /// helpful assertions that a libcall isn't created while another call is - /// being legalized (which could lead to non-serialized call sequences). - bool IsLegalizingCall; + /// LegalizePosition - The iterator for walking through the node list. + SelectionDAG::allnodes_iterator LegalizePosition; - /// LegalizedNodes - For nodes that are of legal width, and that have more - /// than one use, this map indicates what regularized operand to use. This - /// allows us to avoid legalizing the same thing more than once. - DenseMap LegalizedNodes; + /// LegalizedNodes - The set of nodes which have already been legalized. + SmallPtrSet LegalizedNodes; - void AddLegalizedOperand(SDValue From, SDValue To) { - LegalizedNodes.insert(std::make_pair(From, To)); - // If someone requests legalization of the new node, return itself. - if (From != To) - LegalizedNodes.insert(std::make_pair(To, To)); - - // Transfer SDDbgValues. - DAG.TransferDbgValues(From, To); - } + // Libcall insertion helpers. public: explicit SelectionDAGLegalize(SelectionDAG &DAG); @@ -84,9 +65,8 @@ public: void LegalizeDAG(); private: - /// LegalizeOp - Return a legal replacement for the given operation, with - /// all legal operands. - SDValue LegalizeOp(SDValue O); + /// LegalizeOp - Legalizes the given operation. + void LegalizeOp(SDNode *Node); SDValue OptimizeFloatStore(StoreSDNode *ST); @@ -107,9 +87,6 @@ private: SDValue N1, SDValue N2, SmallVectorImpl &Mask) const; - bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, - SmallPtrSet &NodesLeadingTo); - void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, DebugLoc dl); @@ -150,10 +127,21 @@ private: SDValue ExpandInsertToVectorThroughStack(SDValue Op); SDValue ExpandVectorBuildThroughStack(SDNode* Node); + SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP); + std::pair ExpandAtomic(SDNode *Node); - void ExpandNode(SDNode *Node, SmallVectorImpl &Results); - void PromoteNode(SDNode *Node, SmallVectorImpl &Results); + void ExpandNode(SDNode *Node); + void PromoteNode(SDNode *Node); + + // DAGUpdateListener implementation. + virtual void NodeDeleted(SDNode *N, SDNode *E) { + LegalizedNodes.erase(N); + if (LegalizePosition == SelectionDAG::allnodes_iterator(N)) + ++LegalizePosition; + } + + virtual void NodeUpdated(SDNode *N) {} }; } @@ -195,145 +183,54 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag) } void SelectionDAGLegalize::LegalizeDAG() { - LastCALLSEQ_END = DAG.getEntryNode(); - IsLegalizingCall = false; - - // The legalize process is inherently a bottom-up recursive process (users - // legalize their uses before themselves). Given infinite stack space, we - // could just start legalizing on the root and traverse the whole graph. In - // practice however, this causes us to run out of stack space on large basic - // blocks. To avoid this problem, compute an ordering of the nodes where each - // node is only legalized after all of its operands are legalized. DAG.AssignTopologicalOrder(); - for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) - LegalizeOp(SDValue(I, 0)); - - // Finally, it's possible the root changed. Get the new root. - SDValue OldRoot = DAG.getRoot(); - assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); - DAG.setRoot(LegalizedNodes[OldRoot]); - - LegalizedNodes.clear(); - - // Remove dead nodes now. - DAG.RemoveDeadNodes(); -} - -/// FindCallEndFromCallStart - Given a chained node that is part of a call -/// sequence, find the CALLSEQ_END node that terminates the call sequence. -static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) { - // Nested CALLSEQ_START/END constructs aren't yet legal, - // but we can DTRT and handle them correctly here. - if (Node->getOpcode() == ISD::CALLSEQ_START) - depth++; - else if (Node->getOpcode() == ISD::CALLSEQ_END) { - depth--; - if (depth == 0) - return Node; - } - if (Node->use_empty()) - return 0; // No CallSeqEnd - - // The chain is usually at the end. - SDValue TheChain(Node, Node->getNumValues()-1); - if (TheChain.getValueType() != MVT::Other) { - // Sometimes it's at the beginning. - TheChain = SDValue(Node, 0); - if (TheChain.getValueType() != MVT::Other) { - // Otherwise, hunt for it. - for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i) - if (Node->getValueType(i) == MVT::Other) { - TheChain = SDValue(Node, i); - break; - } - - // Otherwise, we walked into a node without a chain. - if (TheChain.getValueType() != MVT::Other) - return 0; - } - } - - for (SDNode::use_iterator UI = Node->use_begin(), - E = Node->use_end(); UI != E; ++UI) { - - // Make sure to only follow users of our token chain. - SDNode *User = *UI; - for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) - if (User->getOperand(i) == TheChain) - if (SDNode *Result = FindCallEndFromCallStart(User, depth)) - return Result; +#if 0 + SDValue LastChain = DAG.getEntryNode(); + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = DAG.allnodes_end(); I != E; ++I) { + SDNode *N = I; + if (N->getOpcode() == ISD::CALLSEQ_START) { + SmallVector Ops(N->op_begin(), N->op_end()); + Ops[0] = LastChain; + SDNode *New = DAG.UpdateNodeOperands(N, Ops.data(), Ops.size()); + assert(New == N && "CALLSEQ_START got CSE'd!"); + } + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) + if (N->getValueType(i) == MVT::Other) + LastChain = SDValue(N, i); } - return 0; -} +#endif -/// FindCallStartFromCallEnd - Given a chained node that is part of a call -/// sequence, find the CALLSEQ_START node that initiates the call sequence. -static SDNode *FindCallStartFromCallEnd(SDNode *Node) { - int nested = 0; - assert(Node && "Didn't find callseq_start for a call??"); - while (Node->getOpcode() != ISD::CALLSEQ_START || nested) { - Node = Node->getOperand(0).getNode(); - assert(Node->getOperand(0).getValueType() == MVT::Other && - "Node doesn't have a token chain argument!"); - switch (Node->getOpcode()) { - default: - break; - case ISD::CALLSEQ_START: - if (!nested) - return Node; - nested--; - break; - case ISD::CALLSEQ_END: - nested++; - break; + // Visit all the nodes. We start in topological order, so that we see + // nodes with their original operands intact. Legalization can produce + // new nodes which may themselves need to be legalized. Iterate until all + // nodes have been legalized. + for (;;) { + bool AnyLegalized = false; + for (LegalizePosition = DAG.allnodes_end(); + LegalizePosition != DAG.allnodes_begin(); ) { + --LegalizePosition; + + SDNode *N = LegalizePosition; + if (LegalizedNodes.insert(N)) { + AnyLegalized = true; + LegalizeOp(N); + } } - } - return 0; -} - -/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to -/// see if any uses can reach Dest. If no dest operands can get to dest, -/// legalize them, legalize ourself, and return false, otherwise, return true. -/// -/// Keep track of the nodes we fine that actually do lead to Dest in -/// NodesLeadingTo. This avoids retraversing them exponential number of times. -/// -bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, - SmallPtrSet &NodesLeadingTo) { - if (N == Dest) return true; // N certainly leads to Dest :) - - // If we've already processed this node and it does lead to Dest, there is no - // need to reprocess it. - if (NodesLeadingTo.count(N)) return true; - - // If the first result of this node has been already legalized, then it cannot - // reach N. - if (LegalizedNodes.count(SDValue(N, 0))) return false; - - // Okay, this node has not already been legalized. Check and legalize all - // operands. If none lead to Dest, then we can legalize this node. - bool OperandsLeadToDest = false; - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - OperandsLeadToDest |= // If an operand leads to Dest, so do we. - LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, - NodesLeadingTo); + if (!AnyLegalized) + break; - if (OperandsLeadToDest) { - NodesLeadingTo.insert(N); - return true; } - // Okay, this node looks safe, legalize it and return false. - LegalizeOp(SDValue(N, 0)); - return false; + // Remove dead nodes now. + DAG.RemoveDeadNodes(); } /// ExpandConstantFP - Expands the ConstantFP node to an integer constant or /// a load from the constant pool. -static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, - SelectionDAG &DAG, const TargetLowering &TLI) { +SDValue +SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { bool Extend = false; DebugLoc dl = CFP->getDebugLoc(); @@ -369,20 +266,25 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy()); unsigned Alignment = cast(CPIdx)->getAlignment(); - if (Extend) - return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, - DAG.getEntryNode(), - CPIdx, MachinePointerInfo::getConstantPool(), - VT, false, false, Alignment); - return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), false, false, - Alignment); + if (Extend) { + SDValue Result = + DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, + DAG.getEntryNode(), + CPIdx, MachinePointerInfo::getConstantPool(), + VT, false, false, Alignment); + return Result; + } + SDValue Result = + DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(), false, false, + Alignment); + return Result; } /// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores. -static -SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, - const TargetLowering &TLI) { +static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, + const TargetLowering &TLI, + SelectionDAG::DAGUpdateListener *DUL) { SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); SDValue Val = ST->getValue(); @@ -397,8 +299,10 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // same size, then a (misaligned) int store. // FIXME: Does not handle truncating floating point stores! SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val); - return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), - ST->isVolatile(), ST->isNonTemporal(), Alignment); + Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), + ST->isVolatile(), ST->isNonTemporal(), Alignment); + DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); + return; } // Do a (aligned) store to a stack slot, then copy from the stack slot // to the final destination using (unaligned) integer loads and stores. @@ -458,8 +362,11 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, ST->isNonTemporal(), MinAlign(ST->getAlignment(), Offset))); // The order of the stores doesn't matter - say it with a TokenFactor. - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], - Stores.size()); + SDValue Result = + DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], + Stores.size()); + DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); + return; } assert(ST->getMemoryVT().isInteger() && !ST->getMemoryVT().isVector() && @@ -488,13 +395,16 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), Alignment); - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); + SDValue Result = + DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); + DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); } /// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads. -static -SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, - const TargetLowering &TLI) { +static void +ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, + const TargetLowering &TLI, + SDValue &ValResult, SDValue &ChainResult) { SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); EVT VT = LD->getValueType(0); @@ -512,8 +422,9 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, if (VT.isFloatingPoint() && LoadedVT != VT) Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result); - SDValue Ops[] = { Result, Chain }; - return DAG.getMergeValues(Ops, 2, dl); + ValResult = Result; + ChainResult = Chain; + return; } // Copy the value to a (aligned) stack slot using (unaligned) integer @@ -572,8 +483,9 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, MachinePointerInfo(), LoadedVT, false, false, 0); // Callers expect a MERGE_VALUES node. - SDValue Ops[] = { Load, TF }; - return DAG.getMergeValues(Ops, 2, dl); + ValResult = Load; + ChainResult = TF; + return; } assert(LoadedVT.isInteger() && !LoadedVT.isVector() && "Unaligned load of unsupported type."); @@ -626,8 +538,8 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); - SDValue Ops[] = { Result, TF }; - return DAG.getMergeValues(Ops, 2, dl); + ValResult = Result; + ChainResult = TF; } /// PerformInsertVectorEltInMemory - Some target cannot handle a variable @@ -763,11 +675,10 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { /// LegalizeOp - Return a legal replacement for the given operation, with /// all legal operands. -SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { - if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. - return Op; +void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { + if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. + return; - SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) @@ -782,13 +693,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Node->getOperand(i).getOpcode() == ISD::TargetConstant) && "Unexpected illegal type!"); - // Note that LegalizeOp may be reentered even from single-use nodes, which - // means that we always must cache transformed nodes. - DenseMap::iterator I = LegalizedNodes.find(Op); - if (I != LegalizedNodes.end()) return I->second; - SDValue Tmp1, Tmp2, Tmp3, Tmp4; - SDValue Result = Op; bool isCustom = false; // Figure out the correct action; the way to query this varies by opcode @@ -882,17 +787,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (Action == TargetLowering::Legal) Action = TargetLowering::Custom; break; - case ISD::BUILD_VECTOR: - // A weird case: legalization for BUILD_VECTOR never legalizes the - // operands! - // FIXME: This really sucks... changing it isn't semantically incorrect, - // but it massively pessimizes the code for floating-point BUILD_VECTORs - // because ConstantFP operands get legalized into constant pool loads - // before the BUILD_VECTOR code can see them. It doesn't usually bite, - // though, because BUILD_VECTORS usually get lowered into other nodes - // which get legalized properly. - SimpleFinishLegalizing = false; - break; default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { Action = TargetLowering::Legal; @@ -903,22 +797,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } if (SimpleFinishLegalizing) { - SmallVector Ops, ResultVals; + SmallVector Ops; for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) - Ops.push_back(LegalizeOp(Node->getOperand(i))); + Ops.push_back(Node->getOperand(i)); switch (Node->getOpcode()) { default: break; - case ISD::BR: - case ISD::BRIND: - case ISD::BR_JT: - case ISD::BR_CC: - case ISD::BRCOND: - // Branches tweak the chain to include LastCALLSEQ_END - Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0], - LastCALLSEQ_END); - Ops[0] = LegalizeOp(Ops[0]); - LastCALLSEQ_END = DAG.getEntryNode(); - break; case ISD::SHL: case ISD::SRL: case ISD::SRA: @@ -926,57 +809,66 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::ROTR: // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. - if (!Ops[1].getValueType().isVector()) - Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(), - Ops[1])); + if (!Ops[1].getValueType().isVector()) { + SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[1]); + HandleSDNode Handle(SAO); + LegalizeOp(SAO.getNode()); + Ops[1] = Handle.getValue(); + } break; case ISD::SRL_PARTS: case ISD::SRA_PARTS: case ISD::SHL_PARTS: // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. - if (!Ops[2].getValueType().isVector()) - Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(), - Ops[2])); + if (!Ops[2].getValueType().isVector()) { + SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[2]); + HandleSDNode Handle(SAO); + LegalizeOp(SAO.getNode()); + Ops[2] = Handle.getValue(); + } break; } - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), Ops.data(), - Ops.size()), 0); + SDNode *NewNode = DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size()); + if (NewNode != Node) { + DAG.ReplaceAllUsesWith(Node, NewNode, this); + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i)); + DAG.RemoveDeadNode(Node, this); + Node = NewNode; + } switch (Action) { case TargetLowering::Legal: - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - ResultVals.push_back(Result.getValue(i)); - break; + return; case TargetLowering::Custom: // FIXME: The handling for custom lowering with multiple results is // a complete mess. - Tmp1 = TLI.LowerOperation(Result, DAG); + Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Tmp1.getNode()) { + SmallVector ResultVals; for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) { if (e == 1) ResultVals.push_back(Tmp1); else ResultVals.push_back(Tmp1.getValue(i)); } - break; + if (Tmp1.getNode() != Node || Tmp1.getResNo() != 0) { + DAG.ReplaceAllUsesWith(Node, ResultVals.data(), this); + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]); + DAG.RemoveDeadNode(Node, this); + } + return; } // FALL THROUGH case TargetLowering::Expand: - ExpandNode(Result.getNode(), ResultVals); - break; + ExpandNode(Node); + return; case TargetLowering::Promote: - PromoteNode(Result.getNode(), ResultVals); - break; - } - if (!ResultVals.empty()) { - for (unsigned i = 0, e = ResultVals.size(); i != e; ++i) { - if (ResultVals[i] != SDValue(Node, i)) - ResultVals[i] = LegalizeOp(ResultVals[i]); - AddLegalizedOperand(SDValue(Node, i), ResultVals[i]); - } - return ResultVals[Op.getResNo()]; + PromoteNode(Node); + return; } } @@ -989,155 +881,20 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { #endif assert(0 && "Do not know how to legalize this operator!"); - case ISD::SRA: - case ISD::SRL: - case ISD::SHL: { - // Scalarize vector SRA/SRL/SHL. - EVT VT = Node->getValueType(0); - assert(VT.isVector() && "Unable to legalize non-vector shift"); - assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal"); - unsigned NumElem = VT.getVectorNumElements(); - - SmallVector Scalars; - for (unsigned Idx = 0; Idx < NumElem; Idx++) { - SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - VT.getScalarType(), - Node->getOperand(0), DAG.getIntPtrConstant(Idx)); - SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - VT.getScalarType(), - Node->getOperand(1), DAG.getIntPtrConstant(Idx)); - Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, - VT.getScalarType(), Ex, Sh)); - } - Result = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), - &Scalars[0], Scalars.size()); - break; - } - - case ISD::BUILD_VECTOR: - switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) { - default: assert(0 && "This action is not supported yet!"); - case TargetLowering::Custom: - Tmp3 = TLI.LowerOperation(Result, DAG); - if (Tmp3.getNode()) { - Result = Tmp3; - break; - } - // FALLTHROUGH - case TargetLowering::Expand: - Result = ExpandBUILD_VECTOR(Result.getNode()); - break; - } - break; - case ISD::CALLSEQ_START: { - SDNode *CallEnd = FindCallEndFromCallStart(Node); - - // Recursively Legalize all of the inputs of the call end that do not lead - // to this call start. This ensures that any libcalls that need be inserted - // are inserted *before* the CALLSEQ_START. - {SmallPtrSet NodesLeadingTo; - for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i) - LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).getNode(), Node, - NodesLeadingTo); - } - - // Now that we have legalized all of the inputs (which may have inserted - // libcalls), create the new CALLSEQ_START node. - Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. - - // Merge in the last call to ensure that this call starts after the last - // call ended. - if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) { - Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - Tmp1, LastCALLSEQ_END); - Tmp1 = LegalizeOp(Tmp1); - } - - // Do not try to legalize the target-specific arguments (#1+). - if (Tmp1 != Node->getOperand(0)) { - SmallVector Ops(Node->op_begin(), Node->op_end()); - Ops[0] = Tmp1; - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), &Ops[0], - Ops.size()), Result.getResNo()); - } - - // Remember that the CALLSEQ_START is legalized. - AddLegalizedOperand(Op.getValue(0), Result); - if (Node->getNumValues() == 2) // If this has a flag result, remember it. - AddLegalizedOperand(Op.getValue(1), Result.getValue(1)); - - // Now that the callseq_start and all of the non-call nodes above this call - // sequence have been legalized, legalize the call itself. During this - // process, no libcalls can/will be inserted, guaranteeing that no calls - // can overlap. - assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!"); - // Note that we are selecting this call! - LastCALLSEQ_END = SDValue(CallEnd, 0); - IsLegalizingCall = true; - - // Legalize the call, starting from the CALLSEQ_END. - LegalizeOp(LastCALLSEQ_END); - assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!"); - return Result; - } + case ISD::CALLSEQ_START: case ISD::CALLSEQ_END: - // If the CALLSEQ_START node hasn't been legalized first, legalize it. This - // will cause this node to be legalized as well as handling libcalls right. - if (LastCALLSEQ_END.getNode() != Node) { - LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0)); - DenseMap::iterator I = LegalizedNodes.find(Op); - assert(I != LegalizedNodes.end() && - "Legalizing the call start should have legalized this node!"); - return I->second; - } - - // Otherwise, the call start has been legalized and everything is going - // according to plan. Just legalize ourselves normally here. - Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. - // Do not try to legalize the target-specific arguments (#1+), except for - // an optional flag input. - if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Glue){ - if (Tmp1 != Node->getOperand(0)) { - SmallVector Ops(Node->op_begin(), Node->op_end()); - Ops[0] = Tmp1; - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - &Ops[0], Ops.size()), - Result.getResNo()); - } - } else { - Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1)); - if (Tmp1 != Node->getOperand(0) || - Tmp2 != Node->getOperand(Node->getNumOperands()-1)) { - SmallVector Ops(Node->op_begin(), Node->op_end()); - Ops[0] = Tmp1; - Ops.back() = Tmp2; - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - &Ops[0], Ops.size()), - Result.getResNo()); - } - } - assert(IsLegalizingCall && "Call sequence imbalance between start/end?"); - // This finishes up call legalization. - IsLegalizingCall = false; - - // If the CALLSEQ_END node has a flag, remember that we legalized it. - AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0)); - if (Node->getNumValues() == 2) - AddLegalizedOperand(SDValue(Node, 1), Result.getValue(1)); - return Result.getValue(Op.getResNo()); + break; case ISD::LOAD: { LoadSDNode *LD = cast(Node); - Tmp1 = LegalizeOp(LD->getChain()); // Legalize the chain. - Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer. + Tmp1 = LD->getChain(); // Legalize the chain. + Tmp2 = LD->getBasePtr(); // Legalize the base pointer. ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { EVT VT = Node->getValueType(0); - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp2, LD->getOffset()), - Result.getResNo()); - Tmp3 = Result.getValue(0); - Tmp4 = Result.getValue(1); + Node = DAG.UpdateNodeOperands(Node, Tmp1, Tmp2, LD->getOffset()); + Tmp3 = SDValue(Node, 0); + Tmp4 = SDValue(Node, 1); switch (TLI.getOperationAction(Node->getOpcode(), VT)) { default: assert(0 && "This action is not supported yet!"); @@ -1148,20 +905,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast(Result.getNode()), - DAG, TLI); - Tmp3 = Result.getOperand(0); - Tmp4 = Result.getOperand(1); - Tmp3 = LegalizeOp(Tmp3); - Tmp4 = LegalizeOp(Tmp4); + ExpandUnalignedLoad(cast(Node), + DAG, TLI, Tmp3, Tmp4); } } break; case TargetLowering::Custom: Tmp1 = TLI.LowerOperation(Tmp3, DAG); if (Tmp1.getNode()) { - Tmp3 = LegalizeOp(Tmp1); - Tmp4 = LegalizeOp(Tmp1.getValue(1)); + Tmp3 = Tmp1; + Tmp4 = Tmp1.getValue(1); } break; case TargetLowering::Promote: { @@ -1173,16 +926,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); - Tmp3 = LegalizeOp(DAG.getNode(ISD::BITCAST, dl, VT, Tmp1)); - Tmp4 = LegalizeOp(Tmp1.getValue(1)); + Tmp3 = DAG.getNode(ISD::BITCAST, dl, VT, Tmp1); + Tmp4 = Tmp1.getValue(1); break; } } // Since loads produce two values, make sure to remember that we // legalized both of them. - AddLegalizedOperand(SDValue(Node, 0), Tmp3); - AddLegalizedOperand(SDValue(Node, 1), Tmp4); - return Op.getResNo() ? Tmp4 : Tmp3; + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp3); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp4); + return; } EVT SrcVT = LD->getMemoryVT(); @@ -1213,9 +966,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { ISD::LoadExtType NewExtType = ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; - Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), - Tmp1, Tmp2, LD->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); + SDValue Result = + DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); Ch = Result.getValue(1); // The chain. @@ -1230,8 +984,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Result.getValueType(), Result, DAG.getValueType(SrcVT)); - Tmp1 = LegalizeOp(Result); - Tmp2 = LegalizeOp(Ch); + Tmp1 = Result; + Tmp2 = Ch; } else if (SrcWidth & (SrcWidth - 1)) { // If not loading a power-of-2 number of bits, expand as two loads. assert(!SrcVT.isVector() && "Unsupported extload!"); @@ -1274,7 +1028,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. - Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); } else { // Big endian - avoid unaligned loads. // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 @@ -1304,11 +1058,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. - Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); } - Tmp1 = LegalizeOp(Result); - Tmp2 = LegalizeOp(Ch); + Tmp2 = Ch; } else { switch (TLI.getLoadExtAction(ExtType, SrcVT)) { default: assert(0 && "This action is not supported yet!"); @@ -1316,17 +1069,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { isCustom = true; // FALLTHROUGH case TargetLowering::Legal: - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp2, LD->getOffset()), - Result.getResNo()); - Tmp1 = Result.getValue(0); - Tmp2 = Result.getValue(1); + Node = DAG.UpdateNodeOperands(Node, + Tmp1, Tmp2, LD->getOffset()); + Tmp1 = SDValue(Node, 0); + Tmp2 = SDValue(Node, 1); if (isCustom) { - Tmp3 = TLI.LowerOperation(Result, DAG); + Tmp3 = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Tmp3.getNode()) { - Tmp1 = LegalizeOp(Tmp3); - Tmp2 = LegalizeOp(Tmp3.getValue(1)); + Tmp1 = Tmp3; + Tmp2 = Tmp3.getValue(1); } } else { // If this is an unaligned load and the target doesn't support it, @@ -1337,12 +1089,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast(Result.getNode()), - DAG, TLI); - Tmp1 = Result.getOperand(0); - Tmp2 = Result.getOperand(1); - Tmp1 = LegalizeOp(Tmp1); - Tmp2 = LegalizeOp(Tmp2); + ExpandUnalignedLoad(cast(Node), + DAG, TLI, Tmp1, Tmp2); } } } @@ -1363,9 +1111,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; default: llvm_unreachable("Unexpected extend load type!"); } - Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); - Tmp1 = LegalizeOp(Result); // Relegalize new nodes. - Tmp2 = LegalizeOp(Load.getValue(1)); + Tmp1 = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); + Tmp2 = Load.getValue(1); break; } @@ -1380,10 +1127,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { "EXTLOAD should always be supported!"); // Turn the unsupported load into an EXTLOAD followed by an explicit // zero/sign extend inreg. - Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), - Tmp1, Tmp2, LD->getPointerInfo(), SrcVT, - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getPointerInfo(), SrcVT, + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); SDValue ValRes; if (ExtType == ISD::SEXTLOAD) ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, @@ -1391,38 +1138,37 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Result, DAG.getValueType(SrcVT)); else ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); - Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. - Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes. + Tmp1 = ValRes; + Tmp2 = Result.getValue(1); break; } } // Since loads produce two values, make sure to remember that we legalized // both of them. - AddLegalizedOperand(SDValue(Node, 0), Tmp1); - AddLegalizedOperand(SDValue(Node, 1), Tmp2); - return Op.getResNo() ? Tmp2 : Tmp1; + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp1); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp2); + break; } case ISD::STORE: { StoreSDNode *ST = cast(Node); - Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain. - Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer. + Tmp1 = ST->getChain(); + Tmp2 = ST->getBasePtr(); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); if (!ST->isTruncatingStore()) { if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { - Result = SDValue(OptStore, 0); + DAG.ReplaceAllUsesWith(ST, OptStore, this); break; } { - Tmp3 = LegalizeOp(ST->getValue()); - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp3, Tmp2, - ST->getOffset()), - Result.getResNo()); + Tmp3 = ST->getValue(); + Node = DAG.UpdateNodeOperands(Node, + Tmp1, Tmp3, Tmp2, + ST->getOffset()); EVT VT = Tmp3.getValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { @@ -1434,27 +1180,31 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) - Result = ExpandUnalignedStore(cast(Result.getNode()), - DAG, TLI); + ExpandUnalignedStore(cast(Node), + DAG, TLI, this); } break; case TargetLowering::Custom: - Tmp1 = TLI.LowerOperation(Result, DAG); - if (Tmp1.getNode()) Result = Tmp1; + Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Tmp1.getNode()) + DAG.ReplaceAllUsesWith(SDValue(Node, 0), Tmp1, this); break; - case TargetLowering::Promote: + case TargetLowering::Promote: { assert(VT.isVector() && "Unknown legal promote case!"); Tmp3 = DAG.getNode(ISD::BITCAST, dl, TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3); - Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, - ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment); + SDValue Result = + DAG.getStore(Tmp1, dl, Tmp3, Tmp2, + ST->getPointerInfo(), isVolatile, + isNonTemporal, Alignment); + DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); break; } + } break; } } else { - Tmp3 = LegalizeOp(ST->getValue()); + Tmp3 = ST->getValue(); EVT StVT = ST->getMemoryVT(); unsigned StWidth = StVT.getSizeInBits(); @@ -1466,8 +1216,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits()); Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT); - Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); + SDValue Result = + DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); + DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. assert(!StVT.isVector() && "Unsupported truncstore!"); @@ -1521,14 +1273,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } // The order of the stores doesn't matter. - Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); } else { if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() || Tmp2 != ST->getBasePtr()) - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp3, Tmp2, - ST->getOffset()), - Result.getResNo()); + Node = DAG.UpdateNodeOperands(Node, Tmp1, Tmp3, Tmp2, + ST->getOffset()); switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { default: assert(0 && "This action is not supported yet!"); @@ -1539,12 +1290,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) - Result = ExpandUnalignedStore(cast(Result.getNode()), - DAG, TLI); + ExpandUnalignedStore(cast(Node), DAG, TLI, this); } break; case TargetLowering::Custom: - Result = TLI.LowerOperation(Result, DAG); + DAG.ReplaceAllUsesWith(SDValue(Node, 0), + TLI.LowerOperation(SDValue(Node, 0), DAG), + this); break; case TargetLowering::Expand: assert(!StVT.isVector() && @@ -1553,8 +1305,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // TRUNCSTORE:i16 i32 -> STORE i16 assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!"); Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); - Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + SDValue Result = + DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); + DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); break; } } @@ -1562,17 +1316,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { break; } } - assert(Result.getValueType() == Op.getValueType() && - "Bad legalization!"); - - // Make sure that the generated code is itself legal. - if (Result != Op) - Result = LegalizeOp(Result); - - // Note that LegalizeOp may be reentered even from single-use nodes, which - // means that we always must cache transformed nodes. - AddLegalizedOperand(Op, Result); - return Result; } SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { @@ -2011,7 +1754,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { // and leave the Hi part unset. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { - assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); // The input chain to this libcall is the entry node of the function. // Legalizing the call will automatically add the previous call to the // dependence. @@ -2030,7 +1772,6 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - // Splice the libcall in wherever FindInputOutputChains tells us to. Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); // isTailCall may be true since the callee does not reference caller stack @@ -2046,10 +1787,6 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, // It's a tailcall, return the chain (which is the DAG root). return DAG.getRoot(); - // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that - // was added by LowerCallTo (guaranteeing proper serialization of calls). - LegalizeOp(CallInfo.second); return CallInfo.first; } @@ -2079,11 +1816,6 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); - // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that - // was added by LowerCallTo (guaranteeing proper serialization of calls). - LegalizeOp(CallInfo.second); - return CallInfo.first; } @@ -2093,7 +1825,6 @@ std::pair SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { - assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); SDValue InChain = Node->getOperand(0); TargetLowering::ArgListTy Args; @@ -2110,7 +1841,6 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - // Splice the libcall in wherever FindInputOutputChains tells us to. Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, @@ -2118,10 +1848,6 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); - // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that - // was added by LowerCallTo (guaranteeing proper serialization of calls). - LegalizeOp(CallInfo.second); return CallInfo; } @@ -2247,20 +1973,14 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - // Splice the libcall in wherever FindInputOutputChains tells us to. DebugLoc dl = Node->getDebugLoc(); std::pair CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); - // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that - // was added by LowerCallTo (guaranteeing proper serialization of calls). - LegalizeOp(CallInfo.second); - // Remainder is loaded back from the stack frame. - SDValue Rem = DAG.getLoad(RetVT, dl, LastCALLSEQ_END, FIPtr, + SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, MachinePointerInfo(), false, false, 0); Results.push_back(CallInfo.first); Results.push_back(Rem); @@ -2452,11 +2172,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, MachinePointerInfo::getConstantPool(), false, false, Alignment); else { - FudgeInReg = - LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, - DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - MVT::f32, false, false, Alignment)); + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, + DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(), + MVT::f32, false, false, Alignment); + HandleSDNode Handle(Load); + LegalizeOp(Load.getNode()); + FudgeInReg = Handle.getValue(); } return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); @@ -2780,8 +2502,8 @@ std::pair SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { return ExpandChainLibCall(LC, Node, false); } -void SelectionDAGLegalize::ExpandNode(SDNode *Node, - SmallVectorImpl &Results) { +void SelectionDAGLegalize::ExpandNode(SDNode *Node) { + SmallVector Results; DebugLoc dl = Node->getDebugLoc(); SDValue Tmp1, Tmp2, Tmp3, Tmp4; switch (Node->getOpcode()) { @@ -3229,10 +2951,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, ConstantFPSDNode *CFP = cast(Node); // Check to see if this FP immediate is already legal. // If this is a legal constant, turn it into a TargetConstantFP node. - if (TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0))) - Results.push_back(SDValue(Node, 0)); - else - Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI)); + if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0))) + Results.push_back(ExpandConstantFP(CFP, true)); break; } case ISD::EHSELECTION: { @@ -3478,6 +3198,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, DAG.getIntPtrConstant(0)); TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, DAG.getIntPtrConstant(1)); + // Ret is a node with an illegal type. Because such things are not + // generally permitted during this phase of legalization, delete the + // node. The above EXTRACT_ELEMENT nodes should have been folded. + DAG.DeleteNode(Ret.getNode()); } if (isSigned) { @@ -3618,7 +3342,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, dl); - LastCALLSEQ_END = DAG.getEntryNode(); assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!"); Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); @@ -3628,6 +3351,35 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Results.push_back(Tmp1); break; } + case ISD::BUILD_VECTOR: + Results.push_back(ExpandBUILD_VECTOR(Node)); + break; + case ISD::SRA: + case ISD::SRL: + case ISD::SHL: { + // Scalarize vector SRA/SRL/SHL. + EVT VT = Node->getValueType(0); + assert(VT.isVector() && "Unable to legalize non-vector shift"); + assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal"); + unsigned NumElem = VT.getVectorNumElements(); + + SmallVector Scalars; + for (unsigned Idx = 0; Idx < NumElem; Idx++) { + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + VT.getScalarType(), + Node->getOperand(0), DAG.getIntPtrConstant(Idx)); + SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + VT.getScalarType(), + Node->getOperand(1), DAG.getIntPtrConstant(Idx)); + Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, + VT.getScalarType(), Ex, Sh)); + } + SDValue Result = + DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), + &Scalars[0], Scalars.size()); + DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); + break; + } case ISD::GLOBAL_OFFSET_TABLE: case ISD::GlobalAddress: case ISD::GlobalTLSAddress: @@ -3638,13 +3390,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: // FIXME: Custom lowering for these operations shouldn't return null! - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - Results.push_back(SDValue(Node, i)); break; } + + // Replace the original node with the legalized result. + if (!Results.empty()) + DAG.ReplaceAllUsesWith(Node, Results.data(), this); } -void SelectionDAGLegalize::PromoteNode(SDNode *Node, - SmallVectorImpl &Results) { + +void SelectionDAGLegalize::PromoteNode(SDNode *Node) { + SmallVector Results; EVT OVT = Node->getValueType(0); if (Node->getOpcode() == ISD::UINT_TO_FP || Node->getOpcode() == ISD::SINT_TO_FP || @@ -3772,6 +3527,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, break; } } + + // Replace the original node with the legalized result. + if (!Results.empty()) + DAG.ReplaceAllUsesWith(Node, Results.data(), this); } // SelectionDAG::Legalize - This is the entry point for the file. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index a4bb577..7ed1b98 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -1084,7 +1084,6 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - // Splice the libcall in wherever FindInputOutputChains tells us to. Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index a1abdb4..fd768b1 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -386,6 +386,90 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { } } +/// IsChainDependent - Test if Outer is reachable from Inner through +/// chain dependencies. +static bool IsChainDependent(SDNode *Outer, SDNode *Inner) { + SDNode *N = Outer; + for (;;) { + if (N == Inner) + return true; + if (N->getOpcode() == ISD::TokenFactor) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (IsChainDependent(N->getOperand(i).getNode(), Inner)) + return true; + return false; + } + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) { + N = N->getOperand(i).getNode(); + goto found_chain_operand; + } + return false; + found_chain_operand:; + if (N->getOpcode() == ISD::EntryToken) + return false; + } +} + +/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate +/// the corresponding (lowered) CALLSEQ_BEGIN node. +/// +/// NestLevel and MaxNested are used in recursion to indcate the current level +/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum +/// level seen so far. +/// +/// TODO: It would be better to give CALLSEQ_END an explicit operand to point +/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it. +static SDNode * +FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, + const TargetInstrInfo *TII) { + for (;;) { + // For a TokenFactor, examine each operand. There may be multiple ways + // to get to the CALLSEQ_BEGIN, but we need to find the path with the + // most nesting in order to ensure that we find the corresponding match. + if (N->getOpcode() == ISD::TokenFactor) { + SDNode *Best = 0; + unsigned BestMaxNest = MaxNest; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + unsigned MyNestLevel = NestLevel; + unsigned MyMaxNest = MaxNest; + if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(), + MyNestLevel, MyMaxNest, TII)) + if (!Best || (MyMaxNest > BestMaxNest)) { + Best = New; + BestMaxNest = MyMaxNest; + } + } + assert(Best); + MaxNest = BestMaxNest; + return Best; + } + // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. + if (N->isMachineOpcode()) { + if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameDestroyOpcode()) { + ++NestLevel; + MaxNest = std::max(MaxNest, NestLevel); + } else if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameSetupOpcode()) { + --NestLevel; + if (NestLevel == 0) + return N; + } + } + // Otherwise, find the chain and continue climbing. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) { + N = N->getOperand(i).getNode(); + goto found_chain_operand; + } + return 0; + found_chain_operand:; + if (N->getOpcode() == ISD::EntryToken) + return 0; + } +} + /// Call ReleasePred for each predecessor, then update register live def/gen. /// Always update LiveRegDefs for a register dependence even if the current SU /// also defines the register. This effectively create one large live range @@ -423,6 +507,26 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { } } } + + // If we're scheduling a lowered CALLSEQ_END, find the corresponding CALLSEQ_BEGIN. + // Inject an artificial physical register dependence between these nodes, to + // prevent other calls from being interscheduled with them. + const TargetLowering *TLI = TM.getTargetLowering(); + unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); + if (!LiveRegDefs[SP]) + for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) + if (Node->isMachineOpcode() && + Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { + unsigned NestLevel = 0; + unsigned MaxNest = 0; + SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); + + SUnit *Def = &SUnits[N->getNodeId()]; + ++NumLiveRegs; + LiveRegDefs[SP] = Def; + LiveRegGens[SP] = SU; + break; + } } /// Check to see if any of the pending instructions are ready to issue. If @@ -605,6 +709,22 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { LiveRegGens[I->getReg()] = NULL; } } + // Release the special call resource dependence, if this is the beginning + // of a call. + const TargetLowering *TLI = TM.getTargetLowering(); + unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); + if (LiveRegDefs[SP] == SU) + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isMachineOpcode() && + SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode() && + LiveRegDefs[SP] == SU) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + --NumLiveRegs; + LiveRegDefs[SP] = NULL; + LiveRegGens[SP] = NULL; + } + } resetVRegCycle(SU); @@ -1083,6 +1203,20 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector &LRegs) { if (!Node->isMachineOpcode()) continue; + // If we're in the middle of scheduling a call, don't begin scheduling + // another call. + if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode() || + Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { + for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) + if (LiveRegDefs[i]) { + SDNode *Gen = LiveRegGens[i]->getNode(); + while (SDNode *Glued = Gen->getGluedNode()) + Gen = Glued; + if (!IsChainDependent(Gen, Node) && RegAdded.insert(i)) + LRegs.push_back(i); + } + continue; + } const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 07d2db6..010a740 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5290,6 +5290,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } + + // If we just RAUW'd the root, take note. + if (FromN == getRoot()) + setRoot(To); } /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. @@ -5335,6 +5339,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } + + // If we just RAUW'd the root, take note. + if (From == getRoot().getNode()) + setRoot(SDValue(To, getRoot().getResNo())); } /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. @@ -5373,6 +5381,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } + + // If we just RAUW'd the root, take note. + if (From == getRoot().getNode()) + setRoot(SDValue(To[getRoot().getResNo()])); } /// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving @@ -5431,6 +5443,10 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } + + // If we just RAUW'd the root, take note. + if (From == getRoot()) + setRoot(To); } namespace { diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 31e522d..bfc1690 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1353,12 +1353,10 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, MVT::i32); - // TODO: Disable AlwaysInline when it becomes possible - // to emit a nested call sequence. MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile=*/false, - /*AlwaysInline=*/true, + /*AlwaysInline=*/false, MachinePointerInfo(0), MachinePointerInfo(0))); @@ -4350,9 +4348,24 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // If this is undef splat, generate it via "just" vdup, if possible. if (Lane == -1) Lane = 0; + // Test if V1 is a SCALAR_TO_VECTOR. if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); } + // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR + // (and probably will turn into a SCALAR_TO_VECTOR once legalization + // reaches it). + if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && + !isa(V1.getOperand(0))) { + bool IsScalarToVector = true; + for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) + if (V1.getOperand(i).getOpcode() != ISD::UNDEF) { + IsScalarToVector = false; + break; + } + if (IsScalarToVector) + return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); + } return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, DAG.getConstant(Lane, MVT::i32)); } diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 02b0ff2..3d75de0 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2114,7 +2114,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { HasNoSignedComparisonUses(Node)) // Look past the truncate if CMP is the only use of it. N0 = N0.getOperand(0); - if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && + if ((N0.getNode()->getOpcode() == ISD::AND || + (N0.getResNo() == 0 && N0.getNode()->getOpcode() == X86ISD::AND)) && + N0.getNode()->hasOneUse() && N0.getValueType() != MVT::i8 && X86::isZeroNode(N1)) { ConstantSDNode *C = dyn_cast(N0.getNode()->getOperand(1)); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2ec0814..927a307 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4220,6 +4220,29 @@ static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) { return true; } +// Test whether the given value is a vector value which will be legalized +// into a load. +static bool WillBeConstantPoolLoad(SDNode *N) { + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + + // Check for any non-constant elements. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + switch (N->getOperand(i).getNode()->getOpcode()) { + case ISD::UNDEF: + case ISD::ConstantFP: + case ISD::Constant: + break; + default: + return false; + } + + // Vectors of all-zeros and all-ones are materialized with special + // instructions rather than being loaded. + return !ISD::isBuildVectorAllZeros(N) && + !ISD::isBuildVectorAllOnes(N); +} + /// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to /// match movlp{s|d}. The lower half elements should come from lower half of /// V1 (and in order), and the upper half elements should come from the upper @@ -4235,7 +4258,7 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, return false; // Is V2 is a vector load, don't do this transformation. We will try to use // load folding shufps op. - if (ISD::isNON_EXTLoad(V2)) + if (ISD::isNON_EXTLoad(V2) || WillBeConstantPoolLoad(V2)) return false; unsigned NumElems = VT.getVectorNumElements(); @@ -6351,6 +6374,8 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) { if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op)) CanFoldLoad = true; + ShuffleVectorSDNode *SVOp = cast(Op); + // Both of them can't be memory operations though. if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2)) CanFoldLoad = false; @@ -6360,10 +6385,11 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) { return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG); if (NumElems == 4) - return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG); + // If we don't care about the second element, procede to use movss. + if (SVOp->getMaskElt(1) != -1) + return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG); } - ShuffleVectorSDNode *SVOp = cast(Op); // movl and movlp will both match v2i64, but v2i64 is never matched by // movl earlier because we make it strict to avoid messing with the movlp load // folding logic (see the code above getMOVLP call). Match it here then, @@ -8681,8 +8707,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. - if (Cond.getOpcode() == X86ISD::SETCC || - Cond.getOpcode() == X86ISD::SETCC_CARRY) { + unsigned CondOpcode = Cond.getOpcode(); + if (CondOpcode == X86ISD::SETCC || + CondOpcode == X86ISD::SETCC_CARRY) { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); @@ -8699,6 +8726,39 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { Cond = Cmp; addTest = false; } + } else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO || + CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO || + ((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) && + Cond.getOperand(0).getValueType() != MVT::i8)) { + SDValue LHS = Cond.getOperand(0); + SDValue RHS = Cond.getOperand(1); + unsigned X86Opcode; + unsigned X86Cond; + SDVTList VTs; + switch (CondOpcode) { + case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break; + case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break; + case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break; + case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break; + case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break; + case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break; + default: llvm_unreachable("unexpected overflowing operator"); + } + if (CondOpcode == ISD::UMULO) + VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(), + MVT::i32); + else + VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); + + SDValue X86Op = DAG.getNode(X86Opcode, DL, VTs, LHS, RHS); + + if (CondOpcode == ISD::UMULO) + Cond = X86Op.getValue(2); + else + Cond = X86Op.getValue(1); + + CC = DAG.getConstant(X86Cond, MVT::i8); + addTest = false; } if (addTest) { @@ -8780,11 +8840,27 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue Dest = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); SDValue CC; + bool Inverted = false; if (Cond.getOpcode() == ISD::SETCC) { - SDValue NewCond = LowerSETCC(Cond, DAG); - if (NewCond.getNode()) - Cond = NewCond; + // Check for setcc([su]{add,sub,mul}o == 0). + if (cast(Cond.getOperand(2))->get() == ISD::SETEQ && + isa(Cond.getOperand(1)) && + cast(Cond.getOperand(1))->isNullValue() && + Cond.getOperand(0).getResNo() == 1 && + (Cond.getOperand(0).getOpcode() == ISD::SADDO || + Cond.getOperand(0).getOpcode() == ISD::UADDO || + Cond.getOperand(0).getOpcode() == ISD::SSUBO || + Cond.getOperand(0).getOpcode() == ISD::USUBO || + Cond.getOperand(0).getOpcode() == ISD::SMULO || + Cond.getOperand(0).getOpcode() == ISD::UMULO)) { + Inverted = true; + Cond = Cond.getOperand(0); + } else { + SDValue NewCond = LowerSETCC(Cond, DAG); + if (NewCond.getNode()) + Cond = NewCond; + } } #if 0 // FIXME: LowerXALUO doesn't handle these!! @@ -8805,8 +8881,9 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. - if (Cond.getOpcode() == X86ISD::SETCC || - Cond.getOpcode() == X86ISD::SETCC_CARRY) { + unsigned CondOpcode = Cond.getOpcode(); + if (CondOpcode == X86ISD::SETCC || + CondOpcode == X86ISD::SETCC_CARRY) { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); @@ -8827,6 +8904,43 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { break; } } + } + CondOpcode = Cond.getOpcode(); + if (CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO || + CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO || + ((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) && + Cond.getOperand(0).getValueType() != MVT::i8)) { + SDValue LHS = Cond.getOperand(0); + SDValue RHS = Cond.getOperand(1); + unsigned X86Opcode; + unsigned X86Cond; + SDVTList VTs; + switch (CondOpcode) { + case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break; + case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break; + case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break; + case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break; + case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break; + case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break; + default: llvm_unreachable("unexpected overflowing operator"); + } + if (Inverted) + X86Cond = X86::GetOppositeBranchCondition((X86::CondCode)X86Cond); + if (CondOpcode == ISD::UMULO) + VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(), + MVT::i32); + else + VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); + + SDValue X86Op = DAG.getNode(X86Opcode, dl, VTs, LHS, RHS); + + if (CondOpcode == ISD::UMULO) + Cond = X86Op.getValue(2); + else + Cond = X86Op.getValue(1); + + CC = DAG.getConstant(X86Cond, MVT::i8); + addTest = false; } else { unsigned CondOpc; if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) { @@ -8890,6 +9004,66 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { CC = DAG.getConstant(CCode, MVT::i8); Cond = Cond.getOperand(0).getOperand(1); addTest = false; + } else if (Cond.getOpcode() == ISD::SETCC && + cast(Cond.getOperand(2))->get() == ISD::SETOEQ) { + // For FCMP_OEQ, we can emit + // two branches instead of an explicit AND instruction with a + // separate test. However, we only do this if this block doesn't + // have a fall-through edge, because this requires an explicit + // jmp when the condition is false. + if (Op.getNode()->hasOneUse()) { + SDNode *User = *Op.getNode()->use_begin(); + // Look for an unconditional branch following this conditional branch. + // We need this because we need to reverse the successors in order + // to implement FCMP_OEQ. + if (User->getOpcode() == ISD::BR) { + SDValue FalseBB = User->getOperand(1); + SDNode *NewBR = + DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); + assert(NewBR == User); + (void)NewBR; + Dest = FalseBB; + + SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, + Cond.getOperand(0), Cond.getOperand(1)); + CC = DAG.getConstant(X86::COND_NE, MVT::i8); + Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), + Chain, Dest, CC, Cmp); + CC = DAG.getConstant(X86::COND_P, MVT::i8); + Cond = Cmp; + addTest = false; + } + } + } else if (Cond.getOpcode() == ISD::SETCC && + cast(Cond.getOperand(2))->get() == ISD::SETUNE) { + // For FCMP_UNE, we can emit + // two branches instead of an explicit AND instruction with a + // separate test. However, we only do this if this block doesn't + // have a fall-through edge, because this requires an explicit + // jmp when the condition is false. + if (Op.getNode()->hasOneUse()) { + SDNode *User = *Op.getNode()->use_begin(); + // Look for an unconditional branch following this conditional branch. + // We need this because we need to reverse the successors in order + // to implement FCMP_UNE. + if (User->getOpcode() == ISD::BR) { + SDValue FalseBB = User->getOperand(1); + SDNode *NewBR = + DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); + assert(NewBR == User); + (void)NewBR; + + SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, + Cond.getOperand(0), Cond.getOperand(1)); + CC = DAG.getConstant(X86::COND_NE, MVT::i8); + Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), + Chain, Dest, CC, Cmp); + CC = DAG.getConstant(X86::COND_NP, MVT::i8); + Cond = Cmp; + addTest = false; + Dest = FalseBB; + } + } } } diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 2afe0e3..4b74f96 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -386,6 +386,15 @@ IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase, Offset = off; return true; } + // Check for an aligned global variable. + if (GlobalAddressSDNode *GA = dyn_cast(*Root)) { + const GlobalValue *GV = GA->getGlobal(); + if (GA->getOffset() == 0 && GV->getAlignment() >= 4) { + AlignedBase = Base; + Offset = off; + return true; + } + } return false; } -- cgit v1.1 From 3799efab8c028c8928bf55ddd7d39da0e6b62cea Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 28 Oct 2011 01:41:21 +0000 Subject: Delete #if 0 code accidentally left in. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143179 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 263333d..7425669 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -185,23 +185,6 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag) void SelectionDAGLegalize::LegalizeDAG() { DAG.AssignTopologicalOrder(); -#if 0 - SDValue LastChain = DAG.getEntryNode(); - for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = DAG.allnodes_end(); I != E; ++I) { - SDNode *N = I; - if (N->getOpcode() == ISD::CALLSEQ_START) { - SmallVector Ops(N->op_begin(), N->op_end()); - Ops[0] = LastChain; - SDNode *New = DAG.UpdateNodeOperands(N, Ops.data(), Ops.size()); - assert(New == N && "CALLSEQ_START got CSE'd!"); - } - for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) - if (N->getValueType(i) == MVT::Other) - LastChain = SDValue(N, i); - } -#endif - // Visit all the nodes. We start in topological order, so that we see // nodes with their original operands intact. Legalization can produce // new nodes which may themselves need to be legalized. Iterate until all -- cgit v1.1 From 6f2dd7ebcf5225bb98594c3504ded9a46c7ed11a Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 28 Oct 2011 03:45:11 +0000 Subject: LFTR should avoid a type mismatch with null pointer IVs. Fixes rdar://10359193 Indvar LinearFunctionTestReplace assertion git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143183 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/IndVarSimplify.cpp | 34 +++++++++++++++++--------------- 1 file changed, 18 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 874a135..dce7f87 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1558,8 +1558,7 @@ LinearFunctionTestReplace(Loop *L, } // For unit stride, IVLimit = Start + BECount with 2's complement overflow. - // So for, non-zero start compute the IVLimit here. - bool isPtrIV = false; + // So for non-zero start compute the IVLimit here. Type *CmpTy = CntTy; const SCEVAddRecExpr *AR = dyn_cast(SE->getSCEV(IndVar)); assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter"); @@ -1571,8 +1570,7 @@ LinearFunctionTestReplace(Loop *L, // Note that for without EnableIVRewrite, we never run SCEVExpander on a // pointer type, because we must preserve the existing GEPs. Instead we // directly generate a GEP later. - if (IVInit->getType()->isPointerTy()) { - isPtrIV = true; + if (CmpIndVar->getType()->isPointerTy()) { CmpTy = SE->getEffectiveSCEVType(IVInit->getType()); IVLimit = SE->getTruncateOrSignExtend(IVLimit, CmpTy); } @@ -1590,21 +1588,25 @@ LinearFunctionTestReplace(Loop *L, assert(SE->isLoopInvariant(IVLimit, L) && "Computed iteration count is not loop invariant!"); + assert( !IVLimit->getType()->isPointerTy() && + "Should not expand pointer types" ); Value *ExitCnt = Rewriter.expandCodeFor(IVLimit, CmpTy, BI); // Create a gep for IVInit + IVLimit from on an existing pointer base. - assert(isPtrIV == IndVar->getType()->isPointerTy() && - "IndVar type must match IVInit type"); - if (isPtrIV) { - Value *IVStart = IndVar->getIncomingValueForBlock(L->getLoopPreheader()); - assert(AR->getStart() == SE->getSCEV(IVStart) && "bad loop counter"); - assert(SE->getSizeOfExpr( - cast(IVStart->getType())->getElementType())->isOne() - && "unit stride pointer IV must be i8*"); - - Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); - ExitCnt = Builder.CreateGEP(IVStart, ExitCnt, "lftr.limit"); - Builder.SetInsertPoint(BI); + // + // In the presence of null pointer values, the SCEV expression may be an + // integer type while the IV is a pointer type. Ensure that the compare + // operands are always the same type by checking the IV type here. + if (CmpIndVar->getType()->isPointerTy()) { + Value *IVStart = IndVar->getIncomingValueForBlock(L->getLoopPreheader()); + assert(AR->getStart() == SE->getSCEV(IVStart) && "bad loop counter"); + assert(SE->getSizeOfExpr( + cast(IVStart->getType())->getElementType())->isOne() + && "unit stride pointer IV must be i8*"); + + Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); + ExitCnt = Builder.CreateGEP(IVStart, ExitCnt, "lftr.limit"); + Builder.SetInsertPoint(BI); } // Insert a new icmp_ne or icmp_eq instruction before the branch. -- cgit v1.1 From 6a7efcfc02ea5370fb0da66d750165a3ffe93ab7 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Fri, 28 Oct 2011 05:29:47 +0000 Subject: Always use the string pool, even when it makes the .o larger. This may help tools that read the debug info in the .o files by making the DIE sizes more consistent. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143186 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DIE.cpp | 18 ------------------ lib/CodeGen/AsmPrinter/DIE.h | 27 --------------------------- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 26 +++++++++++--------------- 3 files changed, 11 insertions(+), 60 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index fad3d0d..a2dc873 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -236,24 +236,6 @@ void DIEInteger::print(raw_ostream &O) { #endif //===----------------------------------------------------------------------===// -// DIEString Implementation -//===----------------------------------------------------------------------===// - -/// EmitValue - Emit string value. -/// -void DIEString::EmitValue(AsmPrinter *AP, unsigned Form) const { - AP->OutStreamer.EmitBytes(Str, /*addrspace*/0); - // Emit nul terminator. - AP->OutStreamer.EmitIntValue(0, 1, /*addrspace*/0); -} - -#ifndef NDEBUG -void DIEString::print(raw_ostream &O) { - O << "Str: \"" << Str << "\""; -} -#endif - -//===----------------------------------------------------------------------===// // DIELabel Implementation //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index 12448af..265446c 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -275,33 +275,6 @@ namespace llvm { }; //===--------------------------------------------------------------------===// - /// DIEString - A string value DIE. This DIE keeps string reference only. - /// - class DIEString : public DIEValue { - const StringRef Str; - public: - explicit DIEString(const StringRef S) : DIEValue(isString), Str(S) {} - - /// EmitValue - Emit string value. - /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; - - /// SizeOf - Determine size of string value in bytes. - /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned /*Form*/) const { - return Str.size() + sizeof(char); // sizeof('\0'); - } - - // Implement isa/cast/dyncast. - static bool classof(const DIEString *) { return true; } - static bool classof(const DIEValue *S) { return S->getType() == isString; } - -#ifndef NDEBUG - virtual void print(raw_ostream &O); -#endif - }; - - //===--------------------------------------------------------------------===// /// DIELabel - A label expression DIE. // class DIELabel : public DIEValue { diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index a6a004d..f9f1642 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -67,23 +67,19 @@ void CompileUnit::addSInt(DIE *Die, unsigned Attribute, Die->addValue(Attribute, Form, Value); } -/// addString - Add a string attribute data and value. DIEString only -/// keeps string reference. +/// addString - Add a string attribute data and value. We always emit a +/// reference to the string pool instead of immediate strings so that DIEs have +/// more predictable sizes. void CompileUnit::addString(DIE *Die, unsigned Attribute, StringRef String) { - if (String.size() > 3) { - MCSymbol *Symb = DD->getStringPoolEntry(String); - DIEValue *Value; - if (Asm->needsRelocationsForDwarfStringPool()) - Value = new (DIEValueAllocator) DIELabel(Symb); - else { - MCSymbol *StringPool = DD->getStringPool(); - Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool); - } - Die->addValue(Attribute, dwarf::DW_FORM_strp, Value); - } else { - DIEValue *Value = new (DIEValueAllocator) DIEString(String); - Die->addValue(Attribute, dwarf::DW_FORM_string, Value); + MCSymbol *Symb = DD->getStringPoolEntry(String); + DIEValue *Value; + if (Asm->needsRelocationsForDwarfStringPool()) + Value = new (DIEValueAllocator) DIELabel(Symb); + else { + MCSymbol *StringPool = DD->getStringPool(); + Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool); } + Die->addValue(Attribute, dwarf::DW_FORM_strp, Value); } /// addLabel - Add a Dwarf label attribute data and value. -- cgit v1.1 From 62c1d00dfd38996f381edae55e1028b8e52a1107 Mon Sep 17 00:00:00 2001 From: Duncan Sands Date: Fri, 28 Oct 2011 09:55:57 +0000 Subject: Speculatively disable Dan's commits 143177 and 143179 to see if it fixes the dragonegg self-host (it looks like gcc is miscompiled). Original commit messages: Eliminate LegalizeOps' LegalizedNodes map and have it just call RAUW on every node as it legalizes them. This makes it easier to use hasOneUse() heuristics, since unneeded nodes can be removed from the DAG earlier. Make LegalizeOps visit the DAG in an operands-last order. It previously used operands-first, because LegalizeTypes has to go operands-first, and LegalizeTypes used to be part of LegalizeOps, but they're now split. The operands-last order is more natural for several legalization tasks. For example, it allows lowering code for nodes with floating-point or vector constants to see those constants directly instead of seeing the lowered form (often constant-pool loads). This makes some things somewhat more complicated today, though it ought to allow things to be simpler in the future. It also fixes some bugs exposed by Legalizing using RAUW aggressively. Remove the part of LegalizeOps that attempted to patch up invalid chain operands on libcalls generated by LegalizeTypes, since it doesn't work with the new LegalizeOps traversal order. Instead, define what LegalizeTypes is doing to be correct, and transfer the responsibility of keeping calls from having overlapping calling sequences into the scheduler. Teach the scheduler to model callseq_begin/end pairs as having a physical register definition/use to prevent calls from having overlapping calling sequences. This is also somewhat complicated, though there are ways it might be simplified in the future. This addresses rdar://9816668, rdar://10043614, rdar://8434668, and others. Please direct high-level questions about this patch to management. Delete #if 0 code accidentally left in. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143188 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 772 +++++++++++++++++-------- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 1 + lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 134 ----- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 16 - lib/Target/ARM/ARMISelLowering.cpp | 19 +- lib/Target/X86/X86ISelDAGToDAG.cpp | 4 +- lib/Target/X86/X86ISelLowering.cpp | 194 +------ lib/Target/XCore/XCoreISelLowering.cpp | 9 - 8 files changed, 530 insertions(+), 619 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 7425669..a8bccda 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -46,18 +46,37 @@ using namespace llvm; /// will attempt merge setcc and brc instructions into brcc's. /// namespace { -class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener { +class SelectionDAGLegalize { const TargetMachine &TM; const TargetLowering &TLI; SelectionDAG &DAG; - /// LegalizePosition - The iterator for walking through the node list. - SelectionDAG::allnodes_iterator LegalizePosition; + // Libcall insertion helpers. - /// LegalizedNodes - The set of nodes which have already been legalized. - SmallPtrSet LegalizedNodes; + /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been + /// legalized. We use this to ensure that calls are properly serialized + /// against each other, including inserted libcalls. + SDValue LastCALLSEQ_END; - // Libcall insertion helpers. + /// IsLegalizingCall - This member is used *only* for purposes of providing + /// helpful assertions that a libcall isn't created while another call is + /// being legalized (which could lead to non-serialized call sequences). + bool IsLegalizingCall; + + /// LegalizedNodes - For nodes that are of legal width, and that have more + /// than one use, this map indicates what regularized operand to use. This + /// allows us to avoid legalizing the same thing more than once. + DenseMap LegalizedNodes; + + void AddLegalizedOperand(SDValue From, SDValue To) { + LegalizedNodes.insert(std::make_pair(From, To)); + // If someone requests legalization of the new node, return itself. + if (From != To) + LegalizedNodes.insert(std::make_pair(To, To)); + + // Transfer SDDbgValues. + DAG.TransferDbgValues(From, To); + } public: explicit SelectionDAGLegalize(SelectionDAG &DAG); @@ -65,8 +84,9 @@ public: void LegalizeDAG(); private: - /// LegalizeOp - Legalizes the given operation. - void LegalizeOp(SDNode *Node); + /// LegalizeOp - Return a legal replacement for the given operation, with + /// all legal operands. + SDValue LegalizeOp(SDValue O); SDValue OptimizeFloatStore(StoreSDNode *ST); @@ -87,6 +107,9 @@ private: SDValue N1, SDValue N2, SmallVectorImpl &Mask) const; + bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, + SmallPtrSet &NodesLeadingTo); + void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, DebugLoc dl); @@ -127,21 +150,10 @@ private: SDValue ExpandInsertToVectorThroughStack(SDValue Op); SDValue ExpandVectorBuildThroughStack(SDNode* Node); - SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP); - std::pair ExpandAtomic(SDNode *Node); - void ExpandNode(SDNode *Node); - void PromoteNode(SDNode *Node); - - // DAGUpdateListener implementation. - virtual void NodeDeleted(SDNode *N, SDNode *E) { - LegalizedNodes.erase(N); - if (LegalizePosition == SelectionDAG::allnodes_iterator(N)) - ++LegalizePosition; - } - - virtual void NodeUpdated(SDNode *N) {} + void ExpandNode(SDNode *Node, SmallVectorImpl &Results); + void PromoteNode(SDNode *Node, SmallVectorImpl &Results); }; } @@ -183,37 +195,145 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag) } void SelectionDAGLegalize::LegalizeDAG() { + LastCALLSEQ_END = DAG.getEntryNode(); + IsLegalizingCall = false; + + // The legalize process is inherently a bottom-up recursive process (users + // legalize their uses before themselves). Given infinite stack space, we + // could just start legalizing on the root and traverse the whole graph. In + // practice however, this causes us to run out of stack space on large basic + // blocks. To avoid this problem, compute an ordering of the nodes where each + // node is only legalized after all of its operands are legalized. DAG.AssignTopologicalOrder(); + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) + LegalizeOp(SDValue(I, 0)); - // Visit all the nodes. We start in topological order, so that we see - // nodes with their original operands intact. Legalization can produce - // new nodes which may themselves need to be legalized. Iterate until all - // nodes have been legalized. - for (;;) { - bool AnyLegalized = false; - for (LegalizePosition = DAG.allnodes_end(); - LegalizePosition != DAG.allnodes_begin(); ) { - --LegalizePosition; - - SDNode *N = LegalizePosition; - if (LegalizedNodes.insert(N)) { - AnyLegalized = true; - LegalizeOp(N); - } + // Finally, it's possible the root changed. Get the new root. + SDValue OldRoot = DAG.getRoot(); + assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); + DAG.setRoot(LegalizedNodes[OldRoot]); + + LegalizedNodes.clear(); + + // Remove dead nodes now. + DAG.RemoveDeadNodes(); +} + + +/// FindCallEndFromCallStart - Given a chained node that is part of a call +/// sequence, find the CALLSEQ_END node that terminates the call sequence. +static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) { + // Nested CALLSEQ_START/END constructs aren't yet legal, + // but we can DTRT and handle them correctly here. + if (Node->getOpcode() == ISD::CALLSEQ_START) + depth++; + else if (Node->getOpcode() == ISD::CALLSEQ_END) { + depth--; + if (depth == 0) + return Node; + } + if (Node->use_empty()) + return 0; // No CallSeqEnd + + // The chain is usually at the end. + SDValue TheChain(Node, Node->getNumValues()-1); + if (TheChain.getValueType() != MVT::Other) { + // Sometimes it's at the beginning. + TheChain = SDValue(Node, 0); + if (TheChain.getValueType() != MVT::Other) { + // Otherwise, hunt for it. + for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i) + if (Node->getValueType(i) == MVT::Other) { + TheChain = SDValue(Node, i); + break; + } + + // Otherwise, we walked into a node without a chain. + if (TheChain.getValueType() != MVT::Other) + return 0; } - if (!AnyLegalized) + } + + for (SDNode::use_iterator UI = Node->use_begin(), + E = Node->use_end(); UI != E; ++UI) { + + // Make sure to only follow users of our token chain. + SDNode *User = *UI; + for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) + if (User->getOperand(i) == TheChain) + if (SDNode *Result = FindCallEndFromCallStart(User, depth)) + return Result; + } + return 0; +} + +/// FindCallStartFromCallEnd - Given a chained node that is part of a call +/// sequence, find the CALLSEQ_START node that initiates the call sequence. +static SDNode *FindCallStartFromCallEnd(SDNode *Node) { + int nested = 0; + assert(Node && "Didn't find callseq_start for a call??"); + while (Node->getOpcode() != ISD::CALLSEQ_START || nested) { + Node = Node->getOperand(0).getNode(); + assert(Node->getOperand(0).getValueType() == MVT::Other && + "Node doesn't have a token chain argument!"); + switch (Node->getOpcode()) { + default: break; + case ISD::CALLSEQ_START: + if (!nested) + return Node; + nested--; + break; + case ISD::CALLSEQ_END: + nested++; + break; + } + } + return 0; +} + +/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to +/// see if any uses can reach Dest. If no dest operands can get to dest, +/// legalize them, legalize ourself, and return false, otherwise, return true. +/// +/// Keep track of the nodes we fine that actually do lead to Dest in +/// NodesLeadingTo. This avoids retraversing them exponential number of times. +/// +bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, + SmallPtrSet &NodesLeadingTo) { + if (N == Dest) return true; // N certainly leads to Dest :) + + // If we've already processed this node and it does lead to Dest, there is no + // need to reprocess it. + if (NodesLeadingTo.count(N)) return true; + // If the first result of this node has been already legalized, then it cannot + // reach N. + if (LegalizedNodes.count(SDValue(N, 0))) return false; + + // Okay, this node has not already been legalized. Check and legalize all + // operands. If none lead to Dest, then we can legalize this node. + bool OperandsLeadToDest = false; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + OperandsLeadToDest |= // If an operand leads to Dest, so do we. + LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, + NodesLeadingTo); + + if (OperandsLeadToDest) { + NodesLeadingTo.insert(N); + return true; } - // Remove dead nodes now. - DAG.RemoveDeadNodes(); + // Okay, this node looks safe, legalize it and return false. + LegalizeOp(SDValue(N, 0)); + return false; } /// ExpandConstantFP - Expands the ConstantFP node to an integer constant or /// a load from the constant pool. -SDValue -SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { +static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, + SelectionDAG &DAG, const TargetLowering &TLI) { bool Extend = false; DebugLoc dl = CFP->getDebugLoc(); @@ -249,25 +369,20 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy()); unsigned Alignment = cast(CPIdx)->getAlignment(); - if (Extend) { - SDValue Result = - DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, - DAG.getEntryNode(), - CPIdx, MachinePointerInfo::getConstantPool(), - VT, false, false, Alignment); - return Result; - } - SDValue Result = - DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), false, false, - Alignment); - return Result; + if (Extend) + return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, + DAG.getEntryNode(), + CPIdx, MachinePointerInfo::getConstantPool(), + VT, false, false, Alignment); + return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(), false, false, + Alignment); } /// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores. -static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, - const TargetLowering &TLI, - SelectionDAG::DAGUpdateListener *DUL) { +static +SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, + const TargetLowering &TLI) { SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); SDValue Val = ST->getValue(); @@ -282,10 +397,8 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // same size, then a (misaligned) int store. // FIXME: Does not handle truncating floating point stores! SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val); - Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), - ST->isVolatile(), ST->isNonTemporal(), Alignment); - DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); - return; + return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), + ST->isVolatile(), ST->isNonTemporal(), Alignment); } // Do a (aligned) store to a stack slot, then copy from the stack slot // to the final destination using (unaligned) integer loads and stores. @@ -345,11 +458,8 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, ST->isNonTemporal(), MinAlign(ST->getAlignment(), Offset))); // The order of the stores doesn't matter - say it with a TokenFactor. - SDValue Result = - DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], - Stores.size()); - DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); - return; + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], + Stores.size()); } assert(ST->getMemoryVT().isInteger() && !ST->getMemoryVT().isVector() && @@ -378,16 +488,13 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), Alignment); - SDValue Result = - DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); - DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); } /// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads. -static void -ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, - const TargetLowering &TLI, - SDValue &ValResult, SDValue &ChainResult) { +static +SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, + const TargetLowering &TLI) { SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); EVT VT = LD->getValueType(0); @@ -405,9 +512,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, if (VT.isFloatingPoint() && LoadedVT != VT) Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result); - ValResult = Result; - ChainResult = Chain; - return; + SDValue Ops[] = { Result, Chain }; + return DAG.getMergeValues(Ops, 2, dl); } // Copy the value to a (aligned) stack slot using (unaligned) integer @@ -466,9 +572,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, MachinePointerInfo(), LoadedVT, false, false, 0); // Callers expect a MERGE_VALUES node. - ValResult = Load; - ChainResult = TF; - return; + SDValue Ops[] = { Load, TF }; + return DAG.getMergeValues(Ops, 2, dl); } assert(LoadedVT.isInteger() && !LoadedVT.isVector() && "Unaligned load of unsupported type."); @@ -521,8 +626,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); - ValResult = Result; - ChainResult = TF; + SDValue Ops[] = { Result, TF }; + return DAG.getMergeValues(Ops, 2, dl); } /// PerformInsertVectorEltInMemory - Some target cannot handle a variable @@ -658,10 +763,11 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { /// LegalizeOp - Return a legal replacement for the given operation, with /// all legal operands. -void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { - if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. - return; +SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { + if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. + return Op; + SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) @@ -676,7 +782,13 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Node->getOperand(i).getOpcode() == ISD::TargetConstant) && "Unexpected illegal type!"); + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + DenseMap::iterator I = LegalizedNodes.find(Op); + if (I != LegalizedNodes.end()) return I->second; + SDValue Tmp1, Tmp2, Tmp3, Tmp4; + SDValue Result = Op; bool isCustom = false; // Figure out the correct action; the way to query this varies by opcode @@ -770,6 +882,17 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (Action == TargetLowering::Legal) Action = TargetLowering::Custom; break; + case ISD::BUILD_VECTOR: + // A weird case: legalization for BUILD_VECTOR never legalizes the + // operands! + // FIXME: This really sucks... changing it isn't semantically incorrect, + // but it massively pessimizes the code for floating-point BUILD_VECTORs + // because ConstantFP operands get legalized into constant pool loads + // before the BUILD_VECTOR code can see them. It doesn't usually bite, + // though, because BUILD_VECTORS usually get lowered into other nodes + // which get legalized properly. + SimpleFinishLegalizing = false; + break; default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { Action = TargetLowering::Legal; @@ -780,11 +903,22 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } if (SimpleFinishLegalizing) { - SmallVector Ops; + SmallVector Ops, ResultVals; for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) - Ops.push_back(Node->getOperand(i)); + Ops.push_back(LegalizeOp(Node->getOperand(i))); switch (Node->getOpcode()) { default: break; + case ISD::BR: + case ISD::BRIND: + case ISD::BR_JT: + case ISD::BR_CC: + case ISD::BRCOND: + // Branches tweak the chain to include LastCALLSEQ_END + Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0], + LastCALLSEQ_END); + Ops[0] = LegalizeOp(Ops[0]); + LastCALLSEQ_END = DAG.getEntryNode(); + break; case ISD::SHL: case ISD::SRL: case ISD::SRA: @@ -792,66 +926,57 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::ROTR: // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. - if (!Ops[1].getValueType().isVector()) { - SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[1]); - HandleSDNode Handle(SAO); - LegalizeOp(SAO.getNode()); - Ops[1] = Handle.getValue(); - } + if (!Ops[1].getValueType().isVector()) + Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(), + Ops[1])); break; case ISD::SRL_PARTS: case ISD::SRA_PARTS: case ISD::SHL_PARTS: // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. - if (!Ops[2].getValueType().isVector()) { - SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[2]); - HandleSDNode Handle(SAO); - LegalizeOp(SAO.getNode()); - Ops[2] = Handle.getValue(); - } + if (!Ops[2].getValueType().isVector()) + Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(), + Ops[2])); break; } - SDNode *NewNode = DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size()); - if (NewNode != Node) { - DAG.ReplaceAllUsesWith(Node, NewNode, this); - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i)); - DAG.RemoveDeadNode(Node, this); - Node = NewNode; - } + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), Ops.data(), + Ops.size()), 0); switch (Action) { case TargetLowering::Legal: - return; + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + ResultVals.push_back(Result.getValue(i)); + break; case TargetLowering::Custom: // FIXME: The handling for custom lowering with multiple results is // a complete mess. - Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); + Tmp1 = TLI.LowerOperation(Result, DAG); if (Tmp1.getNode()) { - SmallVector ResultVals; for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) { if (e == 1) ResultVals.push_back(Tmp1); else ResultVals.push_back(Tmp1.getValue(i)); } - if (Tmp1.getNode() != Node || Tmp1.getResNo() != 0) { - DAG.ReplaceAllUsesWith(Node, ResultVals.data(), this); - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]); - DAG.RemoveDeadNode(Node, this); - } - return; + break; } // FALL THROUGH case TargetLowering::Expand: - ExpandNode(Node); - return; + ExpandNode(Result.getNode(), ResultVals); + break; case TargetLowering::Promote: - PromoteNode(Node); - return; + PromoteNode(Result.getNode(), ResultVals); + break; + } + if (!ResultVals.empty()) { + for (unsigned i = 0, e = ResultVals.size(); i != e; ++i) { + if (ResultVals[i] != SDValue(Node, i)) + ResultVals[i] = LegalizeOp(ResultVals[i]); + AddLegalizedOperand(SDValue(Node, i), ResultVals[i]); + } + return ResultVals[Op.getResNo()]; } } @@ -864,20 +989,155 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { #endif assert(0 && "Do not know how to legalize this operator!"); - case ISD::CALLSEQ_START: - case ISD::CALLSEQ_END: + case ISD::SRA: + case ISD::SRL: + case ISD::SHL: { + // Scalarize vector SRA/SRL/SHL. + EVT VT = Node->getValueType(0); + assert(VT.isVector() && "Unable to legalize non-vector shift"); + assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal"); + unsigned NumElem = VT.getVectorNumElements(); + + SmallVector Scalars; + for (unsigned Idx = 0; Idx < NumElem; Idx++) { + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + VT.getScalarType(), + Node->getOperand(0), DAG.getIntPtrConstant(Idx)); + SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + VT.getScalarType(), + Node->getOperand(1), DAG.getIntPtrConstant(Idx)); + Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, + VT.getScalarType(), Ex, Sh)); + } + Result = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), + &Scalars[0], Scalars.size()); + break; + } + + case ISD::BUILD_VECTOR: + switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Custom: + Tmp3 = TLI.LowerOperation(Result, DAG); + if (Tmp3.getNode()) { + Result = Tmp3; + break; + } + // FALLTHROUGH + case TargetLowering::Expand: + Result = ExpandBUILD_VECTOR(Result.getNode()); + break; + } break; + case ISD::CALLSEQ_START: { + SDNode *CallEnd = FindCallEndFromCallStart(Node); + + // Recursively Legalize all of the inputs of the call end that do not lead + // to this call start. This ensures that any libcalls that need be inserted + // are inserted *before* the CALLSEQ_START. + {SmallPtrSet NodesLeadingTo; + for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i) + LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).getNode(), Node, + NodesLeadingTo); + } + + // Now that we have legalized all of the inputs (which may have inserted + // libcalls), create the new CALLSEQ_START node. + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + + // Merge in the last call to ensure that this call starts after the last + // call ended. + if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) { + Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Tmp1, LastCALLSEQ_END); + Tmp1 = LegalizeOp(Tmp1); + } + + // Do not try to legalize the target-specific arguments (#1+). + if (Tmp1 != Node->getOperand(0)) { + SmallVector Ops(Node->op_begin(), Node->op_end()); + Ops[0] = Tmp1; + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), &Ops[0], + Ops.size()), Result.getResNo()); + } + + // Remember that the CALLSEQ_START is legalized. + AddLegalizedOperand(Op.getValue(0), Result); + if (Node->getNumValues() == 2) // If this has a flag result, remember it. + AddLegalizedOperand(Op.getValue(1), Result.getValue(1)); + + // Now that the callseq_start and all of the non-call nodes above this call + // sequence have been legalized, legalize the call itself. During this + // process, no libcalls can/will be inserted, guaranteeing that no calls + // can overlap. + assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!"); + // Note that we are selecting this call! + LastCALLSEQ_END = SDValue(CallEnd, 0); + IsLegalizingCall = true; + + // Legalize the call, starting from the CALLSEQ_END. + LegalizeOp(LastCALLSEQ_END); + assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!"); + return Result; + } + case ISD::CALLSEQ_END: + // If the CALLSEQ_START node hasn't been legalized first, legalize it. This + // will cause this node to be legalized as well as handling libcalls right. + if (LastCALLSEQ_END.getNode() != Node) { + LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0)); + DenseMap::iterator I = LegalizedNodes.find(Op); + assert(I != LegalizedNodes.end() && + "Legalizing the call start should have legalized this node!"); + return I->second; + } + + // Otherwise, the call start has been legalized and everything is going + // according to plan. Just legalize ourselves normally here. + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + // Do not try to legalize the target-specific arguments (#1+), except for + // an optional flag input. + if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Glue){ + if (Tmp1 != Node->getOperand(0)) { + SmallVector Ops(Node->op_begin(), Node->op_end()); + Ops[0] = Tmp1; + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + &Ops[0], Ops.size()), + Result.getResNo()); + } + } else { + Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1)); + if (Tmp1 != Node->getOperand(0) || + Tmp2 != Node->getOperand(Node->getNumOperands()-1)) { + SmallVector Ops(Node->op_begin(), Node->op_end()); + Ops[0] = Tmp1; + Ops.back() = Tmp2; + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + &Ops[0], Ops.size()), + Result.getResNo()); + } + } + assert(IsLegalizingCall && "Call sequence imbalance between start/end?"); + // This finishes up call legalization. + IsLegalizingCall = false; + + // If the CALLSEQ_END node has a flag, remember that we legalized it. + AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0)); + if (Node->getNumValues() == 2) + AddLegalizedOperand(SDValue(Node, 1), Result.getValue(1)); + return Result.getValue(Op.getResNo()); case ISD::LOAD: { LoadSDNode *LD = cast(Node); - Tmp1 = LD->getChain(); // Legalize the chain. - Tmp2 = LD->getBasePtr(); // Legalize the base pointer. + Tmp1 = LegalizeOp(LD->getChain()); // Legalize the chain. + Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer. ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { EVT VT = Node->getValueType(0); - Node = DAG.UpdateNodeOperands(Node, Tmp1, Tmp2, LD->getOffset()); - Tmp3 = SDValue(Node, 0); - Tmp4 = SDValue(Node, 1); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp2, LD->getOffset()), + Result.getResNo()); + Tmp3 = Result.getValue(0); + Tmp4 = Result.getValue(1); switch (TLI.getOperationAction(Node->getOpcode(), VT)) { default: assert(0 && "This action is not supported yet!"); @@ -888,16 +1148,20 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - ExpandUnalignedLoad(cast(Node), - DAG, TLI, Tmp3, Tmp4); + Result = ExpandUnalignedLoad(cast(Result.getNode()), + DAG, TLI); + Tmp3 = Result.getOperand(0); + Tmp4 = Result.getOperand(1); + Tmp3 = LegalizeOp(Tmp3); + Tmp4 = LegalizeOp(Tmp4); } } break; case TargetLowering::Custom: Tmp1 = TLI.LowerOperation(Tmp3, DAG); if (Tmp1.getNode()) { - Tmp3 = Tmp1; - Tmp4 = Tmp1.getValue(1); + Tmp3 = LegalizeOp(Tmp1); + Tmp4 = LegalizeOp(Tmp1.getValue(1)); } break; case TargetLowering::Promote: { @@ -909,16 +1173,16 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); - Tmp3 = DAG.getNode(ISD::BITCAST, dl, VT, Tmp1); - Tmp4 = Tmp1.getValue(1); + Tmp3 = LegalizeOp(DAG.getNode(ISD::BITCAST, dl, VT, Tmp1)); + Tmp4 = LegalizeOp(Tmp1.getValue(1)); break; } } // Since loads produce two values, make sure to remember that we // legalized both of them. - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp3); - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp4); - return; + AddLegalizedOperand(SDValue(Node, 0), Tmp3); + AddLegalizedOperand(SDValue(Node, 1), Tmp4); + return Op.getResNo() ? Tmp4 : Tmp3; } EVT SrcVT = LD->getMemoryVT(); @@ -949,10 +1213,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { ISD::LoadExtType NewExtType = ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; - SDValue Result = - DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), - Tmp1, Tmp2, LD->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); + Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); Ch = Result.getValue(1); // The chain. @@ -967,8 +1230,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Result.getValueType(), Result, DAG.getValueType(SrcVT)); - Tmp1 = Result; - Tmp2 = Ch; + Tmp1 = LegalizeOp(Result); + Tmp2 = LegalizeOp(Ch); } else if (SrcWidth & (SrcWidth - 1)) { // If not loading a power-of-2 number of bits, expand as two loads. assert(!SrcVT.isVector() && "Unsupported extload!"); @@ -1011,7 +1274,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. - Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); } else { // Big endian - avoid unaligned loads. // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 @@ -1041,10 +1304,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. - Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); } - Tmp2 = Ch; + Tmp1 = LegalizeOp(Result); + Tmp2 = LegalizeOp(Ch); } else { switch (TLI.getLoadExtAction(ExtType, SrcVT)) { default: assert(0 && "This action is not supported yet!"); @@ -1052,16 +1316,17 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { isCustom = true; // FALLTHROUGH case TargetLowering::Legal: - Node = DAG.UpdateNodeOperands(Node, - Tmp1, Tmp2, LD->getOffset()); - Tmp1 = SDValue(Node, 0); - Tmp2 = SDValue(Node, 1); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp2, LD->getOffset()), + Result.getResNo()); + Tmp1 = Result.getValue(0); + Tmp2 = Result.getValue(1); if (isCustom) { - Tmp3 = TLI.LowerOperation(SDValue(Node, 0), DAG); + Tmp3 = TLI.LowerOperation(Result, DAG); if (Tmp3.getNode()) { - Tmp1 = Tmp3; - Tmp2 = Tmp3.getValue(1); + Tmp1 = LegalizeOp(Tmp3); + Tmp2 = LegalizeOp(Tmp3.getValue(1)); } } else { // If this is an unaligned load and the target doesn't support it, @@ -1072,8 +1337,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - ExpandUnalignedLoad(cast(Node), - DAG, TLI, Tmp1, Tmp2); + Result = ExpandUnalignedLoad(cast(Result.getNode()), + DAG, TLI); + Tmp1 = Result.getOperand(0); + Tmp2 = Result.getOperand(1); + Tmp1 = LegalizeOp(Tmp1); + Tmp2 = LegalizeOp(Tmp2); } } } @@ -1094,8 +1363,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; default: llvm_unreachable("Unexpected extend load type!"); } - Tmp1 = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); - Tmp2 = Load.getValue(1); + Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); + Tmp1 = LegalizeOp(Result); // Relegalize new nodes. + Tmp2 = LegalizeOp(Load.getValue(1)); break; } @@ -1110,10 +1380,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { "EXTLOAD should always be supported!"); // Turn the unsupported load into an EXTLOAD followed by an explicit // zero/sign extend inreg. - SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), - Tmp1, Tmp2, LD->getPointerInfo(), SrcVT, - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getPointerInfo(), SrcVT, + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); SDValue ValRes; if (ExtType == ISD::SEXTLOAD) ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, @@ -1121,37 +1391,38 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Result, DAG.getValueType(SrcVT)); else ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); - Tmp1 = ValRes; - Tmp2 = Result.getValue(1); + Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. + Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes. break; } } // Since loads produce two values, make sure to remember that we legalized // both of them. - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp1); - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp2); - break; + AddLegalizedOperand(SDValue(Node, 0), Tmp1); + AddLegalizedOperand(SDValue(Node, 1), Tmp2); + return Op.getResNo() ? Tmp2 : Tmp1; } case ISD::STORE: { StoreSDNode *ST = cast(Node); - Tmp1 = ST->getChain(); - Tmp2 = ST->getBasePtr(); + Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain. + Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer. unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); if (!ST->isTruncatingStore()) { if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { - DAG.ReplaceAllUsesWith(ST, OptStore, this); + Result = SDValue(OptStore, 0); break; } { - Tmp3 = ST->getValue(); - Node = DAG.UpdateNodeOperands(Node, - Tmp1, Tmp3, Tmp2, - ST->getOffset()); + Tmp3 = LegalizeOp(ST->getValue()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp3, Tmp2, + ST->getOffset()), + Result.getResNo()); EVT VT = Tmp3.getValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { @@ -1163,31 +1434,27 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) - ExpandUnalignedStore(cast(Node), - DAG, TLI, this); + Result = ExpandUnalignedStore(cast(Result.getNode()), + DAG, TLI); } break; case TargetLowering::Custom: - Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Tmp1.getNode()) - DAG.ReplaceAllUsesWith(SDValue(Node, 0), Tmp1, this); + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.getNode()) Result = Tmp1; break; - case TargetLowering::Promote: { + case TargetLowering::Promote: assert(VT.isVector() && "Unknown legal promote case!"); Tmp3 = DAG.getNode(ISD::BITCAST, dl, TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3); - SDValue Result = - DAG.getStore(Tmp1, dl, Tmp3, Tmp2, - ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment); - DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); + Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, + ST->getPointerInfo(), isVolatile, + isNonTemporal, Alignment); break; } - } break; } } else { - Tmp3 = ST->getValue(); + Tmp3 = LegalizeOp(ST->getValue()); EVT StVT = ST->getMemoryVT(); unsigned StWidth = StVT.getSizeInBits(); @@ -1199,10 +1466,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits()); Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT); - SDValue Result = - DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); - DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); + Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. assert(!StVT.isVector() && "Unsupported truncstore!"); @@ -1256,13 +1521,14 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } // The order of the stores doesn't matter. - SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); - DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } else { if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() || Tmp2 != ST->getBasePtr()) - Node = DAG.UpdateNodeOperands(Node, Tmp1, Tmp3, Tmp2, - ST->getOffset()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp3, Tmp2, + ST->getOffset()), + Result.getResNo()); switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { default: assert(0 && "This action is not supported yet!"); @@ -1273,13 +1539,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) - ExpandUnalignedStore(cast(Node), DAG, TLI, this); + Result = ExpandUnalignedStore(cast(Result.getNode()), + DAG, TLI); } break; case TargetLowering::Custom: - DAG.ReplaceAllUsesWith(SDValue(Node, 0), - TLI.LowerOperation(SDValue(Node, 0), DAG), - this); + Result = TLI.LowerOperation(Result, DAG); break; case TargetLowering::Expand: assert(!StVT.isVector() && @@ -1288,10 +1553,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { // TRUNCSTORE:i16 i32 -> STORE i16 assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!"); Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); - SDValue Result = - DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); - DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); + Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); break; } } @@ -1299,6 +1562,17 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { break; } } + assert(Result.getValueType() == Op.getValueType() && + "Bad legalization!"); + + // Make sure that the generated code is itself legal. + if (Result != Op) + Result = LegalizeOp(Result); + + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + AddLegalizedOperand(Op, Result); + return Result; } SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { @@ -1737,6 +2011,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { // and leave the Hi part unset. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { + assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); // The input chain to this libcall is the entry node of the function. // Legalizing the call will automatically add the previous call to the // dependence. @@ -1755,6 +2030,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); + // Splice the libcall in wherever FindInputOutputChains tells us to. Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); // isTailCall may be true since the callee does not reference caller stack @@ -1770,6 +2046,10 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, // It's a tailcall, return the chain (which is the DAG root). return DAG.getRoot(); + // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); return CallInfo.first; } @@ -1799,6 +2079,11 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); + // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); + return CallInfo.first; } @@ -1808,6 +2093,7 @@ std::pair SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { + assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); SDValue InChain = Node->getOperand(0); TargetLowering::ArgListTy Args; @@ -1824,6 +2110,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); + // Splice the libcall in wherever FindInputOutputChains tells us to. Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, @@ -1831,6 +2118,10 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); + // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); return CallInfo; } @@ -1956,14 +2247,20 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); + // Splice the libcall in wherever FindInputOutputChains tells us to. DebugLoc dl = Node->getDebugLoc(); std::pair CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); + // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); + // Remainder is loaded back from the stack frame. - SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, + SDValue Rem = DAG.getLoad(RetVT, dl, LastCALLSEQ_END, FIPtr, MachinePointerInfo(), false, false, 0); Results.push_back(CallInfo.first); Results.push_back(Rem); @@ -2155,13 +2452,11 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, MachinePointerInfo::getConstantPool(), false, false, Alignment); else { - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, - DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - MVT::f32, false, false, Alignment); - HandleSDNode Handle(Load); - LegalizeOp(Load.getNode()); - FudgeInReg = Handle.getValue(); + FudgeInReg = + LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, + DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(), + MVT::f32, false, false, Alignment)); } return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); @@ -2485,8 +2780,8 @@ std::pair SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { return ExpandChainLibCall(LC, Node, false); } -void SelectionDAGLegalize::ExpandNode(SDNode *Node) { - SmallVector Results; +void SelectionDAGLegalize::ExpandNode(SDNode *Node, + SmallVectorImpl &Results) { DebugLoc dl = Node->getDebugLoc(); SDValue Tmp1, Tmp2, Tmp3, Tmp4; switch (Node->getOpcode()) { @@ -2934,8 +3229,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { ConstantFPSDNode *CFP = cast(Node); // Check to see if this FP immediate is already legal. // If this is a legal constant, turn it into a TargetConstantFP node. - if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0))) - Results.push_back(ExpandConstantFP(CFP, true)); + if (TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0))) + Results.push_back(SDValue(Node, 0)); + else + Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI)); break; } case ISD::EHSELECTION: { @@ -3181,10 +3478,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { DAG.getIntPtrConstant(0)); TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, DAG.getIntPtrConstant(1)); - // Ret is a node with an illegal type. Because such things are not - // generally permitted during this phase of legalization, delete the - // node. The above EXTRACT_ELEMENT nodes should have been folded. - DAG.DeleteNode(Ret.getNode()); } if (isSigned) { @@ -3325,6 +3618,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, dl); + LastCALLSEQ_END = DAG.getEntryNode(); assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!"); Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); @@ -3334,35 +3628,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; } - case ISD::BUILD_VECTOR: - Results.push_back(ExpandBUILD_VECTOR(Node)); - break; - case ISD::SRA: - case ISD::SRL: - case ISD::SHL: { - // Scalarize vector SRA/SRL/SHL. - EVT VT = Node->getValueType(0); - assert(VT.isVector() && "Unable to legalize non-vector shift"); - assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal"); - unsigned NumElem = VT.getVectorNumElements(); - - SmallVector Scalars; - for (unsigned Idx = 0; Idx < NumElem; Idx++) { - SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - VT.getScalarType(), - Node->getOperand(0), DAG.getIntPtrConstant(Idx)); - SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - VT.getScalarType(), - Node->getOperand(1), DAG.getIntPtrConstant(Idx)); - Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, - VT.getScalarType(), Ex, Sh)); - } - SDValue Result = - DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), - &Scalars[0], Scalars.size()); - DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); - break; - } case ISD::GLOBAL_OFFSET_TABLE: case ISD::GlobalAddress: case ISD::GlobalTLSAddress: @@ -3373,16 +3638,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: // FIXME: Custom lowering for these operations shouldn't return null! + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + Results.push_back(SDValue(Node, i)); break; } - - // Replace the original node with the legalized result. - if (!Results.empty()) - DAG.ReplaceAllUsesWith(Node, Results.data(), this); } - -void SelectionDAGLegalize::PromoteNode(SDNode *Node) { - SmallVector Results; +void SelectionDAGLegalize::PromoteNode(SDNode *Node, + SmallVectorImpl &Results) { EVT OVT = Node->getValueType(0); if (Node->getOpcode() == ISD::UINT_TO_FP || Node->getOpcode() == ISD::SINT_TO_FP || @@ -3510,10 +3772,6 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { break; } } - - // Replace the original node with the legalized result. - if (!Results.empty()) - DAG.ReplaceAllUsesWith(Node, Results.data(), this); } // SelectionDAG::Legalize - This is the entry point for the file. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 7ed1b98..a4bb577 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -1084,6 +1084,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); + // Splice the libcall in wherever FindInputOutputChains tells us to. Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index fd768b1..a1abdb4 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -386,90 +386,6 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { } } -/// IsChainDependent - Test if Outer is reachable from Inner through -/// chain dependencies. -static bool IsChainDependent(SDNode *Outer, SDNode *Inner) { - SDNode *N = Outer; - for (;;) { - if (N == Inner) - return true; - if (N->getOpcode() == ISD::TokenFactor) { - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - if (IsChainDependent(N->getOperand(i).getNode(), Inner)) - return true; - return false; - } - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - if (N->getOperand(i).getValueType() == MVT::Other) { - N = N->getOperand(i).getNode(); - goto found_chain_operand; - } - return false; - found_chain_operand:; - if (N->getOpcode() == ISD::EntryToken) - return false; - } -} - -/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate -/// the corresponding (lowered) CALLSEQ_BEGIN node. -/// -/// NestLevel and MaxNested are used in recursion to indcate the current level -/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum -/// level seen so far. -/// -/// TODO: It would be better to give CALLSEQ_END an explicit operand to point -/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it. -static SDNode * -FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, - const TargetInstrInfo *TII) { - for (;;) { - // For a TokenFactor, examine each operand. There may be multiple ways - // to get to the CALLSEQ_BEGIN, but we need to find the path with the - // most nesting in order to ensure that we find the corresponding match. - if (N->getOpcode() == ISD::TokenFactor) { - SDNode *Best = 0; - unsigned BestMaxNest = MaxNest; - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - unsigned MyNestLevel = NestLevel; - unsigned MyMaxNest = MaxNest; - if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(), - MyNestLevel, MyMaxNest, TII)) - if (!Best || (MyMaxNest > BestMaxNest)) { - Best = New; - BestMaxNest = MyMaxNest; - } - } - assert(Best); - MaxNest = BestMaxNest; - return Best; - } - // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. - if (N->isMachineOpcode()) { - if (N->getMachineOpcode() == - (unsigned)TII->getCallFrameDestroyOpcode()) { - ++NestLevel; - MaxNest = std::max(MaxNest, NestLevel); - } else if (N->getMachineOpcode() == - (unsigned)TII->getCallFrameSetupOpcode()) { - --NestLevel; - if (NestLevel == 0) - return N; - } - } - // Otherwise, find the chain and continue climbing. - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - if (N->getOperand(i).getValueType() == MVT::Other) { - N = N->getOperand(i).getNode(); - goto found_chain_operand; - } - return 0; - found_chain_operand:; - if (N->getOpcode() == ISD::EntryToken) - return 0; - } -} - /// Call ReleasePred for each predecessor, then update register live def/gen. /// Always update LiveRegDefs for a register dependence even if the current SU /// also defines the register. This effectively create one large live range @@ -507,26 +423,6 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { } } } - - // If we're scheduling a lowered CALLSEQ_END, find the corresponding CALLSEQ_BEGIN. - // Inject an artificial physical register dependence between these nodes, to - // prevent other calls from being interscheduled with them. - const TargetLowering *TLI = TM.getTargetLowering(); - unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); - if (!LiveRegDefs[SP]) - for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) - if (Node->isMachineOpcode() && - Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { - unsigned NestLevel = 0; - unsigned MaxNest = 0; - SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); - - SUnit *Def = &SUnits[N->getNodeId()]; - ++NumLiveRegs; - LiveRegDefs[SP] = Def; - LiveRegGens[SP] = SU; - break; - } } /// Check to see if any of the pending instructions are ready to issue. If @@ -709,22 +605,6 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { LiveRegGens[I->getReg()] = NULL; } } - // Release the special call resource dependence, if this is the beginning - // of a call. - const TargetLowering *TLI = TM.getTargetLowering(); - unsigned SP = TLI->getStackPointerRegisterToSaveRestore(); - if (LiveRegDefs[SP] == SU) - for (const SDNode *SUNode = SU->getNode(); SUNode; - SUNode = SUNode->getGluedNode()) { - if (SUNode->isMachineOpcode() && - SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode() && - LiveRegDefs[SP] == SU) { - assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); - --NumLiveRegs; - LiveRegDefs[SP] = NULL; - LiveRegGens[SP] = NULL; - } - } resetVRegCycle(SU); @@ -1203,20 +1083,6 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector &LRegs) { if (!Node->isMachineOpcode()) continue; - // If we're in the middle of scheduling a call, don't begin scheduling - // another call. - if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode() || - Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { - for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) - if (LiveRegDefs[i]) { - SDNode *Gen = LiveRegGens[i]->getNode(); - while (SDNode *Glued = Gen->getGluedNode()) - Gen = Glued; - if (!IsChainDependent(Gen, Node) && RegAdded.insert(i)) - LRegs.push_back(i); - } - continue; - } const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 010a740..07d2db6 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5290,10 +5290,6 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } - - // If we just RAUW'd the root, take note. - if (FromN == getRoot()) - setRoot(To); } /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. @@ -5339,10 +5335,6 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } - - // If we just RAUW'd the root, take note. - if (From == getRoot().getNode()) - setRoot(SDValue(To, getRoot().getResNo())); } /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. @@ -5381,10 +5373,6 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } - - // If we just RAUW'd the root, take note. - if (From == getRoot().getNode()) - setRoot(SDValue(To[getRoot().getResNo()])); } /// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving @@ -5443,10 +5431,6 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } - - // If we just RAUW'd the root, take note. - if (From == getRoot()) - setRoot(To); } namespace { diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index bfc1690..31e522d 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1353,10 +1353,12 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, MVT::i32); + // TODO: Disable AlwaysInline when it becomes possible + // to emit a nested call sequence. MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile=*/false, - /*AlwaysInline=*/false, + /*AlwaysInline=*/true, MachinePointerInfo(0), MachinePointerInfo(0))); @@ -4348,24 +4350,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // If this is undef splat, generate it via "just" vdup, if possible. if (Lane == -1) Lane = 0; - // Test if V1 is a SCALAR_TO_VECTOR. if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); } - // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR - // (and probably will turn into a SCALAR_TO_VECTOR once legalization - // reaches it). - if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && - !isa(V1.getOperand(0))) { - bool IsScalarToVector = true; - for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) - if (V1.getOperand(i).getOpcode() != ISD::UNDEF) { - IsScalarToVector = false; - break; - } - if (IsScalarToVector) - return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); - } return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, DAG.getConstant(Lane, MVT::i32)); } diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 3d75de0..02b0ff2 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2114,9 +2114,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { HasNoSignedComparisonUses(Node)) // Look past the truncate if CMP is the only use of it. N0 = N0.getOperand(0); - if ((N0.getNode()->getOpcode() == ISD::AND || - (N0.getResNo() == 0 && N0.getNode()->getOpcode() == X86ISD::AND)) && - N0.getNode()->hasOneUse() && + if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && N0.getValueType() != MVT::i8 && X86::isZeroNode(N1)) { ConstantSDNode *C = dyn_cast(N0.getNode()->getOperand(1)); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 927a307..2ec0814 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4220,29 +4220,6 @@ static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) { return true; } -// Test whether the given value is a vector value which will be legalized -// into a load. -static bool WillBeConstantPoolLoad(SDNode *N) { - if (N->getOpcode() != ISD::BUILD_VECTOR) - return false; - - // Check for any non-constant elements. - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - switch (N->getOperand(i).getNode()->getOpcode()) { - case ISD::UNDEF: - case ISD::ConstantFP: - case ISD::Constant: - break; - default: - return false; - } - - // Vectors of all-zeros and all-ones are materialized with special - // instructions rather than being loaded. - return !ISD::isBuildVectorAllZeros(N) && - !ISD::isBuildVectorAllOnes(N); -} - /// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to /// match movlp{s|d}. The lower half elements should come from lower half of /// V1 (and in order), and the upper half elements should come from the upper @@ -4258,7 +4235,7 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, return false; // Is V2 is a vector load, don't do this transformation. We will try to use // load folding shufps op. - if (ISD::isNON_EXTLoad(V2) || WillBeConstantPoolLoad(V2)) + if (ISD::isNON_EXTLoad(V2)) return false; unsigned NumElems = VT.getVectorNumElements(); @@ -6374,8 +6351,6 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) { if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op)) CanFoldLoad = true; - ShuffleVectorSDNode *SVOp = cast(Op); - // Both of them can't be memory operations though. if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2)) CanFoldLoad = false; @@ -6385,11 +6360,10 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) { return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG); if (NumElems == 4) - // If we don't care about the second element, procede to use movss. - if (SVOp->getMaskElt(1) != -1) - return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG); + return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG); } + ShuffleVectorSDNode *SVOp = cast(Op); // movl and movlp will both match v2i64, but v2i64 is never matched by // movl earlier because we make it strict to avoid messing with the movlp load // folding logic (see the code above getMOVLP call). Match it here then, @@ -8707,9 +8681,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. - unsigned CondOpcode = Cond.getOpcode(); - if (CondOpcode == X86ISD::SETCC || - CondOpcode == X86ISD::SETCC_CARRY) { + if (Cond.getOpcode() == X86ISD::SETCC || + Cond.getOpcode() == X86ISD::SETCC_CARRY) { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); @@ -8726,39 +8699,6 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { Cond = Cmp; addTest = false; } - } else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO || - CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO || - ((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) && - Cond.getOperand(0).getValueType() != MVT::i8)) { - SDValue LHS = Cond.getOperand(0); - SDValue RHS = Cond.getOperand(1); - unsigned X86Opcode; - unsigned X86Cond; - SDVTList VTs; - switch (CondOpcode) { - case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break; - case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break; - case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break; - case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break; - case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break; - case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break; - default: llvm_unreachable("unexpected overflowing operator"); - } - if (CondOpcode == ISD::UMULO) - VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(), - MVT::i32); - else - VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); - - SDValue X86Op = DAG.getNode(X86Opcode, DL, VTs, LHS, RHS); - - if (CondOpcode == ISD::UMULO) - Cond = X86Op.getValue(2); - else - Cond = X86Op.getValue(1); - - CC = DAG.getConstant(X86Cond, MVT::i8); - addTest = false; } if (addTest) { @@ -8840,27 +8780,11 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue Dest = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); SDValue CC; - bool Inverted = false; if (Cond.getOpcode() == ISD::SETCC) { - // Check for setcc([su]{add,sub,mul}o == 0). - if (cast(Cond.getOperand(2))->get() == ISD::SETEQ && - isa(Cond.getOperand(1)) && - cast(Cond.getOperand(1))->isNullValue() && - Cond.getOperand(0).getResNo() == 1 && - (Cond.getOperand(0).getOpcode() == ISD::SADDO || - Cond.getOperand(0).getOpcode() == ISD::UADDO || - Cond.getOperand(0).getOpcode() == ISD::SSUBO || - Cond.getOperand(0).getOpcode() == ISD::USUBO || - Cond.getOperand(0).getOpcode() == ISD::SMULO || - Cond.getOperand(0).getOpcode() == ISD::UMULO)) { - Inverted = true; - Cond = Cond.getOperand(0); - } else { - SDValue NewCond = LowerSETCC(Cond, DAG); - if (NewCond.getNode()) - Cond = NewCond; - } + SDValue NewCond = LowerSETCC(Cond, DAG); + if (NewCond.getNode()) + Cond = NewCond; } #if 0 // FIXME: LowerXALUO doesn't handle these!! @@ -8881,9 +8805,8 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. - unsigned CondOpcode = Cond.getOpcode(); - if (CondOpcode == X86ISD::SETCC || - CondOpcode == X86ISD::SETCC_CARRY) { + if (Cond.getOpcode() == X86ISD::SETCC || + Cond.getOpcode() == X86ISD::SETCC_CARRY) { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); @@ -8904,43 +8827,6 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { break; } } - } - CondOpcode = Cond.getOpcode(); - if (CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO || - CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO || - ((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) && - Cond.getOperand(0).getValueType() != MVT::i8)) { - SDValue LHS = Cond.getOperand(0); - SDValue RHS = Cond.getOperand(1); - unsigned X86Opcode; - unsigned X86Cond; - SDVTList VTs; - switch (CondOpcode) { - case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break; - case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break; - case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break; - case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break; - case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break; - case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break; - default: llvm_unreachable("unexpected overflowing operator"); - } - if (Inverted) - X86Cond = X86::GetOppositeBranchCondition((X86::CondCode)X86Cond); - if (CondOpcode == ISD::UMULO) - VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(), - MVT::i32); - else - VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); - - SDValue X86Op = DAG.getNode(X86Opcode, dl, VTs, LHS, RHS); - - if (CondOpcode == ISD::UMULO) - Cond = X86Op.getValue(2); - else - Cond = X86Op.getValue(1); - - CC = DAG.getConstant(X86Cond, MVT::i8); - addTest = false; } else { unsigned CondOpc; if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) { @@ -9004,66 +8890,6 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { CC = DAG.getConstant(CCode, MVT::i8); Cond = Cond.getOperand(0).getOperand(1); addTest = false; - } else if (Cond.getOpcode() == ISD::SETCC && - cast(Cond.getOperand(2))->get() == ISD::SETOEQ) { - // For FCMP_OEQ, we can emit - // two branches instead of an explicit AND instruction with a - // separate test. However, we only do this if this block doesn't - // have a fall-through edge, because this requires an explicit - // jmp when the condition is false. - if (Op.getNode()->hasOneUse()) { - SDNode *User = *Op.getNode()->use_begin(); - // Look for an unconditional branch following this conditional branch. - // We need this because we need to reverse the successors in order - // to implement FCMP_OEQ. - if (User->getOpcode() == ISD::BR) { - SDValue FalseBB = User->getOperand(1); - SDNode *NewBR = - DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); - assert(NewBR == User); - (void)NewBR; - Dest = FalseBB; - - SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, - Cond.getOperand(0), Cond.getOperand(1)); - CC = DAG.getConstant(X86::COND_NE, MVT::i8); - Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), - Chain, Dest, CC, Cmp); - CC = DAG.getConstant(X86::COND_P, MVT::i8); - Cond = Cmp; - addTest = false; - } - } - } else if (Cond.getOpcode() == ISD::SETCC && - cast(Cond.getOperand(2))->get() == ISD::SETUNE) { - // For FCMP_UNE, we can emit - // two branches instead of an explicit AND instruction with a - // separate test. However, we only do this if this block doesn't - // have a fall-through edge, because this requires an explicit - // jmp when the condition is false. - if (Op.getNode()->hasOneUse()) { - SDNode *User = *Op.getNode()->use_begin(); - // Look for an unconditional branch following this conditional branch. - // We need this because we need to reverse the successors in order - // to implement FCMP_UNE. - if (User->getOpcode() == ISD::BR) { - SDValue FalseBB = User->getOperand(1); - SDNode *NewBR = - DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); - assert(NewBR == User); - (void)NewBR; - - SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, - Cond.getOperand(0), Cond.getOperand(1)); - CC = DAG.getConstant(X86::COND_NE, MVT::i8); - Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), - Chain, Dest, CC, Cmp); - CC = DAG.getConstant(X86::COND_NP, MVT::i8); - Cond = Cmp; - addTest = false; - Dest = FalseBB; - } - } } } diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 4b74f96..2afe0e3 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -386,15 +386,6 @@ IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase, Offset = off; return true; } - // Check for an aligned global variable. - if (GlobalAddressSDNode *GA = dyn_cast(*Root)) { - const GlobalValue *GV = GA->getGlobal(); - if (GA->getOffset() == 0 && GV->getAlignment() >= 4) { - AlignedBase = Base; - Offset = off; - return true; - } - } return false; } -- cgit v1.1 From 91bbe237167bf84ce41d01eff3c028ff2b10be26 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 28 Oct 2011 11:14:31 +0000 Subject: Use BranchProbability compare operators. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143190 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBranchProbabilityInfo.cpp | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp index c13fa6b..9874037 100644 --- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -55,12 +55,8 @@ MachineBranchProbabilityInfo::getEdgeWeight(MachineBasicBlock *Src, bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src, MachineBasicBlock *Dst) const { // Hot probability is at least 4/5 = 80% - uint32_t Weight = getEdgeWeight(Src, Dst); - uint32_t Sum = getSumForBlock(Src); - - // FIXME: Implement BranchProbability::compare then change this code to - // compare this BranchProbability against a static "hot" BranchProbability. - return (uint64_t)Weight * 5 > (uint64_t)Sum * 4; + // FIXME: Compare against a static "hot" BranchProbability. + return getEdgeProbability(Src, Dst) > BranchProbability(4, 5); } MachineBasicBlock * @@ -84,8 +80,7 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { } } - // FIXME: Use BranchProbability::compare. - if ((uint64_t)MaxWeight * 5 >= (uint64_t)Sum * 4) + if (BranchProbability(MaxWeight, Sum) >= BranchProbability(4, 5)) return MaxSucc; return 0; -- cgit v1.1 From c3e48c38bf87ad081904eccf16e4ddd99c36d070 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Fri, 28 Oct 2011 14:12:22 +0000 Subject: Dwarf: [PR11022] Fix emitting DW_AT_const_value(>i64), to be host-endian-neutral. Don't assume APInt::getRawData() would hold target-aware endianness nor host-compliant endianness. rawdata[0] holds most lower i64, even on big endian host. FIXME: Add a testcase for big endian target. FIXME: Ditto on CompileUnit::addConstantFPValue() ? git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143194 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index f9f1642..95f1f92 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -534,18 +534,20 @@ bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI, // Get the raw data form of the large APInt. const APInt Val = CI->getValue(); - const char *Ptr = (const char*)Val.getRawData(); + const uint64_t *Ptr64 = Val.getRawData(); int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte. bool LittleEndian = Asm->getTargetData().isLittleEndian(); - int Incr = (LittleEndian ? 1 : -1); - int Start = (LittleEndian ? 0 : NumBytes - 1); - int Stop = (LittleEndian ? NumBytes : -1); // Output the constant to DWARF one byte at a time. - for (; Start != Stop; Start += Incr) - addUInt(Block, 0, dwarf::DW_FORM_data1, - (unsigned char)0xFF & Ptr[Start]); + for (int i = 0; i < NumBytes; i++) { + uint8_t c; + if (LittleEndian) + c = Ptr64[i / 8] >> (8 * (i & 7)); + else + c = Ptr64[(NumBytes - 1 - i) / 8] >> (8 * ((NumBytes - 1 - i) & 7)); + addUInt(Block, 0, dwarf::DW_FORM_data1, c); + } addBlock(Die, dwarf::DW_AT_const_value, 0, Block); return true; -- cgit v1.1 From 5d0492cfc4521ccb13b4961227b279991a17c393 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Fri, 28 Oct 2011 16:57:07 +0000 Subject: Thumb2 ADD/SUB instructions encoding selection outside IT block. Outside an IT block, "add r3, #2" should select a 32-bit wide encoding rather than generating an error indicating the 16-bit encoding is only legal in an IT block (outside, the 'S' suffic is required for the 16-bit encoding). rdar://10348481 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143201 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 7d98182..9a48b18 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3861,6 +3861,16 @@ def : t2InstAlias<"add${s}${p} $Rd, $Rn, $Rm", def : t2InstAlias<"add${s}${p} $Rd, $Rn, $ShiftedRm", (t2ADDrs GPRnopc:$Rd, GPRnopc:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>; +// ... and with the destination and source register combined. +def : t2InstAlias<"add${s}${p} $Rdn, $imm", + (t2ADDri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"add${p} $Rdn, $imm", + (t2ADDri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095:$imm, pred:$p)>; +def : t2InstAlias<"add${s}${p} $Rdn, $Rm", + (t2ADDrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"add${s}${p} $Rdn, $ShiftedRm", + (t2ADDrs GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_reg:$ShiftedRm, + pred:$p, cc_out:$s)>; // Aliases for SUB without the ".w" optional width specifier. def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $imm", @@ -3872,6 +3882,17 @@ def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $Rm", def : t2InstAlias<"sub${s}${p} $Rd, $Rn, $ShiftedRm", (t2SUBrs GPRnopc:$Rd, GPRnopc:$Rn, t2_so_reg:$ShiftedRm, pred:$p, cc_out:$s)>; +// ... and with the destination and source register combined. +def : t2InstAlias<"sub${s}${p} $Rdn, $imm", + (t2SUBri GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_imm:$imm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"sub${p} $Rdn, $imm", + (t2SUBri12 GPRnopc:$Rdn, GPRnopc:$Rdn, imm0_4095:$imm, pred:$p)>; +def : t2InstAlias<"sub${s}${p} $Rdn, $Rm", + (t2SUBrr GPRnopc:$Rdn, GPRnopc:$Rdn, rGPR:$Rm, pred:$p, cc_out:$s)>; +def : t2InstAlias<"sub${s}${p} $Rdn, $ShiftedRm", + (t2SUBrs GPRnopc:$Rdn, GPRnopc:$Rdn, t2_so_reg:$ShiftedRm, + pred:$p, cc_out:$s)>; + // Alias for compares without the ".w" optional width specifier. def : t2InstAlias<"cmn${p} $Rn, $Rm", -- cgit v1.1 From 7ccee5610a7100ddffe05f5eda1950f7d2cf66a6 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Fri, 28 Oct 2011 17:29:39 +0000 Subject: Specify fixed bits on CPS instructions to enable roundtripping. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143202 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index b9cbc83..9373972 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -1640,7 +1640,7 @@ class CPS let Inst{27-20} = 0b00010000; let Inst{19-18} = imod; let Inst{17} = M; // Enabled if mode is set; - let Inst{16} = 0; + let Inst{16-9} = 0b00000000; let Inst{8-6} = iflags; let Inst{5} = 0; let Inst{4-0} = mode; -- cgit v1.1 From 82418ff4d1156dfd30d89a4874a365509a0798de Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Fri, 28 Oct 2011 17:38:30 +0000 Subject: Revert r143202. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143203 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 9373972..b9cbc83 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -1640,7 +1640,7 @@ class CPS let Inst{27-20} = 0b00010000; let Inst{19-18} = imod; let Inst{17} = M; // Enabled if mode is set; - let Inst{16-9} = 0b00000000; + let Inst{16} = 0; let Inst{8-6} = iflags; let Inst{5} = 0; let Inst{4-0} = mode; -- cgit v1.1 From bf923b815d6da97367e3eedab69230918bf128a3 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 28 Oct 2011 17:55:38 +0000 Subject: Reapply r143177 and r143179 (reverting r143188), with scheduler fixes: Use a separate register, instead of SP, as the calling-convention resource, to avoid spurious conflicts with actual uses of SP. Also, fix unscheduling of calling sequences, which can be triggered by pseudo-two-address dependencies. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143206 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 772 ++++++++----------------- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 1 - lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 165 +++++- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 16 + lib/Target/ARM/ARMISelLowering.cpp | 19 +- lib/Target/X86/X86ISelDAGToDAG.cpp | 4 +- lib/Target/X86/X86ISelLowering.cpp | 194 ++++++- lib/Target/XCore/XCoreISelLowering.cpp | 9 + 8 files changed, 648 insertions(+), 532 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index a8bccda..7425669 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -46,37 +46,18 @@ using namespace llvm; /// will attempt merge setcc and brc instructions into brcc's. /// namespace { -class SelectionDAGLegalize { +class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener { const TargetMachine &TM; const TargetLowering &TLI; SelectionDAG &DAG; - // Libcall insertion helpers. - - /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been - /// legalized. We use this to ensure that calls are properly serialized - /// against each other, including inserted libcalls. - SDValue LastCALLSEQ_END; - - /// IsLegalizingCall - This member is used *only* for purposes of providing - /// helpful assertions that a libcall isn't created while another call is - /// being legalized (which could lead to non-serialized call sequences). - bool IsLegalizingCall; + /// LegalizePosition - The iterator for walking through the node list. + SelectionDAG::allnodes_iterator LegalizePosition; - /// LegalizedNodes - For nodes that are of legal width, and that have more - /// than one use, this map indicates what regularized operand to use. This - /// allows us to avoid legalizing the same thing more than once. - DenseMap LegalizedNodes; + /// LegalizedNodes - The set of nodes which have already been legalized. + SmallPtrSet LegalizedNodes; - void AddLegalizedOperand(SDValue From, SDValue To) { - LegalizedNodes.insert(std::make_pair(From, To)); - // If someone requests legalization of the new node, return itself. - if (From != To) - LegalizedNodes.insert(std::make_pair(To, To)); - - // Transfer SDDbgValues. - DAG.TransferDbgValues(From, To); - } + // Libcall insertion helpers. public: explicit SelectionDAGLegalize(SelectionDAG &DAG); @@ -84,9 +65,8 @@ public: void LegalizeDAG(); private: - /// LegalizeOp - Return a legal replacement for the given operation, with - /// all legal operands. - SDValue LegalizeOp(SDValue O); + /// LegalizeOp - Legalizes the given operation. + void LegalizeOp(SDNode *Node); SDValue OptimizeFloatStore(StoreSDNode *ST); @@ -107,9 +87,6 @@ private: SDValue N1, SDValue N2, SmallVectorImpl &Mask) const; - bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, - SmallPtrSet &NodesLeadingTo); - void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, DebugLoc dl); @@ -150,10 +127,21 @@ private: SDValue ExpandInsertToVectorThroughStack(SDValue Op); SDValue ExpandVectorBuildThroughStack(SDNode* Node); + SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP); + std::pair ExpandAtomic(SDNode *Node); - void ExpandNode(SDNode *Node, SmallVectorImpl &Results); - void PromoteNode(SDNode *Node, SmallVectorImpl &Results); + void ExpandNode(SDNode *Node); + void PromoteNode(SDNode *Node); + + // DAGUpdateListener implementation. + virtual void NodeDeleted(SDNode *N, SDNode *E) { + LegalizedNodes.erase(N); + if (LegalizePosition == SelectionDAG::allnodes_iterator(N)) + ++LegalizePosition; + } + + virtual void NodeUpdated(SDNode *N) {} }; } @@ -195,145 +183,37 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag) } void SelectionDAGLegalize::LegalizeDAG() { - LastCALLSEQ_END = DAG.getEntryNode(); - IsLegalizingCall = false; - - // The legalize process is inherently a bottom-up recursive process (users - // legalize their uses before themselves). Given infinite stack space, we - // could just start legalizing on the root and traverse the whole graph. In - // practice however, this causes us to run out of stack space on large basic - // blocks. To avoid this problem, compute an ordering of the nodes where each - // node is only legalized after all of its operands are legalized. DAG.AssignTopologicalOrder(); - for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) - LegalizeOp(SDValue(I, 0)); - - // Finally, it's possible the root changed. Get the new root. - SDValue OldRoot = DAG.getRoot(); - assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); - DAG.setRoot(LegalizedNodes[OldRoot]); - - LegalizedNodes.clear(); - - // Remove dead nodes now. - DAG.RemoveDeadNodes(); -} - -/// FindCallEndFromCallStart - Given a chained node that is part of a call -/// sequence, find the CALLSEQ_END node that terminates the call sequence. -static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) { - // Nested CALLSEQ_START/END constructs aren't yet legal, - // but we can DTRT and handle them correctly here. - if (Node->getOpcode() == ISD::CALLSEQ_START) - depth++; - else if (Node->getOpcode() == ISD::CALLSEQ_END) { - depth--; - if (depth == 0) - return Node; - } - if (Node->use_empty()) - return 0; // No CallSeqEnd - - // The chain is usually at the end. - SDValue TheChain(Node, Node->getNumValues()-1); - if (TheChain.getValueType() != MVT::Other) { - // Sometimes it's at the beginning. - TheChain = SDValue(Node, 0); - if (TheChain.getValueType() != MVT::Other) { - // Otherwise, hunt for it. - for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i) - if (Node->getValueType(i) == MVT::Other) { - TheChain = SDValue(Node, i); - break; - } - - // Otherwise, we walked into a node without a chain. - if (TheChain.getValueType() != MVT::Other) - return 0; + // Visit all the nodes. We start in topological order, so that we see + // nodes with their original operands intact. Legalization can produce + // new nodes which may themselves need to be legalized. Iterate until all + // nodes have been legalized. + for (;;) { + bool AnyLegalized = false; + for (LegalizePosition = DAG.allnodes_end(); + LegalizePosition != DAG.allnodes_begin(); ) { + --LegalizePosition; + + SDNode *N = LegalizePosition; + if (LegalizedNodes.insert(N)) { + AnyLegalized = true; + LegalizeOp(N); + } } - } - - for (SDNode::use_iterator UI = Node->use_begin(), - E = Node->use_end(); UI != E; ++UI) { - - // Make sure to only follow users of our token chain. - SDNode *User = *UI; - for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) - if (User->getOperand(i) == TheChain) - if (SDNode *Result = FindCallEndFromCallStart(User, depth)) - return Result; - } - return 0; -} - -/// FindCallStartFromCallEnd - Given a chained node that is part of a call -/// sequence, find the CALLSEQ_START node that initiates the call sequence. -static SDNode *FindCallStartFromCallEnd(SDNode *Node) { - int nested = 0; - assert(Node && "Didn't find callseq_start for a call??"); - while (Node->getOpcode() != ISD::CALLSEQ_START || nested) { - Node = Node->getOperand(0).getNode(); - assert(Node->getOperand(0).getValueType() == MVT::Other && - "Node doesn't have a token chain argument!"); - switch (Node->getOpcode()) { - default: + if (!AnyLegalized) break; - case ISD::CALLSEQ_START: - if (!nested) - return Node; - nested--; - break; - case ISD::CALLSEQ_END: - nested++; - break; - } - } - return 0; -} - -/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to -/// see if any uses can reach Dest. If no dest operands can get to dest, -/// legalize them, legalize ourself, and return false, otherwise, return true. -/// -/// Keep track of the nodes we fine that actually do lead to Dest in -/// NodesLeadingTo. This avoids retraversing them exponential number of times. -/// -bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, - SmallPtrSet &NodesLeadingTo) { - if (N == Dest) return true; // N certainly leads to Dest :) - - // If we've already processed this node and it does lead to Dest, there is no - // need to reprocess it. - if (NodesLeadingTo.count(N)) return true; - // If the first result of this node has been already legalized, then it cannot - // reach N. - if (LegalizedNodes.count(SDValue(N, 0))) return false; - - // Okay, this node has not already been legalized. Check and legalize all - // operands. If none lead to Dest, then we can legalize this node. - bool OperandsLeadToDest = false; - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - OperandsLeadToDest |= // If an operand leads to Dest, so do we. - LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, - NodesLeadingTo); - - if (OperandsLeadToDest) { - NodesLeadingTo.insert(N); - return true; } - // Okay, this node looks safe, legalize it and return false. - LegalizeOp(SDValue(N, 0)); - return false; + // Remove dead nodes now. + DAG.RemoveDeadNodes(); } /// ExpandConstantFP - Expands the ConstantFP node to an integer constant or /// a load from the constant pool. -static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, - SelectionDAG &DAG, const TargetLowering &TLI) { +SDValue +SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { bool Extend = false; DebugLoc dl = CFP->getDebugLoc(); @@ -369,20 +249,25 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy()); unsigned Alignment = cast(CPIdx)->getAlignment(); - if (Extend) - return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, - DAG.getEntryNode(), - CPIdx, MachinePointerInfo::getConstantPool(), - VT, false, false, Alignment); - return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), false, false, - Alignment); + if (Extend) { + SDValue Result = + DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, + DAG.getEntryNode(), + CPIdx, MachinePointerInfo::getConstantPool(), + VT, false, false, Alignment); + return Result; + } + SDValue Result = + DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(), false, false, + Alignment); + return Result; } /// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores. -static -SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, - const TargetLowering &TLI) { +static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, + const TargetLowering &TLI, + SelectionDAG::DAGUpdateListener *DUL) { SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); SDValue Val = ST->getValue(); @@ -397,8 +282,10 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // same size, then a (misaligned) int store. // FIXME: Does not handle truncating floating point stores! SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val); - return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), - ST->isVolatile(), ST->isNonTemporal(), Alignment); + Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), + ST->isVolatile(), ST->isNonTemporal(), Alignment); + DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); + return; } // Do a (aligned) store to a stack slot, then copy from the stack slot // to the final destination using (unaligned) integer loads and stores. @@ -458,8 +345,11 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, ST->isNonTemporal(), MinAlign(ST->getAlignment(), Offset))); // The order of the stores doesn't matter - say it with a TokenFactor. - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], - Stores.size()); + SDValue Result = + DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], + Stores.size()); + DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); + return; } assert(ST->getMemoryVT().isInteger() && !ST->getMemoryVT().isVector() && @@ -488,13 +378,16 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), Alignment); - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); + SDValue Result = + DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); + DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); } /// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads. -static -SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, - const TargetLowering &TLI) { +static void +ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, + const TargetLowering &TLI, + SDValue &ValResult, SDValue &ChainResult) { SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); EVT VT = LD->getValueType(0); @@ -512,8 +405,9 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, if (VT.isFloatingPoint() && LoadedVT != VT) Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result); - SDValue Ops[] = { Result, Chain }; - return DAG.getMergeValues(Ops, 2, dl); + ValResult = Result; + ChainResult = Chain; + return; } // Copy the value to a (aligned) stack slot using (unaligned) integer @@ -572,8 +466,9 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, MachinePointerInfo(), LoadedVT, false, false, 0); // Callers expect a MERGE_VALUES node. - SDValue Ops[] = { Load, TF }; - return DAG.getMergeValues(Ops, 2, dl); + ValResult = Load; + ChainResult = TF; + return; } assert(LoadedVT.isInteger() && !LoadedVT.isVector() && "Unaligned load of unsupported type."); @@ -626,8 +521,8 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); - SDValue Ops[] = { Result, TF }; - return DAG.getMergeValues(Ops, 2, dl); + ValResult = Result; + ChainResult = TF; } /// PerformInsertVectorEltInMemory - Some target cannot handle a variable @@ -763,11 +658,10 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { /// LegalizeOp - Return a legal replacement for the given operation, with /// all legal operands. -SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { - if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. - return Op; +void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { + if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. + return; - SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) @@ -782,13 +676,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Node->getOperand(i).getOpcode() == ISD::TargetConstant) && "Unexpected illegal type!"); - // Note that LegalizeOp may be reentered even from single-use nodes, which - // means that we always must cache transformed nodes. - DenseMap::iterator I = LegalizedNodes.find(Op); - if (I != LegalizedNodes.end()) return I->second; - SDValue Tmp1, Tmp2, Tmp3, Tmp4; - SDValue Result = Op; bool isCustom = false; // Figure out the correct action; the way to query this varies by opcode @@ -882,17 +770,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (Action == TargetLowering::Legal) Action = TargetLowering::Custom; break; - case ISD::BUILD_VECTOR: - // A weird case: legalization for BUILD_VECTOR never legalizes the - // operands! - // FIXME: This really sucks... changing it isn't semantically incorrect, - // but it massively pessimizes the code for floating-point BUILD_VECTORs - // because ConstantFP operands get legalized into constant pool loads - // before the BUILD_VECTOR code can see them. It doesn't usually bite, - // though, because BUILD_VECTORS usually get lowered into other nodes - // which get legalized properly. - SimpleFinishLegalizing = false; - break; default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { Action = TargetLowering::Legal; @@ -903,22 +780,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } if (SimpleFinishLegalizing) { - SmallVector Ops, ResultVals; + SmallVector Ops; for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) - Ops.push_back(LegalizeOp(Node->getOperand(i))); + Ops.push_back(Node->getOperand(i)); switch (Node->getOpcode()) { default: break; - case ISD::BR: - case ISD::BRIND: - case ISD::BR_JT: - case ISD::BR_CC: - case ISD::BRCOND: - // Branches tweak the chain to include LastCALLSEQ_END - Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0], - LastCALLSEQ_END); - Ops[0] = LegalizeOp(Ops[0]); - LastCALLSEQ_END = DAG.getEntryNode(); - break; case ISD::SHL: case ISD::SRL: case ISD::SRA: @@ -926,57 +792,66 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::ROTR: // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. - if (!Ops[1].getValueType().isVector()) - Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(), - Ops[1])); + if (!Ops[1].getValueType().isVector()) { + SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[1]); + HandleSDNode Handle(SAO); + LegalizeOp(SAO.getNode()); + Ops[1] = Handle.getValue(); + } break; case ISD::SRL_PARTS: case ISD::SRA_PARTS: case ISD::SHL_PARTS: // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. - if (!Ops[2].getValueType().isVector()) - Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(), - Ops[2])); + if (!Ops[2].getValueType().isVector()) { + SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[2]); + HandleSDNode Handle(SAO); + LegalizeOp(SAO.getNode()); + Ops[2] = Handle.getValue(); + } break; } - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), Ops.data(), - Ops.size()), 0); + SDNode *NewNode = DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size()); + if (NewNode != Node) { + DAG.ReplaceAllUsesWith(Node, NewNode, this); + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i)); + DAG.RemoveDeadNode(Node, this); + Node = NewNode; + } switch (Action) { case TargetLowering::Legal: - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - ResultVals.push_back(Result.getValue(i)); - break; + return; case TargetLowering::Custom: // FIXME: The handling for custom lowering with multiple results is // a complete mess. - Tmp1 = TLI.LowerOperation(Result, DAG); + Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Tmp1.getNode()) { + SmallVector ResultVals; for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) { if (e == 1) ResultVals.push_back(Tmp1); else ResultVals.push_back(Tmp1.getValue(i)); } - break; + if (Tmp1.getNode() != Node || Tmp1.getResNo() != 0) { + DAG.ReplaceAllUsesWith(Node, ResultVals.data(), this); + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]); + DAG.RemoveDeadNode(Node, this); + } + return; } // FALL THROUGH case TargetLowering::Expand: - ExpandNode(Result.getNode(), ResultVals); - break; + ExpandNode(Node); + return; case TargetLowering::Promote: - PromoteNode(Result.getNode(), ResultVals); - break; - } - if (!ResultVals.empty()) { - for (unsigned i = 0, e = ResultVals.size(); i != e; ++i) { - if (ResultVals[i] != SDValue(Node, i)) - ResultVals[i] = LegalizeOp(ResultVals[i]); - AddLegalizedOperand(SDValue(Node, i), ResultVals[i]); - } - return ResultVals[Op.getResNo()]; + PromoteNode(Node); + return; } } @@ -989,155 +864,20 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { #endif assert(0 && "Do not know how to legalize this operator!"); - case ISD::SRA: - case ISD::SRL: - case ISD::SHL: { - // Scalarize vector SRA/SRL/SHL. - EVT VT = Node->getValueType(0); - assert(VT.isVector() && "Unable to legalize non-vector shift"); - assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal"); - unsigned NumElem = VT.getVectorNumElements(); - - SmallVector Scalars; - for (unsigned Idx = 0; Idx < NumElem; Idx++) { - SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - VT.getScalarType(), - Node->getOperand(0), DAG.getIntPtrConstant(Idx)); - SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - VT.getScalarType(), - Node->getOperand(1), DAG.getIntPtrConstant(Idx)); - Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, - VT.getScalarType(), Ex, Sh)); - } - Result = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), - &Scalars[0], Scalars.size()); - break; - } - - case ISD::BUILD_VECTOR: - switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) { - default: assert(0 && "This action is not supported yet!"); - case TargetLowering::Custom: - Tmp3 = TLI.LowerOperation(Result, DAG); - if (Tmp3.getNode()) { - Result = Tmp3; - break; - } - // FALLTHROUGH - case TargetLowering::Expand: - Result = ExpandBUILD_VECTOR(Result.getNode()); - break; - } - break; - case ISD::CALLSEQ_START: { - SDNode *CallEnd = FindCallEndFromCallStart(Node); - - // Recursively Legalize all of the inputs of the call end that do not lead - // to this call start. This ensures that any libcalls that need be inserted - // are inserted *before* the CALLSEQ_START. - {SmallPtrSet NodesLeadingTo; - for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i) - LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).getNode(), Node, - NodesLeadingTo); - } - - // Now that we have legalized all of the inputs (which may have inserted - // libcalls), create the new CALLSEQ_START node. - Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. - - // Merge in the last call to ensure that this call starts after the last - // call ended. - if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) { - Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - Tmp1, LastCALLSEQ_END); - Tmp1 = LegalizeOp(Tmp1); - } - - // Do not try to legalize the target-specific arguments (#1+). - if (Tmp1 != Node->getOperand(0)) { - SmallVector Ops(Node->op_begin(), Node->op_end()); - Ops[0] = Tmp1; - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), &Ops[0], - Ops.size()), Result.getResNo()); - } - - // Remember that the CALLSEQ_START is legalized. - AddLegalizedOperand(Op.getValue(0), Result); - if (Node->getNumValues() == 2) // If this has a flag result, remember it. - AddLegalizedOperand(Op.getValue(1), Result.getValue(1)); - - // Now that the callseq_start and all of the non-call nodes above this call - // sequence have been legalized, legalize the call itself. During this - // process, no libcalls can/will be inserted, guaranteeing that no calls - // can overlap. - assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!"); - // Note that we are selecting this call! - LastCALLSEQ_END = SDValue(CallEnd, 0); - IsLegalizingCall = true; - - // Legalize the call, starting from the CALLSEQ_END. - LegalizeOp(LastCALLSEQ_END); - assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!"); - return Result; - } + case ISD::CALLSEQ_START: case ISD::CALLSEQ_END: - // If the CALLSEQ_START node hasn't been legalized first, legalize it. This - // will cause this node to be legalized as well as handling libcalls right. - if (LastCALLSEQ_END.getNode() != Node) { - LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0)); - DenseMap::iterator I = LegalizedNodes.find(Op); - assert(I != LegalizedNodes.end() && - "Legalizing the call start should have legalized this node!"); - return I->second; - } - - // Otherwise, the call start has been legalized and everything is going - // according to plan. Just legalize ourselves normally here. - Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. - // Do not try to legalize the target-specific arguments (#1+), except for - // an optional flag input. - if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Glue){ - if (Tmp1 != Node->getOperand(0)) { - SmallVector Ops(Node->op_begin(), Node->op_end()); - Ops[0] = Tmp1; - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - &Ops[0], Ops.size()), - Result.getResNo()); - } - } else { - Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1)); - if (Tmp1 != Node->getOperand(0) || - Tmp2 != Node->getOperand(Node->getNumOperands()-1)) { - SmallVector Ops(Node->op_begin(), Node->op_end()); - Ops[0] = Tmp1; - Ops.back() = Tmp2; - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - &Ops[0], Ops.size()), - Result.getResNo()); - } - } - assert(IsLegalizingCall && "Call sequence imbalance between start/end?"); - // This finishes up call legalization. - IsLegalizingCall = false; - - // If the CALLSEQ_END node has a flag, remember that we legalized it. - AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0)); - if (Node->getNumValues() == 2) - AddLegalizedOperand(SDValue(Node, 1), Result.getValue(1)); - return Result.getValue(Op.getResNo()); + break; case ISD::LOAD: { LoadSDNode *LD = cast(Node); - Tmp1 = LegalizeOp(LD->getChain()); // Legalize the chain. - Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer. + Tmp1 = LD->getChain(); // Legalize the chain. + Tmp2 = LD->getBasePtr(); // Legalize the base pointer. ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { EVT VT = Node->getValueType(0); - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp2, LD->getOffset()), - Result.getResNo()); - Tmp3 = Result.getValue(0); - Tmp4 = Result.getValue(1); + Node = DAG.UpdateNodeOperands(Node, Tmp1, Tmp2, LD->getOffset()); + Tmp3 = SDValue(Node, 0); + Tmp4 = SDValue(Node, 1); switch (TLI.getOperationAction(Node->getOpcode(), VT)) { default: assert(0 && "This action is not supported yet!"); @@ -1148,20 +888,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast(Result.getNode()), - DAG, TLI); - Tmp3 = Result.getOperand(0); - Tmp4 = Result.getOperand(1); - Tmp3 = LegalizeOp(Tmp3); - Tmp4 = LegalizeOp(Tmp4); + ExpandUnalignedLoad(cast(Node), + DAG, TLI, Tmp3, Tmp4); } } break; case TargetLowering::Custom: Tmp1 = TLI.LowerOperation(Tmp3, DAG); if (Tmp1.getNode()) { - Tmp3 = LegalizeOp(Tmp1); - Tmp4 = LegalizeOp(Tmp1.getValue(1)); + Tmp3 = Tmp1; + Tmp4 = Tmp1.getValue(1); } break; case TargetLowering::Promote: { @@ -1173,16 +909,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); - Tmp3 = LegalizeOp(DAG.getNode(ISD::BITCAST, dl, VT, Tmp1)); - Tmp4 = LegalizeOp(Tmp1.getValue(1)); + Tmp3 = DAG.getNode(ISD::BITCAST, dl, VT, Tmp1); + Tmp4 = Tmp1.getValue(1); break; } } // Since loads produce two values, make sure to remember that we // legalized both of them. - AddLegalizedOperand(SDValue(Node, 0), Tmp3); - AddLegalizedOperand(SDValue(Node, 1), Tmp4); - return Op.getResNo() ? Tmp4 : Tmp3; + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp3); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp4); + return; } EVT SrcVT = LD->getMemoryVT(); @@ -1213,9 +949,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { ISD::LoadExtType NewExtType = ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; - Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), - Tmp1, Tmp2, LD->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); + SDValue Result = + DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); Ch = Result.getValue(1); // The chain. @@ -1230,8 +967,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Result.getValueType(), Result, DAG.getValueType(SrcVT)); - Tmp1 = LegalizeOp(Result); - Tmp2 = LegalizeOp(Ch); + Tmp1 = Result; + Tmp2 = Ch; } else if (SrcWidth & (SrcWidth - 1)) { // If not loading a power-of-2 number of bits, expand as two loads. assert(!SrcVT.isVector() && "Unsupported extload!"); @@ -1274,7 +1011,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. - Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); } else { // Big endian - avoid unaligned loads. // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 @@ -1304,11 +1041,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. - Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); } - Tmp1 = LegalizeOp(Result); - Tmp2 = LegalizeOp(Ch); + Tmp2 = Ch; } else { switch (TLI.getLoadExtAction(ExtType, SrcVT)) { default: assert(0 && "This action is not supported yet!"); @@ -1316,17 +1052,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { isCustom = true; // FALLTHROUGH case TargetLowering::Legal: - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp2, LD->getOffset()), - Result.getResNo()); - Tmp1 = Result.getValue(0); - Tmp2 = Result.getValue(1); + Node = DAG.UpdateNodeOperands(Node, + Tmp1, Tmp2, LD->getOffset()); + Tmp1 = SDValue(Node, 0); + Tmp2 = SDValue(Node, 1); if (isCustom) { - Tmp3 = TLI.LowerOperation(Result, DAG); + Tmp3 = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Tmp3.getNode()) { - Tmp1 = LegalizeOp(Tmp3); - Tmp2 = LegalizeOp(Tmp3.getValue(1)); + Tmp1 = Tmp3; + Tmp2 = Tmp3.getValue(1); } } else { // If this is an unaligned load and the target doesn't support it, @@ -1337,12 +1072,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast(Result.getNode()), - DAG, TLI); - Tmp1 = Result.getOperand(0); - Tmp2 = Result.getOperand(1); - Tmp1 = LegalizeOp(Tmp1); - Tmp2 = LegalizeOp(Tmp2); + ExpandUnalignedLoad(cast(Node), + DAG, TLI, Tmp1, Tmp2); } } } @@ -1363,9 +1094,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; default: llvm_unreachable("Unexpected extend load type!"); } - Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); - Tmp1 = LegalizeOp(Result); // Relegalize new nodes. - Tmp2 = LegalizeOp(Load.getValue(1)); + Tmp1 = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); + Tmp2 = Load.getValue(1); break; } @@ -1380,10 +1110,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { "EXTLOAD should always be supported!"); // Turn the unsupported load into an EXTLOAD followed by an explicit // zero/sign extend inreg. - Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), - Tmp1, Tmp2, LD->getPointerInfo(), SrcVT, - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getPointerInfo(), SrcVT, + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); SDValue ValRes; if (ExtType == ISD::SEXTLOAD) ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, @@ -1391,38 +1121,37 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Result, DAG.getValueType(SrcVT)); else ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); - Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. - Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes. + Tmp1 = ValRes; + Tmp2 = Result.getValue(1); break; } } // Since loads produce two values, make sure to remember that we legalized // both of them. - AddLegalizedOperand(SDValue(Node, 0), Tmp1); - AddLegalizedOperand(SDValue(Node, 1), Tmp2); - return Op.getResNo() ? Tmp2 : Tmp1; + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp1); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp2); + break; } case ISD::STORE: { StoreSDNode *ST = cast(Node); - Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain. - Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer. + Tmp1 = ST->getChain(); + Tmp2 = ST->getBasePtr(); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); if (!ST->isTruncatingStore()) { if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { - Result = SDValue(OptStore, 0); + DAG.ReplaceAllUsesWith(ST, OptStore, this); break; } { - Tmp3 = LegalizeOp(ST->getValue()); - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp3, Tmp2, - ST->getOffset()), - Result.getResNo()); + Tmp3 = ST->getValue(); + Node = DAG.UpdateNodeOperands(Node, + Tmp1, Tmp3, Tmp2, + ST->getOffset()); EVT VT = Tmp3.getValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { @@ -1434,27 +1163,31 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) - Result = ExpandUnalignedStore(cast(Result.getNode()), - DAG, TLI); + ExpandUnalignedStore(cast(Node), + DAG, TLI, this); } break; case TargetLowering::Custom: - Tmp1 = TLI.LowerOperation(Result, DAG); - if (Tmp1.getNode()) Result = Tmp1; + Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Tmp1.getNode()) + DAG.ReplaceAllUsesWith(SDValue(Node, 0), Tmp1, this); break; - case TargetLowering::Promote: + case TargetLowering::Promote: { assert(VT.isVector() && "Unknown legal promote case!"); Tmp3 = DAG.getNode(ISD::BITCAST, dl, TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3); - Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, - ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment); + SDValue Result = + DAG.getStore(Tmp1, dl, Tmp3, Tmp2, + ST->getPointerInfo(), isVolatile, + isNonTemporal, Alignment); + DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); break; } + } break; } } else { - Tmp3 = LegalizeOp(ST->getValue()); + Tmp3 = ST->getValue(); EVT StVT = ST->getMemoryVT(); unsigned StWidth = StVT.getSizeInBits(); @@ -1466,8 +1199,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits()); Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT); - Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); + SDValue Result = + DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); + DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. assert(!StVT.isVector() && "Unsupported truncstore!"); @@ -1521,14 +1256,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } // The order of the stores doesn't matter. - Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); } else { if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() || Tmp2 != ST->getBasePtr()) - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp3, Tmp2, - ST->getOffset()), - Result.getResNo()); + Node = DAG.UpdateNodeOperands(Node, Tmp1, Tmp3, Tmp2, + ST->getOffset()); switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { default: assert(0 && "This action is not supported yet!"); @@ -1539,12 +1273,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) - Result = ExpandUnalignedStore(cast(Result.getNode()), - DAG, TLI); + ExpandUnalignedStore(cast(Node), DAG, TLI, this); } break; case TargetLowering::Custom: - Result = TLI.LowerOperation(Result, DAG); + DAG.ReplaceAllUsesWith(SDValue(Node, 0), + TLI.LowerOperation(SDValue(Node, 0), DAG), + this); break; case TargetLowering::Expand: assert(!StVT.isVector() && @@ -1553,8 +1288,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // TRUNCSTORE:i16 i32 -> STORE i16 assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!"); Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); - Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + SDValue Result = + DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); + DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); break; } } @@ -1562,17 +1299,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { break; } } - assert(Result.getValueType() == Op.getValueType() && - "Bad legalization!"); - - // Make sure that the generated code is itself legal. - if (Result != Op) - Result = LegalizeOp(Result); - - // Note that LegalizeOp may be reentered even from single-use nodes, which - // means that we always must cache transformed nodes. - AddLegalizedOperand(Op, Result); - return Result; } SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { @@ -2011,7 +1737,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { // and leave the Hi part unset. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { - assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); // The input chain to this libcall is the entry node of the function. // Legalizing the call will automatically add the previous call to the // dependence. @@ -2030,7 +1755,6 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - // Splice the libcall in wherever FindInputOutputChains tells us to. Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); // isTailCall may be true since the callee does not reference caller stack @@ -2046,10 +1770,6 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, // It's a tailcall, return the chain (which is the DAG root). return DAG.getRoot(); - // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that - // was added by LowerCallTo (guaranteeing proper serialization of calls). - LegalizeOp(CallInfo.second); return CallInfo.first; } @@ -2079,11 +1799,6 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); - // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that - // was added by LowerCallTo (guaranteeing proper serialization of calls). - LegalizeOp(CallInfo.second); - return CallInfo.first; } @@ -2093,7 +1808,6 @@ std::pair SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { - assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); SDValue InChain = Node->getOperand(0); TargetLowering::ArgListTy Args; @@ -2110,7 +1824,6 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - // Splice the libcall in wherever FindInputOutputChains tells us to. Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, @@ -2118,10 +1831,6 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); - // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that - // was added by LowerCallTo (guaranteeing proper serialization of calls). - LegalizeOp(CallInfo.second); return CallInfo; } @@ -2247,20 +1956,14 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - // Splice the libcall in wherever FindInputOutputChains tells us to. DebugLoc dl = Node->getDebugLoc(); std::pair CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); - // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that - // was added by LowerCallTo (guaranteeing proper serialization of calls). - LegalizeOp(CallInfo.second); - // Remainder is loaded back from the stack frame. - SDValue Rem = DAG.getLoad(RetVT, dl, LastCALLSEQ_END, FIPtr, + SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, MachinePointerInfo(), false, false, 0); Results.push_back(CallInfo.first); Results.push_back(Rem); @@ -2452,11 +2155,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, MachinePointerInfo::getConstantPool(), false, false, Alignment); else { - FudgeInReg = - LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, - DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - MVT::f32, false, false, Alignment)); + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, + DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(), + MVT::f32, false, false, Alignment); + HandleSDNode Handle(Load); + LegalizeOp(Load.getNode()); + FudgeInReg = Handle.getValue(); } return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); @@ -2780,8 +2485,8 @@ std::pair SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { return ExpandChainLibCall(LC, Node, false); } -void SelectionDAGLegalize::ExpandNode(SDNode *Node, - SmallVectorImpl &Results) { +void SelectionDAGLegalize::ExpandNode(SDNode *Node) { + SmallVector Results; DebugLoc dl = Node->getDebugLoc(); SDValue Tmp1, Tmp2, Tmp3, Tmp4; switch (Node->getOpcode()) { @@ -3229,10 +2934,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, ConstantFPSDNode *CFP = cast(Node); // Check to see if this FP immediate is already legal. // If this is a legal constant, turn it into a TargetConstantFP node. - if (TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0))) - Results.push_back(SDValue(Node, 0)); - else - Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI)); + if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0))) + Results.push_back(ExpandConstantFP(CFP, true)); break; } case ISD::EHSELECTION: { @@ -3478,6 +3181,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, DAG.getIntPtrConstant(0)); TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, DAG.getIntPtrConstant(1)); + // Ret is a node with an illegal type. Because such things are not + // generally permitted during this phase of legalization, delete the + // node. The above EXTRACT_ELEMENT nodes should have been folded. + DAG.DeleteNode(Ret.getNode()); } if (isSigned) { @@ -3618,7 +3325,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, dl); - LastCALLSEQ_END = DAG.getEntryNode(); assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!"); Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); @@ -3628,6 +3334,35 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Results.push_back(Tmp1); break; } + case ISD::BUILD_VECTOR: + Results.push_back(ExpandBUILD_VECTOR(Node)); + break; + case ISD::SRA: + case ISD::SRL: + case ISD::SHL: { + // Scalarize vector SRA/SRL/SHL. + EVT VT = Node->getValueType(0); + assert(VT.isVector() && "Unable to legalize non-vector shift"); + assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal"); + unsigned NumElem = VT.getVectorNumElements(); + + SmallVector Scalars; + for (unsigned Idx = 0; Idx < NumElem; Idx++) { + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + VT.getScalarType(), + Node->getOperand(0), DAG.getIntPtrConstant(Idx)); + SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + VT.getScalarType(), + Node->getOperand(1), DAG.getIntPtrConstant(Idx)); + Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, + VT.getScalarType(), Ex, Sh)); + } + SDValue Result = + DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), + &Scalars[0], Scalars.size()); + DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); + break; + } case ISD::GLOBAL_OFFSET_TABLE: case ISD::GlobalAddress: case ISD::GlobalTLSAddress: @@ -3638,13 +3373,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: // FIXME: Custom lowering for these operations shouldn't return null! - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - Results.push_back(SDValue(Node, i)); break; } + + // Replace the original node with the legalized result. + if (!Results.empty()) + DAG.ReplaceAllUsesWith(Node, Results.data(), this); } -void SelectionDAGLegalize::PromoteNode(SDNode *Node, - SmallVectorImpl &Results) { + +void SelectionDAGLegalize::PromoteNode(SDNode *Node) { + SmallVector Results; EVT OVT = Node->getValueType(0); if (Node->getOpcode() == ISD::UINT_TO_FP || Node->getOpcode() == ISD::SINT_TO_FP || @@ -3772,6 +3510,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, break; } } + + // Replace the original node with the legalized result. + if (!Results.empty()) + DAG.ReplaceAllUsesWith(Node, Results.data(), this); } // SelectionDAG::Legalize - This is the entry point for the file. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index a4bb577..7ed1b98 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -1084,7 +1084,6 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - // Splice the libcall in wherever FindInputOutputChains tells us to. Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index a1abdb4..b8cf998 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -315,8 +315,10 @@ void ScheduleDAGRRList::Schedule() { IssueCount = 0; MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; NumLiveRegs = 0; - LiveRegDefs.resize(TRI->getNumRegs(), NULL); - LiveRegGens.resize(TRI->getNumRegs(), NULL); + // Allocate slots for each physical register, plus one for a special register + // to track the virtual resource of a calling sequence. + LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL); + LiveRegGens.resize(TRI->getNumRegs() + 1, NULL); // Build the scheduling graph. BuildSchedGraph(NULL); @@ -386,6 +388,90 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { } } +/// IsChainDependent - Test if Outer is reachable from Inner through +/// chain dependencies. +static bool IsChainDependent(SDNode *Outer, SDNode *Inner) { + SDNode *N = Outer; + for (;;) { + if (N == Inner) + return true; + if (N->getOpcode() == ISD::TokenFactor) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (IsChainDependent(N->getOperand(i).getNode(), Inner)) + return true; + return false; + } + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) { + N = N->getOperand(i).getNode(); + goto found_chain_operand; + } + return false; + found_chain_operand:; + if (N->getOpcode() == ISD::EntryToken) + return false; + } +} + +/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate +/// the corresponding (lowered) CALLSEQ_BEGIN node. +/// +/// NestLevel and MaxNested are used in recursion to indcate the current level +/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum +/// level seen so far. +/// +/// TODO: It would be better to give CALLSEQ_END an explicit operand to point +/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it. +static SDNode * +FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, + const TargetInstrInfo *TII) { + for (;;) { + // For a TokenFactor, examine each operand. There may be multiple ways + // to get to the CALLSEQ_BEGIN, but we need to find the path with the + // most nesting in order to ensure that we find the corresponding match. + if (N->getOpcode() == ISD::TokenFactor) { + SDNode *Best = 0; + unsigned BestMaxNest = MaxNest; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + unsigned MyNestLevel = NestLevel; + unsigned MyMaxNest = MaxNest; + if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(), + MyNestLevel, MyMaxNest, TII)) + if (!Best || (MyMaxNest > BestMaxNest)) { + Best = New; + BestMaxNest = MyMaxNest; + } + } + assert(Best); + MaxNest = BestMaxNest; + return Best; + } + // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. + if (N->isMachineOpcode()) { + if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameDestroyOpcode()) { + ++NestLevel; + MaxNest = std::max(MaxNest, NestLevel); + } else if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameSetupOpcode()) { + --NestLevel; + if (NestLevel == 0) + return N; + } + } + // Otherwise, find the chain and continue climbing. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) { + N = N->getOperand(i).getNode(); + goto found_chain_operand; + } + return 0; + found_chain_operand:; + if (N->getOpcode() == ISD::EntryToken) + return 0; + } +} + /// Call ReleasePred for each predecessor, then update register live def/gen. /// Always update LiveRegDefs for a register dependence even if the current SU /// also defines the register. This effectively create one large live range @@ -423,6 +509,25 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { } } } + + // If we're scheduling a lowered CALLSEQ_END, find the corresponding CALLSEQ_BEGIN. + // Inject an artificial physical register dependence between these nodes, to + // prevent other calls from being interscheduled with them. + unsigned CallResource = TRI->getNumRegs(); + if (!LiveRegDefs[CallResource]) + for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) + if (Node->isMachineOpcode() && + Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { + unsigned NestLevel = 0; + unsigned MaxNest = 0; + SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); + + SUnit *Def = &SUnits[N->getNodeId()]; + ++NumLiveRegs; + LiveRegDefs[CallResource] = Def; + LiveRegGens[CallResource] = SU; + break; + } } /// Check to see if any of the pending instructions are ready to issue. If @@ -605,6 +710,20 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { LiveRegGens[I->getReg()] = NULL; } } + // Release the special call resource dependence, if this is the beginning + // of a call. + unsigned CallResource = TRI->getNumRegs(); + if (LiveRegDefs[CallResource] == SU) + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isMachineOpcode() && + SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + --NumLiveRegs; + LiveRegDefs[CallResource] = NULL; + LiveRegGens[CallResource] = NULL; + } + } resetVRegCycle(SU); @@ -661,6 +780,33 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { } } + // Reclaim the special call resource dependence, if this is the beginning + // of a call. + unsigned CallResource = TRI->getNumRegs(); + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isMachineOpcode() && + SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { + ++NumLiveRegs; + LiveRegDefs[CallResource] = SU; + LiveRegGens[CallResource] = NULL; + } + } + + // Release the special call resource dependence, if this is the end + // of a call. + if (LiveRegGens[CallResource] == SU) + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isMachineOpcode() && + SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + --NumLiveRegs; + LiveRegDefs[CallResource] = NULL; + LiveRegGens[CallResource] = NULL; + } + } + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { if (I->isAssignedRegDep()) { @@ -1083,6 +1229,21 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector &LRegs) { if (!Node->isMachineOpcode()) continue; + // If we're in the middle of scheduling a call, don't begin scheduling + // another call. Also, don't allow any physical registers to be live across + // the call. + if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { + // Add one here so that we include the special calling-sequence resource. + for (unsigned i = 0, e = TRI->getNumRegs() + 1; i != e; ++i) + if (LiveRegDefs[i]) { + SDNode *Gen = LiveRegGens[i]->getNode(); + while (SDNode *Glued = Gen->getGluedNode()) + Gen = Glued; + if (!IsChainDependent(Gen, Node) && RegAdded.insert(i)) + LRegs.push_back(i); + } + continue; + } const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 07d2db6..010a740 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5290,6 +5290,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } + + // If we just RAUW'd the root, take note. + if (FromN == getRoot()) + setRoot(To); } /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. @@ -5335,6 +5339,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } + + // If we just RAUW'd the root, take note. + if (From == getRoot().getNode()) + setRoot(SDValue(To, getRoot().getResNo())); } /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. @@ -5373,6 +5381,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } + + // If we just RAUW'd the root, take note. + if (From == getRoot().getNode()) + setRoot(SDValue(To[getRoot().getResNo()])); } /// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving @@ -5431,6 +5443,10 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } + + // If we just RAUW'd the root, take note. + if (From == getRoot()) + setRoot(To); } namespace { diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 31e522d..bfc1690 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1353,12 +1353,10 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, MVT::i32); - // TODO: Disable AlwaysInline when it becomes possible - // to emit a nested call sequence. MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile=*/false, - /*AlwaysInline=*/true, + /*AlwaysInline=*/false, MachinePointerInfo(0), MachinePointerInfo(0))); @@ -4350,9 +4348,24 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // If this is undef splat, generate it via "just" vdup, if possible. if (Lane == -1) Lane = 0; + // Test if V1 is a SCALAR_TO_VECTOR. if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); } + // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR + // (and probably will turn into a SCALAR_TO_VECTOR once legalization + // reaches it). + if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && + !isa(V1.getOperand(0))) { + bool IsScalarToVector = true; + for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) + if (V1.getOperand(i).getOpcode() != ISD::UNDEF) { + IsScalarToVector = false; + break; + } + if (IsScalarToVector) + return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); + } return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, DAG.getConstant(Lane, MVT::i32)); } diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 02b0ff2..3d75de0 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2114,7 +2114,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { HasNoSignedComparisonUses(Node)) // Look past the truncate if CMP is the only use of it. N0 = N0.getOperand(0); - if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && + if ((N0.getNode()->getOpcode() == ISD::AND || + (N0.getResNo() == 0 && N0.getNode()->getOpcode() == X86ISD::AND)) && + N0.getNode()->hasOneUse() && N0.getValueType() != MVT::i8 && X86::isZeroNode(N1)) { ConstantSDNode *C = dyn_cast(N0.getNode()->getOperand(1)); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2ec0814..927a307 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4220,6 +4220,29 @@ static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) { return true; } +// Test whether the given value is a vector value which will be legalized +// into a load. +static bool WillBeConstantPoolLoad(SDNode *N) { + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + + // Check for any non-constant elements. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + switch (N->getOperand(i).getNode()->getOpcode()) { + case ISD::UNDEF: + case ISD::ConstantFP: + case ISD::Constant: + break; + default: + return false; + } + + // Vectors of all-zeros and all-ones are materialized with special + // instructions rather than being loaded. + return !ISD::isBuildVectorAllZeros(N) && + !ISD::isBuildVectorAllOnes(N); +} + /// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to /// match movlp{s|d}. The lower half elements should come from lower half of /// V1 (and in order), and the upper half elements should come from the upper @@ -4235,7 +4258,7 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, return false; // Is V2 is a vector load, don't do this transformation. We will try to use // load folding shufps op. - if (ISD::isNON_EXTLoad(V2)) + if (ISD::isNON_EXTLoad(V2) || WillBeConstantPoolLoad(V2)) return false; unsigned NumElems = VT.getVectorNumElements(); @@ -6351,6 +6374,8 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) { if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op)) CanFoldLoad = true; + ShuffleVectorSDNode *SVOp = cast(Op); + // Both of them can't be memory operations though. if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2)) CanFoldLoad = false; @@ -6360,10 +6385,11 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) { return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG); if (NumElems == 4) - return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG); + // If we don't care about the second element, procede to use movss. + if (SVOp->getMaskElt(1) != -1) + return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG); } - ShuffleVectorSDNode *SVOp = cast(Op); // movl and movlp will both match v2i64, but v2i64 is never matched by // movl earlier because we make it strict to avoid messing with the movlp load // folding logic (see the code above getMOVLP call). Match it here then, @@ -8681,8 +8707,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. - if (Cond.getOpcode() == X86ISD::SETCC || - Cond.getOpcode() == X86ISD::SETCC_CARRY) { + unsigned CondOpcode = Cond.getOpcode(); + if (CondOpcode == X86ISD::SETCC || + CondOpcode == X86ISD::SETCC_CARRY) { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); @@ -8699,6 +8726,39 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { Cond = Cmp; addTest = false; } + } else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO || + CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO || + ((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) && + Cond.getOperand(0).getValueType() != MVT::i8)) { + SDValue LHS = Cond.getOperand(0); + SDValue RHS = Cond.getOperand(1); + unsigned X86Opcode; + unsigned X86Cond; + SDVTList VTs; + switch (CondOpcode) { + case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break; + case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break; + case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break; + case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break; + case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break; + case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break; + default: llvm_unreachable("unexpected overflowing operator"); + } + if (CondOpcode == ISD::UMULO) + VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(), + MVT::i32); + else + VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); + + SDValue X86Op = DAG.getNode(X86Opcode, DL, VTs, LHS, RHS); + + if (CondOpcode == ISD::UMULO) + Cond = X86Op.getValue(2); + else + Cond = X86Op.getValue(1); + + CC = DAG.getConstant(X86Cond, MVT::i8); + addTest = false; } if (addTest) { @@ -8780,11 +8840,27 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue Dest = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); SDValue CC; + bool Inverted = false; if (Cond.getOpcode() == ISD::SETCC) { - SDValue NewCond = LowerSETCC(Cond, DAG); - if (NewCond.getNode()) - Cond = NewCond; + // Check for setcc([su]{add,sub,mul}o == 0). + if (cast(Cond.getOperand(2))->get() == ISD::SETEQ && + isa(Cond.getOperand(1)) && + cast(Cond.getOperand(1))->isNullValue() && + Cond.getOperand(0).getResNo() == 1 && + (Cond.getOperand(0).getOpcode() == ISD::SADDO || + Cond.getOperand(0).getOpcode() == ISD::UADDO || + Cond.getOperand(0).getOpcode() == ISD::SSUBO || + Cond.getOperand(0).getOpcode() == ISD::USUBO || + Cond.getOperand(0).getOpcode() == ISD::SMULO || + Cond.getOperand(0).getOpcode() == ISD::UMULO)) { + Inverted = true; + Cond = Cond.getOperand(0); + } else { + SDValue NewCond = LowerSETCC(Cond, DAG); + if (NewCond.getNode()) + Cond = NewCond; + } } #if 0 // FIXME: LowerXALUO doesn't handle these!! @@ -8805,8 +8881,9 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. - if (Cond.getOpcode() == X86ISD::SETCC || - Cond.getOpcode() == X86ISD::SETCC_CARRY) { + unsigned CondOpcode = Cond.getOpcode(); + if (CondOpcode == X86ISD::SETCC || + CondOpcode == X86ISD::SETCC_CARRY) { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); @@ -8827,6 +8904,43 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { break; } } + } + CondOpcode = Cond.getOpcode(); + if (CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO || + CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO || + ((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) && + Cond.getOperand(0).getValueType() != MVT::i8)) { + SDValue LHS = Cond.getOperand(0); + SDValue RHS = Cond.getOperand(1); + unsigned X86Opcode; + unsigned X86Cond; + SDVTList VTs; + switch (CondOpcode) { + case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break; + case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break; + case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break; + case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break; + case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break; + case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break; + default: llvm_unreachable("unexpected overflowing operator"); + } + if (Inverted) + X86Cond = X86::GetOppositeBranchCondition((X86::CondCode)X86Cond); + if (CondOpcode == ISD::UMULO) + VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(), + MVT::i32); + else + VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); + + SDValue X86Op = DAG.getNode(X86Opcode, dl, VTs, LHS, RHS); + + if (CondOpcode == ISD::UMULO) + Cond = X86Op.getValue(2); + else + Cond = X86Op.getValue(1); + + CC = DAG.getConstant(X86Cond, MVT::i8); + addTest = false; } else { unsigned CondOpc; if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) { @@ -8890,6 +9004,66 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { CC = DAG.getConstant(CCode, MVT::i8); Cond = Cond.getOperand(0).getOperand(1); addTest = false; + } else if (Cond.getOpcode() == ISD::SETCC && + cast(Cond.getOperand(2))->get() == ISD::SETOEQ) { + // For FCMP_OEQ, we can emit + // two branches instead of an explicit AND instruction with a + // separate test. However, we only do this if this block doesn't + // have a fall-through edge, because this requires an explicit + // jmp when the condition is false. + if (Op.getNode()->hasOneUse()) { + SDNode *User = *Op.getNode()->use_begin(); + // Look for an unconditional branch following this conditional branch. + // We need this because we need to reverse the successors in order + // to implement FCMP_OEQ. + if (User->getOpcode() == ISD::BR) { + SDValue FalseBB = User->getOperand(1); + SDNode *NewBR = + DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); + assert(NewBR == User); + (void)NewBR; + Dest = FalseBB; + + SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, + Cond.getOperand(0), Cond.getOperand(1)); + CC = DAG.getConstant(X86::COND_NE, MVT::i8); + Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), + Chain, Dest, CC, Cmp); + CC = DAG.getConstant(X86::COND_P, MVT::i8); + Cond = Cmp; + addTest = false; + } + } + } else if (Cond.getOpcode() == ISD::SETCC && + cast(Cond.getOperand(2))->get() == ISD::SETUNE) { + // For FCMP_UNE, we can emit + // two branches instead of an explicit AND instruction with a + // separate test. However, we only do this if this block doesn't + // have a fall-through edge, because this requires an explicit + // jmp when the condition is false. + if (Op.getNode()->hasOneUse()) { + SDNode *User = *Op.getNode()->use_begin(); + // Look for an unconditional branch following this conditional branch. + // We need this because we need to reverse the successors in order + // to implement FCMP_UNE. + if (User->getOpcode() == ISD::BR) { + SDValue FalseBB = User->getOperand(1); + SDNode *NewBR = + DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); + assert(NewBR == User); + (void)NewBR; + + SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, + Cond.getOperand(0), Cond.getOperand(1)); + CC = DAG.getConstant(X86::COND_NE, MVT::i8); + Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), + Chain, Dest, CC, Cmp); + CC = DAG.getConstant(X86::COND_NP, MVT::i8); + Cond = Cmp; + addTest = false; + Dest = FalseBB; + } + } } } diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 2afe0e3..4b74f96 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -386,6 +386,15 @@ IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase, Offset = off; return true; } + // Check for an aligned global variable. + if (GlobalAddressSDNode *GA = dyn_cast(*Root)) { + const GlobalValue *GV = GA->getGlobal(); + if (GA->getOffset() == 0 && GV->getAlignment() >= 4) { + AlignedBase = Base; + Offset = off; + return true; + } + } return false; } -- cgit v1.1 From cb9fed665550376b7c65c7e1157a58911193e2e2 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Fri, 28 Oct 2011 18:02:13 +0000 Subject: Reapply r143202, with a manual decoding hook for SWP. This change inadvertantly exposed a decoding ambiguity between SWP and CPS that the auto-generated decoder can't handle. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143208 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrFormats.td | 2 ++ lib/Target/ARM/ARMInstrInfo.td | 2 +- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 24 ++++++++++++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 7cbc911..43f1194 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -481,6 +481,8 @@ class AIswp pattern> let Inst{15-12} = Rt; let Inst{11-4} = 0b00001001; let Inst{3-0} = Rt2; + + let DecoderMethod = "DecodeSwap"; } // addrmode1 instructions diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index b9cbc83..9373972 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -1640,7 +1640,7 @@ class CPS let Inst{27-20} = 0b00010000; let Inst{19-18} = imod; let Inst{17} = M; // Enabled if mode is set; - let Inst{16} = 0; + let Inst{16-9} = 0b00000000; let Inst{8-6} = iflags; let Inst{5} = 0; let Inst{4-0} = mode; diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 0e63d9c..92a4cdc 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -249,6 +249,8 @@ static DecodeStatus DecodeVMOVSRR(llvm::MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); static DecodeStatus DecodeVMOVRRS(llvm::MCInst &Inst, unsigned Insn, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeSwap(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeThumbAddSpecialReg(llvm::MCInst &Inst, uint16_t Insn, uint64_t Address, const void *Decoder); @@ -4043,3 +4045,25 @@ static DecodeStatus DecodeT2ShifterImmOperand(llvm::MCInst &Inst, uint32_t Val, return S; } +static DecodeStatus DecodeSwap(llvm::MCInst &Inst, unsigned Insn, + uint64_t Address, const void *Decoder) { + unsigned Rt = fieldFromInstruction32(Insn, 12, 4); + unsigned Rt2 = fieldFromInstruction32(Insn, 0, 4); + unsigned Rn = fieldFromInstruction32(Insn, 16, 4); + unsigned pred = fieldFromInstruction32(Insn, 28, 4); + + if (pred == 0xF) + return DecodeCPSInstruction(Inst, Insn, Address, Decoder); + + DecodeStatus S = MCDisassembler::Success; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rt2, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodeGPRnopcRegisterClass(Inst, Rn, Address, Decoder))) + return MCDisassembler::Fail; + if (!Check(S, DecodePredicateOperand(Inst, pred, Address, Decoder))) + return MCDisassembler::Fail; + + return S; +} -- cgit v1.1 From c65c747bc4ee7d3ca8463d33708bbb2aed38a809 Mon Sep 17 00:00:00 2001 From: Duncan Sands Date: Fri, 28 Oct 2011 18:17:44 +0000 Subject: Fold icmp ugt (udiv X, Y), X to false. Spotted by my super-optimizer in 186.crafty. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143209 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InstructionSimplify.cpp | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'lib') diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 31cbbba..6bef0ae 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -1574,6 +1574,9 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // 'srem x, CI2' produces (-|CI2|, |CI2|). Upper = CI2->getValue().abs(); Lower = (-Upper) + 1; + } else if (match(LHS, m_UDiv(m_ConstantInt(CI2), m_Value()))) { + // 'udiv CI2, x' produces [0, CI2]. + Upper = CI2->getValue(); } else if (match(LHS, m_UDiv(m_Value(), m_ConstantInt(CI2)))) { // 'udiv x, CI2' produces [0, UINT_MAX / CI2]. APInt NegOne = APInt::getAllOnesValue(Width); @@ -1880,6 +1883,15 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } + // x udiv y <=u x. + if (LBO && match(LBO, m_UDiv(m_Specific(RHS), m_Value()))) { + // icmp pred (X /u Y), X + if (Pred == ICmpInst::ICMP_UGT) + return getFalse(ITy); + if (Pred == ICmpInst::ICMP_ULE) + return getTrue(ITy); + } + if (MaxRecurse && LBO && RBO && LBO->getOpcode() == RBO->getOpcode() && LBO->getOperand(1) == RBO->getOperand(1)) { switch (LBO->getOpcode()) { -- cgit v1.1 From 4604fc7791314af7ba7b66999e4c7fb75a4d9f6e Mon Sep 17 00:00:00 2001 From: Duncan Sands Date: Fri, 28 Oct 2011 18:30:05 +0000 Subject: A shift of a power of two is a power of two or zero. For completeness - not spotted in the wild. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143211 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ValueTracking.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 90757f9..9f7b5b5 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -769,6 +769,12 @@ bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, bool OrZero, if (Depth++ == MaxDepth) return false; + Value *X = 0, *Y = 0; + // A shift of a power of two is a power of two or zero. + if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) || + match(V, m_Shr(m_Value(X), m_Value())))) + return isPowerOfTwo(X, TD, /*OrZero*/true, Depth); + if (ZExtInst *ZI = dyn_cast(V)) return isPowerOfTwo(ZI->getOperand(0), TD, OrZero, Depth); @@ -776,7 +782,6 @@ bool llvm::isPowerOfTwo(Value *V, const TargetData *TD, bool OrZero, return isPowerOfTwo(SI->getTrueValue(), TD, OrZero, Depth) && isPowerOfTwo(SI->getFalseValue(), TD, OrZero, Depth); - Value *X = 0, *Y = 0; if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) { // A power of two and'd with anything is a power of two or zero. if (isPowerOfTwo(X, TD, /*OrZero*/true, Depth) || -- cgit v1.1 From 2ec69faf2615ccdffffacff9033b2228c589971c Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 28 Oct 2011 18:47:24 +0000 Subject: Add variable IsO32 to MipsTargetLowering. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143213 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 19 ++++++++++--------- lib/Target/Mips/MipsISelLowering.h | 2 +- 2 files changed, 11 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 5271332..fd3e120 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -84,7 +84,8 @@ MipsTargetLowering:: MipsTargetLowering(MipsTargetMachine &TM) : TargetLowering(TM, new MipsTargetObjectFile()), Subtarget(&TM.getSubtarget()), - HasMips64(Subtarget->hasMips64()), IsN64(Subtarget->isABI_N64()) { + HasMips64(Subtarget->hasMips64()), IsN64(Subtarget->isABI_N64()), + IsO32(Subtarget->isABI_O32()) { // Mips does not have i1 type, so use i32 for // setcc operations results (slt, sgt, ...). @@ -1926,7 +1927,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); - if (Subtarget->isABI_O32()) + if (IsO32) CCInfo.AnalyzeCallOperands(Outs, CC_MipsO32); else CCInfo.AnalyzeCallOperands(Outs, CC_Mips); @@ -1954,7 +1955,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, // Update size of the maximum argument space. // For O32, a minimum of four words (16 bytes) of argument space is // allocated. - if (Subtarget->isABI_O32()) + if (IsO32) NextStackOffset = std::max(NextStackOffset, (unsigned)16); unsigned MaxCallFrameSize = MipsFI->getMaxCallFrameSize(); @@ -1990,7 +1991,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: - if (Subtarget->isABI_O32() && VA.isRegLoc()) { + if (IsO32 && VA.isRegLoc()) { if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i32) Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); if (VA.getValVT() == MVT::f64 && VA.getLocVT() == MVT::i32) { @@ -2032,7 +2033,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, // ByVal Arg. ISD::ArgFlagsTy Flags = Outs[i].Flags; if (Flags.isByVal()) { - assert(Subtarget->isABI_O32() && + assert(IsO32 && "No support for ByVal args by ABIs other than O32 yet."); assert(Flags.getByValSize() && "ByVal args of size 0 should have been ignored by front-end."); @@ -2243,7 +2244,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), getTargetMachine(), ArgLocs, *DAG.getContext()); - if (Subtarget->isABI_O32()) + if (IsO32) CCInfo.AnalyzeFormalArguments(Ins, CC_MipsO32); else CCInfo.AnalyzeFormalArguments(Ins, CC_Mips); @@ -2291,7 +2292,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, } // Handle O32 ABI cases: i32->f32 and (i32,i32)->f64 - if (Subtarget->isABI_O32()) { + if (IsO32) { if (RegVT == MVT::i32 && VA.getValVT() == MVT::f32) ArgValue = DAG.getNode(ISD::BITCAST, dl, MVT::f32, ArgValue); if (RegVT == MVT::i32 && VA.getValVT() == MVT::f64) { @@ -2314,7 +2315,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, ISD::ArgFlagsTy Flags = Ins[i].Flags; if (Flags.isByVal()) { - assert(Subtarget->isABI_O32() && + assert(IsO32 && "No support for ByVal args by ABIs other than O32 yet."); assert(Flags.getByValSize() && "ByVal args of size 0 should have been ignored by front-end."); @@ -2353,7 +2354,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); } - if (isVarArg && Subtarget->isABI_O32()) { + if (isVarArg && IsO32) { // Record the frame index of the first variable argument // which is a value necessary to VASTART. unsigned NextStackOffset = CCInfo.getNextStackOffset(); diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 4be3fed..62e7f09 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -115,7 +115,7 @@ namespace llvm { // Subtarget Info const MipsSubtarget *Subtarget; - bool HasMips64, IsN64; + bool HasMips64, IsN64, IsO32; // Lower Operand helpers SDValue LowerCallResult(SDValue Chain, SDValue InFlag, -- cgit v1.1 From 012f8547f70c7cefe552282f28581a434ebe7324 Mon Sep 17 00:00:00 2001 From: Duncan Sands Date: Fri, 28 Oct 2011 19:01:20 +0000 Subject: The expression icmp eq (select (icmp eq x, 0), 1, x), 0 folds to false. Spotted by my super-optimizer in 186.crafty and 450.soplex. We really need a proper infrastructure for handling generalizations of this kind of thing (which occur a lot), however this case is so simple that I decided to go ahead and implement it directly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143214 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InstructionSimplify.cpp | 74 ++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 29 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 6bef0ae..83881de 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -416,39 +416,55 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, } assert(isa(LHS) && "Not comparing with a select instruction!"); SelectInst *SI = cast(LHS); + Value *Cond = SI->getCondition(); + Value *TV = SI->getTrueValue(); + Value *FV = SI->getFalseValue(); // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it. // Does "cmp TV, RHS" simplify? - if (Value *TCmp = SimplifyCmpInst(Pred, SI->getTrueValue(), RHS, TD, DT, - MaxRecurse)) { - // It does! Does "cmp FV, RHS" simplify? - if (Value *FCmp = SimplifyCmpInst(Pred, SI->getFalseValue(), RHS, TD, DT, - MaxRecurse)) { - // It does! If they simplified to the same value, then use it as the - // result of the original comparison. - if (TCmp == FCmp) - return TCmp; - Value *Cond = SI->getCondition(); - // If the false value simplified to false, then the result of the compare - // is equal to "Cond && TCmp". This also catches the case when the false - // value simplified to false and the true value to true, returning "Cond". - if (match(FCmp, m_Zero())) - if (Value *V = SimplifyAndInst(Cond, TCmp, TD, DT, MaxRecurse)) - return V; - // If the true value simplified to true, then the result of the compare - // is equal to "Cond || FCmp". - if (match(TCmp, m_One())) - if (Value *V = SimplifyOrInst(Cond, FCmp, TD, DT, MaxRecurse)) - return V; - // Finally, if the false value simplified to true and the true value to - // false, then the result of the compare is equal to "!Cond". - if (match(FCmp, m_One()) && match(TCmp, m_Zero())) - if (Value *V = - SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()), - TD, DT, MaxRecurse)) - return V; - } + Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, TD, DT, MaxRecurse); + if (!TCmp) { + // It didn't simplify. However if "cmp TV, RHS" is equal to the select + // condition itself then we can replace it with 'true'. + if (match(Cond, m_ICmp(Pred, m_Specific(TV), m_Specific(RHS)))) + TCmp = getTrue(Cond->getType()); + } + if (!TCmp) + return 0; + + // Does "cmp FV, RHS" simplify? + Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, TD, DT, MaxRecurse); + if (!FCmp) { + // It didn't simplify. However if "cmp FV, RHS" is equal to the select + // condition itself then we can replace it with 'false'. + if (match(Cond, m_ICmp(Pred, m_Specific(FV), m_Specific(RHS)))) + FCmp = getFalse(Cond->getType()); } + if (!FCmp) + return 0; + + // If both sides simplified to the same value, then use it as the result of + // the original comparison. + if (TCmp == FCmp) + return TCmp; + // If the false value simplified to false, then the result of the compare + // is equal to "Cond && TCmp". This also catches the case when the false + // value simplified to false and the true value to true, returning "Cond". + if (match(FCmp, m_Zero())) + if (Value *V = SimplifyAndInst(Cond, TCmp, TD, DT, MaxRecurse)) + return V; + // If the true value simplified to true, then the result of the compare + // is equal to "Cond || FCmp". + if (match(TCmp, m_One())) + if (Value *V = SimplifyOrInst(Cond, FCmp, TD, DT, MaxRecurse)) + return V; + // Finally, if the false value simplified to true and the true value to + // false, then the result of the compare is equal to "!Cond". + if (match(FCmp, m_One()) && match(TCmp, m_Zero())) + if (Value *V = + SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()), + TD, DT, MaxRecurse)) + return V; return 0; } -- cgit v1.1 From e42f33bd159413e48d75c4c4783e52224c0b5532 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 28 Oct 2011 19:49:00 +0000 Subject: Make changes necessary in LowerCall to support Mips64. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143217 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 63 ++++++++++++++++++++++-------------- 1 file changed, 38 insertions(+), 25 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index fd3e120..86e3bc8 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -1945,7 +1945,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, // If this is the first call, create a stack frame object that points to // a location to which .cprestore saves $gp. - if (IsPIC && !MipsFI->getGPFI()) + if (IsO32 && IsPIC && !MipsFI->getGPFI()) MipsFI->setGPFI(MFI->CreateFixedObject(4, 0, true)); // Get the frame index of the stack frame object that points to the location @@ -1970,7 +1970,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, NextStackOffset = (NextStackOffset + StackAlignment - 1) / StackAlignment * StackAlignment; - if (IsPIC) + if (MipsFI->needGPSaveRestore()) MFI->setObjectOffset(MipsFI->getGPFI(), NextStackOffset); MFI->setObjectOffset(DynAllocFI, NextStackOffset); @@ -1986,15 +1986,17 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { SDValue Arg = OutVals[i]; CCValAssign &VA = ArgLocs[i]; - + MVT ValVT = VA.getValVT(), LocVT = VA.getLocVT(); + // Promote the value if needed. switch (VA.getLocInfo()) { default: llvm_unreachable("Unknown loc info!"); case CCValAssign::Full: - if (IsO32 && VA.isRegLoc()) { - if (VA.getValVT() == MVT::f32 && VA.getLocVT() == MVT::i32) - Arg = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg); - if (VA.getValVT() == MVT::f64 && VA.getLocVT() == MVT::i32) { + if (VA.isRegLoc()) { + if ((ValVT == MVT::f32 && LocVT == MVT::i32) || + (ValVT == MVT::f64 && LocVT == MVT::i64)) + Arg = DAG.getNode(ISD::BITCAST, dl, LocVT, Arg); + else if (ValVT == MVT::f64 && LocVT == MVT::i32) { SDValue Lo = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32, Arg, DAG.getConstant(0, MVT::i32)); SDValue Hi = DAG.getNode(MipsISD::ExtractElementF64, dl, MVT::i32, @@ -2010,13 +2012,13 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, } break; case CCValAssign::SExt: - Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg); + Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, LocVT, Arg); break; case CCValAssign::ZExt: - Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg); + Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, LocVT, Arg); break; case CCValAssign::AExt: - Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg); + Arg = DAG.getNode(ISD::ANY_EXTEND, dl, LocVT, Arg); break; } @@ -2043,7 +2045,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, } // Create the frame index object for this incoming parameter - LastFI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, + LastFI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, VA.getLocMemOffset(), true); SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy()); @@ -2075,17 +2077,21 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol // node so that legalize doesn't hack it. - unsigned char OpFlag = IsPIC ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG; + unsigned char OpFlag; + bool IsPICCall = (IsN64 || IsPIC); // true if calls are translated to jalr $25 bool LoadSymAddr = false; SDValue CalleeLo; if (GlobalAddressSDNode *G = dyn_cast(Callee)) { - if (IsPIC && G->getGlobal()->hasInternalLinkage()) { - Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, - getPointerTy(), 0,MipsII:: MO_GOT); + if (IsPICCall && G->getGlobal()->hasInternalLinkage()) { + OpFlag = IsO32 ? MipsII::MO_GOT : MipsII::MO_GOT_PAGE; + unsigned char LoFlag = IsO32 ? MipsII::MO_ABS_LO : MipsII::MO_GOT_OFST; + Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), 0, + OpFlag); CalleeLo = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), - 0, MipsII::MO_ABS_LO); + 0, LoFlag); } else { + OpFlag = IsPICCall ? MipsII::MO_GOT_CALL : MipsII::MO_NO_FLAG; Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, getPointerTy(), 0, OpFlag); } @@ -2093,34 +2099,41 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, LoadSymAddr = true; } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { + if (IsN64 || (!IsO32 && IsPIC)) + OpFlag = MipsII::MO_GOT_DISP; + else if (!IsPIC) // !N64 && static + OpFlag = MipsII::MO_NO_FLAG; + else // O32 & PIC + OpFlag = MipsII::MO_GOT_CALL; Callee = DAG.getTargetExternalSymbol(S->getSymbol(), - getPointerTy(), OpFlag); + getPointerTy(), OpFlag); LoadSymAddr = true; } SDValue InFlag; // Create nodes that load address of callee and copy it to T9 - if (IsPIC) { + if (IsPICCall) { if (LoadSymAddr) { // Load callee address - Callee = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, Callee); - SDValue LoadValue = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), Callee, - MachinePointerInfo::getGOT(), + Callee = DAG.getNode(MipsISD::WrapperPIC, dl, getPointerTy(), Callee); + SDValue LoadValue = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), + Callee, MachinePointerInfo::getGOT(), false, false, 0); // Use GOT+LO if callee has internal linkage. if (CalleeLo.getNode()) { - SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CalleeLo); - Callee = DAG.getNode(ISD::ADD, dl, MVT::i32, LoadValue, Lo); + SDValue Lo = DAG.getNode(MipsISD::Lo, dl, getPointerTy(), CalleeLo); + Callee = DAG.getNode(ISD::ADD, dl, getPointerTy(), LoadValue, Lo); } else Callee = LoadValue; } // copy to T9 - Chain = DAG.getCopyToReg(Chain, dl, Mips::T9, Callee, SDValue(0, 0)); + unsigned T9Reg = IsN64 ? Mips::T9_64 : Mips::T9; + Chain = DAG.getCopyToReg(Chain, dl, T9Reg, Callee, SDValue(0, 0)); InFlag = Chain.getValue(1); - Callee = DAG.getRegister(Mips::T9, MVT::i32); + Callee = DAG.getRegister(T9Reg, getPointerTy()); } // Build a sequence of copy-to-reg nodes chained together with token -- cgit v1.1 From feaa4c316f125144b6978073885fbd25c8b369aa Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 28 Oct 2011 19:55:48 +0000 Subject: Make changes necessary in LowerFormalArguments to support Mips64. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143218 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 86e3bc8..31378a7 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -2266,6 +2266,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; + EVT ValVT = VA.getValVT(); // Arguments stored on registers if (VA.isRegLoc()) { @@ -2300,23 +2301,22 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, Opcode = ISD::AssertZext; if (Opcode) ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue, - DAG.getValueType(VA.getValVT())); - ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); + DAG.getValueType(ValVT)); + ArgValue = DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue); } - // Handle O32 ABI cases: i32->f32 and (i32,i32)->f64 - if (IsO32) { - if (RegVT == MVT::i32 && VA.getValVT() == MVT::f32) - ArgValue = DAG.getNode(ISD::BITCAST, dl, MVT::f32, ArgValue); - if (RegVT == MVT::i32 && VA.getValVT() == MVT::f64) { - unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(), - getNextIntArgReg(ArgReg), RC); - SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT); - if (!Subtarget->isLittle()) - std::swap(ArgValue, ArgValue2); - ArgValue = DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64, - ArgValue, ArgValue2); - } + // Handle floating point arguments passed in integer registers. + if ((RegVT == MVT::i32 && ValVT == MVT::f32) || + (RegVT == MVT::i64 && ValVT == MVT::f64)) + ArgValue = DAG.getNode(ISD::BITCAST, dl, ValVT, ArgValue); + else if (IsO32 && RegVT == MVT::i32 && ValVT == MVT::f64) { + unsigned Reg2 = AddLiveIn(DAG.getMachineFunction(), + getNextIntArgReg(ArgReg), RC); + SDValue ArgValue2 = DAG.getCopyFromReg(Chain, dl, Reg2, RegVT); + if (!Subtarget->isLittle()) + std::swap(ArgValue, ArgValue2); + ArgValue = DAG.getNode(MipsISD::BuildPairF64, dl, MVT::f64, + ArgValue, ArgValue2); } InVals.push_back(ArgValue); @@ -2343,12 +2343,12 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, } // The stack pointer offset is relative to the caller stack frame. - LastFI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8, + LastFI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, VA.getLocMemOffset(), true); // Create load nodes to retrieve arguments from the stack SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy()); - InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, + InVals.push_back(DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo::getFixedStack(LastFI), false, false, 0)); } -- cgit v1.1 From b3727fe3ec3b3a718935a6d6c6561e9a58a14546 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Fri, 28 Oct 2011 20:43:24 +0000 Subject: Specify that the high bit of the alignment field is fixed to 0 on these instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143220 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 75418aa..3023a34 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -389,7 +389,7 @@ multiclass VLD1D3WB op7_4, string Dt> { "vld1", Dt, "$Vd, $Rn!", "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. - let Inst{5-4} = Rn{5-4}; + let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDInstruction"; let AsmMatchConverter = "cvtVLDwbFixed"; } @@ -397,7 +397,7 @@ multiclass VLD1D3WB op7_4, string Dt> { (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; + let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLDInstruction"; let AsmMatchConverter = "cvtVLDwbRegister"; } -- cgit v1.1 From 89a633708542de5847e807f98f86edfefc9fc019 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Fri, 28 Oct 2011 22:36:30 +0000 Subject: Add Thumb2 alias for "mov Rd, #imm" to "mvn Rd, #~imm". When '~imm' is encodable as a t2_so_imm but plain 'imm' is not. For example, mov r2, #-3 becomes mvn r2, #2 rdar://10349224 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143235 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 14 +++++++++++++- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 16 ++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 9a48b18..d065107 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -76,10 +76,16 @@ def t2_so_imm : Operand, ImmLeaf and InstAlias<>). The actual encoding +// is handled by the destination instructions, which use t2_so_imm. +def t2_so_imm_not_asmoperand : AsmOperandClass { let Name = "T2SOImmNot"; } def t2_so_imm_not : Operand, PatLeaf<(imm), [{ return ARM_AM::getT2SOImmVal(~((uint32_t)N->getZExtValue())) != -1; -}], t2_so_imm_not_XFORM>; +}], t2_so_imm_not_XFORM> { + let ParserMatchClass = t2_so_imm_not_asmoperand; +} // t2_so_imm_neg - Match an immediate that is a negation of a t2_so_imm. def t2_so_imm_neg : Operand, @@ -4066,3 +4072,9 @@ def : t2InstAlias<"sxtb16${p} $Rd, $Rm$rot", (t2SXTB16 rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; def : t2InstAlias<"sxth${p} $Rd, $Rm$rot", (t2SXTH rGPR:$Rd, rGPR:$Rm, rot_imm:$rot, pred:$p)>; + + +// "mov Rd, t2_so_imm_not" can be handled via "mvn" in assembly, just like +// for isel. +def : t2InstAlias<"mov${p} $Rd, $imm", + (t2MVNi rGPR:$Rd, t2_so_imm_not:$imm, pred:$p, zero_reg)>; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index dbdce29..ad5f061 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -665,6 +665,14 @@ public: int64_t Value = CE->getValue(); return ARM_AM::getT2SOImmVal(Value) != -1; } + bool isT2SOImmNot() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return ARM_AM::getT2SOImmVal(~Value) != -1; + } bool isSetEndImm() const { if (Kind != k_Immediate) return false; @@ -1241,6 +1249,14 @@ public: addExpr(Inst, getImm()); } + void addT2SOImmNotOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The operand is actually a t2_so_imm, but we have its bitwise + // negation in the assembly source, so twiddle it here. + const MCConstantExpr *CE = dyn_cast(getImm()); + Inst.addOperand(MCOperand::CreateImm(~CE->getValue())); + } + void addSetEndImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); addExpr(Inst, getImm()); -- cgit v1.1 From e70ec8463720b5990f0d1ab8d9b6ab56ca1d01c3 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Fri, 28 Oct 2011 22:50:54 +0000 Subject: ARM mode 'mov' to 'mvn' assembler alias. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143237 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 16 ++++++++++++++-- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 16 ++++++++++++++++ 2 files changed, 30 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 9373972..c08c363 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -258,10 +258,16 @@ def so_imm_neg : return ARM_AM::getSOImmVal(-(uint32_t)N->getZExtValue()) != -1; }], so_imm_neg_XFORM>; +// Note: this pattern doesn't require an encoder method and such, as it's +// only used on aliases (Pat<> and InstAlias<>). The actual encoding +// is handled by the destination instructions, which use t2_so_imm. +def so_imm_not_asmoperand : AsmOperandClass { let Name = "ARMSOImmNot"; } def so_imm_not : - PatLeaf<(imm), [{ + Operand, PatLeaf<(imm), [{ return ARM_AM::getSOImmVal(~(uint32_t)N->getZExtValue()) != -1; - }], so_imm_not_XFORM>; + }], so_imm_not_XFORM> { + let ParserMatchClass = so_imm_not_asmoperand; +} // sext_16_node predicate - True if the SDNode is sign-extended 16 or more bits. def sext_16_node : PatLeaf<(i32 GPR:$a), [{ @@ -4988,3 +4994,9 @@ def : MnemonicAlias<"usubaddx", "usax">; // (LDRHTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0, pred:$p)>; //def : InstAlias<"ldrsht${p} $Rt, $addr", // (LDRSHTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0, pred:$p)>; + + +// "mov Rd, so_imm_not" can be handled via "mvn" in assembly, just like +// for isel. +def : ARMInstAlias<"mov${s}${p} $Rd, $imm", + (MVNi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index ad5f061..03fba5a 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -657,6 +657,14 @@ public: int64_t Value = CE->getValue(); return ARM_AM::getSOImmVal(Value) != -1; } + bool isARMSOImmNot() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return ARM_AM::getSOImmVal(~Value) != -1; + } bool isT2SOImm() const { if (Kind != k_Immediate) return false; @@ -1257,6 +1265,14 @@ public: Inst.addOperand(MCOperand::CreateImm(~CE->getValue())); } + void addARMSOImmNotOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + // The operand is actually a so_imm, but we have its bitwise + // negation in the assembly source, so twiddle it here. + const MCConstantExpr *CE = dyn_cast(getImm()); + Inst.addOperand(MCOperand::CreateImm(~CE->getValue())); + } + void addSetEndImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); addExpr(Inst, getImm()); -- cgit v1.1 From 6f3ddef7c51f03945644ad0e69068dfb24d4b092 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Sat, 29 Oct 2011 00:41:52 +0000 Subject: Revert r143206, as there are still some failing tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143262 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 772 +++++++++++++++++-------- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 1 + lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 165 +----- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 16 - lib/Target/ARM/ARMISelLowering.cpp | 19 +- lib/Target/X86/X86ISelDAGToDAG.cpp | 4 +- lib/Target/X86/X86ISelLowering.cpp | 194 +------ lib/Target/XCore/XCoreISelLowering.cpp | 9 - 8 files changed, 532 insertions(+), 648 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 7425669..a8bccda 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -46,18 +46,37 @@ using namespace llvm; /// will attempt merge setcc and brc instructions into brcc's. /// namespace { -class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener { +class SelectionDAGLegalize { const TargetMachine &TM; const TargetLowering &TLI; SelectionDAG &DAG; - /// LegalizePosition - The iterator for walking through the node list. - SelectionDAG::allnodes_iterator LegalizePosition; + // Libcall insertion helpers. - /// LegalizedNodes - The set of nodes which have already been legalized. - SmallPtrSet LegalizedNodes; + /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been + /// legalized. We use this to ensure that calls are properly serialized + /// against each other, including inserted libcalls. + SDValue LastCALLSEQ_END; - // Libcall insertion helpers. + /// IsLegalizingCall - This member is used *only* for purposes of providing + /// helpful assertions that a libcall isn't created while another call is + /// being legalized (which could lead to non-serialized call sequences). + bool IsLegalizingCall; + + /// LegalizedNodes - For nodes that are of legal width, and that have more + /// than one use, this map indicates what regularized operand to use. This + /// allows us to avoid legalizing the same thing more than once. + DenseMap LegalizedNodes; + + void AddLegalizedOperand(SDValue From, SDValue To) { + LegalizedNodes.insert(std::make_pair(From, To)); + // If someone requests legalization of the new node, return itself. + if (From != To) + LegalizedNodes.insert(std::make_pair(To, To)); + + // Transfer SDDbgValues. + DAG.TransferDbgValues(From, To); + } public: explicit SelectionDAGLegalize(SelectionDAG &DAG); @@ -65,8 +84,9 @@ public: void LegalizeDAG(); private: - /// LegalizeOp - Legalizes the given operation. - void LegalizeOp(SDNode *Node); + /// LegalizeOp - Return a legal replacement for the given operation, with + /// all legal operands. + SDValue LegalizeOp(SDValue O); SDValue OptimizeFloatStore(StoreSDNode *ST); @@ -87,6 +107,9 @@ private: SDValue N1, SDValue N2, SmallVectorImpl &Mask) const; + bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, + SmallPtrSet &NodesLeadingTo); + void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, DebugLoc dl); @@ -127,21 +150,10 @@ private: SDValue ExpandInsertToVectorThroughStack(SDValue Op); SDValue ExpandVectorBuildThroughStack(SDNode* Node); - SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP); - std::pair ExpandAtomic(SDNode *Node); - void ExpandNode(SDNode *Node); - void PromoteNode(SDNode *Node); - - // DAGUpdateListener implementation. - virtual void NodeDeleted(SDNode *N, SDNode *E) { - LegalizedNodes.erase(N); - if (LegalizePosition == SelectionDAG::allnodes_iterator(N)) - ++LegalizePosition; - } - - virtual void NodeUpdated(SDNode *N) {} + void ExpandNode(SDNode *Node, SmallVectorImpl &Results); + void PromoteNode(SDNode *Node, SmallVectorImpl &Results); }; } @@ -183,37 +195,145 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag) } void SelectionDAGLegalize::LegalizeDAG() { + LastCALLSEQ_END = DAG.getEntryNode(); + IsLegalizingCall = false; + + // The legalize process is inherently a bottom-up recursive process (users + // legalize their uses before themselves). Given infinite stack space, we + // could just start legalizing on the root and traverse the whole graph. In + // practice however, this causes us to run out of stack space on large basic + // blocks. To avoid this problem, compute an ordering of the nodes where each + // node is only legalized after all of its operands are legalized. DAG.AssignTopologicalOrder(); + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) + LegalizeOp(SDValue(I, 0)); - // Visit all the nodes. We start in topological order, so that we see - // nodes with their original operands intact. Legalization can produce - // new nodes which may themselves need to be legalized. Iterate until all - // nodes have been legalized. - for (;;) { - bool AnyLegalized = false; - for (LegalizePosition = DAG.allnodes_end(); - LegalizePosition != DAG.allnodes_begin(); ) { - --LegalizePosition; - - SDNode *N = LegalizePosition; - if (LegalizedNodes.insert(N)) { - AnyLegalized = true; - LegalizeOp(N); - } + // Finally, it's possible the root changed. Get the new root. + SDValue OldRoot = DAG.getRoot(); + assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); + DAG.setRoot(LegalizedNodes[OldRoot]); + + LegalizedNodes.clear(); + + // Remove dead nodes now. + DAG.RemoveDeadNodes(); +} + + +/// FindCallEndFromCallStart - Given a chained node that is part of a call +/// sequence, find the CALLSEQ_END node that terminates the call sequence. +static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) { + // Nested CALLSEQ_START/END constructs aren't yet legal, + // but we can DTRT and handle them correctly here. + if (Node->getOpcode() == ISD::CALLSEQ_START) + depth++; + else if (Node->getOpcode() == ISD::CALLSEQ_END) { + depth--; + if (depth == 0) + return Node; + } + if (Node->use_empty()) + return 0; // No CallSeqEnd + + // The chain is usually at the end. + SDValue TheChain(Node, Node->getNumValues()-1); + if (TheChain.getValueType() != MVT::Other) { + // Sometimes it's at the beginning. + TheChain = SDValue(Node, 0); + if (TheChain.getValueType() != MVT::Other) { + // Otherwise, hunt for it. + for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i) + if (Node->getValueType(i) == MVT::Other) { + TheChain = SDValue(Node, i); + break; + } + + // Otherwise, we walked into a node without a chain. + if (TheChain.getValueType() != MVT::Other) + return 0; } - if (!AnyLegalized) + } + + for (SDNode::use_iterator UI = Node->use_begin(), + E = Node->use_end(); UI != E; ++UI) { + + // Make sure to only follow users of our token chain. + SDNode *User = *UI; + for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) + if (User->getOperand(i) == TheChain) + if (SDNode *Result = FindCallEndFromCallStart(User, depth)) + return Result; + } + return 0; +} + +/// FindCallStartFromCallEnd - Given a chained node that is part of a call +/// sequence, find the CALLSEQ_START node that initiates the call sequence. +static SDNode *FindCallStartFromCallEnd(SDNode *Node) { + int nested = 0; + assert(Node && "Didn't find callseq_start for a call??"); + while (Node->getOpcode() != ISD::CALLSEQ_START || nested) { + Node = Node->getOperand(0).getNode(); + assert(Node->getOperand(0).getValueType() == MVT::Other && + "Node doesn't have a token chain argument!"); + switch (Node->getOpcode()) { + default: break; + case ISD::CALLSEQ_START: + if (!nested) + return Node; + nested--; + break; + case ISD::CALLSEQ_END: + nested++; + break; + } + } + return 0; +} + +/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to +/// see if any uses can reach Dest. If no dest operands can get to dest, +/// legalize them, legalize ourself, and return false, otherwise, return true. +/// +/// Keep track of the nodes we fine that actually do lead to Dest in +/// NodesLeadingTo. This avoids retraversing them exponential number of times. +/// +bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, + SmallPtrSet &NodesLeadingTo) { + if (N == Dest) return true; // N certainly leads to Dest :) + + // If we've already processed this node and it does lead to Dest, there is no + // need to reprocess it. + if (NodesLeadingTo.count(N)) return true; + // If the first result of this node has been already legalized, then it cannot + // reach N. + if (LegalizedNodes.count(SDValue(N, 0))) return false; + + // Okay, this node has not already been legalized. Check and legalize all + // operands. If none lead to Dest, then we can legalize this node. + bool OperandsLeadToDest = false; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + OperandsLeadToDest |= // If an operand leads to Dest, so do we. + LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, + NodesLeadingTo); + + if (OperandsLeadToDest) { + NodesLeadingTo.insert(N); + return true; } - // Remove dead nodes now. - DAG.RemoveDeadNodes(); + // Okay, this node looks safe, legalize it and return false. + LegalizeOp(SDValue(N, 0)); + return false; } /// ExpandConstantFP - Expands the ConstantFP node to an integer constant or /// a load from the constant pool. -SDValue -SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { +static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, + SelectionDAG &DAG, const TargetLowering &TLI) { bool Extend = false; DebugLoc dl = CFP->getDebugLoc(); @@ -249,25 +369,20 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy()); unsigned Alignment = cast(CPIdx)->getAlignment(); - if (Extend) { - SDValue Result = - DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, - DAG.getEntryNode(), - CPIdx, MachinePointerInfo::getConstantPool(), - VT, false, false, Alignment); - return Result; - } - SDValue Result = - DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), false, false, - Alignment); - return Result; + if (Extend) + return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, + DAG.getEntryNode(), + CPIdx, MachinePointerInfo::getConstantPool(), + VT, false, false, Alignment); + return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(), false, false, + Alignment); } /// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores. -static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, - const TargetLowering &TLI, - SelectionDAG::DAGUpdateListener *DUL) { +static +SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, + const TargetLowering &TLI) { SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); SDValue Val = ST->getValue(); @@ -282,10 +397,8 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // same size, then a (misaligned) int store. // FIXME: Does not handle truncating floating point stores! SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val); - Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), - ST->isVolatile(), ST->isNonTemporal(), Alignment); - DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); - return; + return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), + ST->isVolatile(), ST->isNonTemporal(), Alignment); } // Do a (aligned) store to a stack slot, then copy from the stack slot // to the final destination using (unaligned) integer loads and stores. @@ -345,11 +458,8 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, ST->isNonTemporal(), MinAlign(ST->getAlignment(), Offset))); // The order of the stores doesn't matter - say it with a TokenFactor. - SDValue Result = - DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], - Stores.size()); - DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); - return; + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], + Stores.size()); } assert(ST->getMemoryVT().isInteger() && !ST->getMemoryVT().isVector() && @@ -378,16 +488,13 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), Alignment); - SDValue Result = - DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); - DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); + return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); } /// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads. -static void -ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, - const TargetLowering &TLI, - SDValue &ValResult, SDValue &ChainResult) { +static +SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, + const TargetLowering &TLI) { SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); EVT VT = LD->getValueType(0); @@ -405,9 +512,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, if (VT.isFloatingPoint() && LoadedVT != VT) Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result); - ValResult = Result; - ChainResult = Chain; - return; + SDValue Ops[] = { Result, Chain }; + return DAG.getMergeValues(Ops, 2, dl); } // Copy the value to a (aligned) stack slot using (unaligned) integer @@ -466,9 +572,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, MachinePointerInfo(), LoadedVT, false, false, 0); // Callers expect a MERGE_VALUES node. - ValResult = Load; - ChainResult = TF; - return; + SDValue Ops[] = { Load, TF }; + return DAG.getMergeValues(Ops, 2, dl); } assert(LoadedVT.isInteger() && !LoadedVT.isVector() && "Unaligned load of unsupported type."); @@ -521,8 +626,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); - ValResult = Result; - ChainResult = TF; + SDValue Ops[] = { Result, TF }; + return DAG.getMergeValues(Ops, 2, dl); } /// PerformInsertVectorEltInMemory - Some target cannot handle a variable @@ -658,10 +763,11 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { /// LegalizeOp - Return a legal replacement for the given operation, with /// all legal operands. -void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { - if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. - return; +SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { + if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. + return Op; + SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) @@ -676,7 +782,13 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Node->getOperand(i).getOpcode() == ISD::TargetConstant) && "Unexpected illegal type!"); + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + DenseMap::iterator I = LegalizedNodes.find(Op); + if (I != LegalizedNodes.end()) return I->second; + SDValue Tmp1, Tmp2, Tmp3, Tmp4; + SDValue Result = Op; bool isCustom = false; // Figure out the correct action; the way to query this varies by opcode @@ -770,6 +882,17 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (Action == TargetLowering::Legal) Action = TargetLowering::Custom; break; + case ISD::BUILD_VECTOR: + // A weird case: legalization for BUILD_VECTOR never legalizes the + // operands! + // FIXME: This really sucks... changing it isn't semantically incorrect, + // but it massively pessimizes the code for floating-point BUILD_VECTORs + // because ConstantFP operands get legalized into constant pool loads + // before the BUILD_VECTOR code can see them. It doesn't usually bite, + // though, because BUILD_VECTORS usually get lowered into other nodes + // which get legalized properly. + SimpleFinishLegalizing = false; + break; default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { Action = TargetLowering::Legal; @@ -780,11 +903,22 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } if (SimpleFinishLegalizing) { - SmallVector Ops; + SmallVector Ops, ResultVals; for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) - Ops.push_back(Node->getOperand(i)); + Ops.push_back(LegalizeOp(Node->getOperand(i))); switch (Node->getOpcode()) { default: break; + case ISD::BR: + case ISD::BRIND: + case ISD::BR_JT: + case ISD::BR_CC: + case ISD::BRCOND: + // Branches tweak the chain to include LastCALLSEQ_END + Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0], + LastCALLSEQ_END); + Ops[0] = LegalizeOp(Ops[0]); + LastCALLSEQ_END = DAG.getEntryNode(); + break; case ISD::SHL: case ISD::SRL: case ISD::SRA: @@ -792,66 +926,57 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::ROTR: // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. - if (!Ops[1].getValueType().isVector()) { - SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[1]); - HandleSDNode Handle(SAO); - LegalizeOp(SAO.getNode()); - Ops[1] = Handle.getValue(); - } + if (!Ops[1].getValueType().isVector()) + Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(), + Ops[1])); break; case ISD::SRL_PARTS: case ISD::SRA_PARTS: case ISD::SHL_PARTS: // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. - if (!Ops[2].getValueType().isVector()) { - SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[2]); - HandleSDNode Handle(SAO); - LegalizeOp(SAO.getNode()); - Ops[2] = Handle.getValue(); - } + if (!Ops[2].getValueType().isVector()) + Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(), + Ops[2])); break; } - SDNode *NewNode = DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size()); - if (NewNode != Node) { - DAG.ReplaceAllUsesWith(Node, NewNode, this); - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i)); - DAG.RemoveDeadNode(Node, this); - Node = NewNode; - } + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), Ops.data(), + Ops.size()), 0); switch (Action) { case TargetLowering::Legal: - return; + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + ResultVals.push_back(Result.getValue(i)); + break; case TargetLowering::Custom: // FIXME: The handling for custom lowering with multiple results is // a complete mess. - Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); + Tmp1 = TLI.LowerOperation(Result, DAG); if (Tmp1.getNode()) { - SmallVector ResultVals; for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) { if (e == 1) ResultVals.push_back(Tmp1); else ResultVals.push_back(Tmp1.getValue(i)); } - if (Tmp1.getNode() != Node || Tmp1.getResNo() != 0) { - DAG.ReplaceAllUsesWith(Node, ResultVals.data(), this); - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]); - DAG.RemoveDeadNode(Node, this); - } - return; + break; } // FALL THROUGH case TargetLowering::Expand: - ExpandNode(Node); - return; + ExpandNode(Result.getNode(), ResultVals); + break; case TargetLowering::Promote: - PromoteNode(Node); - return; + PromoteNode(Result.getNode(), ResultVals); + break; + } + if (!ResultVals.empty()) { + for (unsigned i = 0, e = ResultVals.size(); i != e; ++i) { + if (ResultVals[i] != SDValue(Node, i)) + ResultVals[i] = LegalizeOp(ResultVals[i]); + AddLegalizedOperand(SDValue(Node, i), ResultVals[i]); + } + return ResultVals[Op.getResNo()]; } } @@ -864,20 +989,155 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { #endif assert(0 && "Do not know how to legalize this operator!"); - case ISD::CALLSEQ_START: - case ISD::CALLSEQ_END: + case ISD::SRA: + case ISD::SRL: + case ISD::SHL: { + // Scalarize vector SRA/SRL/SHL. + EVT VT = Node->getValueType(0); + assert(VT.isVector() && "Unable to legalize non-vector shift"); + assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal"); + unsigned NumElem = VT.getVectorNumElements(); + + SmallVector Scalars; + for (unsigned Idx = 0; Idx < NumElem; Idx++) { + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + VT.getScalarType(), + Node->getOperand(0), DAG.getIntPtrConstant(Idx)); + SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + VT.getScalarType(), + Node->getOperand(1), DAG.getIntPtrConstant(Idx)); + Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, + VT.getScalarType(), Ex, Sh)); + } + Result = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), + &Scalars[0], Scalars.size()); + break; + } + + case ISD::BUILD_VECTOR: + switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) { + default: assert(0 && "This action is not supported yet!"); + case TargetLowering::Custom: + Tmp3 = TLI.LowerOperation(Result, DAG); + if (Tmp3.getNode()) { + Result = Tmp3; + break; + } + // FALLTHROUGH + case TargetLowering::Expand: + Result = ExpandBUILD_VECTOR(Result.getNode()); + break; + } break; + case ISD::CALLSEQ_START: { + SDNode *CallEnd = FindCallEndFromCallStart(Node); + + // Recursively Legalize all of the inputs of the call end that do not lead + // to this call start. This ensures that any libcalls that need be inserted + // are inserted *before* the CALLSEQ_START. + {SmallPtrSet NodesLeadingTo; + for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i) + LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).getNode(), Node, + NodesLeadingTo); + } + + // Now that we have legalized all of the inputs (which may have inserted + // libcalls), create the new CALLSEQ_START node. + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + + // Merge in the last call to ensure that this call starts after the last + // call ended. + if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) { + Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, + Tmp1, LastCALLSEQ_END); + Tmp1 = LegalizeOp(Tmp1); + } + + // Do not try to legalize the target-specific arguments (#1+). + if (Tmp1 != Node->getOperand(0)) { + SmallVector Ops(Node->op_begin(), Node->op_end()); + Ops[0] = Tmp1; + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), &Ops[0], + Ops.size()), Result.getResNo()); + } + + // Remember that the CALLSEQ_START is legalized. + AddLegalizedOperand(Op.getValue(0), Result); + if (Node->getNumValues() == 2) // If this has a flag result, remember it. + AddLegalizedOperand(Op.getValue(1), Result.getValue(1)); + + // Now that the callseq_start and all of the non-call nodes above this call + // sequence have been legalized, legalize the call itself. During this + // process, no libcalls can/will be inserted, guaranteeing that no calls + // can overlap. + assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!"); + // Note that we are selecting this call! + LastCALLSEQ_END = SDValue(CallEnd, 0); + IsLegalizingCall = true; + + // Legalize the call, starting from the CALLSEQ_END. + LegalizeOp(LastCALLSEQ_END); + assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!"); + return Result; + } + case ISD::CALLSEQ_END: + // If the CALLSEQ_START node hasn't been legalized first, legalize it. This + // will cause this node to be legalized as well as handling libcalls right. + if (LastCALLSEQ_END.getNode() != Node) { + LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0)); + DenseMap::iterator I = LegalizedNodes.find(Op); + assert(I != LegalizedNodes.end() && + "Legalizing the call start should have legalized this node!"); + return I->second; + } + + // Otherwise, the call start has been legalized and everything is going + // according to plan. Just legalize ourselves normally here. + Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. + // Do not try to legalize the target-specific arguments (#1+), except for + // an optional flag input. + if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Glue){ + if (Tmp1 != Node->getOperand(0)) { + SmallVector Ops(Node->op_begin(), Node->op_end()); + Ops[0] = Tmp1; + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + &Ops[0], Ops.size()), + Result.getResNo()); + } + } else { + Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1)); + if (Tmp1 != Node->getOperand(0) || + Tmp2 != Node->getOperand(Node->getNumOperands()-1)) { + SmallVector Ops(Node->op_begin(), Node->op_end()); + Ops[0] = Tmp1; + Ops.back() = Tmp2; + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + &Ops[0], Ops.size()), + Result.getResNo()); + } + } + assert(IsLegalizingCall && "Call sequence imbalance between start/end?"); + // This finishes up call legalization. + IsLegalizingCall = false; + + // If the CALLSEQ_END node has a flag, remember that we legalized it. + AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0)); + if (Node->getNumValues() == 2) + AddLegalizedOperand(SDValue(Node, 1), Result.getValue(1)); + return Result.getValue(Op.getResNo()); case ISD::LOAD: { LoadSDNode *LD = cast(Node); - Tmp1 = LD->getChain(); // Legalize the chain. - Tmp2 = LD->getBasePtr(); // Legalize the base pointer. + Tmp1 = LegalizeOp(LD->getChain()); // Legalize the chain. + Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer. ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { EVT VT = Node->getValueType(0); - Node = DAG.UpdateNodeOperands(Node, Tmp1, Tmp2, LD->getOffset()); - Tmp3 = SDValue(Node, 0); - Tmp4 = SDValue(Node, 1); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp2, LD->getOffset()), + Result.getResNo()); + Tmp3 = Result.getValue(0); + Tmp4 = Result.getValue(1); switch (TLI.getOperationAction(Node->getOpcode(), VT)) { default: assert(0 && "This action is not supported yet!"); @@ -888,16 +1148,20 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - ExpandUnalignedLoad(cast(Node), - DAG, TLI, Tmp3, Tmp4); + Result = ExpandUnalignedLoad(cast(Result.getNode()), + DAG, TLI); + Tmp3 = Result.getOperand(0); + Tmp4 = Result.getOperand(1); + Tmp3 = LegalizeOp(Tmp3); + Tmp4 = LegalizeOp(Tmp4); } } break; case TargetLowering::Custom: Tmp1 = TLI.LowerOperation(Tmp3, DAG); if (Tmp1.getNode()) { - Tmp3 = Tmp1; - Tmp4 = Tmp1.getValue(1); + Tmp3 = LegalizeOp(Tmp1); + Tmp4 = LegalizeOp(Tmp1.getValue(1)); } break; case TargetLowering::Promote: { @@ -909,16 +1173,16 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); - Tmp3 = DAG.getNode(ISD::BITCAST, dl, VT, Tmp1); - Tmp4 = Tmp1.getValue(1); + Tmp3 = LegalizeOp(DAG.getNode(ISD::BITCAST, dl, VT, Tmp1)); + Tmp4 = LegalizeOp(Tmp1.getValue(1)); break; } } // Since loads produce two values, make sure to remember that we // legalized both of them. - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp3); - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp4); - return; + AddLegalizedOperand(SDValue(Node, 0), Tmp3); + AddLegalizedOperand(SDValue(Node, 1), Tmp4); + return Op.getResNo() ? Tmp4 : Tmp3; } EVT SrcVT = LD->getMemoryVT(); @@ -949,10 +1213,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { ISD::LoadExtType NewExtType = ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; - SDValue Result = - DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), - Tmp1, Tmp2, LD->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); + Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); Ch = Result.getValue(1); // The chain. @@ -967,8 +1230,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Result.getValueType(), Result, DAG.getValueType(SrcVT)); - Tmp1 = Result; - Tmp2 = Ch; + Tmp1 = LegalizeOp(Result); + Tmp2 = LegalizeOp(Ch); } else if (SrcWidth & (SrcWidth - 1)) { // If not loading a power-of-2 number of bits, expand as two loads. assert(!SrcVT.isVector() && "Unsupported extload!"); @@ -1011,7 +1274,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. - Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); } else { // Big endian - avoid unaligned loads. // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 @@ -1041,10 +1304,11 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. - Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); } - Tmp2 = Ch; + Tmp1 = LegalizeOp(Result); + Tmp2 = LegalizeOp(Ch); } else { switch (TLI.getLoadExtAction(ExtType, SrcVT)) { default: assert(0 && "This action is not supported yet!"); @@ -1052,16 +1316,17 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { isCustom = true; // FALLTHROUGH case TargetLowering::Legal: - Node = DAG.UpdateNodeOperands(Node, - Tmp1, Tmp2, LD->getOffset()); - Tmp1 = SDValue(Node, 0); - Tmp2 = SDValue(Node, 1); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp2, LD->getOffset()), + Result.getResNo()); + Tmp1 = Result.getValue(0); + Tmp2 = Result.getValue(1); if (isCustom) { - Tmp3 = TLI.LowerOperation(SDValue(Node, 0), DAG); + Tmp3 = TLI.LowerOperation(Result, DAG); if (Tmp3.getNode()) { - Tmp1 = Tmp3; - Tmp2 = Tmp3.getValue(1); + Tmp1 = LegalizeOp(Tmp3); + Tmp2 = LegalizeOp(Tmp3.getValue(1)); } } else { // If this is an unaligned load and the target doesn't support it, @@ -1072,8 +1337,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - ExpandUnalignedLoad(cast(Node), - DAG, TLI, Tmp1, Tmp2); + Result = ExpandUnalignedLoad(cast(Result.getNode()), + DAG, TLI); + Tmp1 = Result.getOperand(0); + Tmp2 = Result.getOperand(1); + Tmp1 = LegalizeOp(Tmp1); + Tmp2 = LegalizeOp(Tmp2); } } } @@ -1094,8 +1363,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; default: llvm_unreachable("Unexpected extend load type!"); } - Tmp1 = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); - Tmp2 = Load.getValue(1); + Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); + Tmp1 = LegalizeOp(Result); // Relegalize new nodes. + Tmp2 = LegalizeOp(Load.getValue(1)); break; } @@ -1110,10 +1380,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { "EXTLOAD should always be supported!"); // Turn the unsupported load into an EXTLOAD followed by an explicit // zero/sign extend inreg. - SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), - Tmp1, Tmp2, LD->getPointerInfo(), SrcVT, - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getPointerInfo(), SrcVT, + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); SDValue ValRes; if (ExtType == ISD::SEXTLOAD) ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, @@ -1121,37 +1391,38 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Result, DAG.getValueType(SrcVT)); else ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); - Tmp1 = ValRes; - Tmp2 = Result.getValue(1); + Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. + Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes. break; } } // Since loads produce two values, make sure to remember that we legalized // both of them. - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp1); - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp2); - break; + AddLegalizedOperand(SDValue(Node, 0), Tmp1); + AddLegalizedOperand(SDValue(Node, 1), Tmp2); + return Op.getResNo() ? Tmp2 : Tmp1; } case ISD::STORE: { StoreSDNode *ST = cast(Node); - Tmp1 = ST->getChain(); - Tmp2 = ST->getBasePtr(); + Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain. + Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer. unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); if (!ST->isTruncatingStore()) { if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { - DAG.ReplaceAllUsesWith(ST, OptStore, this); + Result = SDValue(OptStore, 0); break; } { - Tmp3 = ST->getValue(); - Node = DAG.UpdateNodeOperands(Node, - Tmp1, Tmp3, Tmp2, - ST->getOffset()); + Tmp3 = LegalizeOp(ST->getValue()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp3, Tmp2, + ST->getOffset()), + Result.getResNo()); EVT VT = Tmp3.getValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { @@ -1163,31 +1434,27 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) - ExpandUnalignedStore(cast(Node), - DAG, TLI, this); + Result = ExpandUnalignedStore(cast(Result.getNode()), + DAG, TLI); } break; case TargetLowering::Custom: - Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Tmp1.getNode()) - DAG.ReplaceAllUsesWith(SDValue(Node, 0), Tmp1, this); + Tmp1 = TLI.LowerOperation(Result, DAG); + if (Tmp1.getNode()) Result = Tmp1; break; - case TargetLowering::Promote: { + case TargetLowering::Promote: assert(VT.isVector() && "Unknown legal promote case!"); Tmp3 = DAG.getNode(ISD::BITCAST, dl, TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3); - SDValue Result = - DAG.getStore(Tmp1, dl, Tmp3, Tmp2, - ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment); - DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); + Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, + ST->getPointerInfo(), isVolatile, + isNonTemporal, Alignment); break; } - } break; } } else { - Tmp3 = ST->getValue(); + Tmp3 = LegalizeOp(ST->getValue()); EVT StVT = ST->getMemoryVT(); unsigned StWidth = StVT.getSizeInBits(); @@ -1199,10 +1466,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits()); Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT); - SDValue Result = - DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); - DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); + Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. assert(!StVT.isVector() && "Unsupported truncstore!"); @@ -1256,13 +1521,14 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } // The order of the stores doesn't matter. - SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); - DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); + Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } else { if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() || Tmp2 != ST->getBasePtr()) - Node = DAG.UpdateNodeOperands(Node, Tmp1, Tmp3, Tmp2, - ST->getOffset()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp3, Tmp2, + ST->getOffset()), + Result.getResNo()); switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { default: assert(0 && "This action is not supported yet!"); @@ -1273,13 +1539,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) - ExpandUnalignedStore(cast(Node), DAG, TLI, this); + Result = ExpandUnalignedStore(cast(Result.getNode()), + DAG, TLI); } break; case TargetLowering::Custom: - DAG.ReplaceAllUsesWith(SDValue(Node, 0), - TLI.LowerOperation(SDValue(Node, 0), DAG), - this); + Result = TLI.LowerOperation(Result, DAG); break; case TargetLowering::Expand: assert(!StVT.isVector() && @@ -1288,10 +1553,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { // TRUNCSTORE:i16 i32 -> STORE i16 assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!"); Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); - SDValue Result = - DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); - DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); + Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); break; } } @@ -1299,6 +1562,17 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { break; } } + assert(Result.getValueType() == Op.getValueType() && + "Bad legalization!"); + + // Make sure that the generated code is itself legal. + if (Result != Op) + Result = LegalizeOp(Result); + + // Note that LegalizeOp may be reentered even from single-use nodes, which + // means that we always must cache transformed nodes. + AddLegalizedOperand(Op, Result); + return Result; } SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { @@ -1737,6 +2011,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { // and leave the Hi part unset. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { + assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); // The input chain to this libcall is the entry node of the function. // Legalizing the call will automatically add the previous call to the // dependence. @@ -1755,6 +2030,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); + // Splice the libcall in wherever FindInputOutputChains tells us to. Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); // isTailCall may be true since the callee does not reference caller stack @@ -1770,6 +2046,10 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, // It's a tailcall, return the chain (which is the DAG root). return DAG.getRoot(); + // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); return CallInfo.first; } @@ -1799,6 +2079,11 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); + // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); + return CallInfo.first; } @@ -1808,6 +2093,7 @@ std::pair SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { + assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); SDValue InChain = Node->getOperand(0); TargetLowering::ArgListTy Args; @@ -1824,6 +2110,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); + // Splice the libcall in wherever FindInputOutputChains tells us to. Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, @@ -1831,6 +2118,10 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); + // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); return CallInfo; } @@ -1956,14 +2247,20 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); + // Splice the libcall in wherever FindInputOutputChains tells us to. DebugLoc dl = Node->getDebugLoc(); std::pair CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); + // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); + // Remainder is loaded back from the stack frame. - SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, + SDValue Rem = DAG.getLoad(RetVT, dl, LastCALLSEQ_END, FIPtr, MachinePointerInfo(), false, false, 0); Results.push_back(CallInfo.first); Results.push_back(Rem); @@ -2155,13 +2452,11 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, MachinePointerInfo::getConstantPool(), false, false, Alignment); else { - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, - DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - MVT::f32, false, false, Alignment); - HandleSDNode Handle(Load); - LegalizeOp(Load.getNode()); - FudgeInReg = Handle.getValue(); + FudgeInReg = + LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, + DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(), + MVT::f32, false, false, Alignment)); } return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); @@ -2485,8 +2780,8 @@ std::pair SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { return ExpandChainLibCall(LC, Node, false); } -void SelectionDAGLegalize::ExpandNode(SDNode *Node) { - SmallVector Results; +void SelectionDAGLegalize::ExpandNode(SDNode *Node, + SmallVectorImpl &Results) { DebugLoc dl = Node->getDebugLoc(); SDValue Tmp1, Tmp2, Tmp3, Tmp4; switch (Node->getOpcode()) { @@ -2934,8 +3229,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { ConstantFPSDNode *CFP = cast(Node); // Check to see if this FP immediate is already legal. // If this is a legal constant, turn it into a TargetConstantFP node. - if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0))) - Results.push_back(ExpandConstantFP(CFP, true)); + if (TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0))) + Results.push_back(SDValue(Node, 0)); + else + Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI)); break; } case ISD::EHSELECTION: { @@ -3181,10 +3478,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { DAG.getIntPtrConstant(0)); TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, DAG.getIntPtrConstant(1)); - // Ret is a node with an illegal type. Because such things are not - // generally permitted during this phase of legalization, delete the - // node. The above EXTRACT_ELEMENT nodes should have been folded. - DAG.DeleteNode(Ret.getNode()); } if (isSigned) { @@ -3325,6 +3618,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, dl); + LastCALLSEQ_END = DAG.getEntryNode(); assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!"); Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); @@ -3334,35 +3628,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(Tmp1); break; } - case ISD::BUILD_VECTOR: - Results.push_back(ExpandBUILD_VECTOR(Node)); - break; - case ISD::SRA: - case ISD::SRL: - case ISD::SHL: { - // Scalarize vector SRA/SRL/SHL. - EVT VT = Node->getValueType(0); - assert(VT.isVector() && "Unable to legalize non-vector shift"); - assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal"); - unsigned NumElem = VT.getVectorNumElements(); - - SmallVector Scalars; - for (unsigned Idx = 0; Idx < NumElem; Idx++) { - SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - VT.getScalarType(), - Node->getOperand(0), DAG.getIntPtrConstant(Idx)); - SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - VT.getScalarType(), - Node->getOperand(1), DAG.getIntPtrConstant(Idx)); - Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, - VT.getScalarType(), Ex, Sh)); - } - SDValue Result = - DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), - &Scalars[0], Scalars.size()); - DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); - break; - } case ISD::GLOBAL_OFFSET_TABLE: case ISD::GlobalAddress: case ISD::GlobalTLSAddress: @@ -3373,16 +3638,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: // FIXME: Custom lowering for these operations shouldn't return null! + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + Results.push_back(SDValue(Node, i)); break; } - - // Replace the original node with the legalized result. - if (!Results.empty()) - DAG.ReplaceAllUsesWith(Node, Results.data(), this); } - -void SelectionDAGLegalize::PromoteNode(SDNode *Node) { - SmallVector Results; +void SelectionDAGLegalize::PromoteNode(SDNode *Node, + SmallVectorImpl &Results) { EVT OVT = Node->getValueType(0); if (Node->getOpcode() == ISD::UINT_TO_FP || Node->getOpcode() == ISD::SINT_TO_FP || @@ -3510,10 +3772,6 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { break; } } - - // Replace the original node with the legalized result. - if (!Results.empty()) - DAG.ReplaceAllUsesWith(Node, Results.data(), this); } // SelectionDAG::Legalize - This is the entry point for the file. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 7ed1b98..a4bb577 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -1084,6 +1084,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); + // Splice the libcall in wherever FindInputOutputChains tells us to. Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index b8cf998..a1abdb4 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -315,10 +315,8 @@ void ScheduleDAGRRList::Schedule() { IssueCount = 0; MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; NumLiveRegs = 0; - // Allocate slots for each physical register, plus one for a special register - // to track the virtual resource of a calling sequence. - LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL); - LiveRegGens.resize(TRI->getNumRegs() + 1, NULL); + LiveRegDefs.resize(TRI->getNumRegs(), NULL); + LiveRegGens.resize(TRI->getNumRegs(), NULL); // Build the scheduling graph. BuildSchedGraph(NULL); @@ -388,90 +386,6 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { } } -/// IsChainDependent - Test if Outer is reachable from Inner through -/// chain dependencies. -static bool IsChainDependent(SDNode *Outer, SDNode *Inner) { - SDNode *N = Outer; - for (;;) { - if (N == Inner) - return true; - if (N->getOpcode() == ISD::TokenFactor) { - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - if (IsChainDependent(N->getOperand(i).getNode(), Inner)) - return true; - return false; - } - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - if (N->getOperand(i).getValueType() == MVT::Other) { - N = N->getOperand(i).getNode(); - goto found_chain_operand; - } - return false; - found_chain_operand:; - if (N->getOpcode() == ISD::EntryToken) - return false; - } -} - -/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate -/// the corresponding (lowered) CALLSEQ_BEGIN node. -/// -/// NestLevel and MaxNested are used in recursion to indcate the current level -/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum -/// level seen so far. -/// -/// TODO: It would be better to give CALLSEQ_END an explicit operand to point -/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it. -static SDNode * -FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, - const TargetInstrInfo *TII) { - for (;;) { - // For a TokenFactor, examine each operand. There may be multiple ways - // to get to the CALLSEQ_BEGIN, but we need to find the path with the - // most nesting in order to ensure that we find the corresponding match. - if (N->getOpcode() == ISD::TokenFactor) { - SDNode *Best = 0; - unsigned BestMaxNest = MaxNest; - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - unsigned MyNestLevel = NestLevel; - unsigned MyMaxNest = MaxNest; - if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(), - MyNestLevel, MyMaxNest, TII)) - if (!Best || (MyMaxNest > BestMaxNest)) { - Best = New; - BestMaxNest = MyMaxNest; - } - } - assert(Best); - MaxNest = BestMaxNest; - return Best; - } - // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. - if (N->isMachineOpcode()) { - if (N->getMachineOpcode() == - (unsigned)TII->getCallFrameDestroyOpcode()) { - ++NestLevel; - MaxNest = std::max(MaxNest, NestLevel); - } else if (N->getMachineOpcode() == - (unsigned)TII->getCallFrameSetupOpcode()) { - --NestLevel; - if (NestLevel == 0) - return N; - } - } - // Otherwise, find the chain and continue climbing. - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - if (N->getOperand(i).getValueType() == MVT::Other) { - N = N->getOperand(i).getNode(); - goto found_chain_operand; - } - return 0; - found_chain_operand:; - if (N->getOpcode() == ISD::EntryToken) - return 0; - } -} - /// Call ReleasePred for each predecessor, then update register live def/gen. /// Always update LiveRegDefs for a register dependence even if the current SU /// also defines the register. This effectively create one large live range @@ -509,25 +423,6 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { } } } - - // If we're scheduling a lowered CALLSEQ_END, find the corresponding CALLSEQ_BEGIN. - // Inject an artificial physical register dependence between these nodes, to - // prevent other calls from being interscheduled with them. - unsigned CallResource = TRI->getNumRegs(); - if (!LiveRegDefs[CallResource]) - for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) - if (Node->isMachineOpcode() && - Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { - unsigned NestLevel = 0; - unsigned MaxNest = 0; - SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); - - SUnit *Def = &SUnits[N->getNodeId()]; - ++NumLiveRegs; - LiveRegDefs[CallResource] = Def; - LiveRegGens[CallResource] = SU; - break; - } } /// Check to see if any of the pending instructions are ready to issue. If @@ -710,20 +605,6 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { LiveRegGens[I->getReg()] = NULL; } } - // Release the special call resource dependence, if this is the beginning - // of a call. - unsigned CallResource = TRI->getNumRegs(); - if (LiveRegDefs[CallResource] == SU) - for (const SDNode *SUNode = SU->getNode(); SUNode; - SUNode = SUNode->getGluedNode()) { - if (SUNode->isMachineOpcode() && - SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { - assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); - --NumLiveRegs; - LiveRegDefs[CallResource] = NULL; - LiveRegGens[CallResource] = NULL; - } - } resetVRegCycle(SU); @@ -780,33 +661,6 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { } } - // Reclaim the special call resource dependence, if this is the beginning - // of a call. - unsigned CallResource = TRI->getNumRegs(); - for (const SDNode *SUNode = SU->getNode(); SUNode; - SUNode = SUNode->getGluedNode()) { - if (SUNode->isMachineOpcode() && - SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { - ++NumLiveRegs; - LiveRegDefs[CallResource] = SU; - LiveRegGens[CallResource] = NULL; - } - } - - // Release the special call resource dependence, if this is the end - // of a call. - if (LiveRegGens[CallResource] == SU) - for (const SDNode *SUNode = SU->getNode(); SUNode; - SUNode = SUNode->getGluedNode()) { - if (SUNode->isMachineOpcode() && - SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { - assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); - --NumLiveRegs; - LiveRegDefs[CallResource] = NULL; - LiveRegGens[CallResource] = NULL; - } - } - for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { if (I->isAssignedRegDep()) { @@ -1229,21 +1083,6 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector &LRegs) { if (!Node->isMachineOpcode()) continue; - // If we're in the middle of scheduling a call, don't begin scheduling - // another call. Also, don't allow any physical registers to be live across - // the call. - if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { - // Add one here so that we include the special calling-sequence resource. - for (unsigned i = 0, e = TRI->getNumRegs() + 1; i != e; ++i) - if (LiveRegDefs[i]) { - SDNode *Gen = LiveRegGens[i]->getNode(); - while (SDNode *Glued = Gen->getGluedNode()) - Gen = Glued; - if (!IsChainDependent(Gen, Node) && RegAdded.insert(i)) - LRegs.push_back(i); - } - continue; - } const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 010a740..07d2db6 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5290,10 +5290,6 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } - - // If we just RAUW'd the root, take note. - if (FromN == getRoot()) - setRoot(To); } /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. @@ -5339,10 +5335,6 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } - - // If we just RAUW'd the root, take note. - if (From == getRoot().getNode()) - setRoot(SDValue(To, getRoot().getResNo())); } /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. @@ -5381,10 +5373,6 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } - - // If we just RAUW'd the root, take note. - if (From == getRoot().getNode()) - setRoot(SDValue(To[getRoot().getResNo()])); } /// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving @@ -5443,10 +5431,6 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } - - // If we just RAUW'd the root, take note. - if (From == getRoot()) - setRoot(To); } namespace { diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index bfc1690..31e522d 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1353,10 +1353,12 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, MVT::i32); + // TODO: Disable AlwaysInline when it becomes possible + // to emit a nested call sequence. MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile=*/false, - /*AlwaysInline=*/false, + /*AlwaysInline=*/true, MachinePointerInfo(0), MachinePointerInfo(0))); @@ -4348,24 +4350,9 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // If this is undef splat, generate it via "just" vdup, if possible. if (Lane == -1) Lane = 0; - // Test if V1 is a SCALAR_TO_VECTOR. if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); } - // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR - // (and probably will turn into a SCALAR_TO_VECTOR once legalization - // reaches it). - if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && - !isa(V1.getOperand(0))) { - bool IsScalarToVector = true; - for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) - if (V1.getOperand(i).getOpcode() != ISD::UNDEF) { - IsScalarToVector = false; - break; - } - if (IsScalarToVector) - return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); - } return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, DAG.getConstant(Lane, MVT::i32)); } diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 3d75de0..02b0ff2 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2114,9 +2114,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { HasNoSignedComparisonUses(Node)) // Look past the truncate if CMP is the only use of it. N0 = N0.getOperand(0); - if ((N0.getNode()->getOpcode() == ISD::AND || - (N0.getResNo() == 0 && N0.getNode()->getOpcode() == X86ISD::AND)) && - N0.getNode()->hasOneUse() && + if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && N0.getValueType() != MVT::i8 && X86::isZeroNode(N1)) { ConstantSDNode *C = dyn_cast(N0.getNode()->getOperand(1)); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 927a307..2ec0814 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4220,29 +4220,6 @@ static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) { return true; } -// Test whether the given value is a vector value which will be legalized -// into a load. -static bool WillBeConstantPoolLoad(SDNode *N) { - if (N->getOpcode() != ISD::BUILD_VECTOR) - return false; - - // Check for any non-constant elements. - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - switch (N->getOperand(i).getNode()->getOpcode()) { - case ISD::UNDEF: - case ISD::ConstantFP: - case ISD::Constant: - break; - default: - return false; - } - - // Vectors of all-zeros and all-ones are materialized with special - // instructions rather than being loaded. - return !ISD::isBuildVectorAllZeros(N) && - !ISD::isBuildVectorAllOnes(N); -} - /// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to /// match movlp{s|d}. The lower half elements should come from lower half of /// V1 (and in order), and the upper half elements should come from the upper @@ -4258,7 +4235,7 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, return false; // Is V2 is a vector load, don't do this transformation. We will try to use // load folding shufps op. - if (ISD::isNON_EXTLoad(V2) || WillBeConstantPoolLoad(V2)) + if (ISD::isNON_EXTLoad(V2)) return false; unsigned NumElems = VT.getVectorNumElements(); @@ -6374,8 +6351,6 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) { if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op)) CanFoldLoad = true; - ShuffleVectorSDNode *SVOp = cast(Op); - // Both of them can't be memory operations though. if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2)) CanFoldLoad = false; @@ -6385,11 +6360,10 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) { return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG); if (NumElems == 4) - // If we don't care about the second element, procede to use movss. - if (SVOp->getMaskElt(1) != -1) - return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG); + return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG); } + ShuffleVectorSDNode *SVOp = cast(Op); // movl and movlp will both match v2i64, but v2i64 is never matched by // movl earlier because we make it strict to avoid messing with the movlp load // folding logic (see the code above getMOVLP call). Match it here then, @@ -8707,9 +8681,8 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. - unsigned CondOpcode = Cond.getOpcode(); - if (CondOpcode == X86ISD::SETCC || - CondOpcode == X86ISD::SETCC_CARRY) { + if (Cond.getOpcode() == X86ISD::SETCC || + Cond.getOpcode() == X86ISD::SETCC_CARRY) { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); @@ -8726,39 +8699,6 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { Cond = Cmp; addTest = false; } - } else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO || - CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO || - ((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) && - Cond.getOperand(0).getValueType() != MVT::i8)) { - SDValue LHS = Cond.getOperand(0); - SDValue RHS = Cond.getOperand(1); - unsigned X86Opcode; - unsigned X86Cond; - SDVTList VTs; - switch (CondOpcode) { - case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break; - case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break; - case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break; - case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break; - case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break; - case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break; - default: llvm_unreachable("unexpected overflowing operator"); - } - if (CondOpcode == ISD::UMULO) - VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(), - MVT::i32); - else - VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); - - SDValue X86Op = DAG.getNode(X86Opcode, DL, VTs, LHS, RHS); - - if (CondOpcode == ISD::UMULO) - Cond = X86Op.getValue(2); - else - Cond = X86Op.getValue(1); - - CC = DAG.getConstant(X86Cond, MVT::i8); - addTest = false; } if (addTest) { @@ -8840,27 +8780,11 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue Dest = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); SDValue CC; - bool Inverted = false; if (Cond.getOpcode() == ISD::SETCC) { - // Check for setcc([su]{add,sub,mul}o == 0). - if (cast(Cond.getOperand(2))->get() == ISD::SETEQ && - isa(Cond.getOperand(1)) && - cast(Cond.getOperand(1))->isNullValue() && - Cond.getOperand(0).getResNo() == 1 && - (Cond.getOperand(0).getOpcode() == ISD::SADDO || - Cond.getOperand(0).getOpcode() == ISD::UADDO || - Cond.getOperand(0).getOpcode() == ISD::SSUBO || - Cond.getOperand(0).getOpcode() == ISD::USUBO || - Cond.getOperand(0).getOpcode() == ISD::SMULO || - Cond.getOperand(0).getOpcode() == ISD::UMULO)) { - Inverted = true; - Cond = Cond.getOperand(0); - } else { - SDValue NewCond = LowerSETCC(Cond, DAG); - if (NewCond.getNode()) - Cond = NewCond; - } + SDValue NewCond = LowerSETCC(Cond, DAG); + if (NewCond.getNode()) + Cond = NewCond; } #if 0 // FIXME: LowerXALUO doesn't handle these!! @@ -8881,9 +8805,8 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. - unsigned CondOpcode = Cond.getOpcode(); - if (CondOpcode == X86ISD::SETCC || - CondOpcode == X86ISD::SETCC_CARRY) { + if (Cond.getOpcode() == X86ISD::SETCC || + Cond.getOpcode() == X86ISD::SETCC_CARRY) { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); @@ -8904,43 +8827,6 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { break; } } - } - CondOpcode = Cond.getOpcode(); - if (CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO || - CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO || - ((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) && - Cond.getOperand(0).getValueType() != MVT::i8)) { - SDValue LHS = Cond.getOperand(0); - SDValue RHS = Cond.getOperand(1); - unsigned X86Opcode; - unsigned X86Cond; - SDVTList VTs; - switch (CondOpcode) { - case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break; - case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break; - case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break; - case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break; - case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break; - case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break; - default: llvm_unreachable("unexpected overflowing operator"); - } - if (Inverted) - X86Cond = X86::GetOppositeBranchCondition((X86::CondCode)X86Cond); - if (CondOpcode == ISD::UMULO) - VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(), - MVT::i32); - else - VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); - - SDValue X86Op = DAG.getNode(X86Opcode, dl, VTs, LHS, RHS); - - if (CondOpcode == ISD::UMULO) - Cond = X86Op.getValue(2); - else - Cond = X86Op.getValue(1); - - CC = DAG.getConstant(X86Cond, MVT::i8); - addTest = false; } else { unsigned CondOpc; if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) { @@ -9004,66 +8890,6 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { CC = DAG.getConstant(CCode, MVT::i8); Cond = Cond.getOperand(0).getOperand(1); addTest = false; - } else if (Cond.getOpcode() == ISD::SETCC && - cast(Cond.getOperand(2))->get() == ISD::SETOEQ) { - // For FCMP_OEQ, we can emit - // two branches instead of an explicit AND instruction with a - // separate test. However, we only do this if this block doesn't - // have a fall-through edge, because this requires an explicit - // jmp when the condition is false. - if (Op.getNode()->hasOneUse()) { - SDNode *User = *Op.getNode()->use_begin(); - // Look for an unconditional branch following this conditional branch. - // We need this because we need to reverse the successors in order - // to implement FCMP_OEQ. - if (User->getOpcode() == ISD::BR) { - SDValue FalseBB = User->getOperand(1); - SDNode *NewBR = - DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); - assert(NewBR == User); - (void)NewBR; - Dest = FalseBB; - - SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, - Cond.getOperand(0), Cond.getOperand(1)); - CC = DAG.getConstant(X86::COND_NE, MVT::i8); - Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), - Chain, Dest, CC, Cmp); - CC = DAG.getConstant(X86::COND_P, MVT::i8); - Cond = Cmp; - addTest = false; - } - } - } else if (Cond.getOpcode() == ISD::SETCC && - cast(Cond.getOperand(2))->get() == ISD::SETUNE) { - // For FCMP_UNE, we can emit - // two branches instead of an explicit AND instruction with a - // separate test. However, we only do this if this block doesn't - // have a fall-through edge, because this requires an explicit - // jmp when the condition is false. - if (Op.getNode()->hasOneUse()) { - SDNode *User = *Op.getNode()->use_begin(); - // Look for an unconditional branch following this conditional branch. - // We need this because we need to reverse the successors in order - // to implement FCMP_UNE. - if (User->getOpcode() == ISD::BR) { - SDValue FalseBB = User->getOperand(1); - SDNode *NewBR = - DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); - assert(NewBR == User); - (void)NewBR; - - SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, - Cond.getOperand(0), Cond.getOperand(1)); - CC = DAG.getConstant(X86::COND_NE, MVT::i8); - Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), - Chain, Dest, CC, Cmp); - CC = DAG.getConstant(X86::COND_NP, MVT::i8); - Cond = Cmp; - addTest = false; - Dest = FalseBB; - } - } } } diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 4b74f96..2afe0e3 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -386,15 +386,6 @@ IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase, Offset = off; return true; } - // Check for an aligned global variable. - if (GlobalAddressSDNode *GA = dyn_cast(*Root)) { - const GlobalValue *GV = GA->getGlobal(); - if (GA->getOffset() == 0 && GV->getAlignment() >= 4) { - AlignedBase = Base; - Offset = off; - return true; - } - } return false; } -- cgit v1.1 From 09c3253d3034ac8ed31f04d21181004827224d47 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Sat, 29 Oct 2011 00:56:07 +0000 Subject: Revert r143214; it's breaking a bunch of stuff. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143265 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InstructionSimplify.cpp | 74 ++++++++++++++---------------------- 1 file changed, 29 insertions(+), 45 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 83881de..6bef0ae 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -416,55 +416,39 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, } assert(isa(LHS) && "Not comparing with a select instruction!"); SelectInst *SI = cast(LHS); - Value *Cond = SI->getCondition(); - Value *TV = SI->getTrueValue(); - Value *FV = SI->getFalseValue(); // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it. // Does "cmp TV, RHS" simplify? - Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, TD, DT, MaxRecurse); - if (!TCmp) { - // It didn't simplify. However if "cmp TV, RHS" is equal to the select - // condition itself then we can replace it with 'true'. - if (match(Cond, m_ICmp(Pred, m_Specific(TV), m_Specific(RHS)))) - TCmp = getTrue(Cond->getType()); - } - if (!TCmp) - return 0; - - // Does "cmp FV, RHS" simplify? - Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, TD, DT, MaxRecurse); - if (!FCmp) { - // It didn't simplify. However if "cmp FV, RHS" is equal to the select - // condition itself then we can replace it with 'false'. - if (match(Cond, m_ICmp(Pred, m_Specific(FV), m_Specific(RHS)))) - FCmp = getFalse(Cond->getType()); + if (Value *TCmp = SimplifyCmpInst(Pred, SI->getTrueValue(), RHS, TD, DT, + MaxRecurse)) { + // It does! Does "cmp FV, RHS" simplify? + if (Value *FCmp = SimplifyCmpInst(Pred, SI->getFalseValue(), RHS, TD, DT, + MaxRecurse)) { + // It does! If they simplified to the same value, then use it as the + // result of the original comparison. + if (TCmp == FCmp) + return TCmp; + Value *Cond = SI->getCondition(); + // If the false value simplified to false, then the result of the compare + // is equal to "Cond && TCmp". This also catches the case when the false + // value simplified to false and the true value to true, returning "Cond". + if (match(FCmp, m_Zero())) + if (Value *V = SimplifyAndInst(Cond, TCmp, TD, DT, MaxRecurse)) + return V; + // If the true value simplified to true, then the result of the compare + // is equal to "Cond || FCmp". + if (match(TCmp, m_One())) + if (Value *V = SimplifyOrInst(Cond, FCmp, TD, DT, MaxRecurse)) + return V; + // Finally, if the false value simplified to true and the true value to + // false, then the result of the compare is equal to "!Cond". + if (match(FCmp, m_One()) && match(TCmp, m_Zero())) + if (Value *V = + SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()), + TD, DT, MaxRecurse)) + return V; + } } - if (!FCmp) - return 0; - - // If both sides simplified to the same value, then use it as the result of - // the original comparison. - if (TCmp == FCmp) - return TCmp; - // If the false value simplified to false, then the result of the compare - // is equal to "Cond && TCmp". This also catches the case when the false - // value simplified to false and the true value to true, returning "Cond". - if (match(FCmp, m_Zero())) - if (Value *V = SimplifyAndInst(Cond, TCmp, TD, DT, MaxRecurse)) - return V; - // If the true value simplified to true, then the result of the compare - // is equal to "Cond || FCmp". - if (match(TCmp, m_One())) - if (Value *V = SimplifyOrInst(Cond, FCmp, TD, DT, MaxRecurse)) - return V; - // Finally, if the false value simplified to true and the true value to - // false, then the result of the compare is equal to "!Cond". - if (match(FCmp, m_One()) && match(TCmp, m_Zero())) - if (Value *V = - SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()), - TD, DT, MaxRecurse)) - return V; return 0; } -- cgit v1.1 From 59e43bde289750a4683879727a9e0e41ab7000a6 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 29 Oct 2011 19:43:31 +0000 Subject: SimplifyLibCalls: Use IRBuilder.CreateGlobalString when creating a string for printf->puts, which correctly sets the unnamed_addr bit on the resulting GlobalVariable. Fixes PR11264. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143289 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/SimplifyLibCalls.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp index fbb9465..2a00ae1 100644 --- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -1123,10 +1123,8 @@ struct PrintFOpt : public LibCallOptimization { // Create a string literal with no \n on it. We expect the constant merge // pass to be run after this pass, to merge duplicate strings. FormatStr.erase(FormatStr.end()-1); - Constant *C = ConstantArray::get(*Context, FormatStr, true); - C = new GlobalVariable(*Callee->getParent(), C->getType(), true, - GlobalVariable::InternalLinkage, C, "str"); - EmitPutS(C, B, TD); + Value *GV = B.CreateGlobalString(FormatStr, "str"); + EmitPutS(GV, B, TD); return CI->use_empty() ? (Value*)CI : ConstantInt::get(CI->getType(), FormatStr.size()+1); } -- cgit v1.1 From 95c885d65a0da92f6661fd160d8ce13b30e3892c Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 29 Oct 2011 19:43:38 +0000 Subject: PPC: Disable moves for all CR subregisters. Should fix assertion failures on ppc buildbots. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143290 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCFrameLowering.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 7dead10..0b85fea 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -490,10 +490,8 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF) const { // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just // subregisters of CR2. We just need to emit a move of CR2. - if (Reg == PPC::CR2LT || Reg == PPC::CR2GT || Reg == PPC::CR2EQ) + if (PPC::CRBITRCRegisterClass->contains(Reg)) continue; - if (Reg == PPC::CR2UN) - Reg = PPC::CR2; MachineLocation CSDst(MachineLocation::VirtualFP, Offset); MachineLocation CSSrc(Reg); -- cgit v1.1 From b00418af67b36dcd7d70a268ebba3480c1011348 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sat, 29 Oct 2011 21:23:04 +0000 Subject: Add a new DAGCombine optimization for BUILD_VECTOR. If all of the inputs are zero/any_extended, create a new simple BV which can be further optimized by other BV optimizations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143297 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 83 ++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b1afbf2..d96ce75 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6936,7 +6936,90 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { unsigned NumInScalars = N->getNumOperands(); + DebugLoc dl = N->getDebugLoc(); EVT VT = N->getValueType(0); + // Check to see if this is a BUILD_VECTOR of a bunch of values + // which come from any_extend or zero_extend nodes. If so, we can create + // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR + // optimizations. + EVT SourceType = MVT::Other; + bool allExtend = true; + bool allAnyExt = true; + for (unsigned i = 0; i < NumInScalars; ++i) { + SDValue In = N->getOperand(i); + // Ignore undef inputs. + if (In.getOpcode() == ISD::UNDEF) continue; + + bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; + bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; + + // Abort non-extend incoming values. + if (!ZeroExt && !AnyExt) { + allExtend = false; + break; + } + + // The input is a ZeroExt or AnyExt. Check the original type. + EVT InTy = In.getOperand(0).getValueType(); + + // Check that all of the widened source types are the same. + if (SourceType == MVT::Other) + SourceType = InTy; + else if (InTy != SourceType) { + // Multiple income types. Abort. + allExtend = false; + break; + } + + // Check if all of the extends are ANY_EXTENDs. + allAnyExt &= AnyExt; + } + + // And we are post type-legalization, + // If all of the values are Ext or undef, + // We have a non undef entry. + if (LegalTypes && allExtend && SourceType != MVT::Other) { + bool isLE = TLI.isLittleEndian(); + EVT InScalarTy = SourceType.getScalarType(); + EVT OutScalarTy = N->getValueType(0).getScalarType(); + unsigned ElemRatio = OutScalarTy.getSizeInBits()/InScalarTy.getSizeInBits(); + assert(ElemRatio > 1 && "Invalid element size ratio"); + SDValue Filler = allAnyExt ? DAG.getUNDEF(InScalarTy): + DAG.getConstant(0, InScalarTy); + + unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements(); + SmallVector Ops(NewBVElems , Filler); + + // Populate the new build_vector + for (unsigned i=0; i < N->getNumOperands(); ++i) { + SDValue Cast = N->getOperand(i); + assert(Cast.getOpcode() == ISD::ANY_EXTEND || + Cast.getOpcode() == ISD::ZERO_EXTEND || + Cast.getOpcode() == ISD::UNDEF && "Invalid cast opcode"); + SDValue In; + if (Cast.getOpcode() == ISD::UNDEF) + In = DAG.getUNDEF(InScalarTy); + else + In = Cast->getOperand(0); + unsigned Index = isLE ? (i * ElemRatio) : + (i * ElemRatio + (ElemRatio - 1)); + + assert(Index < Ops.size() && "Invalid index"); + Ops[Index] = In; + } + + // The type of the new BUILD_VECTOR node. + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), InScalarTy, NewBVElems); + assert(VecVT.getSizeInBits() == N->getValueType(0).getSizeInBits() && + "Invalid vector size"); + + // Make the new BUILD_VECTOR. + SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + VecVT, &Ops[0], Ops.size()); + + // Bitcast to the desired type. + return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), BV); + } // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from -- cgit v1.1 From 50bf86ea8a757be4059a56a2c38270873dcf9e2e Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sun, 30 Oct 2011 08:39:55 +0000 Subject: Silence compiler warning. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143308 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index d96ce75..9a63799 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6988,14 +6988,14 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { DAG.getConstant(0, InScalarTy); unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements(); - SmallVector Ops(NewBVElems , Filler); + SmallVector Ops(NewBVElems, Filler); // Populate the new build_vector for (unsigned i=0; i < N->getNumOperands(); ++i) { SDValue Cast = N->getOperand(i); - assert(Cast.getOpcode() == ISD::ANY_EXTEND || - Cast.getOpcode() == ISD::ZERO_EXTEND || - Cast.getOpcode() == ISD::UNDEF && "Invalid cast opcode"); + assert((Cast.getOpcode() == ISD::ANY_EXTEND || + Cast.getOpcode() == ISD::ZERO_EXTEND || + Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); SDValue In; if (Cast.getOpcode() == ISD::UNDEF) In = DAG.getUNDEF(InScalarTy); -- cgit v1.1 From fb0dfbbff7fb3a9cff09301c174a65aec7b98760 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Sun, 30 Oct 2011 13:24:22 +0000 Subject: Fix pr11266. On x86: (shl V, 1) -> add V,V Hardware support for vector-shift is sparse and in many cases we scalarize the result. Additionally, on sandybridge padd is faster than shl. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143311 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2ec0814..1af2449 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -13042,7 +13042,8 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { // fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2)) // since the result of setcc_c is all zero's or all ones. - if (N1C && N0.getOpcode() == ISD::AND && + if (VT.isInteger() && !VT.isVector() && + N1C && N0.getOpcode() == ISD::AND && N0.getOperand(1).getOpcode() == ISD::Constant) { SDValue N00 = N0.getOperand(0); if (N00.getOpcode() == X86ISD::SETCC_CARRY || @@ -13058,6 +13059,22 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { } } + + // Hardware support for vector shifts is sparse which makes us scalarize the + // vector operations in many cases. Also, on sandybridge ADD is faster than + // shl. + // (shl V, 1) -> add V,V + if (isSplatVector(N1.getNode())) { + assert(N0.getValueType().isVector() && "Invalid vector shift type"); + ConstantSDNode *N1C = dyn_cast(N1->getOperand(0)); + // We shift all of the values by one. In many cases we do not have + // hardware support for this operation. This is better expressed as an ADD + // of two values. + if (N1C && (1 == N1C->getZExtValue())) { + return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N0); + } + } + return SDValue(); } @@ -13066,9 +13083,10 @@ static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) { static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { EVT VT = N->getValueType(0); - if (!VT.isVector() && VT.isInteger() && - N->getOpcode() == ISD::SHL) - return PerformSHLCombine(N, DAG); + if (N->getOpcode() == ISD::SHL) { + SDValue V = PerformSHLCombine(N, DAG); + if (V.getNode()) return V; + } // On X86 with SSE2 support, we can transform this to a vector shift if // all elements are shifted by the same amount. We can't do this in legalize -- cgit v1.1 From dade3c144818e4888831ed13c1efaef28934ac78 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sun, 30 Oct 2011 17:31:21 +0000 Subject: X86: Emit logical shift by constant splat of <16 x i8> as a <8 x i16> shift and zero out the bits where zeros should've been shifted in. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143315 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 1af2449..2ddb1b7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -9929,6 +9929,19 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { if (ConstantSDNode *C = dyn_cast(SclrAmt)) { uint64_t ShiftAmt = C->getZExtValue(); + if (VT == MVT::v16i8 && Op.getOpcode() == ISD::SHL) { + // Make a large shift. + SDValue SHL = + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + // Zero out the rightmost bits. + SmallVector V(16, DAG.getConstant(uint8_t(-1U << ShiftAmt), + MVT::i8)); + return DAG.getNode(ISD::AND, dl, VT, SHL, + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16)); + } + if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SHL) return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32), @@ -9944,6 +9957,19 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), R, DAG.getConstant(ShiftAmt, MVT::i32)); + if (VT == MVT::v16i8 && Op.getOpcode() == ISD::SRL) { + // Make a large shift. + SDValue SRL = + DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + // Zero out the leftmost bits. + SmallVector V(16, DAG.getConstant(uint8_t(-1U) >> ShiftAmt, + MVT::i8)); + return DAG.getNode(ISD::AND, dl, VT, SRL, + DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16)); + } + if (VT == MVT::v2i64 && Op.getOpcode() == ISD::SRL) return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32), -- cgit v1.1 From 8895316d0489e2353c0dce289b49f5cdd41085d7 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Sun, 30 Oct 2011 17:46:34 +0000 Subject: Teach ModuleLinker::getLinkageResult about materialisable functions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143316 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Linker/LinkModules.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index 03a962e..bcc6782 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -446,7 +446,7 @@ bool ModuleLinker::getLinkageResult(GlobalValue *Dest, const GlobalValue *Src, assert(!Src->hasLocalLinkage() && "If Src has internal linkage, Dest shouldn't be set!"); - bool SrcIsDeclaration = Src->isDeclaration(); + bool SrcIsDeclaration = Src->isDeclaration() && !Src->isMaterializable(); bool DestIsDeclaration = Dest->isDeclaration(); if (SrcIsDeclaration) { -- cgit v1.1 From 6dc9e2bf7455fa1494078d5d52f1363b05183f20 Mon Sep 17 00:00:00 2001 From: Duncan Sands Date: Sun, 30 Oct 2011 19:56:36 +0000 Subject: Reapply commit 143214 with a fix: m_ICmp doesn't match conditions with the given predicate, it matches any condition and returns the predicate - d'oh! Original commit message: The expression icmp eq (select (icmp eq x, 0), 1, x), 0 folds to false. Spotted by my super-optimizer in 186.crafty and 450.soplex. We really need a proper infrastructure for handling generalizations of this kind of thing (which occur a lot), however this case is so simple that I decided to go ahead and implement it directly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143318 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InstructionSimplify.cpp | 94 +++++++++++++++++++++++++----------- 1 file changed, 65 insertions(+), 29 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 6bef0ae..c2ddc6d 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -68,6 +68,20 @@ static Constant *getTrue(Type *Ty) { return Constant::getAllOnesValue(Ty); } +/// isSameCompare - Is V equivalent to the comparison "LHS Pred RHS"? +static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS, + Value *RHS) { + CmpInst *Cmp = dyn_cast(V); + if (!Cmp) + return false; + CmpInst::Predicate CPred = Cmp->getPredicate(); + Value *CLHS = Cmp->getOperand(0), *CRHS = Cmp->getOperand(1); + if (CPred == Pred && CLHS == LHS && CRHS == RHS) + return true; + return CPred == CmpInst::getSwappedPredicate(Pred) && CLHS == RHS && + CRHS == LHS; +} + /// ValueDominatesPHI - Does the given value dominate the specified phi node? static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) { Instruction *I = dyn_cast(V); @@ -416,40 +430,62 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS, } assert(isa(LHS) && "Not comparing with a select instruction!"); SelectInst *SI = cast(LHS); + Value *Cond = SI->getCondition(); + Value *TV = SI->getTrueValue(); + Value *FV = SI->getFalseValue(); // Now that we have "cmp select(Cond, TV, FV), RHS", analyse it. // Does "cmp TV, RHS" simplify? - if (Value *TCmp = SimplifyCmpInst(Pred, SI->getTrueValue(), RHS, TD, DT, - MaxRecurse)) { - // It does! Does "cmp FV, RHS" simplify? - if (Value *FCmp = SimplifyCmpInst(Pred, SI->getFalseValue(), RHS, TD, DT, - MaxRecurse)) { - // It does! If they simplified to the same value, then use it as the - // result of the original comparison. - if (TCmp == FCmp) - return TCmp; - Value *Cond = SI->getCondition(); - // If the false value simplified to false, then the result of the compare - // is equal to "Cond && TCmp". This also catches the case when the false - // value simplified to false and the true value to true, returning "Cond". - if (match(FCmp, m_Zero())) - if (Value *V = SimplifyAndInst(Cond, TCmp, TD, DT, MaxRecurse)) - return V; - // If the true value simplified to true, then the result of the compare - // is equal to "Cond || FCmp". - if (match(TCmp, m_One())) - if (Value *V = SimplifyOrInst(Cond, FCmp, TD, DT, MaxRecurse)) - return V; - // Finally, if the false value simplified to true and the true value to - // false, then the result of the compare is equal to "!Cond". - if (match(FCmp, m_One()) && match(TCmp, m_Zero())) - if (Value *V = - SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()), - TD, DT, MaxRecurse)) - return V; - } + Value *TCmp = SimplifyCmpInst(Pred, TV, RHS, TD, DT, MaxRecurse); + if (TCmp == Cond) { + // It not only simplified, it simplified to the select condition. Replace + // it with 'true'. + TCmp = getTrue(Cond->getType()); + } else if (!TCmp) { + // It didn't simplify. However if "cmp TV, RHS" is equal to the select + // condition then we can replace it with 'true'. Otherwise give up. + if (!isSameCompare(Cond, Pred, TV, RHS)) + return 0; + TCmp = getTrue(Cond->getType()); + } + + // Does "cmp FV, RHS" simplify? + Value *FCmp = SimplifyCmpInst(Pred, FV, RHS, TD, DT, MaxRecurse); + if (FCmp == Cond) { + // It not only simplified, it simplified to the select condition. Replace + // it with 'false'. + FCmp = getFalse(Cond->getType()); + } else if (!FCmp) { + // It didn't simplify. However if "cmp FV, RHS" is equal to the select + // condition then we can replace it with 'false'. Otherwise give up. + if (!isSameCompare(Cond, Pred, FV, RHS)) + return 0; + FCmp = getFalse(Cond->getType()); } + // If both sides simplified to the same value, then use it as the result of + // the original comparison. + if (TCmp == FCmp) + return TCmp; + // If the false value simplified to false, then the result of the compare + // is equal to "Cond && TCmp". This also catches the case when the false + // value simplified to false and the true value to true, returning "Cond". + if (match(FCmp, m_Zero())) + if (Value *V = SimplifyAndInst(Cond, TCmp, TD, DT, MaxRecurse)) + return V; + // If the true value simplified to true, then the result of the compare + // is equal to "Cond || FCmp". + if (match(TCmp, m_One())) + if (Value *V = SimplifyOrInst(Cond, FCmp, TD, DT, MaxRecurse)) + return V; + // Finally, if the false value simplified to true and the true value to + // false, then the result of the compare is equal to "!Cond". + if (match(FCmp, m_One()) && match(TCmp, m_Zero())) + if (Value *V = + SimplifyXorInst(Cond, Constant::getAllOnesValue(Cond->getType()), + TD, DT, MaxRecurse)) + return V; + return 0; } -- cgit v1.1 From e7b05504faa86a5c0b80a62ddb60cbb0cf163d5d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 30 Oct 2011 19:57:21 +0000 Subject: Add intrinsics and feature flag for read/write FS/GS base instructions. Also add AVX2 feature flag. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143319 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86.td | 17 +++++++++++------ lib/Target/X86/X86InstrInfo.td | 2 ++ lib/Target/X86/X86InstrSystem.td | 34 +++++++++++++++++++++------------- lib/Target/X86/X86Subtarget.cpp | 11 +++++++++++ lib/Target/X86/X86Subtarget.h | 8 ++++++++ 5 files changed, 53 insertions(+), 19 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 9861960..62a7016 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -82,6 +82,9 @@ def FeatureSSE4A : SubtargetFeature<"sse4a", "HasSSE4A", "true", def FeatureAVX : SubtargetFeature<"avx", "HasAVX", "true", "Enable AVX instructions">; +def FeatureAVX2 : SubtargetFeature<"avx2", "HasAVX2", "true", + "Enable AVX2 instructions", + [FeatureAVX]>; def FeatureCLMUL : SubtargetFeature<"clmul", "HasCLMUL", "true", "Enable carry-less multiplication instructions">; def FeatureFMA3 : SubtargetFeature<"fma3", "HasFMA3", "true", @@ -99,6 +102,8 @@ def FeatureRDRAND : SubtargetFeature<"rdrand", "HasRDRAND", "true", "Support RDRAND instruction">; def FeatureF16C : SubtargetFeature<"f16c", "HasF16C", "true", "Support 16-bit floating point conversion instructions">; +def FeatureFSGSBase : SubtargetFeature<"fsgsbase", "HasFSGSBase", "true", + "Support FS/GS Base instructions">; def FeatureLZCNT : SubtargetFeature<"lzcnt", "HasLZCNT", "true", "Support LZCNT instruction">; def FeatureBMI : SubtargetFeature<"bmi", "HasBMI", "true", @@ -157,13 +162,14 @@ def : Proc<"corei7-avx", [FeatureSSE42, FeatureCMPXCHG16B, // Ivy Bridge def : Proc<"core-avx-i", [FeatureSSE42, FeatureCMPXCHG16B, FeatureAES, FeatureCLMUL, - FeatureRDRAND, FeatureF16C]>; + FeatureRDRAND, FeatureF16C, FeatureFSGSBase]>; // Haswell +// FIXME: Disabling AVX/AVX2 for now since it's not ready. def : Proc<"core-avx2", [FeatureSSE42, FeatureCMPXCHG16B, FeatureAES, FeatureCLMUL, FeatureRDRAND, FeatureF16C, - FeatureFMA3, FeatureMOVBE, FeatureLZCNT, - FeatureBMI, FeatureBMI2]>; + FeatureFSGSBase, FeatureFMA3, FeatureMOVBE, + FeatureLZCNT, FeatureBMI, FeatureBMI2]>; def : Proc<"k6", [FeatureMMX]>; def : Proc<"k6-2", [Feature3DNow]>; @@ -194,9 +200,8 @@ def : Proc<"barcelona", [FeatureSSE3, FeatureSSE4A, Feature3DNowA, FeatureCMPXCHG16B, FeatureSlowBTMem]>; def : Proc<"istanbul", [Feature3DNowA, FeatureCMPXCHG16B, - FeatureSSE4A, Feature3DNowA]>; -def : Proc<"shanghai", [Feature3DNowA, FeatureCMPXCHG16B, FeatureSSE4A, - Feature3DNowA]>; + FeatureSSE4A]>; +def : Proc<"shanghai", [Feature3DNowA, FeatureCMPXCHG16B, FeatureSSE4A]>; def : Proc<"winchip-c6", [FeatureMMX]>; def : Proc<"winchip2", [Feature3DNow]>; diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 2a85ba1..79ce509 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -472,6 +472,7 @@ def HasSSE42 : Predicate<"Subtarget->hasSSE42()">; def HasSSE4A : Predicate<"Subtarget->hasSSE4A()">; def HasAVX : Predicate<"Subtarget->hasAVX()">; +def HasAVX2 : Predicate<"Subtarget->hasAVX2()">; def HasXMMInt : Predicate<"Subtarget->hasXMMInt()">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; @@ -482,6 +483,7 @@ def HasFMA4 : Predicate<"Subtarget->hasFMA4()">; def HasMOVBE : Predicate<"Subtarget->hasMOVBE()">; def HasRDRAND : Predicate<"Subtarget->hasRDRAND()">; def HasF16C : Predicate<"Subtarget->hasF16C()">; +def HasFSGSBase : Predicate<"Subtarget->hasFSGSBase()">; def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">; def HasBMI : Predicate<"Subtarget->hasBMI()">; def HasBMI2 : Predicate<"Subtarget->hasBMI2()">; diff --git a/lib/Target/X86/X86InstrSystem.td b/lib/Target/X86/X86InstrSystem.td index be3500a..1b43838 100644 --- a/lib/Target/X86/X86InstrSystem.td +++ b/lib/Target/X86/X86InstrSystem.td @@ -446,23 +446,31 @@ let Defs = [RAX, RDX, RSI], Uses = [RAX, RSI] in //===----------------------------------------------------------------------===// // FS/GS Base Instructions -let Predicates = [In64BitMode] in { +let Predicates = [HasFSGSBase, In64BitMode] in { def RDFSBASE : I<0xAE, MRM0r, (outs GR32:$dst), (ins), - "rdfsbase{l}\t$dst", []>, TB, XS; + "rdfsbase{l}\t$dst", + [(set GR32:$dst, (int_x86_rdfsbase_32))]>, TB, XS; def RDFSBASE64 : RI<0xAE, MRM0r, (outs GR64:$dst), (ins), - "rdfsbase{q}\t$dst", []>, TB, XS; + "rdfsbase{q}\t$dst", + [(set GR64:$dst, (int_x86_rdfsbase_64))]>, TB, XS; def RDGSBASE : I<0xAE, MRM1r, (outs GR32:$dst), (ins), - "rdgsbase{l}\t$dst", []>, TB, XS; + "rdgsbase{l}\t$dst", + [(set GR32:$dst, (int_x86_rdgsbase_32))]>, TB, XS; def RDGSBASE64 : RI<0xAE, MRM1r, (outs GR64:$dst), (ins), - "rdgsbase{q}\t$dst", []>, TB, XS; - def WRFSBASE : I<0xAE, MRM2r, (outs), (ins GR32:$dst), - "wrfsbase{l}\t$dst", []>, TB, XS; - def WRFSBASE64 : RI<0xAE, MRM2r, (outs), (ins GR64:$dst), - "wrfsbase{q}\t$dst", []>, TB, XS; - def WRGSBASE : I<0xAE, MRM3r, (outs), (ins GR32:$dst), - "wrgsbase{l}\t$dst", []>, TB, XS; - def WRGSBASE64 : RI<0xAE, MRM3r, (outs), (ins GR64:$dst), - "wrgsbase{q}\t$dst", []>, TB, XS; + "rdgsbase{q}\t$dst", + [(set GR64:$dst, (int_x86_rdgsbase_64))]>, TB, XS; + def WRFSBASE : I<0xAE, MRM2r, (outs), (ins GR32:$src), + "wrfsbase{l}\t$src", + [(int_x86_wrfsbase_32 GR32:$src)]>, TB, XS; + def WRFSBASE64 : RI<0xAE, MRM2r, (outs), (ins GR64:$src), + "wrfsbase{q}\t$src", + [(int_x86_wrfsbase_64 GR64:$src)]>, TB, XS; + def WRGSBASE : I<0xAE, MRM3r, (outs), (ins GR32:$src), + "wrgsbase{l}\t$src", + [(int_x86_wrgsbase_32 GR32:$src)]>, TB, XS; + def WRGSBASE64 : RI<0xAE, MRM3r, (outs), (ins GR64:$src), + "wrgsbase{q}\t$src", + [(int_x86_wrgsbase_64 GR64:$src)]>, TB, XS; } //===----------------------------------------------------------------------===// diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index fd8ef19..e7bcbf8 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -279,10 +279,19 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { if (IsIntel && MaxLevel >= 7) { if (!X86_MC::GetCpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX)) { + if (EBX & 0x1) { + HasFSGSBase = true; + ToggleFeature(X86::FeatureFSGSBase); + } if ((EBX >> 3) & 0x1) { HasBMI = true; ToggleFeature(X86::FeatureBMI); } + // FIXME: AVX2 codegen support is not ready. + //if ((EBX >> 5) & 0x1) { + // HasAVX2 = true; + // ToggleFeature(X86::FeatureAVX2); + //} if ((EBX >> 8) & 0x1) { HasBMI2 = true; ToggleFeature(X86::FeatureBMI2); @@ -303,6 +312,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, , HasPOPCNT(false) , HasSSE4A(false) , HasAVX(false) + , HasAVX2(false) , HasAES(false) , HasCLMUL(false) , HasFMA3(false) @@ -310,6 +320,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, , HasMOVBE(false) , HasRDRAND(false) , HasF16C(false) + , HasFSGSBase(false) , HasLZCNT(false) , HasBMI(false) , HasBMI2(false) diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 5e08f01..763fb43 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -78,6 +78,9 @@ protected: /// HasAVX - Target has AVX instructions bool HasAVX; + /// HasAVX2 - Target has AVX2 instructions + bool HasAVX2; + /// HasAES - Target has AES instructions bool HasAES; @@ -99,6 +102,9 @@ protected: /// HasF16C - Processor has 16-bit floating point conversion instructions. bool HasF16C; + /// HasFSGSBase - Processor has FS/GS base insturctions. + bool HasFSGSBase; + /// HasLZCNT - Processor has LZCNT instruction. bool HasLZCNT; @@ -181,6 +187,7 @@ public: bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; } bool hasPOPCNT() const { return HasPOPCNT; } bool hasAVX() const { return HasAVX; } + bool hasAVX2() const { return HasAVX2; } bool hasXMM() const { return hasSSE1() || hasAVX(); } bool hasXMMInt() const { return hasSSE2() || hasAVX(); } bool hasAES() const { return HasAES; } @@ -190,6 +197,7 @@ public: bool hasMOVBE() const { return HasMOVBE; } bool hasRDRAND() const { return HasRDRAND; } bool hasF16C() const { return HasF16C; } + bool hasFSGSBase() const { return HasFSGSBase; } bool hasLZCNT() const { return HasLZCNT; } bool hasBMI() const { return HasBMI; } bool hasBMI2() const { return HasBMI2; } -- cgit v1.1 From 4e478fed1b8021150b1f2cec4e670068b6abd135 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Mon, 31 Oct 2011 01:06:02 +0000 Subject: Switch new .file directive emission off by default, change llc's flag for it to -enable-dwarf-directory. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143326 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/TargetMachine.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/TargetMachine.cpp b/lib/Target/TargetMachine.cpp index 3f58e84..daac924 100644 --- a/lib/Target/TargetMachine.cpp +++ b/lib/Target/TargetMachine.cpp @@ -198,7 +198,7 @@ TargetMachine::TargetMachine(const Target &T, MCSaveTempLabels(false), MCUseLoc(true), MCUseCFI(true), - MCUseDwarfDirectory(true) { + MCUseDwarfDirectory(false) { // Typically it will be subtargets that will adjust FloatABIType from Default // to Soft or Hard. if (UseSoftFloat) -- cgit v1.1 From 6b1c5fc02a2c68397fd9fb79f4643e9020e829a8 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 31 Oct 2011 02:15:10 +0000 Subject: Begin adding AVX2 instructions. No selection support yet other than intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143331 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 517 ++++++++++++++++++++++++++++++------------ 1 file changed, 371 insertions(+), 146 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index d3ced23..b5eea45 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3343,64 +3343,68 @@ let Predicates = [HasAVX] in { let ExeDomain = SSEPackedInt in { // SSE integer instructions multiclass PDI_binop_rm_int opc, string OpcodeStr, Intrinsic IntId, - bit IsCommutable = 0, bit Is2Addr = 1> { + RegisterClass RC, PatFrag memop_frag, + X86MemOperand x86memop, bit IsCommutable = 0, + bit Is2Addr = 1> { let isCommutable = IsCommutable in - def rr : PDI; - def rm : PDI; + def rm : PDI; + [(set RC:$dst, (IntId RC:$src1, (bitconvert (memop_frag addr:$src2))))]>; } multiclass PDI_binop_rmi_int opc, bits<8> opc2, Format ImmForm, string OpcodeStr, Intrinsic IntId, - Intrinsic IntId2, bit Is2Addr = 1> { - def rr : PDI { + // src2 is always 128-bit + def rr : PDI; - def rm : PDI; + def rm : PDI; - def ri : PDIi8; + def ri : PDIi8; + [(set RC:$dst, (IntId2 RC:$src1, (i32 imm:$src2)))]>; } /// PDI_binop_rm - Simple SSE2 binary operator. multiclass PDI_binop_rm opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, bit IsCommutable = 0, bit Is2Addr = 1> { + ValueType OpVT, RegisterClass RC, PatFrag memop_frag, + X86MemOperand x86memop, bit IsCommutable = 0, + bit Is2Addr = 1> { let isCommutable = IsCommutable in - def rr : PDI; - def rm : PDI; + def rm : PDI; + [(set RC:$dst, (OpVT (OpNode RC:$src1, + (bitconvert (memop_frag addr:$src2)))))]>; } /// PDI_binop_rm_v2i64 - Simple SSE2 binary operator whose type is v2i64. @@ -3425,93 +3429,203 @@ multiclass PDI_binop_rm_v2i64 opc, string OpcodeStr, SDNode OpNode, [(set VR128:$dst, (OpNode VR128:$src1, (memopv2i64 addr:$src2)))]>; } +/// PDI_binop_rm_v4i64 - Simple AVX2 binary operator whose type is v4i64. +/// +/// FIXME: we could eliminate this and use PDI_binop_rm instead if tblgen knew +/// to collapse (bitconvert VT to VT) into its operand. +/// +multiclass PDI_binop_rm_v4i64 opc, string OpcodeStr, SDNode OpNode, + bit IsCommutable = 0> { + let isCommutable = IsCommutable in + def rr : PDI; + def rm : PDI; +} + } // ExeDomain = SSEPackedInt // 128-bit Integer Arithmetic let Predicates = [HasAVX] in { -defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, 1, 0 /*3addr*/>, VEX_4V; -defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, 1, 0>, VEX_4V; -defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, 1, 0>, VEX_4V; +defm VPADDB : PDI_binop_rm<0xFC, "vpaddb", add, v16i8, VR128, memopv2i64, + i128mem, 1, 0 /*3addr*/>, VEX_4V; +defm VPADDW : PDI_binop_rm<0xFD, "vpaddw", add, v8i16, VR128, memopv2i64, + i128mem, 1, 0>, VEX_4V; +defm VPADDD : PDI_binop_rm<0xFE, "vpaddd", add, v4i32, VR128, memopv2i64, + i128mem, 1, 0>, VEX_4V; defm VPADDQ : PDI_binop_rm_v2i64<0xD4, "vpaddq", add, 1, 0>, VEX_4V; -defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, 1, 0>, VEX_4V; -defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, 0, 0>, VEX_4V; -defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, 0, 0>, VEX_4V; -defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, 0, 0>, VEX_4V; +defm VPMULLW : PDI_binop_rm<0xD5, "vpmullw", mul, v8i16, VR128, memopv2i64, + i128mem, 1, 0>, VEX_4V; +defm VPSUBB : PDI_binop_rm<0xF8, "vpsubb", sub, v16i8, VR128, memopv2i64, + i128mem, 0, 0>, VEX_4V; +defm VPSUBW : PDI_binop_rm<0xF9, "vpsubw", sub, v8i16, VR128, memopv2i64, + i128mem, 0, 0>, VEX_4V; +defm VPSUBD : PDI_binop_rm<0xFA, "vpsubd", sub, v4i32, VR128, memopv2i64, + i128mem, 0, 0>, VEX_4V; defm VPSUBQ : PDI_binop_rm_v2i64<0xFB, "vpsubq", sub, 0, 0>, VEX_4V; // Intrinsic forms -defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, 0, 0>, - VEX_4V; -defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w, 0, 0>, - VEX_4V; -defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b, 0, 0>, - VEX_4V; -defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w, 0, 0>, - VEX_4V; -defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b, 1, 0>, - VEX_4V; -defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w, 1, 0>, - VEX_4V; -defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b, 1, 0>, - VEX_4V; -defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w, 1, 0>, - VEX_4V; -defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w, 1, 0>, - VEX_4V; -defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w, 1, 0>, - VEX_4V; -defm VPMULUDQ : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_sse2_pmulu_dq, 1, 0>, - VEX_4V; -defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd, 1, 0>, - VEX_4V; -defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, 1, 0>, - VEX_4V; -defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, 1, 0>, - VEX_4V; -defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b, 1, 0>, - VEX_4V; -defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w, 1, 0>, - VEX_4V; -defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b, 1, 0>, - VEX_4V; -defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w, 1, 0>, - VEX_4V; -defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, 1, 0>, - VEX_4V; +defm VPSUBSB : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_sse2_psubs_b, + VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; +defm VPSUBSW : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_sse2_psubs_w, + VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; +defm VPSUBUSB : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_sse2_psubus_b, + VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; +defm VPSUBUSW : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_sse2_psubus_w, + VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; +defm VPADDSB : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_sse2_padds_b, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPADDSW : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_sse2_padds_w, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPADDUSB : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_sse2_paddus_b, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPADDUSW : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_sse2_paddus_w, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPMULHUW : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_sse2_pmulhu_w, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPMULHW : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_sse2_pmulh_w, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPMULUDQ : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_sse2_pmulu_dq, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPMADDWD : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_sse2_pmadd_wd, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPAVGB : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_sse2_pavg_b, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPAVGW : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_sse2_pavg_w, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPMINUB : PDI_binop_rm_int<0xDA, "vpminub", int_x86_sse2_pminu_b, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPMINSW : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_sse2_pmins_w, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPMAXUB : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_sse2_pmaxu_b, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPMAXSW : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_sse2_pmaxs_w, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +defm VPSADBW : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_sse2_psad_bw, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; +} + +let Predicates = [HasAVX2] in { +defm VPADDBY : PDI_binop_rm<0xFC, "vpaddb", add, v32i8, VR256, memopv4i64, + i256mem, 1, 0>, VEX_4V; +defm VPADDWY : PDI_binop_rm<0xFD, "vpaddw", add, v16i16, VR256, memopv4i64, + i256mem, 1, 0>, VEX_4V; +defm VPADDDY : PDI_binop_rm<0xFE, "vpaddd", add, v8i32, VR256, memopv4i64, + i256mem, 1, 0>, VEX_4V; +defm VPADDQY : PDI_binop_rm_v4i64<0xD4, "vpaddq", add, 1>, VEX_4V; +defm VPMULLWY : PDI_binop_rm<0xD5, "vpmullw", mul, v16i16, VR256, memopv4i64, + i256mem, 1, 0>, VEX_4V; +defm VPSUBBY : PDI_binop_rm<0xF8, "vpsubb", sub, v32i8, VR256, memopv4i64, + i256mem, 0, 0>, VEX_4V; +defm VPSUBWY : PDI_binop_rm<0xF9, "vpsubw", sub, v16i16,VR256, memopv4i64, + i256mem, 0, 0>, VEX_4V; +defm VPSUBDY : PDI_binop_rm<0xFA, "vpsubd", sub, v8i32, VR256, memopv4i64, + i256mem, 0, 0>, VEX_4V; +defm VPSUBQY : PDI_binop_rm_v4i64<0xFB, "vpsubq", sub, 0>, VEX_4V; + +// Intrinsic forms +defm VPSUBSBY : PDI_binop_rm_int<0xE8, "vpsubsb" , int_x86_avx2_psubs_b, + VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; +defm VPSUBSWY : PDI_binop_rm_int<0xE9, "vpsubsw" , int_x86_avx2_psubs_w, + VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; +defm VPSUBUSBY : PDI_binop_rm_int<0xD8, "vpsubusb", int_x86_avx2_psubus_b, + VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; +defm VPSUBUSWY : PDI_binop_rm_int<0xD9, "vpsubusw", int_x86_avx2_psubus_w, + VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; +defm VPADDSBY : PDI_binop_rm_int<0xEC, "vpaddsb" , int_x86_avx2_padds_b, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPADDSWY : PDI_binop_rm_int<0xED, "vpaddsw" , int_x86_avx2_padds_w, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPADDUSBY : PDI_binop_rm_int<0xDC, "vpaddusb", int_x86_avx2_paddus_b, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPADDUSWY : PDI_binop_rm_int<0xDD, "vpaddusw", int_x86_avx2_paddus_w, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPMULHUWY : PDI_binop_rm_int<0xE4, "vpmulhuw", int_x86_avx2_pmulhu_w, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPMULHWY : PDI_binop_rm_int<0xE5, "vpmulhw" , int_x86_avx2_pmulh_w, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPMULUDQY : PDI_binop_rm_int<0xF4, "vpmuludq", int_x86_avx2_pmulu_dq, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPMADDWDY : PDI_binop_rm_int<0xF5, "vpmaddwd", int_x86_avx2_pmadd_wd, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPAVGBY : PDI_binop_rm_int<0xE0, "vpavgb", int_x86_avx2_pavg_b, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPAVGWY : PDI_binop_rm_int<0xE3, "vpavgw", int_x86_avx2_pavg_w, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPMINUBY : PDI_binop_rm_int<0xDA, "vpminub", int_x86_avx2_pminu_b, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPMINSWY : PDI_binop_rm_int<0xEA, "vpminsw", int_x86_avx2_pmins_w, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPMAXUBY : PDI_binop_rm_int<0xDE, "vpmaxub", int_x86_avx2_pmaxu_b, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPMAXSWY : PDI_binop_rm_int<0xEE, "vpmaxsw", int_x86_avx2_pmaxs_w, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; +defm VPSADBWY : PDI_binop_rm_int<0xF6, "vpsadbw", int_x86_avx2_psad_bw, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; } let Constraints = "$src1 = $dst" in { -defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, 1>; -defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, 1>; -defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, 1>; +defm PADDB : PDI_binop_rm<0xFC, "paddb", add, v16i8, VR128, memopv2i64, + i128mem, 1>; +defm PADDW : PDI_binop_rm<0xFD, "paddw", add, v8i16, VR128, memopv2i64, + i128mem, 1>; +defm PADDD : PDI_binop_rm<0xFE, "paddd", add, v4i32, VR128, memopv2i64, + i128mem, 1>; defm PADDQ : PDI_binop_rm_v2i64<0xD4, "paddq", add, 1>; -defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, 1>; -defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8>; -defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16>; -defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32>; +defm PMULLW : PDI_binop_rm<0xD5, "pmullw", mul, v8i16, VR128, memopv2i64, + i128mem, 1>; +defm PSUBB : PDI_binop_rm<0xF8, "psubb", sub, v16i8, VR128, memopv2i64, + i128mem>; +defm PSUBW : PDI_binop_rm<0xF9, "psubw", sub, v8i16, VR128, memopv2i64, + i128mem>; +defm PSUBD : PDI_binop_rm<0xFA, "psubd", sub, v4i32, VR128, memopv2i64, + i128mem>; defm PSUBQ : PDI_binop_rm_v2i64<0xFB, "psubq", sub>; // Intrinsic forms -defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b>; -defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w>; -defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b>; -defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w>; -defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, 1>; -defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, 1>; -defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, 1>; -defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, 1>; -defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, 1>; -defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, 1>; -defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, 1>; -defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, 1>; -defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, 1>; -defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, 1>; -defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, 1>; -defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, 1>; -defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, 1>; -defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, 1>; -defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>; +defm PSUBSB : PDI_binop_rm_int<0xE8, "psubsb" , int_x86_sse2_psubs_b, + VR128, memopv2i64, i128mem>; +defm PSUBSW : PDI_binop_rm_int<0xE9, "psubsw" , int_x86_sse2_psubs_w, + VR128, memopv2i64, i128mem>; +defm PSUBUSB : PDI_binop_rm_int<0xD8, "psubusb", int_x86_sse2_psubus_b, + VR128, memopv2i64, i128mem>; +defm PSUBUSW : PDI_binop_rm_int<0xD9, "psubusw", int_x86_sse2_psubus_w, + VR128, memopv2i64, i128mem>; +defm PADDSB : PDI_binop_rm_int<0xEC, "paddsb" , int_x86_sse2_padds_b, + VR128, memopv2i64, i128mem, 1>; +defm PADDSW : PDI_binop_rm_int<0xED, "paddsw" , int_x86_sse2_padds_w, + VR128, memopv2i64, i128mem, 1>; +defm PADDUSB : PDI_binop_rm_int<0xDC, "paddusb", int_x86_sse2_paddus_b, + VR128, memopv2i64, i128mem, 1>; +defm PADDUSW : PDI_binop_rm_int<0xDD, "paddusw", int_x86_sse2_paddus_w, + VR128, memopv2i64, i128mem, 1>; +defm PMULHUW : PDI_binop_rm_int<0xE4, "pmulhuw", int_x86_sse2_pmulhu_w, + VR128, memopv2i64, i128mem, 1>; +defm PMULHW : PDI_binop_rm_int<0xE5, "pmulhw" , int_x86_sse2_pmulh_w, + VR128, memopv2i64, i128mem, 1>; +defm PMULUDQ : PDI_binop_rm_int<0xF4, "pmuludq", int_x86_sse2_pmulu_dq, + VR128, memopv2i64, i128mem, 1>; +defm PMADDWD : PDI_binop_rm_int<0xF5, "pmaddwd", int_x86_sse2_pmadd_wd, + VR128, memopv2i64, i128mem, 1>; +defm PAVGB : PDI_binop_rm_int<0xE0, "pavgb", int_x86_sse2_pavg_b, + VR128, memopv2i64, i128mem, 1>; +defm PAVGW : PDI_binop_rm_int<0xE3, "pavgw", int_x86_sse2_pavg_w, + VR128, memopv2i64, i128mem, 1>; +defm PMINUB : PDI_binop_rm_int<0xDA, "pminub", int_x86_sse2_pminu_b, + VR128, memopv2i64, i128mem, 1>; +defm PMINSW : PDI_binop_rm_int<0xEA, "pminsw", int_x86_sse2_pmins_w, + VR128, memopv2i64, i128mem, 1>; +defm PMAXUB : PDI_binop_rm_int<0xDE, "pmaxub", int_x86_sse2_pmaxu_b, + VR128, memopv2i64, i128mem, 1>; +defm PMAXSW : PDI_binop_rm_int<0xEE, "pmaxsw", int_x86_sse2_pmaxs_w, + VR128, memopv2i64, i128mem, 1>; +defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, + VR128, memopv2i64, i128mem, 1>; } // Constraints = "$src1 = $dst" @@ -3521,31 +3635,31 @@ defm PSADBW : PDI_binop_rm_int<0xF6, "psadbw", int_x86_sse2_psad_bw, 1>; let Predicates = [HasAVX] in { defm VPSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", - int_x86_sse2_psll_w, int_x86_sse2_pslli_w, 0>, - VEX_4V; + int_x86_sse2_psll_w, int_x86_sse2_pslli_w, + VR128, 0>, VEX_4V; defm VPSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", - int_x86_sse2_psll_d, int_x86_sse2_pslli_d, 0>, - VEX_4V; + int_x86_sse2_psll_d, int_x86_sse2_pslli_d, + VR128, 0>, VEX_4V; defm VPSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", - int_x86_sse2_psll_q, int_x86_sse2_pslli_q, 0>, - VEX_4V; + int_x86_sse2_psll_q, int_x86_sse2_pslli_q, + VR128, 0>, VEX_4V; defm VPSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", - int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, 0>, - VEX_4V; + int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, + VR128, 0>, VEX_4V; defm VPSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", - int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, 0>, - VEX_4V; + int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, + VR128, 0>, VEX_4V; defm VPSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", - int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, 0>, - VEX_4V; + int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, + VR128, 0>, VEX_4V; defm VPSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", - int_x86_sse2_psra_w, int_x86_sse2_psrai_w, 0>, - VEX_4V; + int_x86_sse2_psra_w, int_x86_sse2_psrai_w, + VR128, 0>, VEX_4V; defm VPSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", - int_x86_sse2_psra_d, int_x86_sse2_psrai_d, 0>, - VEX_4V; + int_x86_sse2_psra_d, int_x86_sse2_psrai_d, + VR128, 0>, VEX_4V; defm VPAND : PDI_binop_rm_v2i64<0xDB, "vpand", and, 1, 0>, VEX_4V; defm VPOR : PDI_binop_rm_v2i64<0xEB, "vpor" , or, 1, 0>, VEX_4V; @@ -3578,25 +3692,92 @@ let ExeDomain = SSEPackedInt in { } } +let Predicates = [HasAVX2] in { +defm VPSLLWY : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "vpsllw", + int_x86_avx2_psll_w, int_x86_avx2_pslli_w, + VR256, 0>, VEX_4V; +defm VPSLLDY : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "vpslld", + int_x86_avx2_psll_d, int_x86_avx2_pslli_d, + VR256, 0>, VEX_4V; +defm VPSLLQY : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "vpsllq", + int_x86_avx2_psll_q, int_x86_avx2_pslli_q, + VR256, 0>, VEX_4V; + +defm VPSRLWY : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "vpsrlw", + int_x86_avx2_psrl_w, int_x86_avx2_psrli_w, + VR256, 0>, VEX_4V; +defm VPSRLDY : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "vpsrld", + int_x86_avx2_psrl_d, int_x86_avx2_psrli_d, + VR256, 0>, VEX_4V; +defm VPSRLQY : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "vpsrlq", + int_x86_avx2_psrl_q, int_x86_avx2_psrli_q, + VR256, 0>, VEX_4V; + +defm VPSRAWY : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "vpsraw", + int_x86_avx2_psra_w, int_x86_avx2_psrai_w, + VR256, 0>, VEX_4V; +defm VPSRADY : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "vpsrad", + int_x86_avx2_psra_d, int_x86_avx2_psrai_d, + VR256, 0>, VEX_4V; + +defm VPANDY : PDI_binop_rm_v4i64<0xDB, "vpand", and, 1>, VEX_4V; +defm VPORY : PDI_binop_rm_v4i64<0xEB, "vpor" , or, 1>, VEX_4V; +defm VPXORY : PDI_binop_rm_v4i64<0xEF, "vpxor", xor, 1>, VEX_4V; + +let ExeDomain = SSEPackedInt in { + let neverHasSideEffects = 1 in { + // 128-bit logical shifts. + def VPSLLDQYri : PDIi8<0x73, MRM7r, + (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2), + "vpslldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + VEX_4V; + def VPSRLDQYri : PDIi8<0x73, MRM3r, + (outs VR256:$dst), (ins VR256:$src1, i32i8imm:$src2), + "vpsrldq\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + VEX_4V; + // PSRADQYri doesn't exist in SSE[1-3]. + } + def VPANDNYrr : PDI<0xDF, MRMSrcReg, + (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), + "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR256:$dst, + (v4i64 (X86andnp VR256:$src1, VR256:$src2)))]>,VEX_4V; + + def VPANDNYrm : PDI<0xDF, MRMSrcMem, + (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), + "vpandn\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR256:$dst, (X86andnp VR256:$src1, + (memopv4i64 addr:$src2)))]>, VEX_4V; +} +} + let Constraints = "$src1 = $dst" in { defm PSLLW : PDI_binop_rmi_int<0xF1, 0x71, MRM6r, "psllw", - int_x86_sse2_psll_w, int_x86_sse2_pslli_w>; + int_x86_sse2_psll_w, int_x86_sse2_pslli_w, + VR128>; defm PSLLD : PDI_binop_rmi_int<0xF2, 0x72, MRM6r, "pslld", - int_x86_sse2_psll_d, int_x86_sse2_pslli_d>; + int_x86_sse2_psll_d, int_x86_sse2_pslli_d, + VR128>; defm PSLLQ : PDI_binop_rmi_int<0xF3, 0x73, MRM6r, "psllq", - int_x86_sse2_psll_q, int_x86_sse2_pslli_q>; + int_x86_sse2_psll_q, int_x86_sse2_pslli_q, + VR128>; defm PSRLW : PDI_binop_rmi_int<0xD1, 0x71, MRM2r, "psrlw", - int_x86_sse2_psrl_w, int_x86_sse2_psrli_w>; + int_x86_sse2_psrl_w, int_x86_sse2_psrli_w, + VR128>; defm PSRLD : PDI_binop_rmi_int<0xD2, 0x72, MRM2r, "psrld", - int_x86_sse2_psrl_d, int_x86_sse2_psrli_d>; + int_x86_sse2_psrl_d, int_x86_sse2_psrli_d, + VR128>; defm PSRLQ : PDI_binop_rmi_int<0xD3, 0x73, MRM2r, "psrlq", - int_x86_sse2_psrl_q, int_x86_sse2_psrli_q>; + int_x86_sse2_psrl_q, int_x86_sse2_psrli_q, + VR128>; defm PSRAW : PDI_binop_rmi_int<0xE1, 0x71, MRM4r, "psraw", - int_x86_sse2_psra_w, int_x86_sse2_psrai_w>; + int_x86_sse2_psra_w, int_x86_sse2_psrai_w, + VR128>; defm PSRAD : PDI_binop_rmi_int<0xE2, 0x72, MRM4r, "psrad", - int_x86_sse2_psra_d, int_x86_sse2_psrai_d>; + int_x86_sse2_psra_d, int_x86_sse2_psrai_d, + VR128>; defm PAND : PDI_binop_rm_v2i64<0xDB, "pand", and, 1>; defm POR : PDI_binop_rm_v2i64<0xEB, "por" , or, 1>; @@ -3642,6 +3823,17 @@ let Predicates = [HasAVX] in { (v2i64 (VPSRLDQri VR128:$src, (BYTE_imm imm:$amt)))>; } +let Predicates = [HasAVX2] in { + def : Pat<(int_x86_avx2_psll_dq VR256:$src1, imm:$src2), + (v4i64 (VPSLLDQYri VR256:$src1, (BYTE_imm imm:$src2)))>; + def : Pat<(int_x86_avx2_psrl_dq VR256:$src1, imm:$src2), + (v4i64 (VPSRLDQYri VR256:$src1, (BYTE_imm imm:$src2)))>; + def : Pat<(int_x86_avx2_psll_dq_bs VR256:$src1, imm:$src2), + (v4i64 (VPSLLDQYri VR256:$src1, imm:$src2))>; + def : Pat<(int_x86_avx2_psrl_dq_bs VR256:$src1, imm:$src2), + (v4i64 (VPSRLDQYri VR256:$src1, imm:$src2))>; +} + let Predicates = [HasSSE2] in { def : Pat<(int_x86_sse2_psll_dq VR128:$src1, imm:$src2), (v2i64 (PSLLDQri VR128:$src1, (BYTE_imm imm:$src2)))>; @@ -3666,18 +3858,18 @@ let Predicates = [HasSSE2] in { //===---------------------------------------------------------------------===// let Predicates = [HasAVX] in { - defm VPCMPEQB : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b, 1, - 0>, VEX_4V; - defm VPCMPEQW : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w, 1, - 0>, VEX_4V; - defm VPCMPEQD : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_sse2_pcmpeq_d, 1, - 0>, VEX_4V; - defm VPCMPGTB : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_sse2_pcmpgt_b, 0, - 0>, VEX_4V; - defm VPCMPGTW : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_sse2_pcmpgt_w, 0, - 0>, VEX_4V; - defm VPCMPGTD : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_sse2_pcmpgt_d, 0, - 0>, VEX_4V; + defm VPCMPEQB : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_sse2_pcmpeq_b, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; + defm VPCMPEQW : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_sse2_pcmpeq_w, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; + defm VPCMPEQD : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_sse2_pcmpeq_d, + VR128, memopv2i64, i128mem, 1, 0>, VEX_4V; + defm VPCMPGTB : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_sse2_pcmpgt_b, + VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; + defm VPCMPGTW : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_sse2_pcmpgt_w, + VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; + defm VPCMPGTD : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_sse2_pcmpgt_d, + VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; def : Pat<(v16i8 (X86pcmpeqb VR128:$src1, VR128:$src2)), (VPCMPEQBrr VR128:$src1, VR128:$src2)>; @@ -3706,13 +3898,34 @@ let Predicates = [HasAVX] in { (VPCMPGTDrm VR128:$src1, addr:$src2)>; } +let Predicates = [HasAVX2] in { + defm VPCMPEQBY : PDI_binop_rm_int<0x74, "vpcmpeqb", int_x86_avx2_pcmpeq_b, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; + defm VPCMPEQWY : PDI_binop_rm_int<0x75, "vpcmpeqw", int_x86_avx2_pcmpeq_w, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; + defm VPCMPEQDY : PDI_binop_rm_int<0x76, "vpcmpeqd", int_x86_avx2_pcmpeq_d, + VR256, memopv4i64, i256mem, 1, 0>, VEX_4V; + defm VPCMPGTBY : PDI_binop_rm_int<0x64, "vpcmpgtb", int_x86_avx2_pcmpgt_b, + VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; + defm VPCMPGTWY : PDI_binop_rm_int<0x65, "vpcmpgtw", int_x86_avx2_pcmpgt_w, + VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; + defm VPCMPGTDY : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_avx2_pcmpgt_d, + VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; +} + let Constraints = "$src1 = $dst" in { - defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b, 1>; - defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w, 1>; - defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d, 1>; - defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b>; - defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w>; - defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d>; + defm PCMPEQB : PDI_binop_rm_int<0x74, "pcmpeqb", int_x86_sse2_pcmpeq_b, + VR128, memopv2i64, i128mem, 1>; + defm PCMPEQW : PDI_binop_rm_int<0x75, "pcmpeqw", int_x86_sse2_pcmpeq_w, + VR128, memopv2i64, i128mem, 1>; + defm PCMPEQD : PDI_binop_rm_int<0x76, "pcmpeqd", int_x86_sse2_pcmpeq_d, + VR128, memopv2i64, i128mem, 1>; + defm PCMPGTB : PDI_binop_rm_int<0x64, "pcmpgtb", int_x86_sse2_pcmpgt_b, + VR128, memopv2i64, i128mem>; + defm PCMPGTW : PDI_binop_rm_int<0x65, "pcmpgtw", int_x86_sse2_pcmpgt_w, + VR128, memopv2i64, i128mem>; + defm PCMPGTD : PDI_binop_rm_int<0x66, "pcmpgtd", int_x86_sse2_pcmpgt_d, + VR128, memopv2i64, i128mem>; } // Constraints = "$src1 = $dst" let Predicates = [HasSSE2] in { @@ -3749,17 +3962,29 @@ let Predicates = [HasSSE2] in { let Predicates = [HasAVX] in { defm VPACKSSWB : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_sse2_packsswb_128, - 0, 0>, VEX_4V; + VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; defm VPACKSSDW : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_sse2_packssdw_128, - 0, 0>, VEX_4V; + VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; defm VPACKUSWB : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_sse2_packuswb_128, - 0, 0>, VEX_4V; + VR128, memopv2i64, i128mem, 0, 0>, VEX_4V; +} + +let Predicates = [HasAVX2] in { +defm VPACKSSWBY : PDI_binop_rm_int<0x63, "vpacksswb", int_x86_avx2_packsswb, + VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; +defm VPACKSSDWY : PDI_binop_rm_int<0x6B, "vpackssdw", int_x86_avx2_packssdw, + VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; +defm VPACKUSWBY : PDI_binop_rm_int<0x67, "vpackuswb", int_x86_avx2_packuswb, + VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; } let Constraints = "$src1 = $dst" in { -defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128>; -defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128>; -defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128>; +defm PACKSSWB : PDI_binop_rm_int<0x63, "packsswb", int_x86_sse2_packsswb_128, + VR128, memopv2i64, i128mem>; +defm PACKSSDW : PDI_binop_rm_int<0x6B, "packssdw", int_x86_sse2_packssdw_128, + VR128, memopv2i64, i128mem>; +defm PACKUSWB : PDI_binop_rm_int<0x67, "packuswb", int_x86_sse2_packuswb_128, + VR128, memopv2i64, i128mem>; } // Constraints = "$src1 = $dst" //===---------------------------------------------------------------------===// -- cgit v1.1 From fb6ab2b30e822d292c557bda32f7eb0acd1004e2 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Mon, 31 Oct 2011 17:17:32 +0000 Subject: More not-crashing NEON disassembly updates for the vld refactoring. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143351 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 92a4cdc..8870024 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -2142,6 +2142,10 @@ static DecodeStatus DecodeVLDInstruction(llvm::MCInst &Inst, unsigned Insn, case ARM::VLD1d16Twb_fixed: case ARM::VLD1d32Twb_fixed: case ARM::VLD1d64Twb_fixed: + case ARM::VLD1d8Qwb_fixed: + case ARM::VLD1d16Qwb_fixed: + case ARM::VLD1d32Qwb_fixed: + case ARM::VLD1d64Qwb_fixed: case ARM::VLD1d8wb_register: case ARM::VLD1d16wb_register: case ARM::VLD1d32wb_register: -- cgit v1.1 From f9f5a765adf8465530fe1aced6455ca9438bb29a Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Mon, 31 Oct 2011 19:11:23 +0000 Subject: ARM writeback vs. stride operands for VST/VLD. The _fixed variants have a writeback operand, but not a stride operand. Split the conditional flag to distinguish the cases. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143356 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMExpandPseudoInsts.cpp | 479 ++++++++++++++++---------------- 1 file changed, 240 insertions(+), 239 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index d1ee635..0c1b047 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -102,7 +102,8 @@ namespace { unsigned PseudoOpc; unsigned RealOpc; bool IsLoad; - bool HasWritebackOperand; + bool isUpdating; + bool hasWritebackOperand; NEONRegSpacing RegSpacing; unsigned char NumRegs; // D registers loaded or stored unsigned char RegElts; // elements per D register; used for lane ops @@ -128,238 +129,238 @@ namespace { } static const NEONLdStTableEntry NEONLdStTable[] = { -{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, SingleSpc, 2, 4,true}, -{ ARM::VLD1DUPq16Pseudo_UPD, ARM::VLD1DUPq16_UPD, true, true, SingleSpc, 2, 4,true}, -{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, SingleSpc, 2, 2,true}, -{ ARM::VLD1DUPq32Pseudo_UPD, ARM::VLD1DUPq32_UPD, true, true, SingleSpc, 2, 2,true}, -{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, SingleSpc, 2, 8,true}, -{ ARM::VLD1DUPq8Pseudo_UPD, ARM::VLD1DUPq8_UPD, true, true, SingleSpc, 2, 8,true}, - -{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, EvenDblSpc, 1, 4 ,true}, -{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, EvenDblSpc, 1, 4 ,true}, -{ ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, EvenDblSpc, 1, 2 ,true}, -{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, EvenDblSpc, 1, 2 ,true}, -{ ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, EvenDblSpc, 1, 8 ,true}, -{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, EvenDblSpc, 1, 8 ,true}, - -{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, SingleSpc, 4, 1 ,false}, -{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, SingleSpc, 3, 1 ,false}, -{ ARM::VLD1q16Pseudo, ARM::VLD1q16, true, false, SingleSpc, 2, 4 ,false}, -{ ARM::VLD1q16PseudoWB_fixed, ARM::VLD1q16wb_fixed,true,false,SingleSpc, 2, 4 ,false}, -{ ARM::VLD1q16PseudoWB_register, ARM::VLD1q16wb_register, true, true, SingleSpc, 2, 4 ,false}, -{ ARM::VLD1q32Pseudo, ARM::VLD1q32, true, false, SingleSpc, 2, 2 ,false}, -{ ARM::VLD1q32PseudoWB_fixed, ARM::VLD1q32wb_fixed,true,false,SingleSpc, 2, 2 ,false}, -{ ARM::VLD1q32PseudoWB_register, ARM::VLD1q32wb_register, true, true, SingleSpc, 2, 2 ,false}, -{ ARM::VLD1q64Pseudo, ARM::VLD1q64, true, false, SingleSpc, 2, 1 ,false}, -{ ARM::VLD1q64PseudoWB_fixed, ARM::VLD1q64wb_fixed,true,false,SingleSpc, 2, 2 ,false}, -{ ARM::VLD1q64PseudoWB_register, ARM::VLD1q64wb_register, true, true, SingleSpc, 2, 1 ,false}, -{ ARM::VLD1q8Pseudo, ARM::VLD1q8, true, false, SingleSpc, 2, 8 ,false}, -{ ARM::VLD1q8PseudoWB_fixed, ARM::VLD1q8wb_fixed,true,false, SingleSpc, 2, 8 ,false}, -{ ARM::VLD1q8PseudoWB_register, ARM::VLD1q8wb_register,true,true,SingleSpc,2,8,false}, - -{ ARM::VLD2DUPd16Pseudo, ARM::VLD2DUPd16, true, false, SingleSpc, 2, 4,true}, -{ ARM::VLD2DUPd16Pseudo_UPD, ARM::VLD2DUPd16_UPD, true, true, SingleSpc, 2, 4,true}, -{ ARM::VLD2DUPd32Pseudo, ARM::VLD2DUPd32, true, false, SingleSpc, 2, 2,true}, -{ ARM::VLD2DUPd32Pseudo_UPD, ARM::VLD2DUPd32_UPD, true, true, SingleSpc, 2, 2,true}, -{ ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd8, true, false, SingleSpc, 2, 8,true}, -{ ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd8_UPD, true, true, SingleSpc, 2, 8,true}, - -{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, SingleSpc, 2, 4 ,true}, -{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, SingleSpc, 2, 4 ,true}, -{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, SingleSpc, 2, 2 ,true}, -{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, SingleSpc, 2, 2 ,true}, -{ ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, SingleSpc, 2, 8 ,true}, -{ ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, SingleSpc, 2, 8 ,true}, -{ ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, EvenDblSpc, 2, 4 ,true}, -{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, EvenDblSpc, 2, 4 ,true}, -{ ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, EvenDblSpc, 2, 2 ,true}, -{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, EvenDblSpc, 2, 2 ,true}, - -{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, SingleSpc, 2, 4 ,false}, -{ ARM::VLD2d16Pseudo_UPD, ARM::VLD2d16_UPD, true, true, SingleSpc, 2, 4 ,false}, -{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, SingleSpc, 2, 2 ,false}, -{ ARM::VLD2d32Pseudo_UPD, ARM::VLD2d32_UPD, true, true, SingleSpc, 2, 2 ,false}, -{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, SingleSpc, 2, 8 ,false}, -{ ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, SingleSpc, 2, 8 ,false}, - -{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, SingleSpc, 4, 4 ,false}, -{ ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, SingleSpc, 4, 4 ,false}, -{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, SingleSpc, 4, 2 ,false}, -{ ARM::VLD2q32Pseudo_UPD, ARM::VLD2q32_UPD, true, true, SingleSpc, 4, 2 ,false}, -{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, SingleSpc, 4, 8 ,false}, -{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, SingleSpc, 4, 8 ,false}, - -{ ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, SingleSpc, 3, 4,true}, -{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, SingleSpc, 3, 4,true}, -{ ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, SingleSpc, 3, 2,true}, -{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, SingleSpc, 3, 2,true}, -{ ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, SingleSpc, 3, 8,true}, -{ ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, SingleSpc, 3, 8,true}, - -{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, SingleSpc, 3, 4 ,true}, -{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, SingleSpc, 3, 4 ,true}, -{ ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, SingleSpc, 3, 2 ,true}, -{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, SingleSpc, 3, 2 ,true}, -{ ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, SingleSpc, 3, 8 ,true}, -{ ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, SingleSpc, 3, 8 ,true}, -{ ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, EvenDblSpc, 3, 4 ,true}, -{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, EvenDblSpc, 3, 4 ,true}, -{ ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, EvenDblSpc, 3, 2 ,true}, -{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, EvenDblSpc, 3, 2 ,true}, - -{ ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, SingleSpc, 3, 4 ,true}, -{ ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, SingleSpc, 3, 4 ,true}, -{ ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, SingleSpc, 3, 2 ,true}, -{ ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, SingleSpc, 3, 2 ,true}, -{ ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, SingleSpc, 3, 8 ,true}, -{ ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, SingleSpc, 3, 8 ,true}, - -{ ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, EvenDblSpc, 3, 4 ,true}, -{ ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, OddDblSpc, 3, 4 ,true}, -{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, OddDblSpc, 3, 4 ,true}, -{ ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, EvenDblSpc, 3, 2 ,true}, -{ ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, OddDblSpc, 3, 2 ,true}, -{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, OddDblSpc, 3, 2 ,true}, -{ ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, EvenDblSpc, 3, 8 ,true}, -{ ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, OddDblSpc, 3, 8 ,true}, -{ ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, OddDblSpc, 3, 8 ,true}, - -{ ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, SingleSpc, 4, 4,true}, -{ ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, SingleSpc, 4, 4,true}, -{ ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, SingleSpc, 4, 2,true}, -{ ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, SingleSpc, 4, 2,true}, -{ ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, SingleSpc, 4, 8,true}, -{ ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, SingleSpc, 4, 8,true}, - -{ ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, SingleSpc, 4, 4 ,true}, -{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, SingleSpc, 4, 4 ,true}, -{ ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, SingleSpc, 4, 2 ,true}, -{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, SingleSpc, 4, 2 ,true}, -{ ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, SingleSpc, 4, 8 ,true}, -{ ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, SingleSpc, 4, 8 ,true}, -{ ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, EvenDblSpc, 4, 4 ,true}, -{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, EvenDblSpc, 4, 4 ,true}, -{ ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, EvenDblSpc, 4, 2 ,true}, -{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, EvenDblSpc, 4, 2 ,true}, - -{ ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, SingleSpc, 4, 4 ,true}, -{ ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, SingleSpc, 4, 4 ,true}, -{ ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, SingleSpc, 4, 2 ,true}, -{ ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, SingleSpc, 4, 2 ,true}, -{ ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, SingleSpc, 4, 8 ,true}, -{ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, SingleSpc, 4, 8 ,true}, - -{ ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, EvenDblSpc, 4, 4 ,true}, -{ ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, OddDblSpc, 4, 4 ,true}, -{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, OddDblSpc, 4, 4 ,true}, -{ ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, EvenDblSpc, 4, 2 ,true}, -{ ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, OddDblSpc, 4, 2 ,true}, -{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, OddDblSpc, 4, 2 ,true}, -{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, EvenDblSpc, 4, 8 ,true}, -{ ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, OddDblSpc, 4, 8 ,true}, -{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, OddDblSpc, 4, 8 ,true}, - -{ ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, EvenDblSpc, 1, 4 ,true}, -{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD,false, true, EvenDblSpc, 1, 4 ,true}, -{ ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, EvenDblSpc, 1, 2 ,true}, -{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD,false, true, EvenDblSpc, 1, 2 ,true}, -{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, EvenDblSpc, 1, 8 ,true}, -{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, EvenDblSpc, 1, 8 ,true}, - -{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, SingleSpc, 4, 1 ,true}, -{ ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, SingleSpc, 4, 1 ,true}, -{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, SingleSpc, 3, 1 ,true}, -{ ARM::VST1d64TPseudo_UPD, ARM::VST1d64T_UPD, false, true, SingleSpc, 3, 1 ,true}, - -{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, SingleSpc, 2, 4 ,true}, -{ ARM::VST1q16Pseudo_UPD, ARM::VST1q16_UPD, false, true, SingleSpc, 2, 4 ,true}, -{ ARM::VST1q32Pseudo, ARM::VST1q32, false, false, SingleSpc, 2, 2 ,true}, -{ ARM::VST1q32Pseudo_UPD, ARM::VST1q32_UPD, false, true, SingleSpc, 2, 2 ,true}, -{ ARM::VST1q64Pseudo, ARM::VST1q64, false, false, SingleSpc, 2, 1 ,true}, -{ ARM::VST1q64Pseudo_UPD, ARM::VST1q64_UPD, false, true, SingleSpc, 2, 1 ,true}, -{ ARM::VST1q8Pseudo, ARM::VST1q8, false, false, SingleSpc, 2, 8 ,true}, -{ ARM::VST1q8Pseudo_UPD, ARM::VST1q8_UPD, false, true, SingleSpc, 2, 8 ,true}, - -{ ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, SingleSpc, 2, 4 ,true}, -{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, SingleSpc, 2, 4 ,true}, -{ ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, SingleSpc, 2, 2 ,true}, -{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, SingleSpc, 2, 2 ,true}, -{ ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, SingleSpc, 2, 8 ,true}, -{ ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, SingleSpc, 2, 8 ,true}, -{ ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, EvenDblSpc, 2, 4,true}, -{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, EvenDblSpc, 2, 4,true}, -{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, EvenDblSpc, 2, 2,true}, -{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, EvenDblSpc, 2, 2,true}, - -{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, SingleSpc, 2, 4 ,true}, -{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, SingleSpc, 2, 4 ,true}, -{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, SingleSpc, 2, 2 ,true}, -{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, SingleSpc, 2, 2 ,true}, -{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, SingleSpc, 2, 8 ,true}, -{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, SingleSpc, 2, 8 ,true}, - -{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, SingleSpc, 4, 4 ,true}, -{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, SingleSpc, 4, 4 ,true}, -{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, SingleSpc, 4, 2 ,true}, -{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, SingleSpc, 4, 2 ,true}, -{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, SingleSpc, 4, 8 ,true}, -{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, SingleSpc, 4, 8 ,true}, - -{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, SingleSpc, 3, 4 ,true}, -{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, SingleSpc, 3, 4 ,true}, -{ ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, SingleSpc, 3, 2 ,true}, -{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, SingleSpc, 3, 2 ,true}, -{ ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, SingleSpc, 3, 8 ,true}, -{ ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, SingleSpc, 3, 8 ,true}, -{ ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, EvenDblSpc, 3, 4,true}, -{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, EvenDblSpc, 3, 4,true}, -{ ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, EvenDblSpc, 3, 2,true}, -{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, EvenDblSpc, 3, 2,true}, - -{ ARM::VST3d16Pseudo, ARM::VST3d16, false, false, SingleSpc, 3, 4 ,true}, -{ ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, SingleSpc, 3, 4 ,true}, -{ ARM::VST3d32Pseudo, ARM::VST3d32, false, false, SingleSpc, 3, 2 ,true}, -{ ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, SingleSpc, 3, 2 ,true}, -{ ARM::VST3d8Pseudo, ARM::VST3d8, false, false, SingleSpc, 3, 8 ,true}, -{ ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, SingleSpc, 3, 8 ,true}, - -{ ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, EvenDblSpc, 3, 4 ,true}, -{ ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, OddDblSpc, 3, 4 ,true}, -{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, OddDblSpc, 3, 4 ,true}, -{ ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, EvenDblSpc, 3, 2 ,true}, -{ ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, OddDblSpc, 3, 2 ,true}, -{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, OddDblSpc, 3, 2 ,true}, -{ ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, EvenDblSpc, 3, 8 ,true}, -{ ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, OddDblSpc, 3, 8 ,true}, -{ ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, OddDblSpc, 3, 8 ,true}, - -{ ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, SingleSpc, 4, 4 ,true}, -{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, SingleSpc, 4, 4 ,true}, -{ ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, SingleSpc, 4, 2 ,true}, -{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, SingleSpc, 4, 2 ,true}, -{ ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, SingleSpc, 4, 8 ,true}, -{ ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, SingleSpc, 4, 8 ,true}, -{ ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, EvenDblSpc, 4, 4,true}, -{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, EvenDblSpc, 4, 4,true}, -{ ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, EvenDblSpc, 4, 2,true}, -{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, EvenDblSpc, 4, 2,true}, - -{ ARM::VST4d16Pseudo, ARM::VST4d16, false, false, SingleSpc, 4, 4 ,true}, -{ ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, SingleSpc, 4, 4 ,true}, -{ ARM::VST4d32Pseudo, ARM::VST4d32, false, false, SingleSpc, 4, 2 ,true}, -{ ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, SingleSpc, 4, 2 ,true}, -{ ARM::VST4d8Pseudo, ARM::VST4d8, false, false, SingleSpc, 4, 8 ,true}, -{ ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, SingleSpc, 4, 8 ,true}, - -{ ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, EvenDblSpc, 4, 4 ,true}, -{ ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, OddDblSpc, 4, 4 ,true}, -{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, OddDblSpc, 4, 4 ,true}, -{ ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, EvenDblSpc, 4, 2 ,true}, -{ ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, OddDblSpc, 4, 2 ,true}, -{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, OddDblSpc, 4, 2 ,true}, -{ ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, EvenDblSpc, 4, 8 ,true}, -{ ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, OddDblSpc, 4, 8 ,true}, -{ ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, OddDblSpc, 4, 8 ,true} +{ ARM::VLD1DUPq16Pseudo, ARM::VLD1DUPq16, true, false, false, SingleSpc, 2, 4,true}, +{ ARM::VLD1DUPq16Pseudo_UPD, ARM::VLD1DUPq16_UPD, true, true, true, SingleSpc, 2, 4,true}, +{ ARM::VLD1DUPq32Pseudo, ARM::VLD1DUPq32, true, false, false, SingleSpc, 2, 2,true}, +{ ARM::VLD1DUPq32Pseudo_UPD, ARM::VLD1DUPq32_UPD, true, true, true, SingleSpc, 2, 2,true}, +{ ARM::VLD1DUPq8Pseudo, ARM::VLD1DUPq8, true, false, false, SingleSpc, 2, 8,true}, +{ ARM::VLD1DUPq8Pseudo_UPD, ARM::VLD1DUPq8_UPD, true, true, true, SingleSpc, 2, 8,true}, + +{ ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true}, +{ ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true}, +{ ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, false, EvenDblSpc, 1, 2 ,true}, +{ ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, true, EvenDblSpc, 1, 2 ,true}, +{ ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, false, EvenDblSpc, 1, 8 ,true}, +{ ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, true, EvenDblSpc, 1, 8 ,true}, + +{ ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false}, +{ ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false}, +{ ARM::VLD1q16Pseudo, ARM::VLD1q16, true, false, false, SingleSpc, 2, 4 ,false}, +{ ARM::VLD1q16PseudoWB_fixed, ARM::VLD1q16wb_fixed,true,false,false,SingleSpc, 2, 4 ,false}, +{ ARM::VLD1q16PseudoWB_register, ARM::VLD1q16wb_register, true, true, true, SingleSpc, 2, 4 ,false}, +{ ARM::VLD1q32Pseudo, ARM::VLD1q32, true, false, false, SingleSpc, 2, 2 ,false}, +{ ARM::VLD1q32PseudoWB_fixed, ARM::VLD1q32wb_fixed,true,false, false,SingleSpc, 2, 2 ,false}, +{ ARM::VLD1q32PseudoWB_register, ARM::VLD1q32wb_register, true, true, true, SingleSpc, 2, 2 ,false}, +{ ARM::VLD1q64Pseudo, ARM::VLD1q64, true, false, false, SingleSpc, 2, 1 ,false}, +{ ARM::VLD1q64PseudoWB_fixed, ARM::VLD1q64wb_fixed,true,false, false,SingleSpc, 2, 2 ,false}, +{ ARM::VLD1q64PseudoWB_register, ARM::VLD1q64wb_register, true, true, true, SingleSpc, 2, 1 ,false}, +{ ARM::VLD1q8Pseudo, ARM::VLD1q8, true, false, false, SingleSpc, 2, 8 ,false}, +{ ARM::VLD1q8PseudoWB_fixed, ARM::VLD1q8wb_fixed,true,false, false, SingleSpc, 2, 8 ,false}, +{ ARM::VLD1q8PseudoWB_register, ARM::VLD1q8wb_register,true,true, true,SingleSpc,2,8,false}, + +{ ARM::VLD2DUPd16Pseudo, ARM::VLD2DUPd16, true, false, false, SingleSpc, 2, 4,true}, +{ ARM::VLD2DUPd16Pseudo_UPD, ARM::VLD2DUPd16_UPD, true, true, true, SingleSpc, 2, 4,true}, +{ ARM::VLD2DUPd32Pseudo, ARM::VLD2DUPd32, true, false, false, SingleSpc, 2, 2,true}, +{ ARM::VLD2DUPd32Pseudo_UPD, ARM::VLD2DUPd32_UPD, true, true, true, SingleSpc, 2, 2,true}, +{ ARM::VLD2DUPd8Pseudo, ARM::VLD2DUPd8, true, false, false, SingleSpc, 2, 8,true}, +{ ARM::VLD2DUPd8Pseudo_UPD, ARM::VLD2DUPd8_UPD, true, true, true, SingleSpc, 2, 8,true}, + +{ ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true}, +{ ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true}, +{ ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, false, SingleSpc, 2, 2 ,true}, +{ ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, true, SingleSpc, 2, 2 ,true}, +{ ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, false, SingleSpc, 2, 8 ,true}, +{ ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, true, SingleSpc, 2, 8 ,true}, +{ ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, false, EvenDblSpc, 2, 4 ,true}, +{ ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, true, EvenDblSpc, 2, 4 ,true}, +{ ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, false, EvenDblSpc, 2, 2 ,true}, +{ ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true}, + +{ ARM::VLD2d16Pseudo, ARM::VLD2d16, true, false, false, SingleSpc, 2, 4 ,false}, +{ ARM::VLD2d16Pseudo_UPD, ARM::VLD2d16_UPD, true, true, true, SingleSpc, 2, 4 ,false}, +{ ARM::VLD2d32Pseudo, ARM::VLD2d32, true, false, false, SingleSpc, 2, 2 ,false}, +{ ARM::VLD2d32Pseudo_UPD, ARM::VLD2d32_UPD, true, true, true, SingleSpc, 2, 2 ,false}, +{ ARM::VLD2d8Pseudo, ARM::VLD2d8, true, false, false, SingleSpc, 2, 8 ,false}, +{ ARM::VLD2d8Pseudo_UPD, ARM::VLD2d8_UPD, true, true, true, SingleSpc, 2, 8 ,false}, + +{ ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false}, +{ ARM::VLD2q16Pseudo_UPD, ARM::VLD2q16_UPD, true, true, true, SingleSpc, 4, 4 ,false}, +{ ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, false, SingleSpc, 4, 2 ,false}, +{ ARM::VLD2q32Pseudo_UPD, ARM::VLD2q32_UPD, true, true, true, SingleSpc, 4, 2 ,false}, +{ ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, false, SingleSpc, 4, 8 ,false}, +{ ARM::VLD2q8Pseudo_UPD, ARM::VLD2q8_UPD, true, true, true, SingleSpc, 4, 8 ,false}, + +{ ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, false, SingleSpc, 3, 4,true}, +{ ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true, SingleSpc, 3, 4,true}, +{ ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, false, SingleSpc, 3, 2,true}, +{ ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, true, SingleSpc, 3, 2,true}, +{ ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, false, SingleSpc, 3, 8,true}, +{ ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, true, SingleSpc, 3, 8,true}, + +{ ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, false, SingleSpc, 3, 4 ,true}, +{ ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, true, SingleSpc, 3, 4 ,true}, +{ ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, false, SingleSpc, 3, 2 ,true}, +{ ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, true, SingleSpc, 3, 2 ,true}, +{ ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, false, SingleSpc, 3, 8 ,true}, +{ ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, true, SingleSpc, 3, 8 ,true}, +{ ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, false, EvenDblSpc, 3, 4 ,true}, +{ ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true}, +{ ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, false, EvenDblSpc, 3, 2 ,true}, +{ ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true}, + +{ ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, false, SingleSpc, 3, 4 ,true}, +{ ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, true, SingleSpc, 3, 4 ,true}, +{ ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, false, SingleSpc, 3, 2 ,true}, +{ ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, true, SingleSpc, 3, 2 ,true}, +{ ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, false, SingleSpc, 3, 8 ,true}, +{ ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, true, SingleSpc, 3, 8 ,true}, + +{ ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true}, +{ ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, false, OddDblSpc, 3, 4 ,true}, +{ ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, true, OddDblSpc, 3, 4 ,true}, +{ ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true}, +{ ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, false, OddDblSpc, 3, 2 ,true}, +{ ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, true, OddDblSpc, 3, 2 ,true}, +{ ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, true, EvenDblSpc, 3, 8 ,true}, +{ ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, false, OddDblSpc, 3, 8 ,true}, +{ ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, true, OddDblSpc, 3, 8 ,true}, + +{ ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, false, SingleSpc, 4, 4,true}, +{ ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, true, SingleSpc, 4, 4,true}, +{ ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, false, SingleSpc, 4, 2,true}, +{ ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, true, SingleSpc, 4, 2,true}, +{ ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, false, SingleSpc, 4, 8,true}, +{ ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, true, SingleSpc, 4, 8,true}, + +{ ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, false, SingleSpc, 4, 4 ,true}, +{ ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, true, SingleSpc, 4, 4 ,true}, +{ ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, false, SingleSpc, 4, 2 ,true}, +{ ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, true, SingleSpc, 4, 2 ,true}, +{ ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, false, SingleSpc, 4, 8 ,true}, +{ ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, true, SingleSpc, 4, 8 ,true}, +{ ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, false, EvenDblSpc, 4, 4 ,true}, +{ ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true}, +{ ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, false, EvenDblSpc, 4, 2 ,true}, +{ ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true}, + +{ ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, false, SingleSpc, 4, 4 ,true}, +{ ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, true, SingleSpc, 4, 4 ,true}, +{ ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, false, SingleSpc, 4, 2 ,true}, +{ ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, true, SingleSpc, 4, 2 ,true}, +{ ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, false, SingleSpc, 4, 8 ,true}, +{ ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, true, SingleSpc, 4, 8 ,true}, + +{ ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true}, +{ ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, false, OddDblSpc, 4, 4 ,true}, +{ ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, true, OddDblSpc, 4, 4 ,true}, +{ ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true}, +{ ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, false, OddDblSpc, 4, 2 ,true}, +{ ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, true, OddDblSpc, 4, 2 ,true}, +{ ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, true, EvenDblSpc, 4, 8 ,true}, +{ ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, false, OddDblSpc, 4, 8 ,true}, +{ ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, true, OddDblSpc, 4, 8 ,true}, + +{ ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, false, EvenDblSpc, 1, 4 ,true}, +{ ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD, false, true, true, EvenDblSpc, 1, 4 ,true}, +{ ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, false, EvenDblSpc, 1, 2 ,true}, +{ ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD, false, true, true, EvenDblSpc, 1, 2 ,true}, +{ ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, false, EvenDblSpc, 1, 8 ,true}, +{ ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, true, EvenDblSpc, 1, 8 ,true}, + +{ ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,true}, +{ ARM::VST1d64QPseudo_UPD, ARM::VST1d64Q_UPD, false, true, true, SingleSpc, 4, 1 ,true}, +{ ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,true}, +{ ARM::VST1d64TPseudo_UPD, ARM::VST1d64T_UPD, false, true, true, SingleSpc, 3, 1 ,true}, + +{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, false, SingleSpc, 2, 4 ,true}, +{ ARM::VST1q16Pseudo_UPD, ARM::VST1q16_UPD, false, true, true, SingleSpc, 2, 4 ,true}, +{ ARM::VST1q32Pseudo, ARM::VST1q32, false, false, false, SingleSpc, 2, 2 ,true}, +{ ARM::VST1q32Pseudo_UPD, ARM::VST1q32_UPD, false, true, true, SingleSpc, 2, 2 ,true}, +{ ARM::VST1q64Pseudo, ARM::VST1q64, false, false, false, SingleSpc, 2, 1 ,true}, +{ ARM::VST1q64Pseudo_UPD, ARM::VST1q64_UPD, false, true, true, SingleSpc, 2, 1 ,true}, +{ ARM::VST1q8Pseudo, ARM::VST1q8, false, false, false, SingleSpc, 2, 8 ,true}, +{ ARM::VST1q8Pseudo_UPD, ARM::VST1q8_UPD, false, true, true, SingleSpc, 2, 8 ,true}, + +{ ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, false, SingleSpc, 2, 4 ,true}, +{ ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, true, SingleSpc, 2, 4 ,true}, +{ ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, false, SingleSpc, 2, 2 ,true}, +{ ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, true, SingleSpc, 2, 2 ,true}, +{ ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, false, SingleSpc, 2, 8 ,true}, +{ ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, true, SingleSpc, 2, 8 ,true}, +{ ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, false, EvenDblSpc, 2, 4,true}, +{ ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, true, EvenDblSpc, 2, 4,true}, +{ ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, false, EvenDblSpc, 2, 2,true}, +{ ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true}, + +{ ARM::VST2d16Pseudo, ARM::VST2d16, false, false, false, SingleSpc, 2, 4 ,true}, +{ ARM::VST2d16Pseudo_UPD, ARM::VST2d16_UPD, false, true, true, SingleSpc, 2, 4 ,true}, +{ ARM::VST2d32Pseudo, ARM::VST2d32, false, false, false, SingleSpc, 2, 2 ,true}, +{ ARM::VST2d32Pseudo_UPD, ARM::VST2d32_UPD, false, true, true, SingleSpc, 2, 2 ,true}, +{ ARM::VST2d8Pseudo, ARM::VST2d8, false, false, false, SingleSpc, 2, 8 ,true}, +{ ARM::VST2d8Pseudo_UPD, ARM::VST2d8_UPD, false, true, true, SingleSpc, 2, 8 ,true}, + +{ ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,true}, +{ ARM::VST2q16Pseudo_UPD, ARM::VST2q16_UPD, false, true, true, SingleSpc, 4, 4 ,true}, +{ ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,true}, +{ ARM::VST2q32Pseudo_UPD, ARM::VST2q32_UPD, false, true, true, SingleSpc, 4, 2 ,true}, +{ ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,true}, +{ ARM::VST2q8Pseudo_UPD, ARM::VST2q8_UPD, false, true, true, SingleSpc, 4, 8 ,true}, + +{ ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, false, SingleSpc, 3, 4 ,true}, +{ ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true, SingleSpc, 3, 4 ,true}, +{ ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, false, SingleSpc, 3, 2 ,true}, +{ ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, true, SingleSpc, 3, 2 ,true}, +{ ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, false, SingleSpc, 3, 8 ,true}, +{ ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, true, SingleSpc, 3, 8 ,true}, +{ ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, false, EvenDblSpc, 3, 4,true}, +{ ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, true, EvenDblSpc, 3, 4,true}, +{ ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, false, EvenDblSpc, 3, 2,true}, +{ ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, true, EvenDblSpc, 3, 2,true}, + +{ ARM::VST3d16Pseudo, ARM::VST3d16, false, false, false, SingleSpc, 3, 4 ,true}, +{ ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, true, SingleSpc, 3, 4 ,true}, +{ ARM::VST3d32Pseudo, ARM::VST3d32, false, false, false, SingleSpc, 3, 2 ,true}, +{ ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, true, SingleSpc, 3, 2 ,true}, +{ ARM::VST3d8Pseudo, ARM::VST3d8, false, false, false, SingleSpc, 3, 8 ,true}, +{ ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, true, SingleSpc, 3, 8 ,true}, + +{ ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, true, EvenDblSpc, 3, 4 ,true}, +{ ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, false, OddDblSpc, 3, 4 ,true}, +{ ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, true, OddDblSpc, 3, 4 ,true}, +{ ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, true, EvenDblSpc, 3, 2 ,true}, +{ ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, false, OddDblSpc, 3, 2 ,true}, +{ ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, true, OddDblSpc, 3, 2 ,true}, +{ ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, true, EvenDblSpc, 3, 8 ,true}, +{ ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, false, OddDblSpc, 3, 8 ,true}, +{ ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, true, OddDblSpc, 3, 8 ,true}, + +{ ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, false, SingleSpc, 4, 4 ,true}, +{ ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, true, SingleSpc, 4, 4 ,true}, +{ ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, false, SingleSpc, 4, 2 ,true}, +{ ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, true, SingleSpc, 4, 2 ,true}, +{ ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, false, SingleSpc, 4, 8 ,true}, +{ ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, true, SingleSpc, 4, 8 ,true}, +{ ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, false, EvenDblSpc, 4, 4,true}, +{ ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, true, EvenDblSpc, 4, 4,true}, +{ ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, false, EvenDblSpc, 4, 2,true}, +{ ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, true, EvenDblSpc, 4, 2,true}, + +{ ARM::VST4d16Pseudo, ARM::VST4d16, false, false, false, SingleSpc, 4, 4 ,true}, +{ ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, true, SingleSpc, 4, 4 ,true}, +{ ARM::VST4d32Pseudo, ARM::VST4d32, false, false, false, SingleSpc, 4, 2 ,true}, +{ ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, true, SingleSpc, 4, 2 ,true}, +{ ARM::VST4d8Pseudo, ARM::VST4d8, false, false, false, SingleSpc, 4, 8 ,true}, +{ ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, true, SingleSpc, 4, 8 ,true}, + +{ ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, true, EvenDblSpc, 4, 4 ,true}, +{ ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, false, OddDblSpc, 4, 4 ,true}, +{ ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, true, OddDblSpc, 4, 4 ,true}, +{ ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, true, EvenDblSpc, 4, 2 ,true}, +{ ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, false, OddDblSpc, 4, 2 ,true}, +{ ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, true, OddDblSpc, 4, 2 ,true}, +{ ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, true, EvenDblSpc, 4, 8 ,true}, +{ ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, false, OddDblSpc, 4, 8 ,true}, +{ ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, true, OddDblSpc, 4, 8 ,true} }; /// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON @@ -437,14 +438,14 @@ void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) { if (NumRegs > 3 && TableEntry->copyAllListRegs) MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); - if (TableEntry->HasWritebackOperand) + if (TableEntry->isUpdating) MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the addrmode6 operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the am6offset operand. - if (TableEntry->HasWritebackOperand) + if (TableEntry->hasWritebackOperand) MIB.addOperand(MI.getOperand(OpIdx++)); // For an instruction writing double-spaced subregs, the pseudo instruction @@ -489,14 +490,14 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(TableEntry->RealOpc)); unsigned OpIdx = 0; - if (TableEntry->HasWritebackOperand) + if (TableEntry->isUpdating) MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the addrmode6 operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the am6offset operand. - if (TableEntry->HasWritebackOperand) + if (TableEntry->hasWritebackOperand) MIB.addOperand(MI.getOperand(OpIdx++)); bool SrcIsKill = MI.getOperand(OpIdx).isKill(); @@ -566,14 +567,14 @@ void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); } - if (TableEntry->HasWritebackOperand) + if (TableEntry->isUpdating) MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the addrmode6 operands. MIB.addOperand(MI.getOperand(OpIdx++)); MIB.addOperand(MI.getOperand(OpIdx++)); // Copy the am6offset operand. - if (TableEntry->HasWritebackOperand) + if (TableEntry->hasWritebackOperand) MIB.addOperand(MI.getOperand(OpIdx++)); // Grab the super-register source. -- cgit v1.1 From f47368bbbe1ebcf77efd202362bfd1125a2126c7 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Mon, 31 Oct 2011 20:08:25 +0000 Subject: Cleanup. Document. Make sure that this build_vector optimization only runs before the op legalizer and that the used type is legal. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143358 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 45 ++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9a63799..0a142bd 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6941,9 +6941,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // Check to see if this is a BUILD_VECTOR of a bunch of values // which come from any_extend or zero_extend nodes. If so, we can create // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR - // optimizations. + // optimizations. We do not handle sign-extend because we can't fill the sign + // using shuffles. EVT SourceType = MVT::Other; - bool allExtend = true; bool allAnyExt = true; for (unsigned i = 0; i < NumInScalars; ++i) { SDValue In = N->getOperand(i); @@ -6953,9 +6953,9 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; - // Abort non-extend incoming values. + // Abort if the element is not an extension. if (!ZeroExt && !AnyExt) { - allExtend = false; + SourceType = MVT::Other; break; } @@ -6964,10 +6964,11 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { // Check that all of the widened source types are the same. if (SourceType == MVT::Other) + // First time. SourceType = InTy; else if (InTy != SourceType) { // Multiple income types. Abort. - allExtend = false; + SourceType = MVT::Other; break; } @@ -6975,17 +6976,27 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { allAnyExt &= AnyExt; } - // And we are post type-legalization, - // If all of the values are Ext or undef, - // We have a non undef entry. - if (LegalTypes && allExtend && SourceType != MVT::Other) { + + // In order to have valid types, all of the inputs must be extended from the + // same source type and all of the inputs must be any or zero extend. + // Scalar sizes must be a power of two. + EVT OutScalarTy = N->getValueType(0).getScalarType(); + bool validTypes = SourceType != MVT::Other && + isPowerOf2_32(OutScalarTy.getSizeInBits()) && + isPowerOf2_32(SourceType.getSizeInBits()); + + // We perform this optimization post type-legalization because + // the type-legalizer often scalarizes integer-promoted vectors. + // Performing this optimization before may create bit-casts which + // will be type-legalized to complex code sequences. + // We perform this optimization only before the operation legalizer because we + // may introduce illegal operations. + if (LegalTypes && !LegalOperations && validTypes) { bool isLE = TLI.isLittleEndian(); - EVT InScalarTy = SourceType.getScalarType(); - EVT OutScalarTy = N->getValueType(0).getScalarType(); - unsigned ElemRatio = OutScalarTy.getSizeInBits()/InScalarTy.getSizeInBits(); + unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); assert(ElemRatio > 1 && "Invalid element size ratio"); - SDValue Filler = allAnyExt ? DAG.getUNDEF(InScalarTy): - DAG.getConstant(0, InScalarTy); + SDValue Filler = allAnyExt ? DAG.getUNDEF(SourceType): + DAG.getConstant(0, SourceType); unsigned NewBVElems = ElemRatio * N->getValueType(0).getVectorNumElements(); SmallVector Ops(NewBVElems, Filler); @@ -6998,7 +7009,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); SDValue In; if (Cast.getOpcode() == ISD::UNDEF) - In = DAG.getUNDEF(InScalarTy); + In = DAG.getUNDEF(SourceType); else In = Cast->getOperand(0); unsigned Index = isLE ? (i * ElemRatio) : @@ -7009,9 +7020,11 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { } // The type of the new BUILD_VECTOR node. - EVT VecVT = EVT::getVectorVT(*DAG.getContext(), InScalarTy, NewBVElems); + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); assert(VecVT.getSizeInBits() == N->getValueType(0).getSizeInBits() && "Invalid vector size"); + // Check if the new vector type is legal. + if (!isTypeLegal(VecVT)) return SDValue(); // Make the new BUILD_VECTOR. SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), -- cgit v1.1 From 4334e032525d6c9038605f3871b945e8cbe6fab7 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Mon, 31 Oct 2011 21:50:31 +0000 Subject: ARM VST1 w/ writeback assembly parsing and encoding. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143369 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMExpandPseudoInsts.cpp | 32 ++++++--- lib/Target/ARM/ARMISelDAGToDAG.cpp | 42 +++++++++--- lib/Target/ARM/ARMInstrNEON.td | 91 +++++++++++++++++-------- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 34 +++++++++ lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 28 ++++---- 5 files changed, 168 insertions(+), 59 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 0c1b047..5f7b8b2 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -273,13 +273,17 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VST1d64TPseudo_UPD, ARM::VST1d64T_UPD, false, true, true, SingleSpc, 3, 1 ,true}, { ARM::VST1q16Pseudo, ARM::VST1q16, false, false, false, SingleSpc, 2, 4 ,true}, -{ ARM::VST1q16Pseudo_UPD, ARM::VST1q16_UPD, false, true, true, SingleSpc, 2, 4 ,true}, +{ ARM::VST1q16PseudoWB_fixed, ARM::VST1q16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false}, +{ ARM::VST1q16PseudoWB_register, ARM::VST1q16wb_register, false, true, true, SingleSpc, 2, 4 ,false}, { ARM::VST1q32Pseudo, ARM::VST1q32, false, false, false, SingleSpc, 2, 2 ,true}, -{ ARM::VST1q32Pseudo_UPD, ARM::VST1q32_UPD, false, true, true, SingleSpc, 2, 2 ,true}, +{ ARM::VST1q32PseudoWB_fixed, ARM::VST1q32wb_fixed, false, true, false, SingleSpc, 2, 2 ,false}, +{ ARM::VST1q32PseudoWB_register, ARM::VST1q32wb_register, false, true, true, SingleSpc, 2, 2 ,false}, { ARM::VST1q64Pseudo, ARM::VST1q64, false, false, false, SingleSpc, 2, 1 ,true}, -{ ARM::VST1q64Pseudo_UPD, ARM::VST1q64_UPD, false, true, true, SingleSpc, 2, 1 ,true}, +{ ARM::VST1q64PseudoWB_fixed, ARM::VST1q64wb_fixed, false, true, false, SingleSpc, 2, 1 ,false}, +{ ARM::VST1q64PseudoWB_register, ARM::VST1q64wb_register, false, true, true, SingleSpc, 2, 1 ,false}, { ARM::VST1q8Pseudo, ARM::VST1q8, false, false, false, SingleSpc, 2, 8 ,true}, -{ ARM::VST1q8Pseudo_UPD, ARM::VST1q8_UPD, false, true, true, SingleSpc, 2, 8 ,true}, +{ ARM::VST1q8PseudoWB_fixed, ARM::VST1q8wb_fixed, false, true, false, SingleSpc, 2, 8 ,false}, +{ ARM::VST1q8PseudoWB_register, ARM::VST1q8wb_register, false, true, true, SingleSpc, 2, 8 ,false}, { ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, false, SingleSpc, 2, 4 ,true}, { ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, true, SingleSpc, 2, 4 ,true}, @@ -504,10 +508,12 @@ void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { unsigned SrcReg = MI.getOperand(OpIdx++).getReg(); unsigned D0, D1, D2, D3; GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3); - MIB.addReg(D0).addReg(D1); - if (NumRegs > 2) + MIB.addReg(D0); + if (NumRegs > 1 && TableEntry->copyAllListRegs) + MIB.addReg(D1); + if (NumRegs > 2 && TableEntry->copyAllListRegs) MIB.addReg(D2); - if (NumRegs > 3) + if (NumRegs > 3 && TableEntry->copyAllListRegs) MIB.addReg(D3); // Copy the predicate operands. @@ -1153,10 +1159,14 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VST1q16Pseudo: case ARM::VST1q32Pseudo: case ARM::VST1q64Pseudo: - case ARM::VST1q8Pseudo_UPD: - case ARM::VST1q16Pseudo_UPD: - case ARM::VST1q32Pseudo_UPD: - case ARM::VST1q64Pseudo_UPD: + case ARM::VST1q8PseudoWB_fixed: + case ARM::VST1q16PseudoWB_fixed: + case ARM::VST1q32PseudoWB_fixed: + case ARM::VST1q64PseudoWB_fixed: + case ARM::VST1q8PseudoWB_register: + case ARM::VST1q16PseudoWB_register: + case ARM::VST1q32PseudoWB_register: + case ARM::VST1q64PseudoWB_register: case ARM::VST2d8Pseudo: case ARM::VST2d16Pseudo: case ARM::VST2d32Pseudo: diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 7c67e0a..bc8588f 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -1566,6 +1566,19 @@ static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { case ARM::VLD1q16PseudoWB_fixed: return ARM::VLD1q16PseudoWB_register; case ARM::VLD1q32PseudoWB_fixed: return ARM::VLD1q32PseudoWB_register; case ARM::VLD1q64PseudoWB_fixed: return ARM::VLD1q64PseudoWB_register; + + case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; + case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; + case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; + case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; + case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; + case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; + case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; + case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; + case ARM::VST1q8PseudoWB_fixed: return ARM::VST1q8PseudoWB_register; + case ARM::VST1q16PseudoWB_fixed: return ARM::VST1q16PseudoWB_register; + case ARM::VST1q32PseudoWB_fixed: return ARM::VST1q32PseudoWB_register; + case ARM::VST1q64PseudoWB_fixed: return ARM::VST1q64PseudoWB_register; } return Opc; // If not one we handle, return it unchanged. } @@ -1635,11 +1648,12 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, SDValue Inc = N->getOperand(AddrOpIdx + 1); // FIXME: VLD1 fixed increment doesn't need Reg0. Remove the reg0 // case entirely when the rest are updated to that form, too. - // Do that before committing this change. Likewise, the opcode - // update call will become unconditional. if (NumVecs == 1 && !isa(Inc.getNode())) Opc = getVLDSTRegisterUpdateOpcode(Opc); - if (NumVecs != 1 || !isa(Inc.getNode())) + // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so + // check for that explicitly too. Horribly hacky, but temporary. + if ((NumVecs != 1 && Opc != ARM::VLD1q64PseudoWB_fixed) || + !isa(Inc.getNode())) Ops.push_back(isa(Inc.getNode()) ? Reg0 : Inc); } Ops.push_back(Pred); @@ -1782,7 +1796,15 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, Ops.push_back(Align); if (isUpdating) { SDValue Inc = N->getOperand(AddrOpIdx + 1); - Ops.push_back(isa(Inc.getNode()) ? Reg0 : Inc); + // FIXME: VST1 fixed increment doesn't need Reg0. Remove the reg0 + // case entirely when the rest are updated to that form, too. + if (NumVecs == 1 && !isa(Inc.getNode())) + Opc = getVLDSTRegisterUpdateOpcode(Opc); + // We use a VST1 for v1i64 even if the pseudo says vld2/3/4, so + // check for that explicitly too. Horribly hacky, but temporary. + if ((NumVecs != 1 && Opc != ARM::VST1q64PseudoWB_fixed) || + !isa(Inc.getNode())) + Ops.push_back(isa(Inc.getNode()) ? Reg0 : Inc); } Ops.push_back(SrcReg); Ops.push_back(Pred); @@ -2844,16 +2866,18 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VST1_UPD: { - unsigned DOpcodes[] = { ARM::VST1d8_UPD, ARM::VST1d16_UPD, - ARM::VST1d32_UPD, ARM::VST1d64_UPD }; - unsigned QOpcodes[] = { ARM::VST1q8Pseudo_UPD, ARM::VST1q16Pseudo_UPD, - ARM::VST1q32Pseudo_UPD, ARM::VST1q64Pseudo_UPD }; + unsigned DOpcodes[] = { ARM::VST1d8wb_fixed, ARM::VST1d16wb_fixed, + ARM::VST1d32wb_fixed, ARM::VST1d64wb_fixed }; + unsigned QOpcodes[] = { ARM::VST1q8PseudoWB_fixed, + ARM::VST1q16PseudoWB_fixed, + ARM::VST1q32PseudoWB_fixed, + ARM::VST1q64PseudoWB_fixed }; return SelectVST(N, true, 1, DOpcodes, QOpcodes, 0); } case ARMISD::VST2_UPD: { unsigned DOpcodes[] = { ARM::VST2d8Pseudo_UPD, ARM::VST2d16Pseudo_UPD, - ARM::VST2d32Pseudo_UPD, ARM::VST1q64Pseudo_UPD }; + ARM::VST2d32Pseudo_UPD, ARM::VST1q64PseudoWB_fixed}; unsigned QOpcodes[] = { ARM::VST2q8Pseudo_UPD, ARM::VST2q16Pseudo_UPD, ARM::VST2q32Pseudo_UPD }; return SelectVST(N, true, 2, DOpcodes, QOpcodes, 0); diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 3023a34..d3c4486b 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1208,6 +1208,14 @@ class VSTQWBPseudo : PseudoNLdSt<(outs GPR:$wb), (ins addrmode6:$addr, am6offset:$offset, QPR:$src), itin, "$addr.addr = $wb">; +class VSTQWBfixedPseudo + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, QPR:$src), itin, + "$addr.addr = $wb">; +class VSTQWBregisterPseudo + : PseudoNLdSt<(outs GPR:$wb), + (ins addrmode6:$addr, rGPR:$offset, QPR:$src), itin, + "$addr.addr = $wb">; class VSTQQPseudo : PseudoNLdSt<(outs), (ins addrmode6:$addr, QQPR:$src), itin, "">; class VSTQQWBPseudo @@ -1254,36 +1262,65 @@ def VST1q32Pseudo : VSTQPseudo; def VST1q64Pseudo : VSTQPseudo; // ...with address register writeback: -class VST1DWB op7_4, string Dt> - : NLdSt<0, 0b00, 0b0111, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd), IIC_VST1u, - "vst1", Dt, "\\{$Vd\\}, $Rn$Rm", "$Rn.addr = $wb", []> { - let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST1DWB op7_4, string Dt> { + def _fixed : NLdSt<0,0b00, 0b0111,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VecListOneD:$Vd), IIC_VLD1u, + "vst1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd), + IIC_VLD1u, + "vst1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{4} = Rn{4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -class VST1QWB op7_4, string Dt> - : NLdSt<0, 0b00, 0b1010, op7_4, (outs GPR:$wb), - (ins addrmode6:$Rn, am6offset:$Rm, DPR:$Vd, DPR:$src2), - IIC_VST1x2u, "vst1", Dt, "\\{$Vd, $src2\\}, $Rn$Rm", - "$Rn.addr = $wb", []> { - let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; +multiclass VST1QWB op7_4, string Dt> { + def _fixed : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, VecListTwoD:$Vd), IIC_VLD1x2u, + "vst1", Dt, "$Vd, $Rn!", + "$Rn.addr = $wb", []> { + let Rm = 0b1101; // NLdSt will assign to the right encoding bits. + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbFixed"; + } + def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), + (ins addrmode6:$Rn, rGPR:$Rm, VecListTwoD:$Vd), + IIC_VLD1x2u, + "vst1", Dt, "$Vd, $Rn, $Rm", + "$Rn.addr = $wb", []> { + let Inst{5-4} = Rn{5-4}; + let DecoderMethod = "DecodeVSTInstruction"; + let AsmMatchConverter = "cvtVSTwbRegister"; + } } -def VST1d8_UPD : VST1DWB<{0,0,0,?}, "8">; -def VST1d16_UPD : VST1DWB<{0,1,0,?}, "16">; -def VST1d32_UPD : VST1DWB<{1,0,0,?}, "32">; -def VST1d64_UPD : VST1DWB<{1,1,0,?}, "64">; - -def VST1q8_UPD : VST1QWB<{0,0,?,?}, "8">; -def VST1q16_UPD : VST1QWB<{0,1,?,?}, "16">; -def VST1q32_UPD : VST1QWB<{1,0,?,?}, "32">; -def VST1q64_UPD : VST1QWB<{1,1,?,?}, "64">; - -def VST1q8Pseudo_UPD : VSTQWBPseudo; -def VST1q16Pseudo_UPD : VSTQWBPseudo; -def VST1q32Pseudo_UPD : VSTQWBPseudo; -def VST1q64Pseudo_UPD : VSTQWBPseudo; +defm VST1d8wb : VST1DWB<{0,0,0,?}, "8">; +defm VST1d16wb : VST1DWB<{0,1,0,?}, "16">; +defm VST1d32wb : VST1DWB<{1,0,0,?}, "32">; +defm VST1d64wb : VST1DWB<{1,1,0,?}, "64">; + +defm VST1q8wb : VST1QWB<{0,0,?,?}, "8">; +defm VST1q16wb : VST1QWB<{0,1,?,?}, "16">; +defm VST1q32wb : VST1QWB<{1,0,?,?}, "32">; +defm VST1q64wb : VST1QWB<{1,1,?,?}, "64">; + +def VST1q8PseudoWB_fixed : VSTQWBfixedPseudo; +def VST1q16PseudoWB_fixed : VSTQWBfixedPseudo; +def VST1q32PseudoWB_fixed : VSTQWBfixedPseudo; +def VST1q64PseudoWB_fixed : VSTQWBfixedPseudo; +def VST1q8PseudoWB_register : VSTQWBregisterPseudo; +def VST1q16PseudoWB_register : VSTQWBregisterPseudo; +def VST1q32PseudoWB_register : VSTQWBregisterPseudo; +def VST1q64PseudoWB_register : VSTQWBregisterPseudo; // ...with 3 registers class VST1D3 op7_4, string Dt> diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 03fba5a..0732060 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -202,6 +202,10 @@ class ARMAsmParser : public MCTargetAsmParser { const SmallVectorImpl &); bool cvtVLDwbRegister(MCInst &Inst, unsigned Opcode, const SmallVectorImpl &); + bool cvtVSTwbFixed(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl &); + bool cvtVSTwbRegister(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl &); bool validateInstruction(MCInst &Inst, const SmallVectorImpl &Ops); @@ -3429,6 +3433,36 @@ cvtVLDwbRegister(MCInst &Inst, unsigned Opcode, return true; } +bool ARMAsmParser:: +cvtVSTwbFixed(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl &Operands) { + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + // Vn + ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); + // Vt + ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1); + // pred + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + +bool ARMAsmParser:: +cvtVSTwbRegister(MCInst &Inst, unsigned Opcode, + const SmallVectorImpl &Operands) { + // Create a writeback register dummy placeholder. + Inst.addOperand(MCOperand::CreateImm(0)); + // Vn + ((ARMOperand*)Operands[4])->addAlignedMemoryOperands(Inst, 2); + // Vm + ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1); + // Vt + ((ARMOperand*)Operands[3])->addVecListTwoDOperands(Inst, 1); + // pred + ((ARMOperand*)Operands[1])->addCondCodeOperands(Inst, 2); + return true; +} + /// Parse an ARM memory expression, return false if successful else return true /// or an error. The first token must be a '[' when called. bool ARMAsmParser:: diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 8870024..e81cc76 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -2183,14 +2183,22 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, // Writeback Operand switch (Inst.getOpcode()) { - case ARM::VST1d8_UPD: - case ARM::VST1d16_UPD: - case ARM::VST1d32_UPD: - case ARM::VST1d64_UPD: - case ARM::VST1q8_UPD: - case ARM::VST1q16_UPD: - case ARM::VST1q32_UPD: - case ARM::VST1q64_UPD: + case ARM::VST1d8wb_fixed: + case ARM::VST1d16wb_fixed: + case ARM::VST1d32wb_fixed: + case ARM::VST1d64wb_fixed: + case ARM::VST1d8wb_register: + case ARM::VST1d16wb_register: + case ARM::VST1d32wb_register: + case ARM::VST1d64wb_register: + case ARM::VST1q8wb_fixed: + case ARM::VST1q16wb_fixed: + case ARM::VST1q32wb_fixed: + case ARM::VST1q64wb_fixed: + case ARM::VST1q8wb_register: + case ARM::VST1q16wb_register: + case ARM::VST1q32wb_register: + case ARM::VST1q64wb_register: case ARM::VST1d8T_UPD: case ARM::VST1d16T_UPD: case ARM::VST1d32T_UPD: @@ -2249,10 +2257,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, case ARM::VST1q16: case ARM::VST1q32: case ARM::VST1q64: - case ARM::VST1q8_UPD: - case ARM::VST1q16_UPD: - case ARM::VST1q32_UPD: - case ARM::VST1q64_UPD: case ARM::VST1d8T: case ARM::VST1d16T: case ARM::VST1d32T: -- cgit v1.1 From 2ad3f93b5f1d6e22f682efd6fd10f7f3ed9fa992 Mon Sep 17 00:00:00 2001 From: Devang Patel Date: Mon, 31 Oct 2011 23:58:51 +0000 Subject: Add utility to append a function to the list of global constructors. Patch by Kostya Serebryany. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143405 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/CMakeLists.txt | 1 + lib/Transforms/Utils/ModuleUtils.cpp | 55 ++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+) create mode 100644 lib/Transforms/Utils/ModuleUtils.cpp (limited to 'lib') diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 7adc5f1..6d5432d 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -18,6 +18,7 @@ add_llvm_library(LLVMTransformUtils LowerInvoke.cpp LowerSwitch.cpp Mem2Reg.cpp + ModuleUtils.cpp PromoteMemoryToRegister.cpp SSAUpdater.cpp SimplifyCFG.cpp diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp new file mode 100644 index 0000000..db81de7 --- /dev/null +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -0,0 +1,55 @@ +//===-- ModuleUtils.cpp - Functions to manipulate Modules -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of functions perform manipulations on Modules. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/ModuleUtils.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/Support/IRBuilder.h" +using namespace llvm; + +void llvm::appendToGlobalCtors(Module &M, Function *F, int Priority) { + IRBuilder<> IRB(M.getContext()); + FunctionType *FnTy = FunctionType::get(IRB.getVoidTy(), false); + StructType *Ty = StructType::get( + IRB.getInt32Ty(), PointerType::getUnqual(FnTy), NULL); + + Constant *RuntimeCtorInit = ConstantStruct::get( + Ty, IRB.getInt32(Priority), F, NULL); + + // Get the current set of static global constructors and add the new ctor + // to the list. + SmallVector CurrentCtors; + if (GlobalVariable * GVCtor = M.getNamedGlobal("llvm.global_ctors")) { + if (Constant *Init = GVCtor->getInitializer()) { + unsigned n = Init->getNumOperands(); + CurrentCtors.reserve(n + 1); + for (unsigned i = 0; i != n; ++i) + CurrentCtors.push_back(cast(Init->getOperand(i))); + } + GVCtor->eraseFromParent(); + } + + CurrentCtors.push_back(RuntimeCtorInit); + + // Create a new initializer. + ArrayType *AT = ArrayType::get(RuntimeCtorInit->getType(), + CurrentCtors.size()); + Constant *NewInit = ConstantArray::get(AT, CurrentCtors); + + // Create the new global variable and replace all uses of + // the old global variable with the new one. + (void)new GlobalVariable(M, NewInit->getType(), false, + GlobalValue::AppendingLinkage, NewInit, + "llvm.global_ctors"); +} -- cgit v1.1 From a7dd4dfccab3ab5b2a7f187baf6522a93d9acab2 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Mon, 31 Oct 2011 23:59:22 +0000 Subject: Add support for new atomics to cpp backend. Misc other fixes while I'm here. PR11268. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143406 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/CppBackend/CPPBackend.cpp | 99 +++++++++++++++++++++++++++++++++++- 1 file changed, 97 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index ae0e3c4..17ca23a 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -1016,6 +1016,27 @@ std::string CppWriter::getOpName(const Value* V) { return result; } +static StringRef ConvertAtomicOrdering(AtomicOrdering Ordering) { + switch (Ordering) { + case NotAtomic: return "NotAtomic"; + case Unordered: return "Unordered"; + case Monotonic: return "Monotonic"; + case Acquire: return "Acquire"; + case Release: return "Release"; + case AcquireRelease: return "AcquireRelease"; + case SequentiallyConsistent: return "SequentiallyConsistent"; + } + llvm_unreachable("Unknown ordering"); +} + +static StringRef ConvertAtomicSynchScope(SynchronizationScope SynchScope) { + switch (SynchScope) { + case SingleThread: return "SingleThread"; + case CrossThread: return "CrossThread"; + } + llvm_unreachable("Unknown synch scope"); +} + // printInstruction - This member is called for each Instruction in a function. void CppWriter::printInstruction(const Instruction *I, const std::string& bbname) { @@ -1237,15 +1258,33 @@ void CppWriter::printInstruction(const Instruction *I, printEscapedString(load->getName()); Out << "\", " << (load->isVolatile() ? "true" : "false" ) << ", " << bbname << ");"; + if (load->getAlignment()) + nl(Out) << iName << "->setAlignment(" + << load->getAlignment() << ");"; + if (load->isAtomic()) { + StringRef Ordering = ConvertAtomicOrdering(load->getOrdering()); + StringRef CrossThread = ConvertAtomicSynchScope(load->getSynchScope()); + nl(Out) << iName << "->setAtomic(" + << Ordering << ", " << CrossThread << ");"; + } break; } case Instruction::Store: { const StoreInst* store = cast(I); - Out << " new StoreInst(" + Out << "StoreInst* " << iName << " = new StoreInst(" << opNames[0] << ", " << opNames[1] << ", " << (store->isVolatile() ? "true" : "false") << ", " << bbname << ");"; + if (store->getAlignment()) + nl(Out) << iName << "->setAlignment(" + << store->getAlignment() << ");"; + if (store->isAtomic()) { + StringRef Ordering = ConvertAtomicOrdering(store->getOrdering()); + StringRef CrossThread = ConvertAtomicSynchScope(store->getSynchScope()); + nl(Out) << iName << "->setAtomic(" + << Ordering << ", " << CrossThread << ");"; + } break; } case Instruction::GetElementPtr: { @@ -1447,6 +1486,60 @@ void CppWriter::printInstruction(const Instruction *I, Out << "\", " << bbname << ");"; break; } + case Instruction::Fence: { + const FenceInst *fi = cast(I); + StringRef Ordering = ConvertAtomicOrdering(fi->getOrdering()); + StringRef CrossThread = ConvertAtomicSynchScope(fi->getSynchScope()); + Out << "FenceInst* " << iName + << " = new FenceInst(mod->getContext(), " + << Ordering << ", " << CrossThread + << ");"; + break; + } + case Instruction::AtomicCmpXchg: { + const AtomicCmpXchgInst *cxi = cast(I); + StringRef Ordering = ConvertAtomicOrdering(cxi->getOrdering()); + StringRef CrossThread = ConvertAtomicSynchScope(cxi->getSynchScope()); + Out << "AtomicCmpXchgInst* " << iName + << " = new AtomicCmpXchgInst(" + << opNames[0] << ", " << opNames[1] << ", " << opNames[2] << ", " + << Ordering << ", " << CrossThread + << ");"; + nl(Out) << iName << "->setName(\""; + printEscapedString(cxi->getName()); + Out << "\");"; + break; + } + case Instruction::AtomicRMW: { + const AtomicRMWInst *rmwi = cast(I); + StringRef Ordering = ConvertAtomicOrdering(rmwi->getOrdering()); + StringRef CrossThread = ConvertAtomicSynchScope(rmwi->getSynchScope()); + StringRef Operation; + switch (rmwi->getOperation()) { + case AtomicRMWInst::Xchg: Operation = "AtomicRMWInst::Xchg"; break; + case AtomicRMWInst::Add: Operation = "AtomicRMWInst::Add"; break; + case AtomicRMWInst::Sub: Operation = "AtomicRMWInst::Sub"; break; + case AtomicRMWInst::And: Operation = "AtomicRMWInst::And"; break; + case AtomicRMWInst::Nand: Operation = "AtomicRMWInst::Nand"; break; + case AtomicRMWInst::Or: Operation = "AtomicRMWInst::Or"; break; + case AtomicRMWInst::Xor: Operation = "AtomicRMWInst::Xor"; break; + case AtomicRMWInst::Max: Operation = "AtomicRMWInst::Max"; break; + case AtomicRMWInst::Min: Operation = "AtomicRMWInst::Min"; break; + case AtomicRMWInst::UMax: Operation = "AtomicRMWInst::UMax"; break; + case AtomicRMWInst::UMin: Operation = "AtomicRMWInst::UMin"; break; + case AtomicRMWInst::BAD_BINOP: llvm_unreachable("Bad atomic operation"); + } + Out << "AtomicRMWInst* " << iName + << " = new AtomicRMWInst(" + << Operation << ", " + << opNames[0] << ", " << opNames[1] << ", " + << Ordering << ", " << CrossThread + << ");"; + nl(Out) << iName << "->setName(\""; + printEscapedString(rmwi->getName()); + Out << "\");"; + break; + } } DefinedValues.insert(I); nl(Out); @@ -1623,7 +1716,9 @@ void CppWriter::printFunctionBody(const Function *F) { Out << "Value* " << getCppName(AI) << " = args++;"; nl(Out); if (AI->hasName()) { - Out << getCppName(AI) << "->setName(\"" << AI->getName() << "\");"; + Out << getCppName(AI) << "->setName(\""; + printEscapedString(AI->getName()); + Out << "\");"; nl(Out); } } -- cgit v1.1 From 7bdf0060a00f04ad03d3c6f294d8db6f4951dbc2 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 1 Nov 2011 00:02:31 +0000 Subject: Update split candidate correctly when interference cache is full. No test case, spotted by inspection. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143407 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/RegAllocGreedy.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index f54a2c8..71b7f4f 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -1118,6 +1118,8 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, } --NumCands; GlobalCand[Worst] = GlobalCand[NumCands]; + if (BestCand == NumCands) + BestCand = Worst; } if (GlobalCand.size() <= NumCands) -- cgit v1.1 From 681460f954e9c13ffd2f02f27bba048ccf90abaf Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Tue, 1 Nov 2011 01:24:45 +0000 Subject: ARM VLD/VST assembly parsing for symbolic address operands. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143413 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrFormats.td | 20 ++++++++++++++++++++ lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 16 +++++++++++++++- lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 2 +- 3 files changed, 36 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 43f1194..f9969b9 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -290,6 +290,26 @@ class InstThumb + : InstTemplate { + let OutOperandList = (ops); + let InOperandList = iops; + let Pattern = []; + let isCodeGenOnly = 0; // So we get asm matcher for it. + let isPseudo = 1; +} + +class ARMAsmPseudo : AsmPseudoInst, Requires<[IsARM]>; +class tAsmPseudo : AsmPseudoInst, Requires<[IsThumb]>; +class t2AsmPseudo : AsmPseudoInst, Requires<[IsThumb2]>; +class VFP2AsmPseudo : AsmPseudoInst, Requires<[HasVFP2]>; +class NEONAsmPseudo : AsmPseudoInst, Requires<[HasNEON]>; + +// Pseudo instructions for the code generator. class PseudoInst pattern> : InstTemplate { diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 0732060..46ea29f 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -761,6 +761,11 @@ public: return (Val > -256 && Val < 256) || Val == INT32_MIN; } bool isAddrMode5() const { + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (Kind == k_Immediate && !isa(getImm())) + return true; if (!isMemory() || Memory.Alignment != 0) return false; // Check for register offset. if (Memory.OffsetRegNum) return false; @@ -768,7 +773,7 @@ public: if (!Memory.OffsetImm) return true; int64_t Val = Memory.OffsetImm->getValue(); return (Val >= -1020 && Val <= 1020 && ((Val & 3) == 0)) || - Val == INT32_MIN; + Val == INT32_MIN; } bool isMemTBB() const { if (!isMemory() || !Memory.OffsetRegNum || Memory.isNegative || @@ -1375,6 +1380,15 @@ public: void addAddrMode5Operands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); + // If we have an immediate that's not a constant, treat it as a label + // reference needing a fixup. If it is a constant, it's something else + // and we reject it. + if (isImm()) { + Inst.addOperand(MCOperand::CreateExpr(getImm())); + Inst.addOperand(MCOperand::CreateImm(0)); + return; + } + // The lower two bits are always zero and as such are not encoded. int32_t Val = Memory.OffsetImm ? Memory.OffsetImm->getValue() / 4 : 0; ARM_AM::AddrOpc AddSub = Val < 0 ? ARM_AM::sub : ARM_AM::add; diff --git a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp index c31c5e6..1bc585b 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp @@ -63,7 +63,7 @@ public: { "fixup_arm_ldst_pcrel_12", 1, 24, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_t2_ldst_pcrel_12", 0, 32, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, -{ "fixup_arm_pcrel_10", 1, 24, MCFixupKindInfo::FKF_IsPCRel }, +{ "fixup_arm_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, { "fixup_t2_pcrel_10", 0, 32, MCFixupKindInfo::FKF_IsPCRel | MCFixupKindInfo::FKF_IsAlignedDownTo32Bits}, { "fixup_thumb_adr_pcrel_10",0, 8, MCFixupKindInfo::FKF_IsPCRel | -- cgit v1.1 From 049260d9e2f72d650d97167e1ab451384e32b014 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Tue, 1 Nov 2011 04:49:29 +0000 Subject: Make sure we use the right insertion point when instcombine replaces a PHI with another instruction. (Specifically, don't insert an arbitrary instruction before a PHI.) Fixes PR11275. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143437 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstructionCombining.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 288fe68..0cc969b 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -2028,9 +2028,10 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) { BasicBlock *InstParent = I->getParent(); BasicBlock::iterator InsertPos = I; - if (!isa(Result)) // If combining a PHI, don't insert - while (isa(InsertPos)) // middle of a block of PHIs. - ++InsertPos; + // If we replace a PHI with something that isn't a PHI, fix up the + // insertion point. + if (!isa(Result) && isa(InsertPos)) + InsertPos = InstParent->getFirstInsertionPt(); InstParent->getInstList().insert(InsertPos, Result); -- cgit v1.1 From c94792507ddac393a0a9f4cdf1bf258a09a47268 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Tue, 1 Nov 2011 05:11:01 +0000 Subject: Remove a couple unused methods. PR11201. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143452 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Path.cpp | 10 ---------- lib/Support/Unix/Path.inc | 5 ----- lib/Support/Windows/Path.inc | 8 -------- 3 files changed, 23 deletions(-) (limited to 'lib') diff --git a/lib/Support/Path.cpp b/lib/Support/Path.cpp index e5b7cd3..a4d49dc 100644 --- a/lib/Support/Path.cpp +++ b/lib/Support/Path.cpp @@ -38,16 +38,6 @@ bool Path::operator<(const Path& that) const { return path < that.path; } -Path -Path::GetLLVMConfigDir() { - Path result; -#ifdef LLVM_ETCDIR - if (result.set(LLVM_ETCDIR)) - return result; -#endif - return GetLLVMDefaultConfigDir(); -} - LLVMFileType sys::IdentifyFileType(const char *magic, unsigned length) { assert(magic && "Invalid magic number string"); diff --git a/lib/Support/Unix/Path.inc b/lib/Support/Unix/Path.inc index 85c7c40..418dc07 100644 --- a/lib/Support/Unix/Path.inc +++ b/lib/Support/Unix/Path.inc @@ -235,11 +235,6 @@ Path::GetBitcodeLibraryPaths(std::vector& Paths) { } Path -Path::GetLLVMDefaultConfigDir() { - return Path("/etc/llvm/"); -} - -Path Path::GetUserHomeDirectory() { const char* home = getenv("HOME"); Path result; diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc index 8a5edcc..d8dc522 100644 --- a/lib/Support/Windows/Path.inc +++ b/lib/Support/Windows/Path.inc @@ -277,14 +277,6 @@ Path::GetBitcodeLibraryPaths(std::vector& Paths) { } Path -Path::GetLLVMDefaultConfigDir() { - Path ret = GetUserHomeDirectory(); - if (!ret.appendComponent(".llvm")) - assert(0 && "Failed to append .llvm"); - return ret; -} - -Path Path::GetUserHomeDirectory() { char buff[MAX_PATH]; HRESULT res = SHGetFolderPathA(NULL, -- cgit v1.1 From 0e6c1c536bc39f26a01fb1e207f65a351b2b4269 Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Tue, 1 Nov 2011 11:31:53 +0000 Subject: Don't fold negative offsets into cp / dp accesses to avoid relocation errors. This can happen if the address + addend is less than the start of the cp / dp. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143459 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/XCore/XCoreISelDAGToDAG.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp index 4dac1ce..8d746ae 100644 --- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp +++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp @@ -120,7 +120,7 @@ bool XCoreDAGToDAGISel::SelectADDRdpii(SDValue Addr, SDValue &Base, ConstantSDNode *CN = 0; if ((Addr.getOperand(0).getOpcode() == XCoreISD::DPRelativeWrapper) && (CN = dyn_cast(Addr.getOperand(1))) - && (CN->getSExtValue() % 4 == 0)) { + && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) { // Constant word offset from a object in the data region Base = Addr.getOperand(0).getOperand(0); Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32); @@ -141,7 +141,7 @@ bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Addr, SDValue &Base, ConstantSDNode *CN = 0; if ((Addr.getOperand(0).getOpcode() == XCoreISD::CPRelativeWrapper) && (CN = dyn_cast(Addr.getOperand(1))) - && (CN->getSExtValue() % 4 == 0)) { + && (CN->getSExtValue() % 4 == 0 && CN->getSExtValue() >= 0)) { // Constant word offset from a object in the data region Base = Addr.getOperand(0).getOperand(0); Offset = CurDAG->getTargetConstant(CN->getSExtValue(), MVT::i32); -- cgit v1.1 From fadfd7b9776c723357894af00442a35d0a4d0acf Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Tue, 1 Nov 2011 18:10:23 +0000 Subject: Ignore MachO symbol flags in the upper nibble of n_desc. They don't impact the MCJIT rtdyld, so just mask them off for now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143472 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp index 623e9b2..f5a68c8 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -176,7 +176,8 @@ loadSegment32(const MachOObject *Obj, continue; // Flags == 0x8 marks a thumb function for ARM, which is fine as it // doesn't require any special handling here. - if (STE->Flags != 0x0 && STE->Flags != 0x8) + // Flags in the upper nibble we don't care about. + if ((STE->Flags & 0xf) != 0x0 && STE->Flags != 0x8) continue; // Remember the symbol. @@ -313,7 +314,8 @@ loadSegment64(const MachOObject *Obj, // FIXME: Check the symbol type and flags. if (STE->Type != 0xF) // external, defined in this section. continue; - if (STE->Flags != 0x0) + // Flags in the upper nibble we don't care about. + if ((STE->Flags & 0xf) != 0x0) continue; // Remember the symbol. -- cgit v1.1 From f6aa6b12f132b41b1337fef14110696458a0f323 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Tue, 1 Nov 2011 21:18:39 +0000 Subject: Teach the x86 backend a couple tricks for dealing with v16i8 sra by a constant splat value. Fixes PR11289. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143498 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2ddb1b7..c9b6422 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -966,6 +966,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SRA, MVT::v4i32, Custom); setOperationAction(ISD::SRA, MVT::v8i16, Custom); + setOperationAction(ISD::SRA, MVT::v16i8, Custom); } if (Subtarget->hasSSE42() || Subtarget->hasAVX()) @@ -9994,6 +9995,23 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32), R, DAG.getConstant(ShiftAmt, MVT::i32)); + + if (VT == MVT::v16i8 && Op.getOpcode() == ISD::SRA) { + if (ShiftAmt == 7) { + // R s>> 7 === R s< 0 + SDValue Zeros = getZeroVector(VT, true /* HasXMMInt */, DAG, dl); + return DAG.getNode(X86ISD::PCMPGTB, dl, VT, Zeros, R); + } + + // R s>> a === ((R u>> a) ^ m) - m + SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt); + SmallVector V(16, DAG.getConstant(128 >> ShiftAmt, + MVT::i8)); + SDValue Mask = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &V[0], 16); + Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask); + Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask); + return Res; + } } } -- cgit v1.1 From de2e0b5e6d5aa7eccf62df17b86c31b39732bf86 Mon Sep 17 00:00:00 2001 From: Sebastian Pop Date: Tue, 1 Nov 2011 21:31:44 +0000 Subject: rename LLVM_HOSTTRIPLE into LLVM_DEFAULT_TARGET_TRIPLE git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143501 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Unix/Host.inc | 5 +---- lib/Support/Windows/Host.inc | 3 +-- 2 files changed, 2 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Support/Unix/Host.inc b/lib/Support/Unix/Host.inc index dda3ce2..dc16417 100644 --- a/lib/Support/Unix/Host.inc +++ b/lib/Support/Unix/Host.inc @@ -36,10 +36,7 @@ static std::string getOSVersion() { } std::string sys::getHostTriple() { - // FIXME: Derive directly instead of relying on the autoconf generated - // variable. - - StringRef HostTripleString(LLVM_HOSTTRIPLE); + StringRef HostTripleString(LLVM_DEFAULT_TARGET_TRIPLE); std::pair ArchSplit = HostTripleString.split('-'); // Normalize the arch, since the host triple may not actually match the host. diff --git a/lib/Support/Windows/Host.inc b/lib/Support/Windows/Host.inc index 733830e..5bbc74e 100644 --- a/lib/Support/Windows/Host.inc +++ b/lib/Support/Windows/Host.inc @@ -18,6 +18,5 @@ using namespace llvm; std::string sys::getHostTriple() { - // FIXME: Adapt to running version. - return LLVM_HOSTTRIPLE; + return LLVM_DEFAULT_TARGET_TRIPLE; } -- cgit v1.1 From 0173864d8a87d9243d304fbf91b556e20b5a32fc Mon Sep 17 00:00:00 2001 From: Sebastian Pop Date: Tue, 1 Nov 2011 21:32:20 +0000 Subject: rename getHostTriple into getDefaultTargetTriple git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143502 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/ExecutionEngine/TargetSelect.cpp | 2 +- lib/Support/CommandLine.cpp | 2 +- lib/Support/TargetRegistry.cpp | 2 +- lib/Support/Unix/Host.inc | 10 +++++----- lib/Support/Windows/Host.inc | 2 +- lib/Target/CBackend/CBackend.cpp | 4 ++-- 6 files changed, 11 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp index 004b865..45480a6 100644 --- a/lib/ExecutionEngine/TargetSelect.cpp +++ b/lib/ExecutionEngine/TargetSelect.cpp @@ -35,7 +35,7 @@ TargetMachine *EngineBuilder::selectTarget(Module *Mod, std::string *ErrorStr) { Triple TheTriple(Mod->getTargetTriple()); if (TheTriple.getTriple().empty()) - TheTriple.setTriple(sys::getHostTriple()); + TheTriple.setTriple(sys::getDefaultTargetTriple()); // Adjust the triple to match what the user requested. const Target *TheTarget = 0; diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp index 238adcc..4b43ae9 100644 --- a/lib/Support/CommandLine.cpp +++ b/lib/Support/CommandLine.cpp @@ -1369,7 +1369,7 @@ public: #if (ENABLE_TIMESTAMPS == 1) << " Built " << __DATE__ << " (" << __TIME__ << ").\n" #endif - << " Host: " << sys::getHostTriple() << '\n' + << " Default target: " << sys::getDefaultTargetTriple() << '\n' << " Host CPU: " << CPU << '\n'; } void operator=(bool OptionWasSpecified) { diff --git a/lib/Support/TargetRegistry.cpp b/lib/Support/TargetRegistry.cpp index 7497bfe..53c8d84 100644 --- a/lib/Support/TargetRegistry.cpp +++ b/lib/Support/TargetRegistry.cpp @@ -84,7 +84,7 @@ void TargetRegistry::RegisterTarget(Target &T, } const Target *TargetRegistry::getClosestTargetForJIT(std::string &Error) { - const Target *TheTarget = lookupTarget(sys::getHostTriple(), Error); + const Target *TheTarget = lookupTarget(sys::getDefaultTargetTriple(), Error); if (TheTarget && !TheTarget->hasJIT()) { Error = "No JIT compatible target available for this host"; diff --git a/lib/Support/Unix/Host.inc b/lib/Support/Unix/Host.inc index dc16417..726e2fb 100644 --- a/lib/Support/Unix/Host.inc +++ b/lib/Support/Unix/Host.inc @@ -35,11 +35,11 @@ static std::string getOSVersion() { return info.release; } -std::string sys::getHostTriple() { - StringRef HostTripleString(LLVM_DEFAULT_TARGET_TRIPLE); - std::pair ArchSplit = HostTripleString.split('-'); +std::string sys::getDefaultTargetTriple() { + StringRef TargetTripleString(LLVM_DEFAULT_TARGET_TRIPLE); + std::pair ArchSplit = TargetTripleString.split('-'); - // Normalize the arch, since the host triple may not actually match the host. + // Normalize the arch, since the target triple may not actually match the target. std::string Arch = ArchSplit.first; std::string Triple(Arch); @@ -52,7 +52,7 @@ std::string sys::getHostTriple() { Triple[1] = '3'; // On darwin, we want to update the version to match that of the - // host. + // target. std::string::size_type DarwinDashIdx = Triple.find("-darwin"); if (DarwinDashIdx != std::string::npos) { Triple.resize(DarwinDashIdx + strlen("-darwin")); diff --git a/lib/Support/Windows/Host.inc b/lib/Support/Windows/Host.inc index 5bbc74e..2e6d6f1 100644 --- a/lib/Support/Windows/Host.inc +++ b/lib/Support/Windows/Host.inc @@ -17,6 +17,6 @@ using namespace llvm; -std::string sys::getHostTriple() { +std::string sys::getDefaultTargetTriple() { return LLVM_DEFAULT_TARGET_TRIPLE; } diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp index 69d8c46..06e812b 100644 --- a/lib/Target/CBackend/CBackend.cpp +++ b/lib/Target/CBackend/CBackend.cpp @@ -1660,7 +1660,7 @@ bool CWriter::doInitialization(Module &M) { #if 0 std::string Triple = TheModule->getTargetTriple(); if (Triple.empty()) - Triple = llvm::sys::getHostTriple(); + Triple = llvm::sys::getDefaultTargetTriple(); std::string E; if (const Target *Match = TargetRegistry::lookupTarget(Triple, E)) @@ -3167,7 +3167,7 @@ std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) { const MCAsmInfo *TargetAsm; std::string Triple = TheModule->getTargetTriple(); if (Triple.empty()) - Triple = llvm::sys::getHostTriple(); + Triple = llvm::sys::getDefaultTargetTriple(); std::string E; if (const Target *Match = TargetRegistry::lookupTarget(Triple, E)) -- cgit v1.1 From 60cb643f7561e5be7a3b5fe705535e96de72cbf5 Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Tue, 1 Nov 2011 22:18:13 +0000 Subject: Fix disassembly of some VST1 instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143507 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index e81cc76..6927d2d 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -2240,13 +2240,27 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, return MCDisassembler::Fail; // AddrMode6 Offset (register) - if (Rm == 0xD) - Inst.addOperand(MCOperand::CreateReg(0)); - else if (Rm != 0xF) { - if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) - return MCDisassembler::Fail; + switch (Inst.getOpcode()) { + default: + if (Rm == 0xD) + Inst.addOperand(MCOperand::CreateReg(0)); + else if (Rm != 0xF) { + if (!Check(S, DecodeGPRRegisterClass(Inst, Rm, Address, Decoder))) + return MCDisassembler::Fail; + } + break; + case ARM::VST1d8wb_fixed: + case ARM::VST1d16wb_fixed: + case ARM::VST1d32wb_fixed: + case ARM::VST1d64wb_fixed: + case ARM::VST1q8wb_fixed: + case ARM::VST1q16wb_fixed: + case ARM::VST1q32wb_fixed: + case ARM::VST1q64wb_fixed: + break; } + // First input register if (!Check(S, DecodeDPRRegisterClass(Inst, Rd, Address, Decoder))) return MCDisassembler::Fail; -- cgit v1.1 From 613b7576896fbd03fe495f4ee27b404f81386774 Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Tue, 1 Nov 2011 22:27:22 +0000 Subject: First part of support for generating dwarf for assembly source files with the -g flag. In this part we generate the .file for the source being assembled and the .loc's for the assembled instructions. The next part will be to generate the dwarf Compile Unit DIE and a dwarf subprogram DIE for each non-temporary label. Once the next part is done test cases will be added. rdar://9275556 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143509 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCContext.cpp | 2 ++ lib/MC/MCParser/AsmParser.cpp | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+) (limited to 'lib') diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index 9e28b8f..814726e 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -43,6 +43,8 @@ MCContext::MCContext(const MCAsmInfo &mai, const MCRegisterInfo &mri, SecureLogUsed = false; DwarfLocSeen = false; + GenDwarfForAssembly = false; + GenDwarfFileNumber = 0; } MCContext::~MCContext() { diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index d7ee1c4..990fd17 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -464,6 +464,14 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { HadError = false; AsmCond StartingCondState = TheCondState; + // If we are generating dwarf for assembly source files save the initial text + // section and generate a .file directive. + if (getContext().getGenDwarfForAssembly()) { + getContext().setGenDwarfSection(getStreamer().getCurrentSection()); + getStreamer().EmitDwarfFileDirective(getContext().nextGenDwarfFileNumber(), + StringRef(), SrcMgr.getMemoryBuffer(CurBuffer)->getBufferIdentifier()); + } + // While we have input, parse each statement. while (Lexer.isNot(AsmToken::Eof)) { if (!ParseStatement()) continue; @@ -1211,6 +1219,18 @@ bool AsmParser::ParseStatement() { PrintMessage(IDLoc, SourceMgr::DK_Note, OS.str()); } + // If we are generating dwarf for assembly source files and the current + // section is the initial text section then generate a .loc directive for + // the instruction. + if (!HadError && getContext().getGenDwarfForAssembly() && + getContext().getGenDwarfSection() == getStreamer().getCurrentSection() ) { + getStreamer().EmitDwarfLocDirective(getContext().getGenDwarfFileNumber(), + SrcMgr.FindLineNumber(IDLoc, CurBuffer), + 0, DWARF2_LINE_DEFAULT_IS_STMT ? + DWARF2_FLAG_IS_STMT : 0, 0, 0, + StringRef()); + } + // If parsing succeeded, match the instruction. if (!HadError) HadError = getTargetParser().MatchAndEmitInstruction(IDLoc, ParsedOperands, @@ -2342,6 +2362,10 @@ bool GenericAsmParser::ParseDirectiveFile(StringRef, SMLoc DirectiveLoc) { if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.file' directive"); + if (getContext().getGenDwarfForAssembly() == true) + Error(DirectiveLoc, "input can't have .file dwarf directives when -g is " + "used to generate dwarf debug info for assembly code"); + if (FileNumber == -1) getStreamer().EmitFileDirective(Filename); else { -- cgit v1.1 From ed6a0c5243f4dc13169edc8e342c679f1bfc201c Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Tue, 1 Nov 2011 22:37:37 +0000 Subject: ARM label operands can have an optional '#' before them. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143510 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 46ea29f..757eccb 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -3835,13 +3835,11 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl &Operands, if (getParser().ParseExpression(ImmVal)) return true; const MCConstantExpr *CE = dyn_cast(ImmVal); - if (!CE) { - Error(S, "constant expression expected"); - return MatchOperand_ParseFail; + if (CE) { + int32_t Val = CE->getValue(); + if (isNegative && Val == 0) + ImmVal = MCConstantExpr::Create(INT32_MIN, getContext()); } - int32_t Val = CE->getValue(); - if (isNegative && Val == 0) - ImmVal = MCConstantExpr::Create(INT32_MIN, getContext()); E = SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); Operands.push_back(ARMOperand::CreateImm(ImmVal, S, E)); return false; -- cgit v1.1 From 6284afc293c8f6e84dffab8731aa9e679d437745 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Tue, 1 Nov 2011 22:38:31 +0000 Subject: ARM label operands can be quoted. For example, labels from Objective-C sources. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143511 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 757eccb..0abfabe 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -3810,6 +3810,7 @@ bool ARMAsmParser::parseOperand(SmallVectorImpl &Operands, } case AsmToken::LParen: // parenthesized expressions like (_strcmp-4) case AsmToken::Integer: // things like 1f and 2b as a branch targets + case AsmToken::String: // quoted label names. case AsmToken::Dot: { // . as a branch target // This was not a register so parse other operands that start with an // identifier (like labels) as expressions and create them as immediates. -- cgit v1.1 From 064e48a3dce1fd29a35b4b1b01a8c4b67e29c74a Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Tue, 1 Nov 2011 23:39:05 +0000 Subject: Fixed a bug in the code to create a dwarf file and directory table entires when it is separating the directory part from the basename of the FileName. Noticed that this: .file 1 "dir/foo" when assembled got the two parts switched. Using the Mac OS X dwarfdump tool it can be seen easily: % dwarfdump -a a.out include_directories[ 1] = 'foo' Dir Mod Time File Len File Name ---- ---------- ---------- --------------------------- file_names[ 1] 1 0x00000000 0x00000000 dir ... Which should be: ... include_directories[ 1] = 'dir' Dir Mod Time File Len File Name ---- ---------- ---------- --------------------------- file_names[ 1] 1 0x00000000 0x00000000 foo git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143521 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCContext.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp index 814726e..a1a01e3 100644 --- a/lib/MC/MCContext.cpp +++ b/lib/MC/MCContext.cpp @@ -273,8 +273,10 @@ unsigned MCContext::GetDwarfFile(StringRef Directory, StringRef FileName, // Separate the directory part from the basename of the FileName. std::pair Slash = FileName.rsplit('/'); Directory = Slash.second; - if (!Directory.empty()) - FileName = Slash.first; + if (!Directory.empty()) { + Directory = Slash.first; + FileName = Slash.second; + } } // Find or make a entry in the MCDwarfDirs vector for this Directory. -- cgit v1.1 From 3129da8d1a15a263a9af10b618478344db9eeddf Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Wed, 2 Nov 2011 00:02:45 +0000 Subject: Broaden an assert to handle enable-iv-rewrite=true following r143183. Narrowest possible fix for PR11279. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143522 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/IndVarSimplify.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index dce7f87..b2ccb22 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1588,7 +1588,7 @@ LinearFunctionTestReplace(Loop *L, assert(SE->isLoopInvariant(IVLimit, L) && "Computed iteration count is not loop invariant!"); - assert( !IVLimit->getType()->isPointerTy() && + assert( EnableIVRewrite || !IVLimit->getType()->isPointerTy() && "Should not expand pointer types" ); Value *ExitCnt = Rewriter.expandCodeFor(IVLimit, CmpTy, BI); -- cgit v1.1 From 0d7b231c9b0acf2ea6bb99f75672751f64c6c6db Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 2 Nov 2011 00:18:48 +0000 Subject: Factor out a SelectTrunc function. No functionality change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143523 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 45 ++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 6aff834..4f883b7 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -167,7 +167,8 @@ class ARMFastISel : public FastISel { bool SelectCall(const Instruction *I); bool SelectSelect(const Instruction *I); bool SelectRet(const Instruction *I); - bool SelectIntCast(const Instruction *I); + bool SelectTrunc(const Instruction *I); + bool SelectIntExt(const Instruction *I); // Utility routines. private: @@ -1963,7 +1964,30 @@ bool ARMFastISel::SelectCall(const Instruction *I) { } -bool ARMFastISel::SelectIntCast(const Instruction *I) { +bool ARMFastISel::SelectTrunc(const Instruction *I) { + // The high bits for a type smaller than the register size are assumed to be + // undefined. + Value *Op = I->getOperand(0); + + EVT SrcVT, DestVT; + SrcVT = TLI.getValueType(Op->getType(), true); + DestVT = TLI.getValueType(I->getType(), true); + + if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8) + return false; + if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1) + return false; + + unsigned SrcReg = getRegForValue(Op); + if (!SrcReg) return false; + + // Because the high bits are undefined, a truncate doesn't generate + // any code. + UpdateValueMap(I, SrcReg); + return true; +} + +bool ARMFastISel::SelectIntExt(const Instruction *I) { // On ARM, in general, integer casts don't involve legal types; this code // handles promotable integers. The high bits for a type smaller than // the register size are assumed to be undefined. @@ -1975,20 +1999,6 @@ bool ARMFastISel::SelectIntCast(const Instruction *I) { SrcVT = TLI.getValueType(SrcTy, true); DestVT = TLI.getValueType(DestTy, true); - if (isa(I)) { - if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8) - return false; - if (DestVT != MVT::i16 && DestVT != MVT::i8 && DestVT != MVT::i1) - return false; - - unsigned SrcReg = getRegForValue(Op); - if (!SrcReg) return false; - - // Because the high bits are undefined, a truncate doesn't generate - // any code. - UpdateValueMap(I, SrcReg); - return true; - } if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8) return false; @@ -2078,9 +2088,10 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { case Instruction::Ret: return SelectRet(I); case Instruction::Trunc: + return SelectTrunc(I); case Instruction::ZExt: case Instruction::SExt: - return SelectIntCast(I); + return SelectIntExt(I); default: break; } return false; -- cgit v1.1 From 9af37a3de8c8688adae383471379f0216287ce28 Mon Sep 17 00:00:00 2001 From: Tanya Lattner Date: Wed, 2 Nov 2011 00:24:56 +0000 Subject: Add support to the linker to lazily link in functions. This change only links functions marked with specific linkage (internal, private, linker_private, linker_private_weak, linker_private_weak_def_auto, linkonce, linkonce_odr, and available_externally) if they have uses in the destination module. Instead of automatically linking, these functions are placed onto a worklist to be processed in the final stage of linking. We iterate over the list and if any functions on the list have uses in the destination module, we link them in and repeat the process until no changes in the state (uses) has changed. This means that any functions in the LazilyLink worklist that have a use in the destination module will be linked in and none that don't. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143524 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Linker/LinkModules.cpp | 58 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) (limited to 'lib') diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index bcc6782..ab099bb 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -341,6 +341,9 @@ namespace { // Set of items not to link in from source. SmallPtrSet DoNotLinkFromSource; + // Vector of functions to lazily link in. + std::vector LazilyLinkFunctions; + public: std::string ErrorMsg; @@ -708,6 +711,13 @@ bool ModuleLinker::linkFunctionProto(Function *SF) { // Any uses of DF need to change to NewDF, with cast. DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF, DGV->getType())); DGV->eraseFromParent(); + } else { + // Internal, LO_ODR, or LO linkage - stick in set to ignore and lazily link. + if (SF->hasLocalLinkage() || SF->hasLinkOnceLinkage() || + SF->hasAvailableExternallyLinkage()) { + DoNotLinkFromSource.insert(SF); + LazilyLinkFunctions.push_back(SF); + } } ValueMap[SF] = NewDF; @@ -974,6 +984,54 @@ bool ModuleLinker::run() { // are properly remapped. linkNamedMDNodes(); + // Process vector of lazily linked in functions. + bool LinkedInAnyFunctions; + do { + LinkedInAnyFunctions = false; + + for(std::vector::iterator I = LazilyLinkFunctions.begin(), + E = LazilyLinkFunctions.end(); I != E; ++I) { + if (!*I) + continue; + + Function *SF = *I; + Function *DF = cast(ValueMap[SF]); + + if (!DF->use_empty()) { + + // Materialize if necessary. + if (SF->isDeclaration()) { + if (!SF->isMaterializable()) + continue; + if (SF->Materialize(&ErrorMsg)) + return true; + } + + // Link in function body. + linkFunctionBody(DF, SF); + + // "Remove" from vector by setting the element to 0. + *I = 0; + + // Set flag to indicate we may have more functions to lazily link in + // since we linked in a function. + LinkedInAnyFunctions = true; + } + } + } while (LinkedInAnyFunctions); + + // Remove any prototypes of functions that were not actually linked in. + for(std::vector::iterator I = LazilyLinkFunctions.begin(), + E = LazilyLinkFunctions.end(); I != E; ++I) { + if (!*I) + continue; + + Function *SF = *I; + Function *DF = cast(ValueMap[SF]); + if (DF->use_empty()) + DF->eraseFromParent(); + } + // Now that all of the types from the source are used, resolve any structs // copied over to the dest that didn't exist there. TypeMap.linkDefinedTypeBodies(); -- cgit v1.1 From 3f2b2c218faf9c08975f24629e01c28cf88f5feb Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 2 Nov 2011 04:42:13 +0000 Subject: Add a bunch more X86 AVX2 instructions and their corresponding intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143529 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrFragmentsSIMD.td | 5 +- lib/Target/X86/X86InstrSSE.td | 253 +++++++++++++++++++++++++++++++- 2 files changed, 256 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td index af919fb..6fd2efd 100644 --- a/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -276,11 +276,12 @@ def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop node:$ptr))>; def memopv16i8 : PatFrag<(ops node:$ptr), (v16i8 (memop node:$ptr))>; // 256-bit memop pattern fragments -def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>; def memopv8f32 : PatFrag<(ops node:$ptr), (v8f32 (memop node:$ptr))>; def memopv4f64 : PatFrag<(ops node:$ptr), (v4f64 (memop node:$ptr))>; def memopv4i64 : PatFrag<(ops node:$ptr), (v4i64 (memop node:$ptr))>; def memopv8i32 : PatFrag<(ops node:$ptr), (v8i32 (memop node:$ptr))>; +def memopv16i16 : PatFrag<(ops node:$ptr), (v16i16 (memop node:$ptr))>; +def memopv32i8 : PatFrag<(ops node:$ptr), (v32i8 (memop node:$ptr))>; // SSSE3 uses MMX registers for some instructions. They aren't aligned on a // 16-byte boundary. @@ -326,6 +327,8 @@ def bc_v4i32 : PatFrag<(ops node:$in), (v4i32 (bitconvert node:$in))>; def bc_v2i64 : PatFrag<(ops node:$in), (v2i64 (bitconvert node:$in))>; // 256-bit bitconvert pattern fragments +def bc_v32i8 : PatFrag<(ops node:$in), (v32i8 (bitconvert node:$in))>; +def bc_v16i16 : PatFrag<(ops node:$in), (v16i16 (bitconvert node:$in))>; def bc_v8i32 : PatFrag<(ops node:$in), (v8i32 (bitconvert node:$in))>; def bc_v4i64 : PatFrag<(ops node:$in), (v4i64 (bitconvert node:$in))>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b5eea45..f30a0c4 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -4008,6 +4008,23 @@ def mi : Ii8<0x70, MRMSrcMem, (bc_frag (memopv2i64 addr:$src1)), (undef))))]>; } + +multiclass sse2_pshuffle_y { +def Yri : Ii8<0x70, MRMSrcReg, + (outs VR256:$dst), (ins VR256:$src1, i8imm:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, (vt (pshuf_frag:$src2 VR256:$src1, + (undef))))]>; +def Ymi : Ii8<0x70, MRMSrcMem, + (outs VR256:$dst), (ins i256mem:$src1, i8imm:$src2), + !strconcat(OpcodeStr, + "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, (vt (pshuf_frag:$src2 + (bc_frag (memopv4i64 addr:$src1)), + (undef))))]>; +} } // ExeDomain = SSEPackedInt let Predicates = [HasAVX] in { @@ -4052,6 +4069,20 @@ let Predicates = [HasAVX] in { (VPSHUFLWmi addr:$src, imm:$imm)>; } +let Predicates = [HasAVX2] in { + let AddedComplexity = 5 in + defm VPSHUFD : sse2_pshuffle_y<"vpshufd", v8i32, pshufd, bc_v8i32>, TB, + OpSize, VEX; + + // SSE2 with ImmT == Imm8 and XS prefix. + defm VPSHUFHW : sse2_pshuffle_y<"vpshufhw", v16i16, pshufhw, bc_v16i16>, XS, + VEX; + + // SSE2 with ImmT == Imm8 and XD prefix. + defm VPSHUFLW : sse2_pshuffle_y<"vpshuflw", v16i16, pshuflw, bc_v16i16>, XD, + VEX; +} + let Predicates = [HasSSE2] in { let AddedComplexity = 5 in defm PSHUFD : sse2_pshuffle<"pshufd", v4i32, pshufd, bc_v4i32>, TB, OpSize; @@ -4114,6 +4145,19 @@ multiclass sse2_unpack opc, string OpcodeStr, ValueType vt, addr:$src2))))]>; } +multiclass sse2_unpack_y opc, string OpcodeStr, ValueType vt, + SDNode OpNode, PatFrag bc_frag> { + def Yrr : PDI; + def Yrm : PDI; +} + let Predicates = [HasAVX] in { defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Punpcklbw, bc_v16i8, 0>, VEX_4V; @@ -4156,6 +4200,48 @@ let Predicates = [HasAVX] in { (memopv2i64 addr:$src2))))]>, VEX_4V; } +let Predicates = [HasAVX2] in { + defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Punpcklbw, + bc_v32i8>, VEX_4V; + defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Punpcklwd, + bc_v16i16>, VEX_4V; + defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Punpckldq, + bc_v8i32>, VEX_4V; + + /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen + /// knew to collapse (bitconvert VT to VT) into its operand. + def VPUNPCKLQDQYrr : PDI<0x6C, MRMSrcReg, + (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), + "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR256:$dst, (v4i64 (X86Punpcklqdq VR256:$src1, + VR256:$src2)))]>, VEX_4V; + def VPUNPCKLQDQYrm : PDI<0x6C, MRMSrcMem, + (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), + "vpunpcklqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR256:$dst, (v4i64 (X86Punpcklqdq VR256:$src1, + (memopv4i64 addr:$src2))))]>, VEX_4V; + + defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Punpckhbw, + bc_v32i8>, VEX_4V; + defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Punpckhwd, + bc_v16i16>, VEX_4V; + defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Punpckhdq, + bc_v8i32>, VEX_4V; + + /// FIXME: we could eliminate this and use sse2_unpack_y instead if tblgen + /// knew to collapse (bitconvert VT to VT) into its operand. + def VPUNPCKHQDQYrr : PDI<0x6D, MRMSrcReg, + (outs VR256:$dst), (ins VR256:$src1, VR256:$src2), + "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR256:$dst, (v4i64 (X86Punpckhqdq VR256:$src1, + VR256:$src2)))]>, VEX_4V; + def VPUNPCKHQDQYrm : PDI<0x6D, MRMSrcMem, + (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2), + "vpunpckhqdq\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR256:$dst, (v4i64 (X86Punpckhqdq VR256:$src1, + (memopv4i64 addr:$src2))))]>, VEX_4V; +} + let Constraints = "$src1 = $dst" in { defm PUNPCKLBW : sse2_unpack<0x60, "punpcklbw", v16i8, X86Punpcklbw, bc_v16i8>; defm PUNPCKLWD : sse2_unpack<0x61, "punpcklwd", v8i16, X86Punpcklwd, bc_v8i16>; @@ -4266,6 +4352,15 @@ def VPMOVMSKBrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>, VEX; def VPMOVMSKBr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX; + +let Predicates = [HasAVX2] in { +def VPMOVMSKBYrr : VPDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR256:$src), + "pmovmskb\t{$src, $dst|$dst, $src}", + [(set GR32:$dst, (int_x86_avx2_pmovmskb VR256:$src))]>, VEX; +def VPMOVMSKBYr64r : VPDI<0xD7, MRMSrcReg, (outs GR64:$dst), (ins VR256:$src), + "pmovmskb\t{$src, $dst|$dst, $src}", []>, VEX; +} + def PMOVMSKBrr : PDI<0xD7, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src), "pmovmskb\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (int_x86_sse2_pmovmskb_128 VR128:$src))]>; @@ -5016,6 +5111,23 @@ multiclass SS3I_unop_rm_int opc, string OpcodeStr, (bitconvert (mem_frag128 addr:$src))))]>, OpSize; } +/// SS3I_unop_rm_int_y - Simple SSSE3 unary op whose type can be v*{i8,i16,i32}. +multiclass SS3I_unop_rm_int_y opc, string OpcodeStr, + PatFrag mem_frag256, Intrinsic IntId256> { + def rr256 : SS38I, + OpSize; + + def rm256 : SS38I, OpSize; +} + let Predicates = [HasAVX] in { defm VPABSB : SS3I_unop_rm_int<0x1C, "vpabsb", memopv16i8, int_x86_ssse3_pabs_b_128>, VEX; @@ -5025,6 +5137,15 @@ let Predicates = [HasAVX] in { int_x86_ssse3_pabs_d_128>, VEX; } +let Predicates = [HasAVX2] in { + defm VPABSB : SS3I_unop_rm_int_y<0x1C, "vpabsb", memopv32i8, + int_x86_avx2_pabs_b>, VEX; + defm VPABSW : SS3I_unop_rm_int_y<0x1D, "vpabsw", memopv16i16, + int_x86_avx2_pabs_w>, VEX; + defm VPABSD : SS3I_unop_rm_int_y<0x1E, "vpabsd", memopv8i32, + int_x86_avx2_pabs_d>, VEX; +} + defm PABSB : SS3I_unop_rm_int<0x1C, "pabsb", memopv16i8, int_x86_ssse3_pabs_b_128>; defm PABSW : SS3I_unop_rm_int<0x1D, "pabsw", memopv8i16, @@ -5055,7 +5176,23 @@ multiclass SS3I_binop_rm_int opc, string OpcodeStr, !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}")), [(set VR128:$dst, (IntId128 VR128:$src1, - (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; + (bitconvert (mem_frag128 addr:$src2))))]>, OpSize; +} + +multiclass SS3I_binop_rm_int_y opc, string OpcodeStr, + PatFrag mem_frag256, Intrinsic IntId256> { + let isCommutable = 1 in + def rr256 : SS38I, + OpSize; + def rm256 : SS38I, OpSize; } let ImmT = NoImm, Predicates = [HasAVX] in { @@ -5087,6 +5224,35 @@ defm VPMULHRSW : SS3I_binop_rm_int<0x0B, "vpmulhrsw", memopv8i16, int_x86_ssse3_pmul_hr_sw_128, 0>, VEX_4V; } +let ImmT = NoImm, Predicates = [HasAVX2] in { +let isCommutable = 0 in { + defm VPHADDW : SS3I_binop_rm_int_y<0x01, "vphaddw", memopv16i16, + int_x86_avx2_phadd_w>, VEX_4V; + defm VPHADDD : SS3I_binop_rm_int_y<0x02, "vphaddd", memopv8i32, + int_x86_avx2_phadd_d>, VEX_4V; + defm VPHADDSW : SS3I_binop_rm_int_y<0x03, "vphaddsw", memopv16i16, + int_x86_avx2_phadd_sw>, VEX_4V; + defm VPHSUBW : SS3I_binop_rm_int_y<0x05, "vphsubw", memopv16i16, + int_x86_avx2_phsub_w>, VEX_4V; + defm VPHSUBD : SS3I_binop_rm_int_y<0x06, "vphsubd", memopv8i32, + int_x86_avx2_phsub_d>, VEX_4V; + defm VPHSUBSW : SS3I_binop_rm_int_y<0x07, "vphsubsw", memopv16i16, + int_x86_avx2_phsub_sw>, VEX_4V; + defm VPMADDUBSW : SS3I_binop_rm_int_y<0x04, "vpmaddubsw", memopv32i8, + int_x86_avx2_pmadd_ub_sw>, VEX_4V; + defm VPSHUFB : SS3I_binop_rm_int_y<0x00, "vpshufb", memopv32i8, + int_x86_avx2_pshuf_b>, VEX_4V; + defm VPSIGNB : SS3I_binop_rm_int_y<0x08, "vpsignb", memopv16i8, + int_x86_avx2_psign_b>, VEX_4V; + defm VPSIGNW : SS3I_binop_rm_int_y<0x09, "vpsignw", memopv8i16, + int_x86_avx2_psign_w>, VEX_4V; + defm VPSIGND : SS3I_binop_rm_int_y<0x0A, "vpsignd", memopv4i32, + int_x86_avx2_psign_d>, VEX_4V; +} +defm VPMULHRSW : SS3I_binop_rm_int_y<0x0B, "vpmulhrsw", memopv16i16, + int_x86_avx2_pmul_hr_sw>, VEX_4V; +} + // None of these have i8 immediate fields. let ImmT = NoImm, Constraints = "$src1 = $dst" in { let isCommutable = 0 in { @@ -5166,8 +5332,23 @@ multiclass ssse3_palign { []>, OpSize; } +multiclass ssse3_palign_y { + def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, i8imm:$src3), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, OpSize; + def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, i256mem:$src2, i8imm:$src3), + !strconcat(asm, + "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), + []>, OpSize; +} + let Predicates = [HasAVX] in defm VPALIGN : ssse3_palign<"vpalignr", 0>, VEX_4V; +let Predicates = [HasAVX2] in + defm VPALIGN : ssse3_palign_y<"vpalignr", 0>, VEX_4V; let Constraints = "$src1 = $dst", Predicates = [HasSSSE3] in defm PALIGN : ssse3_palign<"palignr">; @@ -5235,6 +5416,17 @@ multiclass SS41I_binop_rm_int8 opc, string OpcodeStr, Intrinsic IntId> { OpSize; } +multiclass SS41I_binop_rm_int16_y opc, string OpcodeStr, + Intrinsic IntId> { + def Yrr : SS48I, OpSize; + + def Yrm : SS48I, OpSize; +} + let Predicates = [HasAVX] in { defm VPMOVSXBW : SS41I_binop_rm_int8<0x20, "vpmovsxbw", int_x86_sse41_pmovsxbw>, VEX; @@ -5250,6 +5442,21 @@ defm VPMOVZXDQ : SS41I_binop_rm_int8<0x35, "vpmovzxdq", int_x86_sse41_pmovzxdq>, VEX; } +let Predicates = [HasAVX2] in { +defm VPMOVSXBW : SS41I_binop_rm_int16_y<0x20, "vpmovsxbw", + int_x86_avx2_pmovsxbw>, VEX; +defm VPMOVSXWD : SS41I_binop_rm_int16_y<0x23, "vpmovsxwd", + int_x86_avx2_pmovsxwd>, VEX; +defm VPMOVSXDQ : SS41I_binop_rm_int16_y<0x25, "vpmovsxdq", + int_x86_avx2_pmovsxdq>, VEX; +defm VPMOVZXBW : SS41I_binop_rm_int16_y<0x30, "vpmovzxbw", + int_x86_avx2_pmovzxbw>, VEX; +defm VPMOVZXWD : SS41I_binop_rm_int16_y<0x33, "vpmovzxwd", + int_x86_avx2_pmovzxwd>, VEX; +defm VPMOVZXDQ : SS41I_binop_rm_int16_y<0x35, "vpmovzxdq", + int_x86_avx2_pmovzxdq>, VEX; +} + defm PMOVSXBW : SS41I_binop_rm_int8<0x20, "pmovsxbw", int_x86_sse41_pmovsxbw>; defm PMOVSXWD : SS41I_binop_rm_int8<0x23, "pmovsxwd", int_x86_sse41_pmovsxwd>; defm PMOVSXDQ : SS41I_binop_rm_int8<0x25, "pmovsxdq", int_x86_sse41_pmovsxdq>; @@ -5336,6 +5543,19 @@ multiclass SS41I_binop_rm_int4 opc, string OpcodeStr, Intrinsic IntId> { OpSize; } +multiclass SS41I_binop_rm_int8_y opc, string OpcodeStr, + Intrinsic IntId> { + def Yrr : SS48I, OpSize; + + def Yrm : SS48I, + OpSize; +} + let Predicates = [HasAVX] in { defm VPMOVSXBD : SS41I_binop_rm_int4<0x21, "vpmovsxbd", int_x86_sse41_pmovsxbd>, VEX; @@ -5347,6 +5567,17 @@ defm VPMOVZXWQ : SS41I_binop_rm_int4<0x34, "vpmovzxwq", int_x86_sse41_pmovzxwq>, VEX; } +let Predicates = [HasAVX2] in { +defm VPMOVSXBD : SS41I_binop_rm_int8_y<0x21, "vpmovsxbd", + int_x86_avx2_pmovsxbd>, VEX; +defm VPMOVSXWQ : SS41I_binop_rm_int8_y<0x24, "vpmovsxwq", + int_x86_avx2_pmovsxwq>, VEX; +defm VPMOVZXBD : SS41I_binop_rm_int8_y<0x31, "vpmovzxbd", + int_x86_avx2_pmovzxbd>, VEX; +defm VPMOVZXWQ : SS41I_binop_rm_int8_y<0x34, "vpmovzxwq", + int_x86_avx2_pmovzxwq>, VEX; +} + defm PMOVSXBD : SS41I_binop_rm_int4<0x21, "pmovsxbd", int_x86_sse41_pmovsxbd>; defm PMOVSXWQ : SS41I_binop_rm_int4<0x24, "pmovsxwq", int_x86_sse41_pmovsxwq>; defm PMOVZXBD : SS41I_binop_rm_int4<0x31, "pmovzxbd", int_x86_sse41_pmovzxbd>; @@ -5391,12 +5622,32 @@ multiclass SS41I_binop_rm_int2 opc, string OpcodeStr, Intrinsic IntId> { OpSize; } +multiclass SS41I_binop_rm_int4_y opc, string OpcodeStr, + Intrinsic IntId> { + def Yrr : SS48I, OpSize; + + // Expecting a i16 load any extended to i32 value. + def Yrm : SS48I, + OpSize; +} + let Predicates = [HasAVX] in { defm VPMOVSXBQ : SS41I_binop_rm_int2<0x22, "vpmovsxbq", int_x86_sse41_pmovsxbq>, VEX; defm VPMOVZXBQ : SS41I_binop_rm_int2<0x32, "vpmovzxbq", int_x86_sse41_pmovzxbq>, VEX; } +let Predicates = [HasAVX2] in { +defm VPMOVSXBQ : SS41I_binop_rm_int4_y<0x22, "vpmovsxbq", + int_x86_avx2_pmovsxbq>, VEX; +defm VPMOVZXBQ : SS41I_binop_rm_int4_y<0x32, "vpmovzxbq", + int_x86_avx2_pmovzxbq>, VEX; +} defm PMOVSXBQ : SS41I_binop_rm_int2<0x22, "pmovsxbq", int_x86_sse41_pmovsxbq>; defm PMOVZXBQ : SS41I_binop_rm_int2<0x32, "pmovzxbq", int_x86_sse41_pmovzxbq>; -- cgit v1.1 From 94dffd2bb013ef7d2052ef56eac75cdf5d89d43b Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Wed, 2 Nov 2011 05:43:44 +0000 Subject: Add parentheses to disambiguate the precedence of these operations and silence -Wparentheses. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143534 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/IndVarSimplify.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index b2ccb22..0ba327a 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1588,8 +1588,8 @@ LinearFunctionTestReplace(Loop *L, assert(SE->isLoopInvariant(IVLimit, L) && "Computed iteration count is not loop invariant!"); - assert( EnableIVRewrite || !IVLimit->getType()->isPointerTy() && - "Should not expand pointer types" ); + assert((EnableIVRewrite || !IVLimit->getType()->isPointerTy()) && + "Should not expand pointer types" ); Value *ExitCnt = Rewriter.expandCodeFor(IVLimit, CmpTy, BI); // Create a gep for IVInit + IVLimit from on an existing pointer base. -- cgit v1.1 From 205e3378fda530628ac63c26a9b7b0920a9b49bd Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 2 Nov 2011 06:54:17 +0000 Subject: More AVX2 instructions and intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143536 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 111 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 108 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index f30a0c4..77a9031 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -6248,6 +6248,22 @@ multiclass SS41I_binop_rm_int opc, string OpcodeStr, (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } +/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator +multiclass SS41I_binop_rm_int_y opc, string OpcodeStr, + Intrinsic IntId256> { + let isCommutable = 1 in + def Yrr : SS48I, OpSize; + def Yrm : SS48I, OpSize; +} + let Predicates = [HasAVX] in { let isCommutable = 0 in defm VPACKUSDW : SS41I_binop_rm_int<0x2B, "vpackusdw", int_x86_sse41_packusdw, @@ -6279,6 +6295,32 @@ let Predicates = [HasAVX] in { (VPCMPEQQrm VR128:$src1, addr:$src2)>; } +let Predicates = [HasAVX2] in { + let isCommutable = 0 in + defm VPACKUSDW : SS41I_binop_rm_int_y<0x2B, "vpackusdw", + int_x86_avx2_packusdw>, VEX_4V; + defm VPCMPEQQ : SS41I_binop_rm_int_y<0x29, "vpcmpeqq", + int_x86_avx2_pcmpeq_q>, VEX_4V; + defm VPMINSB : SS41I_binop_rm_int_y<0x38, "vpminsb", + int_x86_avx2_pmins_b>, VEX_4V; + defm VPMINSD : SS41I_binop_rm_int_y<0x39, "vpminsd", + int_x86_avx2_pmins_d>, VEX_4V; + defm VPMINUD : SS41I_binop_rm_int_y<0x3B, "vpminud", + int_x86_avx2_pminu_d>, VEX_4V; + defm VPMINUW : SS41I_binop_rm_int_y<0x3A, "vpminuw", + int_x86_avx2_pminu_w>, VEX_4V; + defm VPMAXSB : SS41I_binop_rm_int_y<0x3C, "vpmaxsb", + int_x86_avx2_pmaxs_b>, VEX_4V; + defm VPMAXSD : SS41I_binop_rm_int_y<0x3D, "vpmaxsd", + int_x86_avx2_pmaxs_d>, VEX_4V; + defm VPMAXUD : SS41I_binop_rm_int_y<0x3F, "vpmaxud", + int_x86_avx2_pmaxu_d>, VEX_4V; + defm VPMAXUW : SS41I_binop_rm_int_y<0x3E, "vpmaxuw", + int_x86_avx2_pmaxu_w>, VEX_4V; + defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq", + int_x86_avx2_pmul_dq>, VEX_4V; +} + let Constraints = "$src1 = $dst" in { let isCommutable = 0 in defm PACKUSDW : SS41I_binop_rm_int<0x2B, "packusdw", int_x86_sse41_packusdw>; @@ -6301,7 +6343,7 @@ def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))), /// SS48I_binop_rm - Simple SSE41 binary operator. multiclass SS48I_binop_rm opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, bit Is2Addr = 1> { + ValueType OpVT, bit Is2Addr = 1> { let isCommutable = 1 in def rr : SS48I opc, string OpcodeStr, SDNode OpNode, OpSize; } +/// SS48I_binop_rm - Simple SSE41 binary operator. +multiclass SS48I_binop_rm_y opc, string OpcodeStr, SDNode OpNode, + ValueType OpVT> { + let isCommutable = 1 in + def Yrr : SS48I, + OpSize; + def Yrm : SS48I, + OpSize; +} + let Predicates = [HasAVX] in defm VPMULLD : SS48I_binop_rm<0x40, "vpmulld", mul, v4i32, 0>, VEX_4V; +let Predicates = [HasAVX2] in + defm VPMULLD : SS48I_binop_rm_y<0x40, "vpmulld", mul, v8i32>, VEX_4V; let Constraints = "$src1 = $dst" in defm PMULLD : SS48I_binop_rm<0x40, "pmulld", mul, v4i32>; @@ -6375,6 +6436,15 @@ let Predicates = [HasAVX] in { VR256, memopv32i8, i256mem, 0>, VEX_4V; } +let Predicates = [HasAVX2] in { + let isCommutable = 0 in { + defm VPBLENDWY : SS41I_binop_rmi_int<0x0E, "vpblendw", int_x86_avx2_pblendw, + VR256, memopv32i8, i256mem, 0>, VEX_4V; + defm VMPSADBWY : SS41I_binop_rmi_int<0x42, "vmpsadbw", int_x86_avx2_mpsadbw, + VR256, memopv32i8, i256mem, 0>, VEX_4V; + } +} + let Constraints = "$src1 = $dst" in { let isCommutable = 0 in { defm BLENDPS : SS41I_binop_rmi_int<0x0C, "blendps", int_x86_sse41_blendps, @@ -6393,7 +6463,6 @@ let Constraints = "$src1 = $dst" in { } /// SS41I_quaternary_int_avx - AVX SSE 4.1 with 4 operators -let Predicates = [HasAVX] in { multiclass SS41I_quaternary_int_avx opc, string OpcodeStr, RegisterClass RC, X86MemOperand x86memop, PatFrag mem_frag, Intrinsic IntId> { @@ -6413,8 +6482,8 @@ multiclass SS41I_quaternary_int_avx opc, string OpcodeStr, RC:$src3))], SSEPackedInt>, OpSize, TA, VEX_4V, VEX_I8IMM; } -} +let Predicates = [HasAVX] in { defm VBLENDVPD : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR128, i128mem, memopv16i8, int_x86_sse41_blendvpd>; defm VBLENDVPS : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR128, i128mem, @@ -6425,6 +6494,12 @@ defm VBLENDVPDY : SS41I_quaternary_int_avx<0x4B, "vblendvpd", VR256, i256mem, memopv32i8, int_x86_avx_blendv_pd_256>; defm VBLENDVPSY : SS41I_quaternary_int_avx<0x4A, "vblendvps", VR256, i256mem, memopv32i8, int_x86_avx_blendv_ps_256>; +} + +let Predicates = [HasAVX2] in { +defm VPBLENDVBY : SS41I_quaternary_int_avx<0x4C, "vpblendvb", VR256, i256mem, + memopv32i8, int_x86_avx2_pblendvb>; +} let Predicates = [HasAVX] in { def : Pat<(v16i8 (vselect (v16i8 VR128:$mask), (v16i8 VR128:$src1), @@ -6503,6 +6578,11 @@ def VMOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "vmovntdqa\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, OpSize, VEX; +let Predicates = [HasAVX2] in +def VMOVNTDQAYrm : SS48I<0x2A, MRMSrcMem, (outs VR256:$dst), (ins i256mem:$src), + "vmovntdqa\t{$src, $dst|$dst, $src}", + [(set VR256:$dst, (int_x86_avx2_movntdqa addr:$src))]>, + OpSize, VEX; def MOVNTDQArm : SS48I<0x2A, MRMSrcMem, (outs VR128:$dst), (ins i128mem:$src), "movntdqa\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (int_x86_sse41_movntdqa addr:$src))]>, @@ -6532,6 +6612,22 @@ multiclass SS42I_binop_rm_int opc, string OpcodeStr, (bitconvert (memopv16i8 addr:$src2))))]>, OpSize; } +/// SS42I_binop_rm_int - Simple SSE 4.2 binary operator +multiclass SS42I_binop_rm_int_y opc, string OpcodeStr, + Intrinsic IntId256> { + def Yrr : SS428I, + OpSize; + def Yrm : SS428I, OpSize; +} + let Predicates = [HasAVX] in { defm VPCMPGTQ : SS42I_binop_rm_int<0x37, "vpcmpgtq", int_x86_sse42_pcmpgtq, 0>, VEX_4V; @@ -6542,6 +6638,11 @@ let Predicates = [HasAVX] in { (VPCMPGTQrm VR128:$src1, addr:$src2)>; } +let Predicates = [HasAVX2] in { + defm VPCMPGTQ : SS42I_binop_rm_int_y<0x37, "vpcmpgtq", int_x86_avx2_pcmpgt_q>, + VEX_4V; +} + let Constraints = "$src1 = $dst" in defm PCMPGTQ : SS42I_binop_rm_int<0x37, "pcmpgtq", int_x86_sse42_pcmpgtq>; @@ -6991,6 +7092,10 @@ def VBROADCASTSD : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem, def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem, int_x86_avx_vbroadcastf128_pd_256>; +let Predicates = [HasAVX2] in +def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem, + int_x86_avx2_vbroadcasti128>; + def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), (VBROADCASTF128 addr:$src)>; -- cgit v1.1 From 37efc9fe42a4867c81526cac7fca9fe0ea04a484 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Wed, 2 Nov 2011 07:17:12 +0000 Subject: Begin collecting some of the statistics for block placement discussed on the mailing list. Suggestions for other statistics to collect would be awesome. =] Currently these are implemented as a separate pass guarded by a separate flag. I'm not thrilled by that, but I wanted to be able to collect the statistics for the old code placement as well as the new in order to have a point of comparison. I'm planning on folding them into the single pass if / when there is only one pass of interest. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143537 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CodeGen.cpp | 1 + lib/CodeGen/LLVMTargetMachine.cpp | 8 ++++ lib/CodeGen/MachineBlockPlacement.cpp | 83 +++++++++++++++++++++++++++++++++++ 3 files changed, 92 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index a911534..899baad 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -29,6 +29,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeLiveVariablesPass(Registry); initializeMachineBlockFrequencyInfoPass(Registry); initializeMachineBlockPlacementPass(Registry); + initializeMachineBlockPlacementStatsPass(Registry); initializeMachineCSEPass(Registry); initializeMachineDominatorTreePass(Registry); initializeMachineLICMPass(Registry); diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 268584c..3e69069 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -55,6 +55,8 @@ static cl::opt DisableEarlyTailDup("disable-early-taildup", cl::Hidden, cl::desc("Disable pre-register allocation tail duplication")); static cl::opt EnableBlockPlacement("enable-block-placement", cl::Hidden, cl::desc("Enable probability-driven block placement")); +static cl::opt EnableBlockPlacementStats("enable-block-placement-stats", + cl::Hidden, cl::desc("Collect probability-driven block placement stats")); static cl::opt DisableCodePlace("disable-code-place", cl::Hidden, cl::desc("Disable code placement")); static cl::opt DisableSSC("disable-ssc", cl::Hidden, @@ -499,6 +501,12 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, PM.add(createCodePlacementOptPass()); printNoVerify(PM, "After CodePlacementOpt"); } + + // Run a separate pass to collect block placement statistics. + if (EnableBlockPlacementStats) { + PM.add(createMachineBlockPlacementStatsPass()); + printNoVerify(PM, "After MachineBlockPlacementStats"); + } } if (addPreEmitPass(PM, OptLevel)) diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 21582b9..53a8779 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -48,6 +48,13 @@ #include using namespace llvm; +STATISTIC(NumCondBranches, "Number of conditional branches"); +STATISTIC(NumUncondBranches, "Number of uncondittional branches"); +STATISTIC(CondBranchTakenFreq, + "Potential frequency of taking conditional branches"); +STATISTIC(UncondBranchTakenFreq, + "Potential frequency of taking unconditional branches"); + namespace { /// \brief A structure for storing a weighted edge. /// @@ -481,3 +488,79 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { // differs from the original order. return true; } + +namespace { +/// \brief A pass to compute block placement statistics. +/// +/// A separate pass to compute interesting statistics for evaluating block +/// placement. This is separate from the actual placement pass so that they can +/// be computed in the absense of any placement transformations or when using +/// alternative placement strategies. +class MachineBlockPlacementStats : public MachineFunctionPass { + /// \brief A handle to the branch probability pass. + const MachineBranchProbabilityInfo *MBPI; + + /// \brief A handle to the function-wide block frequency pass. + const MachineBlockFrequencyInfo *MBFI; + +public: + static char ID; // Pass identification, replacement for typeid + MachineBlockPlacementStats() : MachineFunctionPass(ID) { + initializeMachineBlockPlacementStatsPass(*PassRegistry::getPassRegistry()); + } + + bool runOnMachineFunction(MachineFunction &F); + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); + AU.addRequired(); + AU.setPreservesAll(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + const char *getPassName() const { return "Block Placement Stats"; } +}; +} + +char MachineBlockPlacementStats::ID = 0; +INITIALIZE_PASS_BEGIN(MachineBlockPlacementStats, "block-placement-stats", + "Basic Block Placement Stats", false, false) +INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo) +INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo) +INITIALIZE_PASS_END(MachineBlockPlacementStats, "block-placement-stats", + "Basic Block Placement Stats", false, false) + +FunctionPass *llvm::createMachineBlockPlacementStatsPass() { + return new MachineBlockPlacementStats(); +} + +bool MachineBlockPlacementStats::runOnMachineFunction(MachineFunction &F) { + // Check for single-block functions and skip them. + if (llvm::next(F.begin()) == F.end()) + return false; + + MBPI = &getAnalysis(); + MBFI = &getAnalysis(); + + for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) { + BlockFrequency BlockFreq = MBFI->getBlockFreq(I); + Statistic &NumBranches = (I->succ_size() > 1) ? NumCondBranches + : NumUncondBranches; + Statistic &BranchTakenFreq = (I->succ_size() > 1) ? CondBranchTakenFreq + : UncondBranchTakenFreq; + for (MachineBasicBlock::succ_iterator SI = I->succ_begin(), + SE = I->succ_end(); + SI != SE; ++SI) { + // Skip if this successor is a fallthrough. + if (I->isLayoutSuccessor(*SI)) + continue; + + BlockFrequency EdgeFreq = BlockFreq * MBPI->getEdgeProbability(I, *SI); + ++NumBranches; + BranchTakenFreq += EdgeFreq.getFrequency(); + } + } + + return false; +} + -- cgit v1.1 From d3714b60b5adf15376a8803e6622c74694884b28 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Wed, 2 Nov 2011 17:19:57 +0000 Subject: Rewrite LinearFunctionTestReplace to handle pointer-type IVs. We've been hitting asserts in this code due to the many supported combintions of modes (iv-rewrite/no-iv-rewrite) and IV types. This second rewrite of the code attempts to deal with these cases systematically. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143546 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/IndVarSimplify.cpp | 189 +++++++++++++++++++------------ 1 file changed, 116 insertions(+), 73 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 0ba327a..1f21108 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -1278,6 +1278,16 @@ static bool isHighCostExpansion(const SCEV *S, BranchInst *BI, /// canExpandBackedgeTakenCount - Return true if this loop's backedge taken /// count expression can be safely and cheaply expanded into an instruction /// sequence that can be used by LinearFunctionTestReplace. +/// +/// TODO: This fails for pointer-type loop counters with greater than one byte +/// strides, consequently preventing LFTR from running. For the purpose of LFTR +/// we could skip this check in the case that the LFTR loop counter (chosen by +/// FindLoopCounter) is also pointer type. Instead, we could directly convert +/// the loop test to an inequality test by checking the target data's alignment +/// of element types (given that the initial pointer value originates from or is +/// used by ABI constrained operation, as opposed to inttoptr/ptrtoint). +/// However, we don't yet have a strong motivation for converting loop tests +/// into inequality tests. static bool canExpandBackedgeTakenCount(Loop *L, ScalarEvolution *SE) { const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L); if (isa(BackedgeTakenCount) || @@ -1429,6 +1439,10 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { /// FindLoopCounter - Find an affine IV in canonical form. /// +/// BECount may be an i8* pointer type. The pointer difference is already +/// valid count without scaling the address stride, so it remains a pointer +/// expression as far as SCEV is concerned. +/// /// FIXME: Accept -1 stride and set IVLimit = IVInit - BECount /// /// FIXME: Accept non-unit stride as long as SCEV can reduce BECount * Stride. @@ -1437,11 +1451,6 @@ static bool AlmostDeadIV(PHINode *Phi, BasicBlock *LatchBlock, Value *Cond) { static PHINode * FindLoopCounter(Loop *L, const SCEV *BECount, ScalarEvolution *SE, DominatorTree *DT, const TargetData *TD) { - // I'm not sure how BECount could be a pointer type, but we definitely don't - // want to LFTR that. - if (BECount->getType()->isPointerTy()) - return 0; - uint64_t BCWidth = SE->getTypeSizeInBits(BECount->getType()); Value *Cond = @@ -1458,6 +1467,10 @@ FindLoopCounter(Loop *L, const SCEV *BECount, if (!SE->isSCEVable(Phi->getType())) continue; + // Avoid comparing an integer IV against a pointer Limit. + if (BECount->getType()->isPointerTy() && !Phi->getType()->isPointerTy()) + continue; + const SCEVAddRecExpr *AR = dyn_cast(SE->getSCEV(Phi)); if (!AR || AR->getLoop() != L || !AR->isAffine()) continue; @@ -1503,6 +1516,82 @@ FindLoopCounter(Loop *L, const SCEV *BECount, return BestPhi; } +/// genLoopLimit - Help LinearFunctionTestReplace by generating a value that +/// holds the RHS of the new loop test. +static Value *genLoopLimit(PHINode *IndVar, const SCEV *IVCount, Loop *L, + SCEVExpander &Rewriter, ScalarEvolution *SE) { + const SCEVAddRecExpr *AR = dyn_cast(SE->getSCEV(IndVar)); + assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter"); + const SCEV *IVInit = AR->getStart(); + + // IVInit may be a pointer while IVCount is an integer when FindLoopCounter + // finds a valid pointer IV. Sign extend BECount in order to materialize a + // GEP. Avoid running SCEVExpander on a new pointer value, instead reusing + // the existing GEPs whenever possible. + if (IndVar->getType()->isPointerTy() + && !IVCount->getType()->isPointerTy()) { + + Type *OfsTy = SE->getEffectiveSCEVType(IVInit->getType()); + const SCEV *IVOffset = SE->getTruncateOrSignExtend(IVCount, OfsTy); + + // Expand the code for the iteration count. + assert(SE->isLoopInvariant(IVOffset, L) && + "Computed iteration count is not loop invariant!"); + BranchInst *BI = cast(L->getExitingBlock()->getTerminator()); + Value *GEPOffset = Rewriter.expandCodeFor(IVOffset, OfsTy, BI); + + Value *GEPBase = IndVar->getIncomingValueForBlock(L->getLoopPreheader()); + assert(AR->getStart() == SE->getSCEV(GEPBase) && "bad loop counter"); + // We could handle pointer IVs other than i8*, but we need to compensate for + // gep index scaling. See canExpandBackedgeTakenCount comments. + assert(SE->getSizeOfExpr( + cast(GEPBase->getType())->getElementType())->isOne() + && "unit stride pointer IV must be i8*"); + + IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); + return Builder.CreateGEP(GEPBase, GEPOffset, "lftr.limit"); + } + else { + // In any other case, convert both IVInit and IVCount to integers before + // comparing. This may result in SCEV expension of pointers, but in practice + // SCEV will fold the pointer arithmetic away as such: + // BECount = (IVEnd - IVInit - 1) => IVLimit = IVInit (postinc). + // + // Valid Cases: (1) both integers is most common; (2) both may be pointers + // for simple memset-style loops; (3) IVInit is an integer and IVCount is a + // pointer may occur when enable-iv-rewrite generates a canonical IV on top + // of case #2. + + const SCEV *IVLimit = 0; + // For unit stride, IVCount = Start + BECount with 2's complement overflow. + // For non-zero Start, compute IVCount here. + if (AR->getStart()->isZero()) + IVLimit = IVCount; + else { + assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); + const SCEV *IVInit = AR->getStart(); + + // For integer IVs, truncate the IV before computing IVInit + BECount. + if (SE->getTypeSizeInBits(IVInit->getType()) + > SE->getTypeSizeInBits(IVCount->getType())) + IVInit = SE->getTruncateExpr(IVInit, IVCount->getType()); + + IVLimit = SE->getAddExpr(IVInit, IVCount); + } + // Expand the code for the iteration count. + BranchInst *BI = cast(L->getExitingBlock()->getTerminator()); + IRBuilder<> Builder(BI); + assert(SE->isLoopInvariant(IVLimit, L) && + "Computed iteration count is not loop invariant!"); + // Ensure that we generate the same type as IndVar, or a smaller integer + // type. In the presence of null pointer values, we have an integer type + // SCEV expression (IVInit) for a pointer type IV value (IndVar). + Type *LimitTy = IVCount->getType()->isPointerTy() ? + IndVar->getType() : IVCount->getType(); + return Rewriter.expandCodeFor(IVLimit, LimitTy, BI); + } +} + /// LinearFunctionTestReplace - This method rewrites the exit condition of the /// loop to be a canonical != comparison against the incremented loop induction /// variable. This pass is able to rewrite the exit tests of any loop where the @@ -1514,37 +1603,36 @@ LinearFunctionTestReplace(Loop *L, PHINode *IndVar, SCEVExpander &Rewriter) { assert(canExpandBackedgeTakenCount(L, SE) && "precondition"); - BranchInst *BI = cast(L->getExitingBlock()->getTerminator()); // LFTR can ignore IV overflow and truncate to the width of // BECount. This avoids materializing the add(zext(add)) expression. Type *CntTy = !EnableIVRewrite ? BackedgeTakenCount->getType() : IndVar->getType(); - const SCEV *IVLimit = BackedgeTakenCount; + const SCEV *IVCount = BackedgeTakenCount; - // If the exiting block is not the same as the backedge block, we must compare - // against the preincremented value, otherwise we prefer to compare against - // the post-incremented value. + // If the exiting block is the same as the backedge block, we prefer to + // compare against the post-incremented value, otherwise we must compare + // against the preincremented value. Value *CmpIndVar; if (L->getExitingBlock() == L->getLoopLatch()) { // Add one to the "backedge-taken" count to get the trip count. // If this addition may overflow, we have to be more pessimistic and // cast the induction variable before doing the add. const SCEV *N = - SE->getAddExpr(IVLimit, SE->getConstant(IVLimit->getType(), 1)); - if (CntTy == IVLimit->getType()) - IVLimit = N; + SE->getAddExpr(IVCount, SE->getConstant(IVCount->getType(), 1)); + if (CntTy == IVCount->getType()) + IVCount = N; else { - const SCEV *Zero = SE->getConstant(IVLimit->getType(), 0); + const SCEV *Zero = SE->getConstant(IVCount->getType(), 0); if ((isa(N) && !N->isZero()) || SE->isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, N, Zero)) { // No overflow. Cast the sum. - IVLimit = SE->getTruncateOrZeroExtend(N, CntTy); + IVCount = SE->getTruncateOrZeroExtend(N, CntTy); } else { // Potential overflow. Cast before doing the add. - IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy); - IVLimit = SE->getAddExpr(IVLimit, SE->getConstant(CntTy, 1)); + IVCount = SE->getTruncateOrZeroExtend(IVCount, CntTy); + IVCount = SE->getAddExpr(IVCount, SE->getConstant(CntTy, 1)); } } // The BackedgeTaken expression contains the number of times that the @@ -1552,64 +1640,17 @@ LinearFunctionTestReplace(Loop *L, // number of times the loop executes, so use the incremented indvar. CmpIndVar = IndVar->getIncomingValueForBlock(L->getExitingBlock()); } else { - // We have to use the preincremented value... - IVLimit = SE->getTruncateOrZeroExtend(IVLimit, CntTy); + // We must use the preincremented value... + IVCount = SE->getTruncateOrZeroExtend(IVCount, CntTy); CmpIndVar = IndVar; } - // For unit stride, IVLimit = Start + BECount with 2's complement overflow. - // So for non-zero start compute the IVLimit here. - Type *CmpTy = CntTy; - const SCEVAddRecExpr *AR = dyn_cast(SE->getSCEV(IndVar)); - assert(AR && AR->getLoop() == L && AR->isAffine() && "bad loop counter"); - if (!AR->getStart()->isZero()) { - assert(AR->getStepRecurrence(*SE)->isOne() && "only handles unit stride"); - const SCEV *IVInit = AR->getStart(); - - // For pointer types, sign extend BECount in order to materialize a GEP. - // Note that for without EnableIVRewrite, we never run SCEVExpander on a - // pointer type, because we must preserve the existing GEPs. Instead we - // directly generate a GEP later. - if (CmpIndVar->getType()->isPointerTy()) { - CmpTy = SE->getEffectiveSCEVType(IVInit->getType()); - IVLimit = SE->getTruncateOrSignExtend(IVLimit, CmpTy); - } - // For integer types, truncate the IV before computing IVInit + BECount. - else { - if (SE->getTypeSizeInBits(IVInit->getType()) - > SE->getTypeSizeInBits(CmpTy)) - IVInit = SE->getTruncateExpr(IVInit, CmpTy); - - IVLimit = SE->getAddExpr(IVInit, IVLimit); - } - } - // Expand the code for the iteration count. - IRBuilder<> Builder(BI); - - assert(SE->isLoopInvariant(IVLimit, L) && - "Computed iteration count is not loop invariant!"); - assert((EnableIVRewrite || !IVLimit->getType()->isPointerTy()) && - "Should not expand pointer types" ); - Value *ExitCnt = Rewriter.expandCodeFor(IVLimit, CmpTy, BI); - - // Create a gep for IVInit + IVLimit from on an existing pointer base. - // - // In the presence of null pointer values, the SCEV expression may be an - // integer type while the IV is a pointer type. Ensure that the compare - // operands are always the same type by checking the IV type here. - if (CmpIndVar->getType()->isPointerTy()) { - Value *IVStart = IndVar->getIncomingValueForBlock(L->getLoopPreheader()); - assert(AR->getStart() == SE->getSCEV(IVStart) && "bad loop counter"); - assert(SE->getSizeOfExpr( - cast(IVStart->getType())->getElementType())->isOne() - && "unit stride pointer IV must be i8*"); - - Builder.SetInsertPoint(L->getLoopPreheader()->getTerminator()); - ExitCnt = Builder.CreateGEP(IVStart, ExitCnt, "lftr.limit"); - Builder.SetInsertPoint(BI); - } + Value *ExitCnt = genLoopLimit(IndVar, IVCount, L, Rewriter, SE); + assert(ExitCnt->getType()->isPointerTy() == IndVar->getType()->isPointerTy() + && "genLoopLimit missed a cast"); // Insert a new icmp_ne or icmp_eq instruction before the branch. + BranchInst *BI = cast(L->getExitingBlock()->getTerminator()); ICmpInst::Predicate P; if (L->contains(BI->getSuccessor(0))) P = ICmpInst::ICMP_NE; @@ -1621,11 +1662,13 @@ LinearFunctionTestReplace(Loop *L, << " op:\t" << (P == ICmpInst::ICMP_NE ? "!=" : "==") << "\n" << " RHS:\t" << *ExitCnt << "\n" - << " Expr:\t" << *IVLimit << "\n"); + << " IVCount:\t" << *IVCount << "\n"); + IRBuilder<> Builder(BI); if (SE->getTypeSizeInBits(CmpIndVar->getType()) - > SE->getTypeSizeInBits(CmpTy)) { - CmpIndVar = Builder.CreateTrunc(CmpIndVar, CmpTy, "lftr.wideiv"); + > SE->getTypeSizeInBits(ExitCnt->getType())) { + CmpIndVar = Builder.CreateTrunc(CmpIndVar, ExitCnt->getType(), + "lftr.wideiv"); } Value *Cond = Builder.CreateICmp(P, CmpIndVar, ExitCnt, "exitcond"); -- cgit v1.1 From 87633026d65acf8253e953bdcfd20bc351631f61 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 2 Nov 2011 17:20:24 +0000 Subject: Factor out an EmitIntExt function. No functionality change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143547 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 68 +++++++++++++++++++++++------------------- 1 file changed, 37 insertions(+), 31 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 4f883b7..97d813d 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -179,6 +179,7 @@ class ARMFastISel : public FastISel { bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr); bool ARMComputeAddress(const Value *Obj, Address &Addr); void ARMSimplifyAddress(Address &Addr, EVT VT); + unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt); unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT); unsigned ARMMaterializeInt(const Constant *C, EVT VT); unsigned ARMMaterializeGV(const GlobalValue *GV, EVT VT); @@ -1987,66 +1988,71 @@ bool ARMFastISel::SelectTrunc(const Instruction *I) { return true; } -bool ARMFastISel::SelectIntExt(const Instruction *I) { - // On ARM, in general, integer casts don't involve legal types; this code - // handles promotable integers. The high bits for a type smaller than - // the register size are assumed to be undefined. - Type *DestTy = I->getType(); - Value *Op = I->getOperand(0); - Type *SrcTy = Op->getType(); - - EVT SrcVT, DestVT; - SrcVT = TLI.getValueType(SrcTy, true); - DestVT = TLI.getValueType(DestTy, true); - +unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, + bool isZExt) { if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8) - return false; + return 0; unsigned Opc; - bool isZext = isa(I); bool isBoolZext = false; - if (!SrcVT.isSimple()) - return false; + if (!SrcVT.isSimple()) return 0; switch (SrcVT.getSimpleVT().SimpleTy) { - default: return false; + default: return 0; case MVT::i16: - if (!Subtarget->hasV6Ops()) return false; - if (isZext) + if (!Subtarget->hasV6Ops()) return 0; + if (isZExt) Opc = isThumb ? ARM::t2UXTH : ARM::UXTH; else Opc = isThumb ? ARM::t2SXTH : ARM::SXTH; break; case MVT::i8: - if (!Subtarget->hasV6Ops()) return false; - if (isZext) + if (!Subtarget->hasV6Ops()) return 0; + if (isZExt) Opc = isThumb ? ARM::t2UXTB : ARM::UXTB; else Opc = isThumb ? ARM::t2SXTB : ARM::SXTB; break; case MVT::i1: - if (isZext) { + if (isZExt) { Opc = isThumb ? ARM::t2ANDri : ARM::ANDri; isBoolZext = true; break; } - return false; + return 0; } - // FIXME: We could save an instruction in many cases by special-casing - // load instructions. - unsigned SrcReg = getRegForValue(Op); - if (!SrcReg) return false; - - unsigned DestReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32)); MachineInstrBuilder MIB; - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addReg(SrcReg); if (isBoolZext) MIB.addImm(1); else MIB.addImm(0); AddOptionalDefs(MIB); - UpdateValueMap(I, DestReg); + return ResultReg; +} + +bool ARMFastISel::SelectIntExt(const Instruction *I) { + // On ARM, in general, integer casts don't involve legal types; this code + // handles promotable integers. + // FIXME: We could save an instruction in many cases by special-casing + // load instructions. + Type *DestTy = I->getType(); + Value *Src = I->getOperand(0); + Type *SrcTy = Src->getType(); + + EVT SrcVT, DestVT; + SrcVT = TLI.getValueType(SrcTy, true); + DestVT = TLI.getValueType(DestTy, true); + + bool isZExt = isa(I); + unsigned SrcReg = getRegForValue(Src); + if (!SrcReg) return false; + + unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, isZExt); + if (ResultReg == 0) return false; + UpdateValueMap(I, ResultReg); return true; } -- cgit v1.1 From 5a83264fa26eb573ef25b2db0cafbeef3eeb54c8 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 2 Nov 2011 17:24:36 +0000 Subject: Don't print two 0x prefixes when printing an address. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143549 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/VMCore/AsmWriter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp index 18308f2..d7863f5 100644 --- a/lib/VMCore/AsmWriter.cpp +++ b/lib/VMCore/AsmWriter.cpp @@ -231,7 +231,7 @@ void TypePrinting::print(Type *Ty, raw_ostream &OS) { if (I != NumberedTypes.end()) OS << '%' << I->second; else // Not enumerated, print the hex address. - OS << "%\"type 0x" << STy << '\"'; + OS << "%\"type " << STy << '\"'; return; } case Type::PointerTyID: { -- cgit v1.1 From e31b42a6f5598691498808673648211916bf4d0f Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Wed, 2 Nov 2011 17:41:23 +0000 Subject: Register list operands are not allowed to contain only a single register. Alternate encodings are used in that case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143552 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 6927d2d..8dab153 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -1111,7 +1111,11 @@ static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val, } // Empty register lists are not allowed. - if (CountPopulation_32(Val) == 0) return MCDisassembler::Fail; + uint32_t popcnt = CountPopulation_32(Val); + if (popcnt == 0) return MCDisassembler::Fail; + // and one-register lists are unpredictable. + else if (popcnt == 1) Check(S, MCDisassembler::SoftFail); + for (unsigned i = 0; i < 16; ++i) { if (Val & (1 << i)) { if (!Check(S, DecodeGPRRegisterClass(Inst, i, Address, Decoder))) -- cgit v1.1 From 244006db5c4d48878dc5fdc86976acdaff96cfaa Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Wed, 2 Nov 2011 17:46:18 +0000 Subject: The rules disallowing single-register reglist operands only apply to the POP alias, not to LDM/STM instructions. Revert r143552. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143553 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 8dab153..6927d2d 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -1111,11 +1111,7 @@ static DecodeStatus DecodeRegListOperand(llvm::MCInst &Inst, unsigned Val, } // Empty register lists are not allowed. - uint32_t popcnt = CountPopulation_32(Val); - if (popcnt == 0) return MCDisassembler::Fail; - // and one-register lists are unpredictable. - else if (popcnt == 1) Check(S, MCDisassembler::SoftFail); - + if (CountPopulation_32(Val) == 0) return MCDisassembler::Fail; for (unsigned i = 0; i < 16; ++i) { if (Val & (1 << i)) { if (!Check(S, DecodeGPRRegisterClass(Inst, i, Address, Decoder))) -- cgit v1.1 From dba9a17f9a69d16264ab30cfd878f2e74ba36d26 Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Wed, 2 Nov 2011 17:56:38 +0000 Subject: Replace tabs I added in this new line of code with spaces. Thanks to Nick for spotting this! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143556 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCParser/AsmParser.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 990fd17..7883893 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -1227,7 +1227,7 @@ bool AsmParser::ParseStatement() { getStreamer().EmitDwarfLocDirective(getContext().getGenDwarfFileNumber(), SrcMgr.FindLineNumber(IDLoc, CurBuffer), 0, DWARF2_LINE_DEFAULT_IS_STMT ? - DWARF2_FLAG_IS_STMT : 0, 0, 0, + DWARF2_FLAG_IS_STMT : 0, 0, 0, StringRef()); } -- cgit v1.1 From 81550dc0a866e27a1efbc5de616fb366ebb547cd Mon Sep 17 00:00:00 2001 From: Owen Anderson Date: Wed, 2 Nov 2011 18:03:14 +0000 Subject: Fix the issue that r143552 was trying to address the _right_ way. One-register lists are legal on LDM/STM instructions, but we should not print the PUSH/POP aliases when they appear. This fixes round tripping on this instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143557 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index e4a56be..844e3ab 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -101,7 +101,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, // A8.6.123 PUSH if ((Opcode == ARM::STMDB_UPD || Opcode == ARM::t2STMDB_UPD) && - MI->getOperand(0).getReg() == ARM::SP) { + MI->getOperand(0).getReg() == ARM::SP && + MI->getNumOperands() > 5) { + // Should only print PUSH if there are at least two registers in the list. O << '\t' << "push"; printPredicateOperand(MI, 2, O); if (Opcode == ARM::t2STMDB_UPD) @@ -122,7 +124,9 @@ void ARMInstPrinter::printInst(const MCInst *MI, raw_ostream &O, // A8.6.122 POP if ((Opcode == ARM::LDMIA_UPD || Opcode == ARM::t2LDMIA_UPD) && - MI->getOperand(0).getReg() == ARM::SP) { + MI->getOperand(0).getReg() == ARM::SP && + MI->getNumOperands() > 5) { + // Should only print POP if there are at least two registers in the list. O << '\t' << "pop"; printPredicateOperand(MI, 2, O); if (Opcode == ARM::t2LDMIA_UPD) -- cgit v1.1 From e07cd5e40ac06fabfb9d33ea7c79542f138f45ce Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 2 Nov 2011 18:08:25 +0000 Subject: Add support for comparing integer non-legal types. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143559 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 49 ++++++++++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 97d813d..78bda6c 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -174,7 +174,8 @@ class ARMFastISel : public FastISel { private: bool isTypeLegal(Type *Ty, MVT &VT); bool isLoadTypeLegal(Type *Ty, MVT &VT); - bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value); + bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, + bool isZExt); bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr); bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr); bool ARMComputeAddress(const Value *Obj, Address &Addr); @@ -1119,7 +1120,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { if (ARMPred == ARMCC::AL) return false; // Emit the compare. - if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1))) + if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) return false; unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; @@ -1189,19 +1190,19 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { return true; } -bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value) { - MVT VT; +bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, + bool isZExt) { Type *Ty = Src1Value->getType(); - if (!isTypeLegal(Ty, VT)) - return false; + EVT SrcVT = TLI.getValueType(Ty, true); + if (!SrcVT.isSimple()) return false; bool isFloat = (Ty->isFloatTy() || Ty->isDoubleTy()); if (isFloat && !Subtarget->hasVFP2()) return false; unsigned CmpOpc; - switch (VT.SimpleTy) { - // TODO: Add support for non-legal types (i.e., i1, i8, i16). + bool needsExt = false; + switch (SrcVT.getSimpleVT().SimpleTy) { default: return false; // TODO: Verify compares. case MVT::f32: @@ -1210,19 +1211,36 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value) { case MVT::f64: CmpOpc = ARM::VCMPED; break; + case MVT::i1: + case MVT::i8: + case MVT::i16: + needsExt = true; + // Intentional fall-through. case MVT::i32: CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr; break; } - unsigned Src1 = getRegForValue(Src1Value); - if (Src1 == 0) return false; - - unsigned Src2 = getRegForValue(Src2Value); - if (Src2 == 0) return false; + unsigned SrcReg1 = getRegForValue(Src1Value); + if (SrcReg1 == 0) return false; + + unsigned SrcReg2 = getRegForValue(Src2Value); + if (SrcReg2 == 0) return false; + + // We have i1, i8, or i16, we need to either zero extend or sign extend. + if (needsExt) { + unsigned ResultReg; + EVT DestVT = MVT::i32; + ResultReg = ARMEmitIntExt(SrcVT, SrcReg1, DestVT, isZExt); + if (ResultReg == 0) return false; + SrcReg1 = ResultReg; + ResultReg = ARMEmitIntExt(SrcVT, SrcReg2, DestVT, isZExt); + if (ResultReg == 0) return false; + SrcReg2 = ResultReg; + } AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) - .addReg(Src1).addReg(Src2)); + .addReg(SrcReg1).addReg(SrcReg2)); // For floating point we need to move the result to a comparison register // that we can then use for branches. @@ -1243,7 +1261,7 @@ bool ARMFastISel::SelectCmp(const Instruction *I) { if (ARMPred == ARMCC::AL) return false; // Emit the compare. - if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1))) + if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) return false; // Now set a register based on the comparison. Explicitly set the predicates @@ -1962,7 +1980,6 @@ bool ARMFastISel::SelectCall(const Instruction *I) { static_cast(MIB)->setPhysRegsDeadExcept(UsedRegs, TRI); return true; - } bool ARMFastISel::SelectTrunc(const Instruction *I) { -- cgit v1.1 From c8a55a660e93bb7a4854969b4c5814bf7bb0101b Mon Sep 17 00:00:00 2001 From: "Michael J. Spencer" Date: Wed, 2 Nov 2011 19:33:12 +0000 Subject: Object/Archive: Add symbol table iteration. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143561 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/Archive.cpp | 80 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 71 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp index f1c4b2d..968ddcd 100644 --- a/lib/Object/Archive.cpp +++ b/lib/Object/Archive.cpp @@ -13,6 +13,7 @@ #include "llvm/Object/Archive.h" #include "llvm/ADT/APInt.h" +#include "llvm/Support/Endian.h" #include "llvm/Support/MemoryBuffer.h" using namespace llvm; @@ -171,8 +172,7 @@ error_code Archive::Child::getAsBinary(OwningPtr &Result) const { } Archive::Archive(MemoryBuffer *source, error_code &ec) - : Binary(Binary::isArchive, source) - , StringTable(Child(this, StringRef(0, 0))) { + : Binary(Binary::isArchive, source) { // Check for sufficient magic. if (!source || source->getBufferSize() < (8 + sizeof(ArchiveMemberHeader) + 2) // Smallest archive. @@ -181,15 +181,18 @@ Archive::Archive(MemoryBuffer *source, error_code &ec) return; } - // Get the string table. It's the 3rd member. - child_iterator StrTable = begin_children(false); + // Get the special members. + child_iterator i = begin_children(false); child_iterator e = end_children(); - for (int i = 0; StrTable != e && i < 2; ++StrTable, ++i) {} - // Check to see if there were 3 members, or the 3rd member wasn't named "//". - StringRef name; - if (StrTable != e && !StrTable->getName(name) && name == "//") - StringTable = StrTable; + if (i != e) ++i; // Nobody cares about the first member. + if (i != e) { + SymbolTable = i; + ++i; + } + if (i != e) { + StringTable = i; + } ec = object_error::success; } @@ -208,3 +211,62 @@ Archive::child_iterator Archive::begin_children(bool skip_internal) const { Archive::child_iterator Archive::end_children() const { return Child(this, StringRef(0, 0)); } + +error_code Archive::Symbol::getName(StringRef &Result) const { + Result = + StringRef(Parent->SymbolTable->getBuffer()->getBufferStart() + StringIndex); + return object_error::success; +} + +error_code Archive::Symbol::getMember(child_iterator &Result) const { + const char *buf = Parent->SymbolTable->getBuffer()->getBufferStart(); + uint32_t member_count = *reinterpret_cast(buf); + const char *offsets = buf + 4; + buf += 4 + (member_count * 4); // Skip offsets. + uint32_t symbol_count = *reinterpret_cast(buf); + const char *indicies = buf + 4; + + uint16_t offsetindex = + *(reinterpret_cast(indicies) + + SymbolIndex); + + uint32_t offset = *(reinterpret_cast(offsets) + + (offsetindex - 1)); + + const char *Loc = Parent->getData().begin() + offset; + size_t Size = sizeof(ArchiveMemberHeader) + + ToHeader(Loc)->getSize(); + Result = Child(Parent, StringRef(Loc, Size)); + + return object_error::success; +} + +Archive::Symbol Archive::Symbol::getNext() const { + Symbol t(*this); + const char *buf = Parent->SymbolTable->getBuffer()->getBufferStart(); + buf += t.StringIndex; + while (*buf++); // Go to one past next null. + t.StringIndex = buf - Parent->SymbolTable->getBuffer()->getBufferStart(); + ++t.SymbolIndex; + return t; +} + +Archive::symbol_iterator Archive::begin_symbols() const { + const char *buf = SymbolTable->getBuffer()->getBufferStart(); + uint32_t member_count = *reinterpret_cast(buf); + buf += 4 + (member_count * 4); // Skip offsets. + uint32_t symbol_count = *reinterpret_cast(buf); + buf += 4 + (symbol_count * 2); // Skip indices. + uint32_t string_start_offset = + buf - SymbolTable->getBuffer()->getBufferStart(); + return symbol_iterator(Symbol(this, 0, string_start_offset)); +} + +Archive::symbol_iterator Archive::end_symbols() const { + const char *buf = SymbolTable->getBuffer()->getBufferStart(); + uint32_t member_count = *reinterpret_cast(buf); + buf += 4 + (member_count * 4); // Skip offsets. + uint32_t symbol_count = *reinterpret_cast(buf); + return symbol_iterator( + Symbol(this, symbol_count, 0)); +} -- cgit v1.1 From 783d88793afe4b4e42460c139ab742fbd26f39f7 Mon Sep 17 00:00:00 2001 From: "Michael J. Spencer" Date: Wed, 2 Nov 2011 19:33:26 +0000 Subject: object/COFF: Properly initalize uses of DataRefImpl. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143562 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/COFFObjectFile.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index f19836b..7de51a4 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -284,6 +284,7 @@ error_code COFFObjectFile::getSymbolSection(DataRefImpl Symb, const coff_section *sec; if (error_code ec = getSection(symb->SectionNumber, sec)) return ec; DataRefImpl Sec; + std::memset(&Sec, 0, sizeof(Sec)); Sec.p = reinterpret_cast(sec); Result = section_iterator(SectionRef(Sec, this)); } @@ -618,6 +619,7 @@ error_code COFFObjectFile::getRelocationSymbol(DataRefImpl Rel, SymbolRef &Res) const { const coff_relocation* R = toRel(Rel); DataRefImpl Symb; + std::memset(&Symb, 0, sizeof(Symb)); Symb.p = reinterpret_cast(SymbolTable + R->SymbolTableIndex); Res = SymbolRef(Symb, this); return object_error::success; -- cgit v1.1 From 6c1a703e5418b2ba9d7b47b0c9345e9928ea5a68 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Wed, 2 Nov 2011 20:55:33 +0000 Subject: Don't emit a directory entry for the value in DW_AT_comp_dir, that is always implied by directory index zero. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143570 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 12 ++++++++---- lib/CodeGen/AsmPrinter/DwarfDebug.h | 4 ++++ 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 1d5e05d..0f4ea05 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -442,6 +442,10 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, if (FileName.empty()) return GetOrCreateSourceID("", StringRef()); + // TODO: this might not belong here. See if we can factor this better. + if (DirName == CompilationDir) + DirName = ""; + unsigned SrcId = SourceIdMap.size()+1; std::pair SourceName = std::make_pair(FileName, DirName); @@ -466,8 +470,8 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { DICompileUnit DIUnit(N); StringRef FN = DIUnit.getFilename(); - StringRef Dir = DIUnit.getDirectory(); - unsigned ID = GetOrCreateSourceID(FN, Dir); + CompilationDir = DIUnit.getDirectory(); + unsigned ID = GetOrCreateSourceID(FN, CompilationDir); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); CompileUnit *NewCU = new CompileUnit(ID, Die, Asm, this); @@ -486,8 +490,8 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { else NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); - if (!Dir.empty()) - NewCU->addString(Die, dwarf::DW_AT_comp_dir, Dir); + if (!CompilationDir.empty()) + NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); if (DIUnit.isOptimized()) NewCU->addUInt(Die, dwarf::DW_AT_APPLE_optimized, dwarf::DW_FORM_flag, 1); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index b280fbb..d5ce696 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -302,6 +302,10 @@ class DwarfDebug { MCSymbol *DwarfDebugLocSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; + // As an optimization, there is no need to emit an entry in the directory + // table for the same directory as DW_at_comp_dir. + StringRef CompilationDir; + private: /// assignAbbrevNumber - Define a unique number for the abbreviation. -- cgit v1.1 From 1a1d1fcc0b955420cdbe0b94bd01c46d4e96b429 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 2 Nov 2011 22:52:45 +0000 Subject: Try to lower memset/memcpy/memmove to vector instructions on ARM where the alignment permits. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143582 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 28 ++++++++++++++++++++++++++++ lib/Target/ARM/ARMISelLowering.h | 7 ++++++- 2 files changed, 34 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 31e522d..222a399 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -8127,6 +8127,34 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const { } } +static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, + unsigned AlignCheck) { + return ((SrcAlign == 0 || SrcAlign % AlignCheck == 0) && + (DstAlign == 0 || DstAlign % AlignCheck == 0)); +} + +EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, + bool NonScalarIntSafe, + bool MemcpyStrSrc, + MachineFunction &MF) const { + const Function *F = MF.getFunction(); + + // See if we can use NEON instructions for this... + if (NonScalarIntSafe && + !F->hasFnAttr(Attribute::NoImplicitFloat) && + Subtarget->hasNEON()) { + if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) { + return MVT::v4i32; + } else if (memOpAlign(SrcAlign, DstAlign, 8) && Size >= 8) { + return MVT::v2i32; + } + } + + // Let the target-independent logic figure it out. + return MVT::Other; +} + static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { if (V < 0) return false; diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 5da9b27..43e43dd 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -266,9 +266,14 @@ namespace llvm { /// allowsUnalignedMemoryAccesses - Returns true if the target allows /// unaligned memory accesses. of the specified type. - /// FIXME: Add getOptimalMemOpType to implement memcpy with NEON? virtual bool allowsUnalignedMemoryAccesses(EVT VT) const; + virtual EVT getOptimalMemOpType(uint64_t Size, + unsigned DstAlign, unsigned SrcAlign, + bool NonScalarIntSafe, + bool MemcpyStrSrc, + MachineFunction &MF) const; + /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const; -- cgit v1.1 From 9c674bb012c23622bd31d5e82cb0baeda9aac3a5 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 2 Nov 2011 23:20:58 +0000 Subject: An array of chars of length 8 will also cause the stack protector to be inserted into the function. Reflect that here so that the array will be placed next to the SP. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143590 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index b052740..90d35cc 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -92,7 +92,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { // candidate. I.e., it would trigger the creation of a stack protector. bool MayNeedSP = (AI->isArrayAllocation() || - (TySize > 8 && isa(Ty) && + (TySize >= 8 && isa(Ty) && cast(Ty)->getElementType()->isIntegerTy(8))); StaticAllocaMap[AI] = MF->getFrameInfo()->CreateStackObject(TySize, Align, false, MayNeedSP); -- cgit v1.1 From eefb0de6dc383faa2d95536ac5e9deb71958d953 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 2 Nov 2011 23:21:55 +0000 Subject: Removed unused variable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143591 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/Archive.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp index 968ddcd..a1bd8b6 100644 --- a/lib/Object/Archive.cpp +++ b/lib/Object/Archive.cpp @@ -223,7 +223,6 @@ error_code Archive::Symbol::getMember(child_iterator &Result) const { uint32_t member_count = *reinterpret_cast(buf); const char *offsets = buf + 4; buf += 4 + (member_count * 4); // Skip offsets. - uint32_t symbol_count = *reinterpret_cast(buf); const char *indicies = buf + 4; uint16_t offsetindex = -- cgit v1.1 From a1e78888d95375194e7513bef3e18d9f1b7d45bf Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Wed, 2 Nov 2011 23:37:04 +0000 Subject: Fixed parameter name. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143594 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 4 ++-- lib/Target/ARM/ARMISelLowering.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 222a399..336f730 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -8135,13 +8135,13 @@ static bool memOpAlign(unsigned DstAlign, unsigned SrcAlign, EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool NonScalarIntSafe, + bool IsZeroVal, bool MemcpyStrSrc, MachineFunction &MF) const { const Function *F = MF.getFunction(); // See if we can use NEON instructions for this... - if (NonScalarIntSafe && + if (IsZeroVal && !F->hasFnAttr(Attribute::NoImplicitFloat) && Subtarget->hasNEON()) { if (memOpAlign(SrcAlign, DstAlign, 16) && Size >= 16) { diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 43e43dd..be6a530 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -270,7 +270,7 @@ namespace llvm { virtual EVT getOptimalMemOpType(uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool NonScalarIntSafe, + bool IsZeroVal, bool MemcpyStrSrc, MachineFunction &MF) const; -- cgit v1.1 From cde546497067bf2ed40b9473582212df4ccd8141 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 3 Nov 2011 00:14:01 +0000 Subject: Remove some cruft from the BitcodeWriter, while still maintaining backward compatibility in the BitcodeReader. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143598 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Bitcode/Reader/BitcodeReader.cpp | 40 ++++++++++++++++++++++++++++++++++-- lib/Bitcode/Writer/BitcodeWriter.cpp | 4 +--- 2 files changed, 39 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 46565f3..6ecdbae 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -615,7 +615,7 @@ bool BitcodeReader::ParseTypeTableBody() { ResultTy = PointerType::get(ResultTy, AddressSpace); break; } - case bitc::TYPE_CODE_FUNCTION: { + case bitc::TYPE_CODE_FUNCTION_OLD: { // FIXME: attrid is dead, remove it in LLVM 3.0 // FUNCTION: [vararg, attrid, retty, paramty x N] if (Record.size() < 3) @@ -635,6 +635,25 @@ bool BitcodeReader::ParseTypeTableBody() { ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]); break; } + case bitc::TYPE_CODE_FUNCTION: { + // FUNCTION: [vararg, retty, paramty x N] + if (Record.size() < 2) + return Error("Invalid FUNCTION type record"); + std::vector ArgTys; + for (unsigned i = 2, e = Record.size(); i != e; ++i) { + if (Type *T = getTypeByID(Record[i])) + ArgTys.push_back(T); + else + break; + } + + ResultTy = getTypeByID(Record[1]); + if (ResultTy == 0 || ArgTys.size() < Record.size()-2) + return Error("invalid type in function type"); + + ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]); + break; + } case bitc::TYPE_CODE_STRUCT_ANON: { // STRUCT: [ispacked, eltty x N] if (Record.size() < 1) return Error("Invalid STRUCT type record"); @@ -871,7 +890,7 @@ RestartScan: ResultTy = PointerType::get(ResultTy, AddressSpace); break; } - case bitc::TYPE_CODE_FUNCTION: { + case bitc::TYPE_CODE_FUNCTION_OLD: { // FIXME: attrid is dead, remove it in LLVM 3.0 // FUNCTION: [vararg, attrid, retty, paramty x N] if (Record.size() < 3) @@ -889,6 +908,23 @@ RestartScan: ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]); break; } + case bitc::TYPE_CODE_FUNCTION: { + // FUNCTION: [vararg, retty, paramty x N] + if (Record.size() < 2) + return Error("Invalid FUNCTION type record"); + std::vector ArgTys; + for (unsigned i = 2, e = Record.size(); i != e; ++i) { + if (Type *Elt = getTypeByIDOrNull(Record[i])) + ArgTys.push_back(Elt); + else + break; + } + if (ArgTys.size()+2 != Record.size()) + break; // Something was null. + if ((ResultTy = getTypeByIDOrNull(Record[1]))) + ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]); + break; + } case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty] if (Record.size() < 2) return Error("Invalid ARRAY type record"); diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 5b3d969..e758f94 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -206,7 +206,6 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_FUNCTION)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 1)); // isvararg - Abbv->Add(BitCodeAbbrevOp(0)); // FIXME: DEAD value, remove in LLVM 3.0 Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array)); Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, Log2_32_Ceil(VE.getTypes().size()+1))); @@ -284,10 +283,9 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { } case Type::FunctionTyID: { FunctionType *FT = cast(T); - // FUNCTION: [isvararg, attrid, retty, paramty x N] + // FUNCTION: [isvararg, retty, paramty x N] Code = bitc::TYPE_CODE_FUNCTION; TypeVals.push_back(FT->isVarArg()); - TypeVals.push_back(0); // FIXME: DEAD: remove in llvm 3.0 TypeVals.push_back(VE.getTypeID(FT->getReturnType())); for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) TypeVals.push_back(VE.getTypeID(FT->getParamType(i))); -- cgit v1.1 From d1ffc739c1f88352c79a63ff17b828b3a529777e Mon Sep 17 00:00:00 2001 From: Pete Cooper Date: Thu, 3 Nov 2011 00:56:36 +0000 Subject: Treat objc selector reference globals as invariant so that MachineLICM can hoist them out of loops. Fixes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143600 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineInstr.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index a240667..a2fd3c4 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -1232,6 +1232,19 @@ bool MachineInstr::hasVolatileMemoryRef() const { return false; } +/// pointsToRuntimeConstantMemory - Return true if this value points to data +/// which does never changes once the program starts running +static bool pointsToRuntimeConstantMemory(const Value *V) { + if (const GlobalVariable *GV = dyn_cast(V)) { + StringRef Name = GV->getName(); + // These special values are known to be constant at runtime + // TODO: a new linkage type for these would be far better than this check + if (Name.startswith("\01L_OBJC_SELECTOR_REFERENCES_")) + return true; + } + return false; +} + /// isInvariantLoad - Return true if this instruction is loading from a /// location whose value is invariant across the function. For example, /// loading a value from the constant pool or from the argument area @@ -1259,6 +1272,8 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { if (const PseudoSourceValue *PSV = dyn_cast(V)) if (PSV->isConstant(MFI)) continue; + if (pointsToRuntimeConstantMemory(V)) + continue; // If we have an AliasAnalysis, ask it whether the memory is constant. if (AA && AA->pointsToConstantMemory( AliasAnalysis::Location(V, (*I)->getSize(), -- cgit v1.1 From 463fe24f1dd5132607abb3548a2acb1849e9aa99 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 3 Nov 2011 02:04:59 +0000 Subject: Add support for sign-extending non-legal types in SelectSIToFP(). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143603 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 78bda6c..d1f2c7f 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -1329,16 +1329,25 @@ bool ARMFastISel::SelectSIToFP(const Instruction *I) { if (!isTypeLegal(Ty, DstVT)) return false; - // FIXME: Handle sign-extension where necessary. - if (!I->getOperand(0)->getType()->isIntegerTy(32)) + Value *Src = I->getOperand(0); + EVT SrcVT = TLI.getValueType(Src->getType(), true); + if (SrcVT != MVT::i32 && SrcVT != MVT::i16 && SrcVT != MVT::i8) return false; - unsigned Op = getRegForValue(I->getOperand(0)); - if (Op == 0) return false; + unsigned SrcReg = getRegForValue(Src); + if (SrcReg == 0) return false; + + // Handle sign-extension. + if (SrcVT == MVT::i16 || SrcVT == MVT::i8) { + EVT DestVT = MVT::i32; + unsigned ResultReg = ARMEmitIntExt(SrcVT, SrcReg, DestVT, /*isZExt*/ false); + if (ResultReg == 0) return false; + SrcReg = ResultReg; + } // The conversion routine works on fp-reg to fp-reg and the operand above // was an integer, move it to the fp registers if possible. - unsigned FP = ARMMoveToFPReg(MVT::f32, Op); + unsigned FP = ARMMoveToFPReg(MVT::f32, SrcReg); if (FP == 0) return false; unsigned Opc; -- cgit v1.1 From 98e0b9c86d1385d9cf04aef92651f0cdf8a46a9e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 3 Nov 2011 07:35:53 +0000 Subject: Add new X86 AVX2 VBROADCAST instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143612 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 39 ++++++++++++++++++++++++++------------- 1 file changed, 26 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 77a9031..8f7e27b 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7083,35 +7083,48 @@ class avx_broadcast opc, string OpcodeStr, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (Int addr:$src))]>, VEX; -def VBROADCASTSS : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem, - int_x86_avx_vbroadcastss>; -def VBROADCASTSSY : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem, - int_x86_avx_vbroadcastss_256>; -def VBROADCASTSD : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem, - int_x86_avx_vbroadcast_sd_256>; +class avx_broadcast_reg opc, string OpcodeStr, RegisterClass RC, + Intrinsic Int> : + AVX8I, VEX; + +def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem, + int_x86_avx_vbroadcast_ss>; +def VBROADCASTSSYrm : avx_broadcast<0x18, "vbroadcastss", VR256, f32mem, + int_x86_avx_vbroadcast_ss_256>; +def VBROADCASTSDrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem, + int_x86_avx_vbroadcast_sd_256>; def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem, int_x86_avx_vbroadcastf128_pd_256>; -let Predicates = [HasAVX2] in +let Predicates = [HasAVX2] in { def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem, int_x86_avx2_vbroadcasti128>; +def VBROADCASTSSrr : avx_broadcast_reg<0x18, "vbroadcastss", VR128, + int_x86_avx2_vbroadcast_ss_ps>; +def VBROADCASTSSYrr : avx_broadcast_reg<0x18, "vbroadcastss", VR256, + int_x86_avx2_vbroadcast_ss_ps_256>; +def VBROADCASTSDrr : avx_broadcast_reg<0x19, "vbroadcastsd", VR256, + int_x86_avx2_vbroadcast_sd_pd_256>; +} def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), (VBROADCASTF128 addr:$src)>; def : Pat<(v8i32 (X86VBroadcast (loadi32 addr:$src))), - (VBROADCASTSSY addr:$src)>; + (VBROADCASTSSYrm addr:$src)>; def : Pat<(v4i64 (X86VBroadcast (loadi64 addr:$src))), - (VBROADCASTSD addr:$src)>; + (VBROADCASTSDrm addr:$src)>; def : Pat<(v8f32 (X86VBroadcast (loadf32 addr:$src))), - (VBROADCASTSSY addr:$src)>; + (VBROADCASTSSYrm addr:$src)>; def : Pat<(v4f64 (X86VBroadcast (loadf64 addr:$src))), - (VBROADCASTSD addr:$src)>; + (VBROADCASTSDrm addr:$src)>; def : Pat<(v4f32 (X86VBroadcast (loadf32 addr:$src))), - (VBROADCASTSS addr:$src)>; + (VBROADCASTSSrm addr:$src)>; def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), - (VBROADCASTSS addr:$src)>; + (VBROADCASTSSrm addr:$src)>; //===----------------------------------------------------------------------===// // VINSERTF128 - Insert packed floating-point values -- cgit v1.1 From 2d32b86543aa857a07371a93766652510f1cc0d6 Mon Sep 17 00:00:00 2001 From: Pete Cooper Date: Thu, 3 Nov 2011 18:01:56 +0000 Subject: DeadStoreElimination can now trim the size of a store if the end of it is dead. Only currently done if the later store is writing to a power of 2 address or has the same alignment as the earlier store as then its likely to not break up large stores into smaller ones Fixes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143630 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/DeadStoreElimination.cpp | 142 +++++++++++++++++++------ 1 file changed, 109 insertions(+), 33 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index c0738a9..921bec8 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -239,6 +239,24 @@ static bool isRemovable(Instruction *I) { } } + +/// isShortenable - Returns true if this instruction can be safely shortened in +/// length. +static bool isShortenable(Instruction *I) { + // Don't shorten stores for now + if (isa(I)) + return false; + + IntrinsicInst *II = cast(I); + switch (II->getIntrinsicID()) { + default: return false; + case Intrinsic::memset: + case Intrinsic::memcpy: + // Do shorten memory intrinsics. + return true; + } +} + /// getStoredPointerOperand - Return the pointer that is being written to. static Value *getStoredPointerOperand(Instruction *I) { if (StoreInst *SI = dyn_cast(I)) @@ -293,11 +311,24 @@ static bool isObjectPointerWithTrustworthySize(const Value *V) { return false; } -/// isCompleteOverwrite - Return true if a store to the 'Later' location +namespace { + enum OverwriteResult + { + OverwriteComplete, + OverwriteEnd, + OverwriteUnknown + }; +} + +/// isOverwrite - Return 'OverwriteComplete' if a store to the 'Later' location /// completely overwrites a store to the 'Earlier' location. -static bool isCompleteOverwrite(const AliasAnalysis::Location &Later, - const AliasAnalysis::Location &Earlier, - AliasAnalysis &AA) { +/// 'OverwriteEnd' if the end of the 'Earlier' location is completely +/// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined +static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, + const AliasAnalysis::Location &Earlier, + AliasAnalysis &AA, + int64_t& EarlierOff, + int64_t& LaterOff) { const Value *P1 = Earlier.Ptr->stripPointerCasts(); const Value *P2 = Later.Ptr->stripPointerCasts(); @@ -311,23 +342,24 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later, // If we have no TargetData information around, then the size of the store // is inferrable from the pointee type. If they are the same type, then // we know that the store is safe. - if (AA.getTargetData() == 0) - return Later.Ptr->getType() == Earlier.Ptr->getType(); - return false; + if (AA.getTargetData() == 0 && + Later.Ptr->getType() == Earlier.Ptr->getType()) + return OverwriteComplete; + + return OverwriteUnknown; } // Make sure that the Later size is >= the Earlier size. - if (Later.Size < Earlier.Size) - return false; - return true; + if (Later.Size >= Earlier.Size) + return OverwriteComplete; } // Otherwise, we have to have size information, and the later store has to be // larger than the earlier one. if (Later.Size == AliasAnalysis::UnknownSize || Earlier.Size == AliasAnalysis::UnknownSize || - Later.Size <= Earlier.Size || AA.getTargetData() == 0) - return false; + AA.getTargetData() == 0) + return OverwriteUnknown; // Check to see if the later store is to the entire object (either a global, // an alloca, or a byval argument). If so, then it clearly overwrites any @@ -340,26 +372,27 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later, // If we can't resolve the same pointers to the same object, then we can't // analyze them at all. if (UO1 != UO2) - return false; + return OverwriteUnknown; // If the "Later" store is to a recognizable object, get its size. if (isObjectPointerWithTrustworthySize(UO2)) { uint64_t ObjectSize = TD.getTypeAllocSize(cast(UO2->getType())->getElementType()); if (ObjectSize == Later.Size) - return true; + return OverwriteComplete; } // Okay, we have stores to two completely different pointers. Try to // decompose the pointer into a "base + constant_offset" form. If the base // pointers are equal, then we can reason about the two stores. - int64_t EarlierOff = 0, LaterOff = 0; + EarlierOff = 0; + LaterOff = 0; const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD); const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD); // If the base pointers still differ, we have two completely different stores. if (BP1 != BP2) - return false; + return OverwriteUnknown; // The later store completely overlaps the earlier store if: // @@ -377,11 +410,24 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later, // // We have to be careful here as *Off is signed while *.Size is unsigned. if (EarlierOff >= LaterOff && + Later.Size > Earlier.Size && uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size) - return true; + return OverwriteComplete; + + // The other interesting case is if the later store overwrites the end of + // the earlier store + // + // |--earlier--| + // |-- later --| + // + // In this case we may want to trim the size of earlier to avoid generating + // writes to addresses which will definitely be overwritten later + if (LaterOff > EarlierOff && + LaterOff + Later.Size >= EarlierOff + Earlier.Size) + return OverwriteEnd; // Otherwise, they don't completely overlap. - return false; + return OverwriteUnknown; } /// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a @@ -505,22 +551,52 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { // If we find a write that is a) removable (i.e., non-volatile), b) is // completely obliterated by the store to 'Loc', and c) which we know that // 'Inst' doesn't load from, then we can remove it. - if (isRemovable(DepWrite) && isCompleteOverwrite(Loc, DepLoc, *AA) && + if (isRemovable(DepWrite) && !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) { - DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " - << *DepWrite << "\n KILLER: " << *Inst << '\n'); - - // Delete the store and now-dead instructions that feed it. - DeleteDeadInstruction(DepWrite, *MD); - ++NumFastStores; - MadeChange = true; - - // DeleteDeadInstruction can delete the current instruction in loop - // cases, reset BBI. - BBI = Inst; - if (BBI != BB.begin()) - --BBI; - break; + int64_t InstWriteOffset, DepWriteOffset; + OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA, + DepWriteOffset, InstWriteOffset); + if (OR == OverwriteComplete) { + DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " + << *DepWrite << "\n KILLER: " << *Inst << '\n'); + + // Delete the store and now-dead instructions that feed it. + DeleteDeadInstruction(DepWrite, *MD); + ++NumFastStores; + MadeChange = true; + + // DeleteDeadInstruction can delete the current instruction in loop + // cases, reset BBI. + BBI = Inst; + if (BBI != BB.begin()) + --BBI; + break; + } else if (OR == OverwriteEnd && isShortenable(DepWrite)) { + // TODO: base this on the target vector size so that if the earlier + // store was too small to get vector writes anyway then its likely + // a good idea to shorten it + // Power of 2 vector writes are probably always a bad idea to optimize + // as any store/memset/memcpy is likely using vector instructions so + // shortening it to not vector size is likely to be slower + MemIntrinsic* DepIntrinsic = cast(DepWrite); + unsigned DepWriteAlign = DepIntrinsic->getAlignment(); + if (llvm::isPowerOf2_64(InstWriteOffset) || + ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) { + + DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: " + << *DepWrite << "\n KILLER (offset " + << InstWriteOffset << ", " + << DepLoc.Size << ")" + << *Inst << '\n'); + + Value* DepWriteLength = DepIntrinsic->getLength(); + Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(), + InstWriteOffset - + DepWriteOffset); + DepIntrinsic->setLength(TrimmedLength); + MadeChange = true; + } + } } // If this is a may-aliased store that is clobbering the store value, we -- cgit v1.1 From a3a2dfd4a2a8265a9a0c962cb776e2e6ba123956 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Thu, 3 Nov 2011 18:53:17 +0000 Subject: build: Add initial cut at LLVMBuild.txt files. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143634 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/IPA/LLVMBuild.txt | 24 ++++++++++++++++ lib/Analysis/LLVMBuild.txt | 23 +++++++++++++++ lib/Archive/LLVMBuild.txt | 23 +++++++++++++++ lib/AsmParser/LLVMBuild.txt | 23 +++++++++++++++ lib/Bitcode/LLVMBuild.txt | 22 ++++++++++++++ lib/Bitcode/Reader/LLVMBuild.txt | 23 +++++++++++++++ lib/Bitcode/Writer/LLVMBuild.txt | 23 +++++++++++++++ lib/CodeGen/AsmPrinter/LLVMBuild.txt | 23 +++++++++++++++ lib/CodeGen/LLVMBuild.txt | 23 +++++++++++++++ lib/CodeGen/SelectionDAG/LLVMBuild.txt | 23 +++++++++++++++ lib/DebugInfo/LLVMBuild.txt | 23 +++++++++++++++ lib/ExecutionEngine/Interpreter/LLVMBuild.txt | 23 +++++++++++++++ lib/ExecutionEngine/JIT/LLVMBuild.txt | 23 +++++++++++++++ lib/ExecutionEngine/LLVMBuild.txt | 23 +++++++++++++++ lib/ExecutionEngine/MCJIT/LLVMBuild.txt | 23 +++++++++++++++ lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt | 23 +++++++++++++++ lib/LLVMBuild.txt | 22 ++++++++++++++ lib/Linker/LLVMBuild.txt | 23 +++++++++++++++ lib/MC/LLVMBuild.txt | 23 +++++++++++++++ lib/MC/MCDisassembler/LLVMBuild.txt | 23 +++++++++++++++ lib/MC/MCParser/LLVMBuild.txt | 23 +++++++++++++++ lib/Object/LLVMBuild.txt | 23 +++++++++++++++ lib/Support/LLVMBuild.txt | 22 ++++++++++++++ lib/TableGen/LLVMBuild.txt | 23 +++++++++++++++ lib/Target/ARM/AsmParser/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/ARM/Disassembler/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/ARM/InstPrinter/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/ARM/LLVMBuild.txt | 30 +++++++++++++++++++ lib/Target/ARM/MCTargetDesc/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/ARM/TargetInfo/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/CBackend/LLVMBuild.txt | 31 ++++++++++++++++++++ lib/Target/CBackend/TargetInfo/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/CellSPU/LLVMBuild.txt | 30 +++++++++++++++++++ lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/CellSPU/TargetInfo/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/CppBackend/LLVMBuild.txt | 31 ++++++++++++++++++++ lib/Target/CppBackend/TargetInfo/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/LLVMBuild.txt | 40 ++++++++++++++++++++++++++ lib/Target/MBlaze/AsmParser/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/MBlaze/Disassembler/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/MBlaze/InstPrinter/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/MBlaze/LLVMBuild.txt | 30 +++++++++++++++++++ lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/MBlaze/TargetInfo/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/MSP430/InstPrinter/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/MSP430/LLVMBuild.txt | 30 +++++++++++++++++++ lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/MSP430/TargetInfo/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/Mips/InstPrinter/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/Mips/LLVMBuild.txt | 30 +++++++++++++++++++ lib/Target/Mips/MCTargetDesc/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/Mips/TargetInfo/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/PTX/InstPrinter/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/PTX/LLVMBuild.txt | 30 +++++++++++++++++++ lib/Target/PTX/MCTargetDesc/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/PTX/TargetInfo/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/PowerPC/InstPrinter/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/PowerPC/LLVMBuild.txt | 30 +++++++++++++++++++ lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/PowerPC/TargetInfo/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/Sparc/LLVMBuild.txt | 30 +++++++++++++++++++ lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/Sparc/TargetInfo/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/X86/AsmParser/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/X86/Disassembler/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/X86/InstPrinter/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/X86/LLVMBuild.txt | 30 +++++++++++++++++++ lib/Target/X86/MCTargetDesc/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/X86/TargetInfo/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/X86/Utils/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/XCore/LLVMBuild.txt | 30 +++++++++++++++++++ lib/Target/XCore/MCTargetDesc/LLVMBuild.txt | 24 ++++++++++++++++ lib/Target/XCore/TargetInfo/LLVMBuild.txt | 24 ++++++++++++++++ lib/Transforms/IPO/LLVMBuild.txt | 24 ++++++++++++++++ lib/Transforms/InstCombine/LLVMBuild.txt | 23 +++++++++++++++ lib/Transforms/Instrumentation/LLVMBuild.txt | 23 +++++++++++++++ lib/Transforms/LLVMBuild.txt | 22 ++++++++++++++ lib/Transforms/Scalar/LLVMBuild.txt | 24 ++++++++++++++++ lib/Transforms/Utils/LLVMBuild.txt | 23 +++++++++++++++ lib/VMCore/LLVMBuild.txt | 23 +++++++++++++++ 80 files changed, 1978 insertions(+) create mode 100644 lib/Analysis/IPA/LLVMBuild.txt create mode 100644 lib/Analysis/LLVMBuild.txt create mode 100644 lib/Archive/LLVMBuild.txt create mode 100644 lib/AsmParser/LLVMBuild.txt create mode 100644 lib/Bitcode/LLVMBuild.txt create mode 100644 lib/Bitcode/Reader/LLVMBuild.txt create mode 100644 lib/Bitcode/Writer/LLVMBuild.txt create mode 100644 lib/CodeGen/AsmPrinter/LLVMBuild.txt create mode 100644 lib/CodeGen/LLVMBuild.txt create mode 100644 lib/CodeGen/SelectionDAG/LLVMBuild.txt create mode 100644 lib/DebugInfo/LLVMBuild.txt create mode 100644 lib/ExecutionEngine/Interpreter/LLVMBuild.txt create mode 100644 lib/ExecutionEngine/JIT/LLVMBuild.txt create mode 100644 lib/ExecutionEngine/LLVMBuild.txt create mode 100644 lib/ExecutionEngine/MCJIT/LLVMBuild.txt create mode 100644 lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt create mode 100644 lib/LLVMBuild.txt create mode 100644 lib/Linker/LLVMBuild.txt create mode 100644 lib/MC/LLVMBuild.txt create mode 100644 lib/MC/MCDisassembler/LLVMBuild.txt create mode 100644 lib/MC/MCParser/LLVMBuild.txt create mode 100644 lib/Object/LLVMBuild.txt create mode 100644 lib/Support/LLVMBuild.txt create mode 100644 lib/TableGen/LLVMBuild.txt create mode 100644 lib/Target/ARM/AsmParser/LLVMBuild.txt create mode 100644 lib/Target/ARM/Disassembler/LLVMBuild.txt create mode 100644 lib/Target/ARM/InstPrinter/LLVMBuild.txt create mode 100644 lib/Target/ARM/LLVMBuild.txt create mode 100644 lib/Target/ARM/MCTargetDesc/LLVMBuild.txt create mode 100644 lib/Target/ARM/TargetInfo/LLVMBuild.txt create mode 100644 lib/Target/CBackend/LLVMBuild.txt create mode 100644 lib/Target/CBackend/TargetInfo/LLVMBuild.txt create mode 100644 lib/Target/CellSPU/LLVMBuild.txt create mode 100644 lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt create mode 100644 lib/Target/CellSPU/TargetInfo/LLVMBuild.txt create mode 100644 lib/Target/CppBackend/LLVMBuild.txt create mode 100644 lib/Target/CppBackend/TargetInfo/LLVMBuild.txt create mode 100644 lib/Target/LLVMBuild.txt create mode 100644 lib/Target/MBlaze/AsmParser/LLVMBuild.txt create mode 100644 lib/Target/MBlaze/Disassembler/LLVMBuild.txt create mode 100644 lib/Target/MBlaze/InstPrinter/LLVMBuild.txt create mode 100644 lib/Target/MBlaze/LLVMBuild.txt create mode 100644 lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt create mode 100644 lib/Target/MBlaze/TargetInfo/LLVMBuild.txt create mode 100644 lib/Target/MSP430/InstPrinter/LLVMBuild.txt create mode 100644 lib/Target/MSP430/LLVMBuild.txt create mode 100644 lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt create mode 100644 lib/Target/MSP430/TargetInfo/LLVMBuild.txt create mode 100644 lib/Target/Mips/InstPrinter/LLVMBuild.txt create mode 100644 lib/Target/Mips/LLVMBuild.txt create mode 100644 lib/Target/Mips/MCTargetDesc/LLVMBuild.txt create mode 100644 lib/Target/Mips/TargetInfo/LLVMBuild.txt create mode 100644 lib/Target/PTX/InstPrinter/LLVMBuild.txt create mode 100644 lib/Target/PTX/LLVMBuild.txt create mode 100644 lib/Target/PTX/MCTargetDesc/LLVMBuild.txt create mode 100644 lib/Target/PTX/TargetInfo/LLVMBuild.txt create mode 100644 lib/Target/PowerPC/InstPrinter/LLVMBuild.txt create mode 100644 lib/Target/PowerPC/LLVMBuild.txt create mode 100644 lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt create mode 100644 lib/Target/PowerPC/TargetInfo/LLVMBuild.txt create mode 100644 lib/Target/Sparc/LLVMBuild.txt create mode 100644 lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt create mode 100644 lib/Target/Sparc/TargetInfo/LLVMBuild.txt create mode 100644 lib/Target/X86/AsmParser/LLVMBuild.txt create mode 100644 lib/Target/X86/Disassembler/LLVMBuild.txt create mode 100644 lib/Target/X86/InstPrinter/LLVMBuild.txt create mode 100644 lib/Target/X86/LLVMBuild.txt create mode 100644 lib/Target/X86/MCTargetDesc/LLVMBuild.txt create mode 100644 lib/Target/X86/TargetInfo/LLVMBuild.txt create mode 100644 lib/Target/X86/Utils/LLVMBuild.txt create mode 100644 lib/Target/XCore/LLVMBuild.txt create mode 100644 lib/Target/XCore/MCTargetDesc/LLVMBuild.txt create mode 100644 lib/Target/XCore/TargetInfo/LLVMBuild.txt create mode 100644 lib/Transforms/IPO/LLVMBuild.txt create mode 100644 lib/Transforms/InstCombine/LLVMBuild.txt create mode 100644 lib/Transforms/Instrumentation/LLVMBuild.txt create mode 100644 lib/Transforms/LLVMBuild.txt create mode 100644 lib/Transforms/Scalar/LLVMBuild.txt create mode 100644 lib/Transforms/Utils/LLVMBuild.txt create mode 100644 lib/VMCore/LLVMBuild.txt (limited to 'lib') diff --git a/lib/Analysis/IPA/LLVMBuild.txt b/lib/Analysis/IPA/LLVMBuild.txt new file mode 100644 index 0000000..fb16278 --- /dev/null +++ b/lib/Analysis/IPA/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Analysis/IPA/LLVMBuild.txt -------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = IPA +parent = Libraries +library_name = ipa +required_libraries = Analysis Core Support + diff --git a/lib/Analysis/LLVMBuild.txt b/lib/Analysis/LLVMBuild.txt new file mode 100644 index 0000000..92f199b --- /dev/null +++ b/lib/Analysis/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Analysis/LLVMBuild.txt -----------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = Analysis +parent = Libraries +required_libraries = Core Support Target + diff --git a/lib/Archive/LLVMBuild.txt b/lib/Archive/LLVMBuild.txt new file mode 100644 index 0000000..26b7c8e --- /dev/null +++ b/lib/Archive/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Archive/LLVMBuild.txt ------------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = Archive +parent = Libraries +required_libraries = BitReader Core Support + diff --git a/lib/AsmParser/LLVMBuild.txt b/lib/AsmParser/LLVMBuild.txt new file mode 100644 index 0000000..ad56d4c --- /dev/null +++ b/lib/AsmParser/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/AsmParser/LLVMBuild.txt ----------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = AsmParser +parent = Libraries +required_libraries = Core Support + diff --git a/lib/Bitcode/LLVMBuild.txt b/lib/Bitcode/LLVMBuild.txt new file mode 100644 index 0000000..696440d --- /dev/null +++ b/lib/Bitcode/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/Bitcode/LLVMBuild.txt ------------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Group +name = Bitcode +parent = Libraries + diff --git a/lib/Bitcode/Reader/LLVMBuild.txt b/lib/Bitcode/Reader/LLVMBuild.txt new file mode 100644 index 0000000..948b335 --- /dev/null +++ b/lib/Bitcode/Reader/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Bitcode/Reader/LLVMBuild.txt -----------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = BitReader +parent = Bitcode +required_libraries = Core Support + diff --git a/lib/Bitcode/Writer/LLVMBuild.txt b/lib/Bitcode/Writer/LLVMBuild.txt new file mode 100644 index 0000000..39ff04e --- /dev/null +++ b/lib/Bitcode/Writer/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Bitcode/Writer/LLVMBuild.txt -----------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = BitWriter +parent = Bitcode +required_libraries = Core Support + diff --git a/lib/CodeGen/AsmPrinter/LLVMBuild.txt b/lib/CodeGen/AsmPrinter/LLVMBuild.txt new file mode 100644 index 0000000..0f2059f --- /dev/null +++ b/lib/CodeGen/AsmPrinter/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/CodeGen/AsmPrinter/LLVMBuild.txt -------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = AsmPrinter +parent = Libraries +required_libraries = Analysis CodeGen Core MC MCParser Support Target + diff --git a/lib/CodeGen/LLVMBuild.txt b/lib/CodeGen/LLVMBuild.txt new file mode 100644 index 0000000..2eebb08 --- /dev/null +++ b/lib/CodeGen/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/CodeGen/LLVMBuild.txt ------------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = CodeGen +parent = Libraries +required_libraries = Analysis Core MC Scalar Support Target TransformUtils + diff --git a/lib/CodeGen/SelectionDAG/LLVMBuild.txt b/lib/CodeGen/SelectionDAG/LLVMBuild.txt new file mode 100644 index 0000000..10a849f --- /dev/null +++ b/lib/CodeGen/SelectionDAG/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/CodeGen/SelectionDAG/LLVMBuild.txt -----------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = SelectionDAG +parent = CodeGen +required_libraries = Analysis CodeGen Core MC Support Target TransformUtils + diff --git a/lib/DebugInfo/LLVMBuild.txt b/lib/DebugInfo/LLVMBuild.txt new file mode 100644 index 0000000..b46d3d2 --- /dev/null +++ b/lib/DebugInfo/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/DebugInfo/LLVMBuild.txt ----------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = DebugInfo +parent = Libraries +required_libraries = Support + diff --git a/lib/ExecutionEngine/Interpreter/LLVMBuild.txt b/lib/ExecutionEngine/Interpreter/LLVMBuild.txt new file mode 100644 index 0000000..459426d --- /dev/null +++ b/lib/ExecutionEngine/Interpreter/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/ExecutionEngine/Interpreter/LLVMBuild.txt ----------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = Interpreter +parent = ExecutionEngine +required_libraries = CodeGen Core ExecutionEngine Support Target + diff --git a/lib/ExecutionEngine/JIT/LLVMBuild.txt b/lib/ExecutionEngine/JIT/LLVMBuild.txt new file mode 100644 index 0000000..b974713 --- /dev/null +++ b/lib/ExecutionEngine/JIT/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/ExecutionEngine/JIT/LLVMBuild.txt ------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = JIT +parent = ExecutionEngine +required_libraries = CodeGen Core ExecutionEngine MC Support Target + diff --git a/lib/ExecutionEngine/LLVMBuild.txt b/lib/ExecutionEngine/LLVMBuild.txt new file mode 100644 index 0000000..1ef6a44 --- /dev/null +++ b/lib/ExecutionEngine/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/ExecutionEngine/LLVMBuild.txt ----------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = ExecutionEngine +parent = Libraries +required_libraries = Core MC Support Target + diff --git a/lib/ExecutionEngine/MCJIT/LLVMBuild.txt b/lib/ExecutionEngine/MCJIT/LLVMBuild.txt new file mode 100644 index 0000000..9b08d3b --- /dev/null +++ b/lib/ExecutionEngine/MCJIT/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/ExecutionEngine/MCJIT/LLVMBuild.txt ----------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = MCJIT +parent = ExecutionEngine +required_libraries = Core ExecutionEngine RuntimeDyld Support Target + diff --git a/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt b/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt new file mode 100644 index 0000000..5e39814 --- /dev/null +++ b/lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/ExecutionEngine/RuntimeDyld/LLVMBuild.txt ----------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = RuntimeDyld +parent = ExecutionEngine +required_libraries = Object Support + diff --git a/lib/LLVMBuild.txt b/lib/LLVMBuild.txt new file mode 100644 index 0000000..c3fa1ff --- /dev/null +++ b/lib/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/LLVMBuild.txt --------------------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Group +name = Libraries +parent = $ROOT + diff --git a/lib/Linker/LLVMBuild.txt b/lib/Linker/LLVMBuild.txt new file mode 100644 index 0000000..69f2ac4 --- /dev/null +++ b/lib/Linker/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Linker/LLVMBuild.txt -------------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = Linker +parent = Libraries +required_libraries = Archive BitReader Core Support TransformUtils + diff --git a/lib/MC/LLVMBuild.txt b/lib/MC/LLVMBuild.txt new file mode 100644 index 0000000..acc8dff --- /dev/null +++ b/lib/MC/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/MC/LLVMBuild.txt -----------------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = MC +parent = Libraries +required_libraries = Support + diff --git a/lib/MC/MCDisassembler/LLVMBuild.txt b/lib/MC/MCDisassembler/LLVMBuild.txt new file mode 100644 index 0000000..33f95af --- /dev/null +++ b/lib/MC/MCDisassembler/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/MC/MCDisassembler/LLVMBuild.txt --------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = MCDisassembler +parent = MC +required_libraries = ARMAsmParser ARMDesc ARMDisassembler ARMInfo CBackendInfo CellSPUDesc CellSPUInfo CppBackendInfo MBlazeAsmParser MBlazeDesc MBlazeDisassembler MBlazeInfo MC MCParser MSP430Desc MSP430Info MipsDesc MipsInfo PTXDesc PTXInfo PowerPCDesc PowerPCInfo SparcDesc SparcInfo Support X86AsmParser X86Desc X86Disassembler X86Info XCoreDesc XCoreInfo + diff --git a/lib/MC/MCParser/LLVMBuild.txt b/lib/MC/MCParser/LLVMBuild.txt new file mode 100644 index 0000000..83146a9 --- /dev/null +++ b/lib/MC/MCParser/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/MC/MCParser/LLVMBuild.txt --------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = MCParser +parent = MC +required_libraries = MC Support + diff --git a/lib/Object/LLVMBuild.txt b/lib/Object/LLVMBuild.txt new file mode 100644 index 0000000..44c0083 --- /dev/null +++ b/lib/Object/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Object/LLVMBuild.txt -------------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = Object +parent = Libraries +required_libraries = Support + diff --git a/lib/Support/LLVMBuild.txt b/lib/Support/LLVMBuild.txt new file mode 100644 index 0000000..f32ef8f --- /dev/null +++ b/lib/Support/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/Support/LLVMBuild.txt ------------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = Support +parent = Libraries + diff --git a/lib/TableGen/LLVMBuild.txt b/lib/TableGen/LLVMBuild.txt new file mode 100644 index 0000000..4e24c37 --- /dev/null +++ b/lib/TableGen/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/TableGen/LLVMBuild.txt -----------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = TableGen +parent = Libraries +required_libraries = Support + diff --git a/lib/Target/ARM/AsmParser/LLVMBuild.txt b/lib/Target/ARM/AsmParser/LLVMBuild.txt new file mode 100644 index 0000000..cbf9b4b --- /dev/null +++ b/lib/Target/ARM/AsmParser/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/ARM/AsmParser/LLVMBuild.txt -----------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = ARMAsmParser +parent = ARM +required_libraries = ARMDesc ARMInfo MC MCParser Support +add_to_library_groups = ARM + diff --git a/lib/Target/ARM/Disassembler/LLVMBuild.txt b/lib/Target/ARM/Disassembler/LLVMBuild.txt new file mode 100644 index 0000000..dff57b4 --- /dev/null +++ b/lib/Target/ARM/Disassembler/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/ARM/Disassembler/LLVMBuild.txt --------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = ARMDisassembler +parent = ARM +required_libraries = ARMDesc ARMInfo MC Support +add_to_library_groups = ARM + diff --git a/lib/Target/ARM/InstPrinter/LLVMBuild.txt b/lib/Target/ARM/InstPrinter/LLVMBuild.txt new file mode 100644 index 0000000..b34aab4 --- /dev/null +++ b/lib/Target/ARM/InstPrinter/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/ARM/InstPrinter/LLVMBuild.txt ---------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = ARMAsmPrinter +parent = ARM +required_libraries = MC Support +add_to_library_groups = ARM + diff --git a/lib/Target/ARM/LLVMBuild.txt b/lib/Target/ARM/LLVMBuild.txt new file mode 100644 index 0000000..a7f209c --- /dev/null +++ b/lib/Target/ARM/LLVMBuild.txt @@ -0,0 +1,30 @@ +;===- ./lib/Target/ARM/LLVMBuild.txt ---------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = LibraryGroup +name = ARM +parent = Target +add_to_library_groups = all-targets + +[component_1] +type = Library +name = ARMCodeGen +parent = ARM +required_libraries = ARMAsmPrinter ARMDesc ARMInfo Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target +add_to_library_groups = ARM + diff --git a/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 0000000..46b11c7 --- /dev/null +++ b/lib/Target/ARM/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/ARM/MCTargetDesc/LLVMBuild.txt --------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = ARMDesc +parent = ARM +required_libraries = ARMAsmPrinter ARMInfo MC Support +add_to_library_groups = ARM + diff --git a/lib/Target/ARM/TargetInfo/LLVMBuild.txt b/lib/Target/ARM/TargetInfo/LLVMBuild.txt new file mode 100644 index 0000000..7d7504f --- /dev/null +++ b/lib/Target/ARM/TargetInfo/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/ARM/TargetInfo/LLVMBuild.txt ----------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = ARMInfo +parent = ARM +required_libraries = MC Support +add_to_library_groups = ARM + diff --git a/lib/Target/CBackend/LLVMBuild.txt b/lib/Target/CBackend/LLVMBuild.txt new file mode 100644 index 0000000..1bc5ea1 --- /dev/null +++ b/lib/Target/CBackend/LLVMBuild.txt @@ -0,0 +1,31 @@ +;===- ./lib/Target/CBackend/LLVMBuild.txt ----------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = LibraryGroup +name = CBackend +parent = Target +add_to_library_groups = all-targets + +[component_1] +type = Library +name = CBackendCodeGen +parent = CBackend +library_name = CBackend +required_libraries = Analysis CBackendInfo CodeGen Core MC Scalar Support Target TransformUtils +add_to_library_groups = CBackend + diff --git a/lib/Target/CBackend/TargetInfo/LLVMBuild.txt b/lib/Target/CBackend/TargetInfo/LLVMBuild.txt new file mode 100644 index 0000000..943fe2d --- /dev/null +++ b/lib/Target/CBackend/TargetInfo/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/CBackend/TargetInfo/LLVMBuild.txt -----------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = CBackendInfo +parent = CBackend +required_libraries = MC Support +add_to_library_groups = CBackend + diff --git a/lib/Target/CellSPU/LLVMBuild.txt b/lib/Target/CellSPU/LLVMBuild.txt new file mode 100644 index 0000000..03e592c --- /dev/null +++ b/lib/Target/CellSPU/LLVMBuild.txt @@ -0,0 +1,30 @@ +;===- ./lib/Target/CellSPU/LLVMBuild.txt -----------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = LibraryGroup +name = CellSPU +parent = Target +add_to_library_groups = all-targets + +[component_1] +type = Library +name = CellSPUCodeGen +parent = CellSPU +required_libraries = AsmPrinter CellSPUDesc CellSPUInfo CodeGen Core MC SelectionDAG Support Target +add_to_library_groups = CellSPU + diff --git a/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt b/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 0000000..b5147ae --- /dev/null +++ b/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = CellSPUDesc +parent = CellSPU +required_libraries = CellSPUInfo MC Support +add_to_library_groups = CellSPU + diff --git a/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt b/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt new file mode 100644 index 0000000..7525359 --- /dev/null +++ b/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/CellSPU/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = CellSPUInfo +parent = CellSPU +required_libraries = MC Support +add_to_library_groups = CellSPU + diff --git a/lib/Target/CppBackend/LLVMBuild.txt b/lib/Target/CppBackend/LLVMBuild.txt new file mode 100644 index 0000000..9602f57 --- /dev/null +++ b/lib/Target/CppBackend/LLVMBuild.txt @@ -0,0 +1,31 @@ +;===- ./lib/Target/CppBackend/LLVMBuild.txt --------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = LibraryGroup +name = CppBackend +parent = Target +add_to_library_groups = all-targets + +[component_1] +type = Library +name = CppBackendCodeGen +parent = CppBackend +library_name = CppBackend +required_libraries = Core CppBackendInfo Support Target +add_to_library_groups = CppBackend + diff --git a/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt b/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt new file mode 100644 index 0000000..b130fee --- /dev/null +++ b/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/CppBackend/TargetInfo/LLVMBuild.txt ---------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = CppBackendInfo +parent = CppBackend +required_libraries = MC Support +add_to_library_groups = CppBackend + diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt new file mode 100644 index 0000000..073a76a --- /dev/null +++ b/lib/Target/LLVMBuild.txt @@ -0,0 +1,40 @@ +;===- ./lib/Target/LLVMBuild.txt -------------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = Target +parent = Libraries +required_libraries = Core MC Support + +[component_1] +type = LibraryGroup +name = all-targets +parent = Libraries + +[component_2] +type = LibraryGroup +name = native +parent = Libraries +required_libraries = X86 + +[component_3] +type = LibraryGroup +name = nativecodegen +parent = Libraries +required_libraries = X86CodeGen + diff --git a/lib/Target/MBlaze/AsmParser/LLVMBuild.txt b/lib/Target/MBlaze/AsmParser/LLVMBuild.txt new file mode 100644 index 0000000..2c61a7f --- /dev/null +++ b/lib/Target/MBlaze/AsmParser/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/MBlaze/AsmParser/LLVMBuild.txt --------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = MBlazeAsmParser +parent = MBlaze +required_libraries = MBlazeInfo MC MCParser Support +add_to_library_groups = MBlaze + diff --git a/lib/Target/MBlaze/Disassembler/LLVMBuild.txt b/lib/Target/MBlaze/Disassembler/LLVMBuild.txt new file mode 100644 index 0000000..c5c4f80 --- /dev/null +++ b/lib/Target/MBlaze/Disassembler/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/MBlaze/Disassembler/LLVMBuild.txt -----------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = MBlazeDisassembler +parent = MBlaze +required_libraries = MBlazeDesc MBlazeInfo MC Support +add_to_library_groups = MBlaze + diff --git a/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt b/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt new file mode 100644 index 0000000..7a21f1e --- /dev/null +++ b/lib/Target/MBlaze/InstPrinter/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/MBlaze/InstPrinter/LLVMBuild.txt ------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = MBlazeAsmPrinter +parent = MBlaze +required_libraries = MC Support +add_to_library_groups = MBlaze + diff --git a/lib/Target/MBlaze/LLVMBuild.txt b/lib/Target/MBlaze/LLVMBuild.txt new file mode 100644 index 0000000..7bea268 --- /dev/null +++ b/lib/Target/MBlaze/LLVMBuild.txt @@ -0,0 +1,30 @@ +;===- ./lib/Target/MBlaze/LLVMBuild.txt ------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = LibraryGroup +name = MBlaze +parent = Target +add_to_library_groups = all-targets + +[component_1] +type = Library +name = MBlazeCodeGen +parent = MBlaze +required_libraries = AsmPrinter CodeGen Core MBlazeAsmPrinter MBlazeDesc MBlazeInfo MC SelectionDAG Support Target +add_to_library_groups = MBlaze + diff --git a/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt b/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 0000000..e89811b --- /dev/null +++ b/lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/MBlaze/MCTargetDesc/LLVMBuild.txt -----------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = MBlazeDesc +parent = MBlaze +required_libraries = MBlazeAsmPrinter MBlazeInfo MC Support +add_to_library_groups = MBlaze + diff --git a/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt b/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt new file mode 100644 index 0000000..488c2c7 --- /dev/null +++ b/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/MBlaze/TargetInfo/LLVMBuild.txt -------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = MBlazeInfo +parent = MBlaze +required_libraries = MC Support +add_to_library_groups = MBlaze + diff --git a/lib/Target/MSP430/InstPrinter/LLVMBuild.txt b/lib/Target/MSP430/InstPrinter/LLVMBuild.txt new file mode 100644 index 0000000..aeb863a --- /dev/null +++ b/lib/Target/MSP430/InstPrinter/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/MSP430/InstPrinter/LLVMBuild.txt ------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = MSP430AsmPrinter +parent = MSP430 +required_libraries = MC Support +add_to_library_groups = MSP430 + diff --git a/lib/Target/MSP430/LLVMBuild.txt b/lib/Target/MSP430/LLVMBuild.txt new file mode 100644 index 0000000..3081146 --- /dev/null +++ b/lib/Target/MSP430/LLVMBuild.txt @@ -0,0 +1,30 @@ +;===- ./lib/Target/MSP430/LLVMBuild.txt ------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = LibraryGroup +name = MSP430 +parent = Target +add_to_library_groups = all-targets + +[component_1] +type = Library +name = MSP430CodeGen +parent = MSP430 +required_libraries = AsmPrinter CodeGen Core MC MSP430AsmPrinter MSP430Desc MSP430Info SelectionDAG Support Target +add_to_library_groups = MSP430 + diff --git a/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 0000000..5d41082 --- /dev/null +++ b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt -----------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = MSP430Desc +parent = MSP430 +required_libraries = MC MSP430AsmPrinter MSP430Info Support +add_to_library_groups = MSP430 + diff --git a/lib/Target/MSP430/TargetInfo/LLVMBuild.txt b/lib/Target/MSP430/TargetInfo/LLVMBuild.txt new file mode 100644 index 0000000..3bcc826 --- /dev/null +++ b/lib/Target/MSP430/TargetInfo/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/MSP430/TargetInfo/LLVMBuild.txt -------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = MSP430Info +parent = MSP430 +required_libraries = MC Support +add_to_library_groups = MSP430 + diff --git a/lib/Target/Mips/InstPrinter/LLVMBuild.txt b/lib/Target/Mips/InstPrinter/LLVMBuild.txt new file mode 100644 index 0000000..d953a61 --- /dev/null +++ b/lib/Target/Mips/InstPrinter/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/Mips/InstPrinter/LLVMBuild.txt --------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = MipsAsmPrinter +parent = Mips +required_libraries = MC Support +add_to_library_groups = Mips + diff --git a/lib/Target/Mips/LLVMBuild.txt b/lib/Target/Mips/LLVMBuild.txt new file mode 100644 index 0000000..b9d9abf --- /dev/null +++ b/lib/Target/Mips/LLVMBuild.txt @@ -0,0 +1,30 @@ +;===- ./lib/Target/Mips/LLVMBuild.txt --------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = LibraryGroup +name = Mips +parent = Target +add_to_library_groups = all-targets + +[component_1] +type = Library +name = MipsCodeGen +parent = Mips +required_libraries = AsmPrinter CodeGen Core MC MipsAsmPrinter MipsDesc MipsInfo SelectionDAG Support Target +add_to_library_groups = Mips + diff --git a/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt b/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 0000000..d6f5dd2 --- /dev/null +++ b/lib/Target/Mips/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/Mips/MCTargetDesc/LLVMBuild.txt -------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = MipsDesc +parent = Mips +required_libraries = MC MipsAsmPrinter MipsInfo Support +add_to_library_groups = Mips + diff --git a/lib/Target/Mips/TargetInfo/LLVMBuild.txt b/lib/Target/Mips/TargetInfo/LLVMBuild.txt new file mode 100644 index 0000000..e8035af --- /dev/null +++ b/lib/Target/Mips/TargetInfo/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/Mips/TargetInfo/LLVMBuild.txt ---------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = MipsInfo +parent = Mips +required_libraries = MC Support +add_to_library_groups = Mips + diff --git a/lib/Target/PTX/InstPrinter/LLVMBuild.txt b/lib/Target/PTX/InstPrinter/LLVMBuild.txt new file mode 100644 index 0000000..be89c10 --- /dev/null +++ b/lib/Target/PTX/InstPrinter/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/PTX/InstPrinter/LLVMBuild.txt ---------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = PTXAsmPrinter +parent = PTX +required_libraries = MC Support +add_to_library_groups = PTX + diff --git a/lib/Target/PTX/LLVMBuild.txt b/lib/Target/PTX/LLVMBuild.txt new file mode 100644 index 0000000..2d24524 --- /dev/null +++ b/lib/Target/PTX/LLVMBuild.txt @@ -0,0 +1,30 @@ +;===- ./lib/Target/PTX/LLVMBuild.txt ---------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = LibraryGroup +name = PTX +parent = Target +add_to_library_groups = all-targets + +[component_1] +type = Library +name = PTXCodeGen +parent = PTX +required_libraries = Analysis AsmPrinter CodeGen Core MC PTXDesc PTXInfo Scalar SelectionDAG Support Target TransformUtils +add_to_library_groups = PTX + diff --git a/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt b/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 0000000..fff21c1 --- /dev/null +++ b/lib/Target/PTX/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/PTX/MCTargetDesc/LLVMBuild.txt --------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = PTXDesc +parent = PTX +required_libraries = MC PTXAsmPrinter PTXInfo Support +add_to_library_groups = PTX + diff --git a/lib/Target/PTX/TargetInfo/LLVMBuild.txt b/lib/Target/PTX/TargetInfo/LLVMBuild.txt new file mode 100644 index 0000000..f35c237 --- /dev/null +++ b/lib/Target/PTX/TargetInfo/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/PTX/TargetInfo/LLVMBuild.txt ----------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = PTXInfo +parent = PTX +required_libraries = MC Support +add_to_library_groups = PTX + diff --git a/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt b/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt new file mode 100644 index 0000000..afbb2b1 --- /dev/null +++ b/lib/Target/PowerPC/InstPrinter/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/PowerPC/InstPrinter/LLVMBuild.txt -----------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = PowerPCAsmPrinter +parent = PowerPC +required_libraries = MC Support +add_to_library_groups = PowerPC + diff --git a/lib/Target/PowerPC/LLVMBuild.txt b/lib/Target/PowerPC/LLVMBuild.txt new file mode 100644 index 0000000..4a93587 --- /dev/null +++ b/lib/Target/PowerPC/LLVMBuild.txt @@ -0,0 +1,30 @@ +;===- ./lib/Target/PowerPC/LLVMBuild.txt -----------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = LibraryGroup +name = PowerPC +parent = Target +add_to_library_groups = all-targets + +[component_1] +type = Library +name = PowerPCCodeGen +parent = PowerPC +required_libraries = Analysis AsmPrinter CodeGen Core MC PowerPCAsmPrinter PowerPCDesc PowerPCInfo SelectionDAG Support Target +add_to_library_groups = PowerPC + diff --git a/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt b/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 0000000..fc2da83 --- /dev/null +++ b/lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/PowerPC/MCTargetDesc/LLVMBuild.txt ----------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = PowerPCDesc +parent = PowerPC +required_libraries = MC PowerPCAsmPrinter PowerPCInfo Support +add_to_library_groups = PowerPC + diff --git a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt new file mode 100644 index 0000000..1f5d3e7 --- /dev/null +++ b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/PowerPC/TargetInfo/LLVMBuild.txt ------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = PowerPCInfo +parent = PowerPC +required_libraries = MC Support +add_to_library_groups = PowerPC + diff --git a/lib/Target/Sparc/LLVMBuild.txt b/lib/Target/Sparc/LLVMBuild.txt new file mode 100644 index 0000000..21cd91f --- /dev/null +++ b/lib/Target/Sparc/LLVMBuild.txt @@ -0,0 +1,30 @@ +;===- ./lib/Target/Sparc/LLVMBuild.txt -------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = LibraryGroup +name = Sparc +parent = Target +add_to_library_groups = all-targets + +[component_1] +type = Library +name = SparcCodeGen +parent = Sparc +required_libraries = AsmPrinter CodeGen Core MC SelectionDAG SparcDesc SparcInfo Support Target +add_to_library_groups = Sparc + diff --git a/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt b/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 0000000..a339cec --- /dev/null +++ b/lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/Sparc/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = SparcDesc +parent = Sparc +required_libraries = MC SparcInfo Support +add_to_library_groups = Sparc + diff --git a/lib/Target/Sparc/TargetInfo/LLVMBuild.txt b/lib/Target/Sparc/TargetInfo/LLVMBuild.txt new file mode 100644 index 0000000..22f4e1f --- /dev/null +++ b/lib/Target/Sparc/TargetInfo/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/Sparc/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = SparcInfo +parent = Sparc +required_libraries = MC Support +add_to_library_groups = Sparc + diff --git a/lib/Target/X86/AsmParser/LLVMBuild.txt b/lib/Target/X86/AsmParser/LLVMBuild.txt new file mode 100644 index 0000000..6c2405a --- /dev/null +++ b/lib/Target/X86/AsmParser/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/X86/AsmParser/LLVMBuild.txt -----------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = X86AsmParser +parent = X86 +required_libraries = MC MCParser Support X86Desc X86Info +add_to_library_groups = X86 + diff --git a/lib/Target/X86/Disassembler/LLVMBuild.txt b/lib/Target/X86/Disassembler/LLVMBuild.txt new file mode 100644 index 0000000..cd748cf --- /dev/null +++ b/lib/Target/X86/Disassembler/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/X86/Disassembler/LLVMBuild.txt --------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = X86Disassembler +parent = X86 +required_libraries = MC Support X86Info +add_to_library_groups = X86 + diff --git a/lib/Target/X86/InstPrinter/LLVMBuild.txt b/lib/Target/X86/InstPrinter/LLVMBuild.txt new file mode 100644 index 0000000..fb01323 --- /dev/null +++ b/lib/Target/X86/InstPrinter/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/X86/InstPrinter/LLVMBuild.txt ---------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = X86AsmPrinter +parent = X86 +required_libraries = MC Support X86Utils +add_to_library_groups = X86 + diff --git a/lib/Target/X86/LLVMBuild.txt b/lib/Target/X86/LLVMBuild.txt new file mode 100644 index 0000000..b0958f3 --- /dev/null +++ b/lib/Target/X86/LLVMBuild.txt @@ -0,0 +1,30 @@ +;===- ./lib/Target/X86/LLVMBuild.txt ---------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = LibraryGroup +name = X86 +parent = Target +add_to_library_groups = all-targets + +[component_1] +type = Library +name = X86CodeGen +parent = X86 +required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG Support Target X86AsmPrinter X86Desc X86Info X86Utils +add_to_library_groups = X86 + diff --git a/lib/Target/X86/MCTargetDesc/LLVMBuild.txt b/lib/Target/X86/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 0000000..3d09301 --- /dev/null +++ b/lib/Target/X86/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/X86/MCTargetDesc/LLVMBuild.txt --------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = X86Desc +parent = X86 +required_libraries = MC Support X86AsmPrinter X86Info +add_to_library_groups = X86 + diff --git a/lib/Target/X86/TargetInfo/LLVMBuild.txt b/lib/Target/X86/TargetInfo/LLVMBuild.txt new file mode 100644 index 0000000..6b2635b --- /dev/null +++ b/lib/Target/X86/TargetInfo/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/X86/TargetInfo/LLVMBuild.txt ----------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = X86Info +parent = X86 +required_libraries = MC Support +add_to_library_groups = X86 + diff --git a/lib/Target/X86/Utils/LLVMBuild.txt b/lib/Target/X86/Utils/LLVMBuild.txt new file mode 100644 index 0000000..3ee441e --- /dev/null +++ b/lib/Target/X86/Utils/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/X86/Utils/LLVMBuild.txt ---------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = X86Utils +parent = X86 +required_libraries = Core Support +add_to_library_groups = X86 + diff --git a/lib/Target/XCore/LLVMBuild.txt b/lib/Target/XCore/LLVMBuild.txt new file mode 100644 index 0000000..321f25f --- /dev/null +++ b/lib/Target/XCore/LLVMBuild.txt @@ -0,0 +1,30 @@ +;===- ./lib/Target/XCore/LLVMBuild.txt -------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = LibraryGroup +name = XCore +parent = Target +add_to_library_groups = all-targets + +[component_1] +type = Library +name = XCoreCodeGen +parent = XCore +required_libraries = AsmPrinter CodeGen Core MC SelectionDAG Support Target XCoreDesc XCoreInfo +add_to_library_groups = XCore + diff --git a/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt new file mode 100644 index 0000000..7f4a433 --- /dev/null +++ b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/XCore/MCTargetDesc/LLVMBuild.txt ------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = XCoreDesc +parent = XCore +required_libraries = MC Support XCoreInfo +add_to_library_groups = XCore + diff --git a/lib/Target/XCore/TargetInfo/LLVMBuild.txt b/lib/Target/XCore/TargetInfo/LLVMBuild.txt new file mode 100644 index 0000000..1d1b722 --- /dev/null +++ b/lib/Target/XCore/TargetInfo/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Target/XCore/TargetInfo/LLVMBuild.txt --------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = XCoreInfo +parent = XCore +required_libraries = MC Support +add_to_library_groups = XCore + diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt new file mode 100644 index 0000000..bf5d5f4 --- /dev/null +++ b/lib/Transforms/IPO/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Transforms/IPO/LLVMBuild.txt -----------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = IPO +parent = Transforms +library_name = ipo +required_libraries = Analysis Core InstCombine Scalar Support Target TransformUtils IPA + diff --git a/lib/Transforms/InstCombine/LLVMBuild.txt b/lib/Transforms/InstCombine/LLVMBuild.txt new file mode 100644 index 0000000..b73c303 --- /dev/null +++ b/lib/Transforms/InstCombine/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Transforms/InstCombine/LLVMBuild.txt ---------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = InstCombine +parent = Transforms +required_libraries = Analysis Core Support Target TransformUtils + diff --git a/lib/Transforms/Instrumentation/LLVMBuild.txt b/lib/Transforms/Instrumentation/LLVMBuild.txt new file mode 100644 index 0000000..f302d03 --- /dev/null +++ b/lib/Transforms/Instrumentation/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Transforms/Instrumentation/LLVMBuild.txt -----------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = Instrumentation +parent = Transforms +required_libraries = Analysis Core Support TransformUtils + diff --git a/lib/Transforms/LLVMBuild.txt b/lib/Transforms/LLVMBuild.txt new file mode 100644 index 0000000..d36b898 --- /dev/null +++ b/lib/Transforms/LLVMBuild.txt @@ -0,0 +1,22 @@ +;===- ./lib/Transforms/LLVMBuild.txt ---------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Group +name = Transforms +parent = Libraries + diff --git a/lib/Transforms/Scalar/LLVMBuild.txt b/lib/Transforms/Scalar/LLVMBuild.txt new file mode 100644 index 0000000..027634d --- /dev/null +++ b/lib/Transforms/Scalar/LLVMBuild.txt @@ -0,0 +1,24 @@ +;===- ./lib/Transforms/Scalar/LLVMBuild.txt --------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = Scalar +parent = Transforms +library_name = ScalarOpts +required_libraries = Analysis Core InstCombine Support Target TransformUtils + diff --git a/lib/Transforms/Utils/LLVMBuild.txt b/lib/Transforms/Utils/LLVMBuild.txt new file mode 100644 index 0000000..6ba983c --- /dev/null +++ b/lib/Transforms/Utils/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/Transforms/Utils/LLVMBuild.txt ---------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = TransformUtils +parent = Transforms +required_libraries = Analysis Core Support Target IPA + diff --git a/lib/VMCore/LLVMBuild.txt b/lib/VMCore/LLVMBuild.txt new file mode 100644 index 0000000..45f528e --- /dev/null +++ b/lib/VMCore/LLVMBuild.txt @@ -0,0 +1,23 @@ +;===- ./lib/VMCore/LLVMBuild.txt -------------------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = Core +parent = Libraries +required_libraries = Support + -- cgit v1.1 From e1f38f2ce1221176cc4934a73283cb151e1f940d Mon Sep 17 00:00:00 2001 From: Dan Bailey Date: Thu, 3 Nov 2011 19:24:46 +0000 Subject: fixed global array handling for ptx to use the correct bit widths git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143640 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PTX/PTXAsmPrinter.cpp | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp index b2aa7b2..e329d5d 100644 --- a/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/lib/Target/PTX/PTXAsmPrinter.cpp @@ -318,11 +318,7 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { if (PointerType::classof(gv->getType())) { PointerType* pointerTy = dyn_cast(gv->getType()); Type* elementTy = pointerTy->getElementType(); - - decl += ".b8 "; - decl += gvsym->getName(); - decl += "["; - + if (elementTy->isArrayTy()) { assert(elementTy->isArrayTy() && "Only pointers to arrays are supported"); @@ -343,15 +339,24 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { // FIXME: isPrimitiveType() == false for i16? assert(elementTy->isSingleValueType() && "Non-primitive types are not handled"); + + // Find the size of the element in bits + unsigned elementSize = elementTy->getPrimitiveSizeInBits(); - // Compute the size of the array, in bytes. - uint64_t arraySize = (elementTy->getPrimitiveSizeInBits() >> 3) - * numElements; - - decl += utostr(arraySize); + decl += ".b"; + decl += utostr(elementSize); + decl += " "; + decl += gvsym->getName(); + decl += "["; + decl += utostr(numElements); + decl += "]"; + } + else + { + decl += ".b8 "; + decl += gvsym->getName(); + decl += "[]"; } - - decl += "]"; // handle string constants (assume ConstantArray means string) -- cgit v1.1 From 71fccadbed49b555d9582198ee58301494109410 Mon Sep 17 00:00:00 2001 From: Pete Cooper Date: Thu, 3 Nov 2011 20:47:50 +0000 Subject: Reverted r143600 - selector reference change git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143646 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineInstr.cpp | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index a2fd3c4..a240667 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -1232,19 +1232,6 @@ bool MachineInstr::hasVolatileMemoryRef() const { return false; } -/// pointsToRuntimeConstantMemory - Return true if this value points to data -/// which does never changes once the program starts running -static bool pointsToRuntimeConstantMemory(const Value *V) { - if (const GlobalVariable *GV = dyn_cast(V)) { - StringRef Name = GV->getName(); - // These special values are known to be constant at runtime - // TODO: a new linkage type for these would be far better than this check - if (Name.startswith("\01L_OBJC_SELECTOR_REFERENCES_")) - return true; - } - return false; -} - /// isInvariantLoad - Return true if this instruction is loading from a /// location whose value is invariant across the function. For example, /// loading a value from the constant pool or from the argument area @@ -1272,8 +1259,6 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { if (const PseudoSourceValue *PSV = dyn_cast(V)) if (PSV->isConstant(MFI)) continue; - if (pointsToRuntimeConstantMemory(V)) - continue; // If we have an AliasAnalysis, ask it whether the memory is constant. if (AA && AA->pointsToConstantMemory( AliasAnalysis::Location(V, (*I)->getSize(), -- cgit v1.1 From 65fd6564b8aedd053845c81ede1ac594acb470e4 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Thu, 3 Nov 2011 21:49:52 +0000 Subject: Reapply r143206, with fixes. Disallow physical register lifetimes across calls, and only check for nested dependences on the special call-sequence-resource register. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143660 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 772 ++++++++----------------- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 1 - lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 183 +++++- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 16 + lib/Target/ARM/ARMISelLowering.cpp | 19 +- lib/Target/X86/X86ISelDAGToDAG.cpp | 4 +- lib/Target/X86/X86ISelLowering.cpp | 194 ++++++- lib/Target/XCore/XCoreISelLowering.cpp | 9 + 8 files changed, 666 insertions(+), 532 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index a8bccda..7425669 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -46,37 +46,18 @@ using namespace llvm; /// will attempt merge setcc and brc instructions into brcc's. /// namespace { -class SelectionDAGLegalize { +class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener { const TargetMachine &TM; const TargetLowering &TLI; SelectionDAG &DAG; - // Libcall insertion helpers. - - /// LastCALLSEQ_END - This keeps track of the CALLSEQ_END node that has been - /// legalized. We use this to ensure that calls are properly serialized - /// against each other, including inserted libcalls. - SDValue LastCALLSEQ_END; - - /// IsLegalizingCall - This member is used *only* for purposes of providing - /// helpful assertions that a libcall isn't created while another call is - /// being legalized (which could lead to non-serialized call sequences). - bool IsLegalizingCall; + /// LegalizePosition - The iterator for walking through the node list. + SelectionDAG::allnodes_iterator LegalizePosition; - /// LegalizedNodes - For nodes that are of legal width, and that have more - /// than one use, this map indicates what regularized operand to use. This - /// allows us to avoid legalizing the same thing more than once. - DenseMap LegalizedNodes; + /// LegalizedNodes - The set of nodes which have already been legalized. + SmallPtrSet LegalizedNodes; - void AddLegalizedOperand(SDValue From, SDValue To) { - LegalizedNodes.insert(std::make_pair(From, To)); - // If someone requests legalization of the new node, return itself. - if (From != To) - LegalizedNodes.insert(std::make_pair(To, To)); - - // Transfer SDDbgValues. - DAG.TransferDbgValues(From, To); - } + // Libcall insertion helpers. public: explicit SelectionDAGLegalize(SelectionDAG &DAG); @@ -84,9 +65,8 @@ public: void LegalizeDAG(); private: - /// LegalizeOp - Return a legal replacement for the given operation, with - /// all legal operands. - SDValue LegalizeOp(SDValue O); + /// LegalizeOp - Legalizes the given operation. + void LegalizeOp(SDNode *Node); SDValue OptimizeFloatStore(StoreSDNode *ST); @@ -107,9 +87,6 @@ private: SDValue N1, SDValue N2, SmallVectorImpl &Mask) const; - bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, - SmallPtrSet &NodesLeadingTo); - void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, DebugLoc dl); @@ -150,10 +127,21 @@ private: SDValue ExpandInsertToVectorThroughStack(SDValue Op); SDValue ExpandVectorBuildThroughStack(SDNode* Node); + SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP); + std::pair ExpandAtomic(SDNode *Node); - void ExpandNode(SDNode *Node, SmallVectorImpl &Results); - void PromoteNode(SDNode *Node, SmallVectorImpl &Results); + void ExpandNode(SDNode *Node); + void PromoteNode(SDNode *Node); + + // DAGUpdateListener implementation. + virtual void NodeDeleted(SDNode *N, SDNode *E) { + LegalizedNodes.erase(N); + if (LegalizePosition == SelectionDAG::allnodes_iterator(N)) + ++LegalizePosition; + } + + virtual void NodeUpdated(SDNode *N) {} }; } @@ -195,145 +183,37 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag) } void SelectionDAGLegalize::LegalizeDAG() { - LastCALLSEQ_END = DAG.getEntryNode(); - IsLegalizingCall = false; - - // The legalize process is inherently a bottom-up recursive process (users - // legalize their uses before themselves). Given infinite stack space, we - // could just start legalizing on the root and traverse the whole graph. In - // practice however, this causes us to run out of stack space on large basic - // blocks. To avoid this problem, compute an ordering of the nodes where each - // node is only legalized after all of its operands are legalized. DAG.AssignTopologicalOrder(); - for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), - E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) - LegalizeOp(SDValue(I, 0)); - - // Finally, it's possible the root changed. Get the new root. - SDValue OldRoot = DAG.getRoot(); - assert(LegalizedNodes.count(OldRoot) && "Root didn't get legalized?"); - DAG.setRoot(LegalizedNodes[OldRoot]); - - LegalizedNodes.clear(); - - // Remove dead nodes now. - DAG.RemoveDeadNodes(); -} - -/// FindCallEndFromCallStart - Given a chained node that is part of a call -/// sequence, find the CALLSEQ_END node that terminates the call sequence. -static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) { - // Nested CALLSEQ_START/END constructs aren't yet legal, - // but we can DTRT and handle them correctly here. - if (Node->getOpcode() == ISD::CALLSEQ_START) - depth++; - else if (Node->getOpcode() == ISD::CALLSEQ_END) { - depth--; - if (depth == 0) - return Node; - } - if (Node->use_empty()) - return 0; // No CallSeqEnd - - // The chain is usually at the end. - SDValue TheChain(Node, Node->getNumValues()-1); - if (TheChain.getValueType() != MVT::Other) { - // Sometimes it's at the beginning. - TheChain = SDValue(Node, 0); - if (TheChain.getValueType() != MVT::Other) { - // Otherwise, hunt for it. - for (unsigned i = 1, e = Node->getNumValues(); i != e; ++i) - if (Node->getValueType(i) == MVT::Other) { - TheChain = SDValue(Node, i); - break; - } - - // Otherwise, we walked into a node without a chain. - if (TheChain.getValueType() != MVT::Other) - return 0; + // Visit all the nodes. We start in topological order, so that we see + // nodes with their original operands intact. Legalization can produce + // new nodes which may themselves need to be legalized. Iterate until all + // nodes have been legalized. + for (;;) { + bool AnyLegalized = false; + for (LegalizePosition = DAG.allnodes_end(); + LegalizePosition != DAG.allnodes_begin(); ) { + --LegalizePosition; + + SDNode *N = LegalizePosition; + if (LegalizedNodes.insert(N)) { + AnyLegalized = true; + LegalizeOp(N); + } } - } - - for (SDNode::use_iterator UI = Node->use_begin(), - E = Node->use_end(); UI != E; ++UI) { - - // Make sure to only follow users of our token chain. - SDNode *User = *UI; - for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) - if (User->getOperand(i) == TheChain) - if (SDNode *Result = FindCallEndFromCallStart(User, depth)) - return Result; - } - return 0; -} - -/// FindCallStartFromCallEnd - Given a chained node that is part of a call -/// sequence, find the CALLSEQ_START node that initiates the call sequence. -static SDNode *FindCallStartFromCallEnd(SDNode *Node) { - int nested = 0; - assert(Node && "Didn't find callseq_start for a call??"); - while (Node->getOpcode() != ISD::CALLSEQ_START || nested) { - Node = Node->getOperand(0).getNode(); - assert(Node->getOperand(0).getValueType() == MVT::Other && - "Node doesn't have a token chain argument!"); - switch (Node->getOpcode()) { - default: + if (!AnyLegalized) break; - case ISD::CALLSEQ_START: - if (!nested) - return Node; - nested--; - break; - case ISD::CALLSEQ_END: - nested++; - break; - } - } - return 0; -} - -/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to -/// see if any uses can reach Dest. If no dest operands can get to dest, -/// legalize them, legalize ourself, and return false, otherwise, return true. -/// -/// Keep track of the nodes we fine that actually do lead to Dest in -/// NodesLeadingTo. This avoids retraversing them exponential number of times. -/// -bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, - SmallPtrSet &NodesLeadingTo) { - if (N == Dest) return true; // N certainly leads to Dest :) - - // If we've already processed this node and it does lead to Dest, there is no - // need to reprocess it. - if (NodesLeadingTo.count(N)) return true; - // If the first result of this node has been already legalized, then it cannot - // reach N. - if (LegalizedNodes.count(SDValue(N, 0))) return false; - - // Okay, this node has not already been legalized. Check and legalize all - // operands. If none lead to Dest, then we can legalize this node. - bool OperandsLeadToDest = false; - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - OperandsLeadToDest |= // If an operand leads to Dest, so do we. - LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, - NodesLeadingTo); - - if (OperandsLeadToDest) { - NodesLeadingTo.insert(N); - return true; } - // Okay, this node looks safe, legalize it and return false. - LegalizeOp(SDValue(N, 0)); - return false; + // Remove dead nodes now. + DAG.RemoveDeadNodes(); } /// ExpandConstantFP - Expands the ConstantFP node to an integer constant or /// a load from the constant pool. -static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, - SelectionDAG &DAG, const TargetLowering &TLI) { +SDValue +SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { bool Extend = false; DebugLoc dl = CFP->getDebugLoc(); @@ -369,20 +249,25 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy()); unsigned Alignment = cast(CPIdx)->getAlignment(); - if (Extend) - return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, - DAG.getEntryNode(), - CPIdx, MachinePointerInfo::getConstantPool(), - VT, false, false, Alignment); - return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), false, false, - Alignment); + if (Extend) { + SDValue Result = + DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT, + DAG.getEntryNode(), + CPIdx, MachinePointerInfo::getConstantPool(), + VT, false, false, Alignment); + return Result; + } + SDValue Result = + DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(), false, false, + Alignment); + return Result; } /// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores. -static -SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, - const TargetLowering &TLI) { +static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, + const TargetLowering &TLI, + SelectionDAG::DAGUpdateListener *DUL) { SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); SDValue Val = ST->getValue(); @@ -397,8 +282,10 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // same size, then a (misaligned) int store. // FIXME: Does not handle truncating floating point stores! SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val); - return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), - ST->isVolatile(), ST->isNonTemporal(), Alignment); + Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), + ST->isVolatile(), ST->isNonTemporal(), Alignment); + DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); + return; } // Do a (aligned) store to a stack slot, then copy from the stack slot // to the final destination using (unaligned) integer loads and stores. @@ -458,8 +345,11 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, ST->isNonTemporal(), MinAlign(ST->getAlignment(), Offset))); // The order of the stores doesn't matter - say it with a TokenFactor. - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], - Stores.size()); + SDValue Result = + DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], + Stores.size()); + DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); + return; } assert(ST->getMemoryVT().isInteger() && !ST->getMemoryVT().isVector() && @@ -488,13 +378,16 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), Alignment); - return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); + SDValue Result = + DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); + DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); } /// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads. -static -SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, - const TargetLowering &TLI) { +static void +ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, + const TargetLowering &TLI, + SDValue &ValResult, SDValue &ChainResult) { SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); EVT VT = LD->getValueType(0); @@ -512,8 +405,9 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, if (VT.isFloatingPoint() && LoadedVT != VT) Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result); - SDValue Ops[] = { Result, Chain }; - return DAG.getMergeValues(Ops, 2, dl); + ValResult = Result; + ChainResult = Chain; + return; } // Copy the value to a (aligned) stack slot using (unaligned) integer @@ -572,8 +466,9 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, MachinePointerInfo(), LoadedVT, false, false, 0); // Callers expect a MERGE_VALUES node. - SDValue Ops[] = { Load, TF }; - return DAG.getMergeValues(Ops, 2, dl); + ValResult = Load; + ChainResult = TF; + return; } assert(LoadedVT.isInteger() && !LoadedVT.isVector() && "Unaligned load of unsupported type."); @@ -626,8 +521,8 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); - SDValue Ops[] = { Result, TF }; - return DAG.getMergeValues(Ops, 2, dl); + ValResult = Result; + ChainResult = TF; } /// PerformInsertVectorEltInMemory - Some target cannot handle a variable @@ -763,11 +658,10 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { /// LegalizeOp - Return a legal replacement for the given operation, with /// all legal operands. -SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { - if (Op.getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. - return Op; +void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { + if (Node->getOpcode() == ISD::TargetConstant) // Allow illegal target nodes. + return; - SDNode *Node = Op.getNode(); DebugLoc dl = Node->getDebugLoc(); for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) @@ -782,13 +676,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Node->getOperand(i).getOpcode() == ISD::TargetConstant) && "Unexpected illegal type!"); - // Note that LegalizeOp may be reentered even from single-use nodes, which - // means that we always must cache transformed nodes. - DenseMap::iterator I = LegalizedNodes.find(Op); - if (I != LegalizedNodes.end()) return I->second; - SDValue Tmp1, Tmp2, Tmp3, Tmp4; - SDValue Result = Op; bool isCustom = false; // Figure out the correct action; the way to query this varies by opcode @@ -882,17 +770,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (Action == TargetLowering::Legal) Action = TargetLowering::Custom; break; - case ISD::BUILD_VECTOR: - // A weird case: legalization for BUILD_VECTOR never legalizes the - // operands! - // FIXME: This really sucks... changing it isn't semantically incorrect, - // but it massively pessimizes the code for floating-point BUILD_VECTORs - // because ConstantFP operands get legalized into constant pool loads - // before the BUILD_VECTOR code can see them. It doesn't usually bite, - // though, because BUILD_VECTORS usually get lowered into other nodes - // which get legalized properly. - SimpleFinishLegalizing = false; - break; default: if (Node->getOpcode() >= ISD::BUILTIN_OP_END) { Action = TargetLowering::Legal; @@ -903,22 +780,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } if (SimpleFinishLegalizing) { - SmallVector Ops, ResultVals; + SmallVector Ops; for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) - Ops.push_back(LegalizeOp(Node->getOperand(i))); + Ops.push_back(Node->getOperand(i)); switch (Node->getOpcode()) { default: break; - case ISD::BR: - case ISD::BRIND: - case ISD::BR_JT: - case ISD::BR_CC: - case ISD::BRCOND: - // Branches tweak the chain to include LastCALLSEQ_END - Ops[0] = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ops[0], - LastCALLSEQ_END); - Ops[0] = LegalizeOp(Ops[0]); - LastCALLSEQ_END = DAG.getEntryNode(); - break; case ISD::SHL: case ISD::SRL: case ISD::SRA: @@ -926,57 +792,66 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::ROTR: // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. - if (!Ops[1].getValueType().isVector()) - Ops[1] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(), - Ops[1])); + if (!Ops[1].getValueType().isVector()) { + SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[1]); + HandleSDNode Handle(SAO); + LegalizeOp(SAO.getNode()); + Ops[1] = Handle.getValue(); + } break; case ISD::SRL_PARTS: case ISD::SRA_PARTS: case ISD::SHL_PARTS: // Legalizing shifts/rotates requires adjusting the shift amount // to the appropriate width. - if (!Ops[2].getValueType().isVector()) - Ops[2] = LegalizeOp(DAG.getShiftAmountOperand(Ops[0].getValueType(), - Ops[2])); + if (!Ops[2].getValueType().isVector()) { + SDValue SAO = DAG.getShiftAmountOperand(Ops[0].getValueType(), Ops[2]); + HandleSDNode Handle(SAO); + LegalizeOp(SAO.getNode()); + Ops[2] = Handle.getValue(); + } break; } - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), Ops.data(), - Ops.size()), 0); + SDNode *NewNode = DAG.UpdateNodeOperands(Node, Ops.data(), Ops.size()); + if (NewNode != Node) { + DAG.ReplaceAllUsesWith(Node, NewNode, this); + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i)); + DAG.RemoveDeadNode(Node, this); + Node = NewNode; + } switch (Action) { case TargetLowering::Legal: - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - ResultVals.push_back(Result.getValue(i)); - break; + return; case TargetLowering::Custom: // FIXME: The handling for custom lowering with multiple results is // a complete mess. - Tmp1 = TLI.LowerOperation(Result, DAG); + Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Tmp1.getNode()) { + SmallVector ResultVals; for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) { if (e == 1) ResultVals.push_back(Tmp1); else ResultVals.push_back(Tmp1.getValue(i)); } - break; + if (Tmp1.getNode() != Node || Tmp1.getResNo() != 0) { + DAG.ReplaceAllUsesWith(Node, ResultVals.data(), this); + for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) + DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]); + DAG.RemoveDeadNode(Node, this); + } + return; } // FALL THROUGH case TargetLowering::Expand: - ExpandNode(Result.getNode(), ResultVals); - break; + ExpandNode(Node); + return; case TargetLowering::Promote: - PromoteNode(Result.getNode(), ResultVals); - break; - } - if (!ResultVals.empty()) { - for (unsigned i = 0, e = ResultVals.size(); i != e; ++i) { - if (ResultVals[i] != SDValue(Node, i)) - ResultVals[i] = LegalizeOp(ResultVals[i]); - AddLegalizedOperand(SDValue(Node, i), ResultVals[i]); - } - return ResultVals[Op.getResNo()]; + PromoteNode(Node); + return; } } @@ -989,155 +864,20 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { #endif assert(0 && "Do not know how to legalize this operator!"); - case ISD::SRA: - case ISD::SRL: - case ISD::SHL: { - // Scalarize vector SRA/SRL/SHL. - EVT VT = Node->getValueType(0); - assert(VT.isVector() && "Unable to legalize non-vector shift"); - assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal"); - unsigned NumElem = VT.getVectorNumElements(); - - SmallVector Scalars; - for (unsigned Idx = 0; Idx < NumElem; Idx++) { - SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - VT.getScalarType(), - Node->getOperand(0), DAG.getIntPtrConstant(Idx)); - SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - VT.getScalarType(), - Node->getOperand(1), DAG.getIntPtrConstant(Idx)); - Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, - VT.getScalarType(), Ex, Sh)); - } - Result = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), - &Scalars[0], Scalars.size()); - break; - } - - case ISD::BUILD_VECTOR: - switch (TLI.getOperationAction(ISD::BUILD_VECTOR, Node->getValueType(0))) { - default: assert(0 && "This action is not supported yet!"); - case TargetLowering::Custom: - Tmp3 = TLI.LowerOperation(Result, DAG); - if (Tmp3.getNode()) { - Result = Tmp3; - break; - } - // FALLTHROUGH - case TargetLowering::Expand: - Result = ExpandBUILD_VECTOR(Result.getNode()); - break; - } - break; - case ISD::CALLSEQ_START: { - SDNode *CallEnd = FindCallEndFromCallStart(Node); - - // Recursively Legalize all of the inputs of the call end that do not lead - // to this call start. This ensures that any libcalls that need be inserted - // are inserted *before* the CALLSEQ_START. - {SmallPtrSet NodesLeadingTo; - for (unsigned i = 0, e = CallEnd->getNumOperands(); i != e; ++i) - LegalizeAllNodesNotLeadingTo(CallEnd->getOperand(i).getNode(), Node, - NodesLeadingTo); - } - - // Now that we have legalized all of the inputs (which may have inserted - // libcalls), create the new CALLSEQ_START node. - Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. - - // Merge in the last call to ensure that this call starts after the last - // call ended. - if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) { - Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, - Tmp1, LastCALLSEQ_END); - Tmp1 = LegalizeOp(Tmp1); - } - - // Do not try to legalize the target-specific arguments (#1+). - if (Tmp1 != Node->getOperand(0)) { - SmallVector Ops(Node->op_begin(), Node->op_end()); - Ops[0] = Tmp1; - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), &Ops[0], - Ops.size()), Result.getResNo()); - } - - // Remember that the CALLSEQ_START is legalized. - AddLegalizedOperand(Op.getValue(0), Result); - if (Node->getNumValues() == 2) // If this has a flag result, remember it. - AddLegalizedOperand(Op.getValue(1), Result.getValue(1)); - - // Now that the callseq_start and all of the non-call nodes above this call - // sequence have been legalized, legalize the call itself. During this - // process, no libcalls can/will be inserted, guaranteeing that no calls - // can overlap. - assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!"); - // Note that we are selecting this call! - LastCALLSEQ_END = SDValue(CallEnd, 0); - IsLegalizingCall = true; - - // Legalize the call, starting from the CALLSEQ_END. - LegalizeOp(LastCALLSEQ_END); - assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!"); - return Result; - } + case ISD::CALLSEQ_START: case ISD::CALLSEQ_END: - // If the CALLSEQ_START node hasn't been legalized first, legalize it. This - // will cause this node to be legalized as well as handling libcalls right. - if (LastCALLSEQ_END.getNode() != Node) { - LegalizeOp(SDValue(FindCallStartFromCallEnd(Node), 0)); - DenseMap::iterator I = LegalizedNodes.find(Op); - assert(I != LegalizedNodes.end() && - "Legalizing the call start should have legalized this node!"); - return I->second; - } - - // Otherwise, the call start has been legalized and everything is going - // according to plan. Just legalize ourselves normally here. - Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. - // Do not try to legalize the target-specific arguments (#1+), except for - // an optional flag input. - if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Glue){ - if (Tmp1 != Node->getOperand(0)) { - SmallVector Ops(Node->op_begin(), Node->op_end()); - Ops[0] = Tmp1; - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - &Ops[0], Ops.size()), - Result.getResNo()); - } - } else { - Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1)); - if (Tmp1 != Node->getOperand(0) || - Tmp2 != Node->getOperand(Node->getNumOperands()-1)) { - SmallVector Ops(Node->op_begin(), Node->op_end()); - Ops[0] = Tmp1; - Ops.back() = Tmp2; - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - &Ops[0], Ops.size()), - Result.getResNo()); - } - } - assert(IsLegalizingCall && "Call sequence imbalance between start/end?"); - // This finishes up call legalization. - IsLegalizingCall = false; - - // If the CALLSEQ_END node has a flag, remember that we legalized it. - AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0)); - if (Node->getNumValues() == 2) - AddLegalizedOperand(SDValue(Node, 1), Result.getValue(1)); - return Result.getValue(Op.getResNo()); + break; case ISD::LOAD: { LoadSDNode *LD = cast(Node); - Tmp1 = LegalizeOp(LD->getChain()); // Legalize the chain. - Tmp2 = LegalizeOp(LD->getBasePtr()); // Legalize the base pointer. + Tmp1 = LD->getChain(); // Legalize the chain. + Tmp2 = LD->getBasePtr(); // Legalize the base pointer. ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { EVT VT = Node->getValueType(0); - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp2, LD->getOffset()), - Result.getResNo()); - Tmp3 = Result.getValue(0); - Tmp4 = Result.getValue(1); + Node = DAG.UpdateNodeOperands(Node, Tmp1, Tmp2, LD->getOffset()); + Tmp3 = SDValue(Node, 0); + Tmp4 = SDValue(Node, 1); switch (TLI.getOperationAction(Node->getOpcode(), VT)) { default: assert(0 && "This action is not supported yet!"); @@ -1148,20 +888,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast(Result.getNode()), - DAG, TLI); - Tmp3 = Result.getOperand(0); - Tmp4 = Result.getOperand(1); - Tmp3 = LegalizeOp(Tmp3); - Tmp4 = LegalizeOp(Tmp4); + ExpandUnalignedLoad(cast(Node), + DAG, TLI, Tmp3, Tmp4); } } break; case TargetLowering::Custom: Tmp1 = TLI.LowerOperation(Tmp3, DAG); if (Tmp1.getNode()) { - Tmp3 = LegalizeOp(Tmp1); - Tmp4 = LegalizeOp(Tmp1.getValue(1)); + Tmp3 = Tmp1; + Tmp4 = Tmp1.getValue(1); } break; case TargetLowering::Promote: { @@ -1173,16 +909,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); - Tmp3 = LegalizeOp(DAG.getNode(ISD::BITCAST, dl, VT, Tmp1)); - Tmp4 = LegalizeOp(Tmp1.getValue(1)); + Tmp3 = DAG.getNode(ISD::BITCAST, dl, VT, Tmp1); + Tmp4 = Tmp1.getValue(1); break; } } // Since loads produce two values, make sure to remember that we // legalized both of them. - AddLegalizedOperand(SDValue(Node, 0), Tmp3); - AddLegalizedOperand(SDValue(Node, 1), Tmp4); - return Op.getResNo() ? Tmp4 : Tmp3; + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp3); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp4); + return; } EVT SrcVT = LD->getMemoryVT(); @@ -1213,9 +949,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { ISD::LoadExtType NewExtType = ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; - Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), - Tmp1, Tmp2, LD->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); + SDValue Result = + DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); Ch = Result.getValue(1); // The chain. @@ -1230,8 +967,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Result.getValueType(), Result, DAG.getValueType(SrcVT)); - Tmp1 = LegalizeOp(Result); - Tmp2 = LegalizeOp(Ch); + Tmp1 = Result; + Tmp2 = Ch; } else if (SrcWidth & (SrcWidth - 1)) { // If not loading a power-of-2 number of bits, expand as two loads. assert(!SrcVT.isVector() && "Unsupported extload!"); @@ -1274,7 +1011,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. - Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); } else { // Big endian - avoid unaligned loads. // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 @@ -1304,11 +1041,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { TLI.getShiftAmountTy(Hi.getValueType()))); // Join the hi and lo parts. - Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); + Tmp1 = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi); } - Tmp1 = LegalizeOp(Result); - Tmp2 = LegalizeOp(Ch); + Tmp2 = Ch; } else { switch (TLI.getLoadExtAction(ExtType, SrcVT)) { default: assert(0 && "This action is not supported yet!"); @@ -1316,17 +1052,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { isCustom = true; // FALLTHROUGH case TargetLowering::Legal: - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp2, LD->getOffset()), - Result.getResNo()); - Tmp1 = Result.getValue(0); - Tmp2 = Result.getValue(1); + Node = DAG.UpdateNodeOperands(Node, + Tmp1, Tmp2, LD->getOffset()); + Tmp1 = SDValue(Node, 0); + Tmp2 = SDValue(Node, 1); if (isCustom) { - Tmp3 = TLI.LowerOperation(Result, DAG); + Tmp3 = TLI.LowerOperation(SDValue(Node, 0), DAG); if (Tmp3.getNode()) { - Tmp1 = LegalizeOp(Tmp3); - Tmp2 = LegalizeOp(Tmp3.getValue(1)); + Tmp1 = Tmp3; + Tmp2 = Tmp3.getValue(1); } } else { // If this is an unaligned load and the target doesn't support it, @@ -1337,12 +1072,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast(Result.getNode()), - DAG, TLI); - Tmp1 = Result.getOperand(0); - Tmp2 = Result.getOperand(1); - Tmp1 = LegalizeOp(Tmp1); - Tmp2 = LegalizeOp(Tmp2); + ExpandUnalignedLoad(cast(Node), + DAG, TLI, Tmp1, Tmp2); } } } @@ -1363,9 +1094,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break; default: llvm_unreachable("Unexpected extend load type!"); } - Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); - Tmp1 = LegalizeOp(Result); // Relegalize new nodes. - Tmp2 = LegalizeOp(Load.getValue(1)); + Tmp1 = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load); + Tmp2 = Load.getValue(1); break; } @@ -1380,10 +1110,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { "EXTLOAD should always be supported!"); // Turn the unsupported load into an EXTLOAD followed by an explicit // zero/sign extend inreg. - Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), - Tmp1, Tmp2, LD->getPointerInfo(), SrcVT, - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), + Tmp1, Tmp2, LD->getPointerInfo(), SrcVT, + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); SDValue ValRes; if (ExtType == ISD::SEXTLOAD) ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, @@ -1391,38 +1121,37 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Result, DAG.getValueType(SrcVT)); else ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType()); - Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes. - Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes. + Tmp1 = ValRes; + Tmp2 = Result.getValue(1); break; } } // Since loads produce two values, make sure to remember that we legalized // both of them. - AddLegalizedOperand(SDValue(Node, 0), Tmp1); - AddLegalizedOperand(SDValue(Node, 1), Tmp2); - return Op.getResNo() ? Tmp2 : Tmp1; + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp1); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp2); + break; } case ISD::STORE: { StoreSDNode *ST = cast(Node); - Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain. - Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer. + Tmp1 = ST->getChain(); + Tmp2 = ST->getBasePtr(); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); if (!ST->isTruncatingStore()) { if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { - Result = SDValue(OptStore, 0); + DAG.ReplaceAllUsesWith(ST, OptStore, this); break; } { - Tmp3 = LegalizeOp(ST->getValue()); - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp3, Tmp2, - ST->getOffset()), - Result.getResNo()); + Tmp3 = ST->getValue(); + Node = DAG.UpdateNodeOperands(Node, + Tmp1, Tmp3, Tmp2, + ST->getOffset()); EVT VT = Tmp3.getValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { @@ -1434,27 +1163,31 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) - Result = ExpandUnalignedStore(cast(Result.getNode()), - DAG, TLI); + ExpandUnalignedStore(cast(Node), + DAG, TLI, this); } break; case TargetLowering::Custom: - Tmp1 = TLI.LowerOperation(Result, DAG); - if (Tmp1.getNode()) Result = Tmp1; + Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); + if (Tmp1.getNode()) + DAG.ReplaceAllUsesWith(SDValue(Node, 0), Tmp1, this); break; - case TargetLowering::Promote: + case TargetLowering::Promote: { assert(VT.isVector() && "Unknown legal promote case!"); Tmp3 = DAG.getNode(ISD::BITCAST, dl, TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3); - Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, - ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment); + SDValue Result = + DAG.getStore(Tmp1, dl, Tmp3, Tmp2, + ST->getPointerInfo(), isVolatile, + isNonTemporal, Alignment); + DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); break; } + } break; } } else { - Tmp3 = LegalizeOp(ST->getValue()); + Tmp3 = ST->getValue(); EVT StVT = ST->getMemoryVT(); unsigned StWidth = StVT.getSizeInBits(); @@ -1466,8 +1199,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { EVT NVT = EVT::getIntegerVT(*DAG.getContext(), StVT.getStoreSizeInBits()); Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT); - Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); + SDValue Result = + DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + NVT, isVolatile, isNonTemporal, Alignment); + DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. assert(!StVT.isVector() && "Unsupported truncstore!"); @@ -1521,14 +1256,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } // The order of the stores doesn't matter. - Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); + DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); } else { if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() || Tmp2 != ST->getBasePtr()) - Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), - Tmp1, Tmp3, Tmp2, - ST->getOffset()), - Result.getResNo()); + Node = DAG.UpdateNodeOperands(Node, Tmp1, Tmp3, Tmp2, + ST->getOffset()); switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { default: assert(0 && "This action is not supported yet!"); @@ -1539,12 +1273,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) - Result = ExpandUnalignedStore(cast(Result.getNode()), - DAG, TLI); + ExpandUnalignedStore(cast(Node), DAG, TLI, this); } break; case TargetLowering::Custom: - Result = TLI.LowerOperation(Result, DAG); + DAG.ReplaceAllUsesWith(SDValue(Node, 0), + TLI.LowerOperation(SDValue(Node, 0), DAG), + this); break; case TargetLowering::Expand: assert(!StVT.isVector() && @@ -1553,8 +1288,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // TRUNCSTORE:i16 i32 -> STORE i16 assert(TLI.isTypeLegal(StVT) && "Do not know how to expand this store!"); Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3); - Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + SDValue Result = + DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), + isVolatile, isNonTemporal, Alignment); + DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); break; } } @@ -1562,17 +1299,6 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { break; } } - assert(Result.getValueType() == Op.getValueType() && - "Bad legalization!"); - - // Make sure that the generated code is itself legal. - if (Result != Op) - Result = LegalizeOp(Result); - - // Note that LegalizeOp may be reentered even from single-use nodes, which - // means that we always must cache transformed nodes. - AddLegalizedOperand(Op, Result); - return Result; } SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { @@ -2011,7 +1737,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { // and leave the Hi part unset. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { - assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); // The input chain to this libcall is the entry node of the function. // Legalizing the call will automatically add the previous call to the // dependence. @@ -2030,7 +1755,6 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - // Splice the libcall in wherever FindInputOutputChains tells us to. Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); // isTailCall may be true since the callee does not reference caller stack @@ -2046,10 +1770,6 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, // It's a tailcall, return the chain (which is the DAG root). return DAG.getRoot(); - // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that - // was added by LowerCallTo (guaranteeing proper serialization of calls). - LegalizeOp(CallInfo.second); return CallInfo.first; } @@ -2079,11 +1799,6 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); - // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that - // was added by LowerCallTo (guaranteeing proper serialization of calls). - LegalizeOp(CallInfo.second); - return CallInfo.first; } @@ -2093,7 +1808,6 @@ std::pair SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned) { - assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); SDValue InChain = Node->getOperand(0); TargetLowering::ArgListTy Args; @@ -2110,7 +1824,6 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - // Splice the libcall in wherever FindInputOutputChains tells us to. Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, @@ -2118,10 +1831,6 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, /*isReturnValueUsed=*/true, Callee, Args, DAG, Node->getDebugLoc()); - // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that - // was added by LowerCallTo (guaranteeing proper serialization of calls). - LegalizeOp(CallInfo.second); return CallInfo; } @@ -2247,20 +1956,14 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - // Splice the libcall in wherever FindInputOutputChains tells us to. DebugLoc dl = Node->getDebugLoc(); std::pair CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, /*isReturnValueUsed=*/true, Callee, Args, DAG, dl); - // Legalize the call sequence, starting with the chain. This will advance - // the LastCALLSEQ to the legalized version of the CALLSEQ_END node that - // was added by LowerCallTo (guaranteeing proper serialization of calls). - LegalizeOp(CallInfo.second); - // Remainder is loaded back from the stack frame. - SDValue Rem = DAG.getLoad(RetVT, dl, LastCALLSEQ_END, FIPtr, + SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, MachinePointerInfo(), false, false, 0); Results.push_back(CallInfo.first); Results.push_back(Rem); @@ -2452,11 +2155,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, MachinePointerInfo::getConstantPool(), false, false, Alignment); else { - FudgeInReg = - LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, - DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), - MVT::f32, false, false, Alignment)); + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, + DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(), + MVT::f32, false, false, Alignment); + HandleSDNode Handle(Load); + LegalizeOp(Load.getNode()); + FudgeInReg = Handle.getValue(); } return DAG.getNode(ISD::FADD, dl, DestVT, Tmp1, FudgeInReg); @@ -2780,8 +2485,8 @@ std::pair SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { return ExpandChainLibCall(LC, Node, false); } -void SelectionDAGLegalize::ExpandNode(SDNode *Node, - SmallVectorImpl &Results) { +void SelectionDAGLegalize::ExpandNode(SDNode *Node) { + SmallVector Results; DebugLoc dl = Node->getDebugLoc(); SDValue Tmp1, Tmp2, Tmp3, Tmp4; switch (Node->getOpcode()) { @@ -3229,10 +2934,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, ConstantFPSDNode *CFP = cast(Node); // Check to see if this FP immediate is already legal. // If this is a legal constant, turn it into a TargetConstantFP node. - if (TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0))) - Results.push_back(SDValue(Node, 0)); - else - Results.push_back(ExpandConstantFP(CFP, true, DAG, TLI)); + if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0))) + Results.push_back(ExpandConstantFP(CFP, true)); break; } case ISD::EHSELECTION: { @@ -3478,6 +3181,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, DAG.getIntPtrConstant(0)); TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Ret, DAG.getIntPtrConstant(1)); + // Ret is a node with an illegal type. Because such things are not + // generally permitted during this phase of legalization, delete the + // node. The above EXTRACT_ELEMENT nodes should have been folded. + DAG.DeleteNode(Ret.getNode()); } if (isSigned) { @@ -3618,7 +3325,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, dl); - LastCALLSEQ_END = DAG.getEntryNode(); assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!"); Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); @@ -3628,6 +3334,35 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, Results.push_back(Tmp1); break; } + case ISD::BUILD_VECTOR: + Results.push_back(ExpandBUILD_VECTOR(Node)); + break; + case ISD::SRA: + case ISD::SRL: + case ISD::SHL: { + // Scalarize vector SRA/SRL/SHL. + EVT VT = Node->getValueType(0); + assert(VT.isVector() && "Unable to legalize non-vector shift"); + assert(TLI.isTypeLegal(VT.getScalarType())&& "Element type must be legal"); + unsigned NumElem = VT.getVectorNumElements(); + + SmallVector Scalars; + for (unsigned Idx = 0; Idx < NumElem; Idx++) { + SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + VT.getScalarType(), + Node->getOperand(0), DAG.getIntPtrConstant(Idx)); + SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, + VT.getScalarType(), + Node->getOperand(1), DAG.getIntPtrConstant(Idx)); + Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, + VT.getScalarType(), Ex, Sh)); + } + SDValue Result = + DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), + &Scalars[0], Scalars.size()); + DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); + break; + } case ISD::GLOBAL_OFFSET_TABLE: case ISD::GlobalAddress: case ISD::GlobalTLSAddress: @@ -3638,13 +3373,16 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::INTRINSIC_WO_CHAIN: case ISD::INTRINSIC_VOID: // FIXME: Custom lowering for these operations shouldn't return null! - for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - Results.push_back(SDValue(Node, i)); break; } + + // Replace the original node with the legalized result. + if (!Results.empty()) + DAG.ReplaceAllUsesWith(Node, Results.data(), this); } -void SelectionDAGLegalize::PromoteNode(SDNode *Node, - SmallVectorImpl &Results) { + +void SelectionDAGLegalize::PromoteNode(SDNode *Node) { + SmallVector Results; EVT OVT = Node->getValueType(0); if (Node->getOpcode() == ISD::UINT_TO_FP || Node->getOpcode() == ISD::SINT_TO_FP || @@ -3772,6 +3510,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node, break; } } + + // Replace the original node with the legalized result. + if (!Results.empty()) + DAG.ReplaceAllUsesWith(Node, Results.data(), this); } // SelectionDAG::Legalize - This is the entry point for the file. diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index a4bb577..7ed1b98 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -1084,7 +1084,6 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - // Splice the libcall in wherever FindInputOutputChains tells us to. Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); std::pair CallInfo = TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index a1abdb4..cab303d 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -315,8 +315,10 @@ void ScheduleDAGRRList::Schedule() { IssueCount = 0; MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; NumLiveRegs = 0; - LiveRegDefs.resize(TRI->getNumRegs(), NULL); - LiveRegGens.resize(TRI->getNumRegs(), NULL); + // Allocate slots for each physical register, plus one for a special register + // to track the virtual resource of a calling sequence. + LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL); + LiveRegGens.resize(TRI->getNumRegs() + 1, NULL); // Build the scheduling graph. BuildSchedGraph(NULL); @@ -386,6 +388,109 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) { } } +/// IsChainDependent - Test if Outer is reachable from Inner through +/// chain dependencies. +static bool IsChainDependent(SDNode *Outer, SDNode *Inner, + unsigned NestLevel, + const TargetInstrInfo *TII) { + SDNode *N = Outer; + for (;;) { + if (N == Inner) + return true; + // For a TokenFactor, examine each operand. There may be multiple ways + // to get to the CALLSEQ_BEGIN, but we need to find the path with the + // most nesting in order to ensure that we find the corresponding match. + if (N->getOpcode() == ISD::TokenFactor) { + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (IsChainDependent(N->getOperand(i).getNode(), Inner, NestLevel, TII)) + return true; + return false; + } + // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. + if (N->isMachineOpcode()) { + if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameDestroyOpcode()) { + ++NestLevel; + } else if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameSetupOpcode()) { + if (NestLevel == 0) + return false; + --NestLevel; + } + } + // Otherwise, find the chain and continue climbing. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) { + N = N->getOperand(i).getNode(); + goto found_chain_operand; + } + return false; + found_chain_operand:; + if (N->getOpcode() == ISD::EntryToken) + return false; + } +} + +/// FindCallSeqStart - Starting from the (lowered) CALLSEQ_END node, locate +/// the corresponding (lowered) CALLSEQ_BEGIN node. +/// +/// NestLevel and MaxNested are used in recursion to indcate the current level +/// of nesting of CALLSEQ_BEGIN and CALLSEQ_END pairs, as well as the maximum +/// level seen so far. +/// +/// TODO: It would be better to give CALLSEQ_END an explicit operand to point +/// to the corresponding CALLSEQ_BEGIN to avoid needing to search for it. +static SDNode * +FindCallSeqStart(SDNode *N, unsigned &NestLevel, unsigned &MaxNest, + const TargetInstrInfo *TII) { + for (;;) { + // For a TokenFactor, examine each operand. There may be multiple ways + // to get to the CALLSEQ_BEGIN, but we need to find the path with the + // most nesting in order to ensure that we find the corresponding match. + if (N->getOpcode() == ISD::TokenFactor) { + SDNode *Best = 0; + unsigned BestMaxNest = MaxNest; + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { + unsigned MyNestLevel = NestLevel; + unsigned MyMaxNest = MaxNest; + if (SDNode *New = FindCallSeqStart(N->getOperand(i).getNode(), + MyNestLevel, MyMaxNest, TII)) + if (!Best || (MyMaxNest > BestMaxNest)) { + Best = New; + BestMaxNest = MyMaxNest; + } + } + assert(Best); + MaxNest = BestMaxNest; + return Best; + } + // Check for a lowered CALLSEQ_BEGIN or CALLSEQ_END. + if (N->isMachineOpcode()) { + if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameDestroyOpcode()) { + ++NestLevel; + MaxNest = std::max(MaxNest, NestLevel); + } else if (N->getMachineOpcode() == + (unsigned)TII->getCallFrameSetupOpcode()) { + assert(NestLevel != 0); + --NestLevel; + if (NestLevel == 0) + return N; + } + } + // Otherwise, find the chain and continue climbing. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (N->getOperand(i).getValueType() == MVT::Other) { + N = N->getOperand(i).getNode(); + goto found_chain_operand; + } + return 0; + found_chain_operand:; + if (N->getOpcode() == ISD::EntryToken) + return 0; + } +} + /// Call ReleasePred for each predecessor, then update register live def/gen. /// Always update LiveRegDefs for a register dependence even if the current SU /// also defines the register. This effectively create one large live range @@ -423,6 +528,25 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { } } } + + // If we're scheduling a lowered CALLSEQ_END, find the corresponding + // CALLSEQ_BEGIN. Inject an artificial physical register dependence between + // these nodes, to prevent other calls from being interscheduled with them. + unsigned CallResource = TRI->getNumRegs(); + if (!LiveRegDefs[CallResource]) + for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) + if (Node->isMachineOpcode() && + Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { + unsigned NestLevel = 0; + unsigned MaxNest = 0; + SDNode *N = FindCallSeqStart(Node, NestLevel, MaxNest, TII); + + SUnit *Def = &SUnits[N->getNodeId()]; + ++NumLiveRegs; + LiveRegDefs[CallResource] = Def; + LiveRegGens[CallResource] = SU; + break; + } } /// Check to see if any of the pending instructions are ready to issue. If @@ -605,6 +729,20 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { LiveRegGens[I->getReg()] = NULL; } } + // Release the special call resource dependence, if this is the beginning + // of a call. + unsigned CallResource = TRI->getNumRegs(); + if (LiveRegDefs[CallResource] == SU) + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isMachineOpcode() && + SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + --NumLiveRegs; + LiveRegDefs[CallResource] = NULL; + LiveRegGens[CallResource] = NULL; + } + } resetVRegCycle(SU); @@ -661,6 +799,33 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { } } + // Reclaim the special call resource dependence, if this is the beginning + // of a call. + unsigned CallResource = TRI->getNumRegs(); + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isMachineOpcode() && + SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) { + ++NumLiveRegs; + LiveRegDefs[CallResource] = SU; + LiveRegGens[CallResource] = NULL; + } + } + + // Release the special call resource dependence, if this is the end + // of a call. + if (LiveRegGens[CallResource] == SU) + for (const SDNode *SUNode = SU->getNode(); SUNode; + SUNode = SUNode->getGluedNode()) { + if (SUNode->isMachineOpcode() && + SUNode->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { + assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); + --NumLiveRegs; + LiveRegDefs[CallResource] = NULL; + LiveRegGens[CallResource] = NULL; + } + } + for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { if (I->isAssignedRegDep()) { @@ -1083,6 +1248,20 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector &LRegs) { if (!Node->isMachineOpcode()) continue; + // If we're in the middle of scheduling a call, don't begin scheduling + // another call. Also, don't allow any physical registers to be live across + // the call. + if (Node->getMachineOpcode() == (unsigned)TII->getCallFrameDestroyOpcode()) { + // Check the special calling-sequence resource. + unsigned CallResource = TRI->getNumRegs(); + if (LiveRegDefs[CallResource]) { + SDNode *Gen = LiveRegGens[CallResource]->getNode(); + while (SDNode *Glued = Gen->getGluedNode()) + Gen = Glued; + if (!IsChainDependent(Gen, Node, 0, TII) && RegAdded.insert(CallResource)) + LRegs.push_back(CallResource); + } + } const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 07d2db6..010a740 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5290,6 +5290,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } + + // If we just RAUW'd the root, take note. + if (FromN == getRoot()) + setRoot(To); } /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. @@ -5335,6 +5339,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, SDNode *To, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } + + // If we just RAUW'd the root, take note. + if (From == getRoot().getNode()) + setRoot(SDValue(To, getRoot().getResNo())); } /// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead. @@ -5373,6 +5381,10 @@ void SelectionDAG::ReplaceAllUsesWith(SDNode *From, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } + + // If we just RAUW'd the root, take note. + if (From == getRoot().getNode()) + setRoot(SDValue(To[getRoot().getResNo()])); } /// ReplaceAllUsesOfValueWith - Replace any uses of From with To, leaving @@ -5431,6 +5443,10 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To, // already exists there, recursively merge the results together. AddModifiedNodeToCSEMaps(User, &Listener); } + + // If we just RAUW'd the root, take note. + if (From == getRoot()) + setRoot(To); } namespace { diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 336f730..2f533c2 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1353,12 +1353,10 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue Src = DAG.getNode(ISD::ADD, dl, getPointerTy(), Arg, SrcOffset); SDValue SizeNode = DAG.getConstant(Flags.getByValSize() - 4*offset, MVT::i32); - // TODO: Disable AlwaysInline when it becomes possible - // to emit a nested call sequence. MemOpChains.push_back(DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode, Flags.getByValAlign(), /*isVolatile=*/false, - /*AlwaysInline=*/true, + /*AlwaysInline=*/false, MachinePointerInfo(0), MachinePointerInfo(0))); @@ -4350,9 +4348,24 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { // If this is undef splat, generate it via "just" vdup, if possible. if (Lane == -1) Lane = 0; + // Test if V1 is a SCALAR_TO_VECTOR. if (Lane == 0 && V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); } + // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR + // (and probably will turn into a SCALAR_TO_VECTOR once legalization + // reaches it). + if (Lane == 0 && V1.getOpcode() == ISD::BUILD_VECTOR && + !isa(V1.getOperand(0))) { + bool IsScalarToVector = true; + for (unsigned i = 1, e = V1.getNumOperands(); i != e; ++i) + if (V1.getOperand(i).getOpcode() != ISD::UNDEF) { + IsScalarToVector = false; + break; + } + if (IsScalarToVector) + return DAG.getNode(ARMISD::VDUP, dl, VT, V1.getOperand(0)); + } return DAG.getNode(ARMISD::VDUPLANE, dl, VT, V1, DAG.getConstant(Lane, MVT::i32)); } diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 02b0ff2..3d75de0 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -2114,7 +2114,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) { HasNoSignedComparisonUses(Node)) // Look past the truncate if CMP is the only use of it. N0 = N0.getOperand(0); - if (N0.getNode()->getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && + if ((N0.getNode()->getOpcode() == ISD::AND || + (N0.getResNo() == 0 && N0.getNode()->getOpcode() == X86ISD::AND)) && + N0.getNode()->hasOneUse() && N0.getValueType() != MVT::i8 && X86::isZeroNode(N1)) { ConstantSDNode *C = dyn_cast(N0.getNode()->getOperand(1)); diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c9b6422..b15dfac 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -4221,6 +4221,29 @@ static bool isScalarLoadToVector(SDNode *N, LoadSDNode **LD = NULL) { return true; } +// Test whether the given value is a vector value which will be legalized +// into a load. +static bool WillBeConstantPoolLoad(SDNode *N) { + if (N->getOpcode() != ISD::BUILD_VECTOR) + return false; + + // Check for any non-constant elements. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + switch (N->getOperand(i).getNode()->getOpcode()) { + case ISD::UNDEF: + case ISD::ConstantFP: + case ISD::Constant: + break; + default: + return false; + } + + // Vectors of all-zeros and all-ones are materialized with special + // instructions rather than being loaded. + return !ISD::isBuildVectorAllZeros(N) && + !ISD::isBuildVectorAllOnes(N); +} + /// ShouldXformToMOVLP{S|D} - Return true if the node should be transformed to /// match movlp{s|d}. The lower half elements should come from lower half of /// V1 (and in order), and the upper half elements should come from the upper @@ -4236,7 +4259,7 @@ static bool ShouldXformToMOVLP(SDNode *V1, SDNode *V2, return false; // Is V2 is a vector load, don't do this transformation. We will try to use // load folding shufps op. - if (ISD::isNON_EXTLoad(V2)) + if (ISD::isNON_EXTLoad(V2) || WillBeConstantPoolLoad(V2)) return false; unsigned NumElems = VT.getVectorNumElements(); @@ -6352,6 +6375,8 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) { if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op)) CanFoldLoad = true; + ShuffleVectorSDNode *SVOp = cast(Op); + // Both of them can't be memory operations though. if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2)) CanFoldLoad = false; @@ -6361,10 +6386,11 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) { return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG); if (NumElems == 4) - return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG); + // If we don't care about the second element, procede to use movss. + if (SVOp->getMaskElt(1) != -1) + return getTargetShuffleNode(X86ISD::MOVLPS, dl, VT, V1, V2, DAG); } - ShuffleVectorSDNode *SVOp = cast(Op); // movl and movlp will both match v2i64, but v2i64 is never matched by // movl earlier because we make it strict to avoid messing with the movlp load // folding logic (see the code above getMOVLP call). Match it here then, @@ -8682,8 +8708,9 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. - if (Cond.getOpcode() == X86ISD::SETCC || - Cond.getOpcode() == X86ISD::SETCC_CARRY) { + unsigned CondOpcode = Cond.getOpcode(); + if (CondOpcode == X86ISD::SETCC || + CondOpcode == X86ISD::SETCC_CARRY) { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); @@ -8700,6 +8727,39 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { Cond = Cmp; addTest = false; } + } else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO || + CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO || + ((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) && + Cond.getOperand(0).getValueType() != MVT::i8)) { + SDValue LHS = Cond.getOperand(0); + SDValue RHS = Cond.getOperand(1); + unsigned X86Opcode; + unsigned X86Cond; + SDVTList VTs; + switch (CondOpcode) { + case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break; + case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break; + case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break; + case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break; + case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break; + case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break; + default: llvm_unreachable("unexpected overflowing operator"); + } + if (CondOpcode == ISD::UMULO) + VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(), + MVT::i32); + else + VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); + + SDValue X86Op = DAG.getNode(X86Opcode, DL, VTs, LHS, RHS); + + if (CondOpcode == ISD::UMULO) + Cond = X86Op.getValue(2); + else + Cond = X86Op.getValue(1); + + CC = DAG.getConstant(X86Cond, MVT::i8); + addTest = false; } if (addTest) { @@ -8781,11 +8841,27 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { SDValue Dest = Op.getOperand(2); DebugLoc dl = Op.getDebugLoc(); SDValue CC; + bool Inverted = false; if (Cond.getOpcode() == ISD::SETCC) { - SDValue NewCond = LowerSETCC(Cond, DAG); - if (NewCond.getNode()) - Cond = NewCond; + // Check for setcc([su]{add,sub,mul}o == 0). + if (cast(Cond.getOperand(2))->get() == ISD::SETEQ && + isa(Cond.getOperand(1)) && + cast(Cond.getOperand(1))->isNullValue() && + Cond.getOperand(0).getResNo() == 1 && + (Cond.getOperand(0).getOpcode() == ISD::SADDO || + Cond.getOperand(0).getOpcode() == ISD::UADDO || + Cond.getOperand(0).getOpcode() == ISD::SSUBO || + Cond.getOperand(0).getOpcode() == ISD::USUBO || + Cond.getOperand(0).getOpcode() == ISD::SMULO || + Cond.getOperand(0).getOpcode() == ISD::UMULO)) { + Inverted = true; + Cond = Cond.getOperand(0); + } else { + SDValue NewCond = LowerSETCC(Cond, DAG); + if (NewCond.getNode()) + Cond = NewCond; + } } #if 0 // FIXME: LowerXALUO doesn't handle these!! @@ -8806,8 +8882,9 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { // If condition flag is set by a X86ISD::CMP, then use it as the condition // setting operand in place of the X86ISD::SETCC. - if (Cond.getOpcode() == X86ISD::SETCC || - Cond.getOpcode() == X86ISD::SETCC_CARRY) { + unsigned CondOpcode = Cond.getOpcode(); + if (CondOpcode == X86ISD::SETCC || + CondOpcode == X86ISD::SETCC_CARRY) { CC = Cond.getOperand(0); SDValue Cmp = Cond.getOperand(1); @@ -8828,6 +8905,43 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { break; } } + } + CondOpcode = Cond.getOpcode(); + if (CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO || + CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO || + ((CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) && + Cond.getOperand(0).getValueType() != MVT::i8)) { + SDValue LHS = Cond.getOperand(0); + SDValue RHS = Cond.getOperand(1); + unsigned X86Opcode; + unsigned X86Cond; + SDVTList VTs; + switch (CondOpcode) { + case ISD::UADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_B; break; + case ISD::SADDO: X86Opcode = X86ISD::ADD; X86Cond = X86::COND_O; break; + case ISD::USUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_B; break; + case ISD::SSUBO: X86Opcode = X86ISD::SUB; X86Cond = X86::COND_O; break; + case ISD::UMULO: X86Opcode = X86ISD::UMUL; X86Cond = X86::COND_O; break; + case ISD::SMULO: X86Opcode = X86ISD::SMUL; X86Cond = X86::COND_O; break; + default: llvm_unreachable("unexpected overflowing operator"); + } + if (Inverted) + X86Cond = X86::GetOppositeBranchCondition((X86::CondCode)X86Cond); + if (CondOpcode == ISD::UMULO) + VTs = DAG.getVTList(LHS.getValueType(), LHS.getValueType(), + MVT::i32); + else + VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); + + SDValue X86Op = DAG.getNode(X86Opcode, dl, VTs, LHS, RHS); + + if (CondOpcode == ISD::UMULO) + Cond = X86Op.getValue(2); + else + Cond = X86Op.getValue(1); + + CC = DAG.getConstant(X86Cond, MVT::i8); + addTest = false; } else { unsigned CondOpc; if (Cond.hasOneUse() && isAndOrOfSetCCs(Cond, CondOpc)) { @@ -8891,6 +9005,66 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { CC = DAG.getConstant(CCode, MVT::i8); Cond = Cond.getOperand(0).getOperand(1); addTest = false; + } else if (Cond.getOpcode() == ISD::SETCC && + cast(Cond.getOperand(2))->get() == ISD::SETOEQ) { + // For FCMP_OEQ, we can emit + // two branches instead of an explicit AND instruction with a + // separate test. However, we only do this if this block doesn't + // have a fall-through edge, because this requires an explicit + // jmp when the condition is false. + if (Op.getNode()->hasOneUse()) { + SDNode *User = *Op.getNode()->use_begin(); + // Look for an unconditional branch following this conditional branch. + // We need this because we need to reverse the successors in order + // to implement FCMP_OEQ. + if (User->getOpcode() == ISD::BR) { + SDValue FalseBB = User->getOperand(1); + SDNode *NewBR = + DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); + assert(NewBR == User); + (void)NewBR; + Dest = FalseBB; + + SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, + Cond.getOperand(0), Cond.getOperand(1)); + CC = DAG.getConstant(X86::COND_NE, MVT::i8); + Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), + Chain, Dest, CC, Cmp); + CC = DAG.getConstant(X86::COND_P, MVT::i8); + Cond = Cmp; + addTest = false; + } + } + } else if (Cond.getOpcode() == ISD::SETCC && + cast(Cond.getOperand(2))->get() == ISD::SETUNE) { + // For FCMP_UNE, we can emit + // two branches instead of an explicit AND instruction with a + // separate test. However, we only do this if this block doesn't + // have a fall-through edge, because this requires an explicit + // jmp when the condition is false. + if (Op.getNode()->hasOneUse()) { + SDNode *User = *Op.getNode()->use_begin(); + // Look for an unconditional branch following this conditional branch. + // We need this because we need to reverse the successors in order + // to implement FCMP_UNE. + if (User->getOpcode() == ISD::BR) { + SDValue FalseBB = User->getOperand(1); + SDNode *NewBR = + DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); + assert(NewBR == User); + (void)NewBR; + + SDValue Cmp = DAG.getNode(X86ISD::CMP, dl, MVT::i32, + Cond.getOperand(0), Cond.getOperand(1)); + CC = DAG.getConstant(X86::COND_NE, MVT::i8); + Chain = DAG.getNode(X86ISD::BRCOND, dl, Op.getValueType(), + Chain, Dest, CC, Cmp); + CC = DAG.getConstant(X86::COND_NP, MVT::i8); + Cond = Cmp; + addTest = false; + Dest = FalseBB; + } + } } } diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 2afe0e3..4b74f96 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -386,6 +386,15 @@ IsWordAlignedBasePlusConstantOffset(SDValue Addr, SDValue &AlignedBase, Offset = off; return true; } + // Check for an aligned global variable. + if (GlobalAddressSDNode *GA = dyn_cast(*Root)) { + const GlobalValue *GV = GA->getGlobal(); + if (GA->getOffset() == 0 && GV->getAlignment() >= 4) { + AlignedBase = Base; + Offset = off; + return true; + } + } return false; } -- cgit v1.1 From 28eb1c5217416aa60b06b8b569a5de8047f75514 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Fri, 4 Nov 2011 00:48:26 +0000 Subject: Speculatively revert "DeadStoreElimination can now trim the size of a store if the end of it is dead.", which appears to break bootstrapping LLVM. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143668 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/DeadStoreElimination.cpp | 142 ++++++------------------- 1 file changed, 33 insertions(+), 109 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 921bec8..c0738a9 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -239,24 +239,6 @@ static bool isRemovable(Instruction *I) { } } - -/// isShortenable - Returns true if this instruction can be safely shortened in -/// length. -static bool isShortenable(Instruction *I) { - // Don't shorten stores for now - if (isa(I)) - return false; - - IntrinsicInst *II = cast(I); - switch (II->getIntrinsicID()) { - default: return false; - case Intrinsic::memset: - case Intrinsic::memcpy: - // Do shorten memory intrinsics. - return true; - } -} - /// getStoredPointerOperand - Return the pointer that is being written to. static Value *getStoredPointerOperand(Instruction *I) { if (StoreInst *SI = dyn_cast(I)) @@ -311,24 +293,11 @@ static bool isObjectPointerWithTrustworthySize(const Value *V) { return false; } -namespace { - enum OverwriteResult - { - OverwriteComplete, - OverwriteEnd, - OverwriteUnknown - }; -} - -/// isOverwrite - Return 'OverwriteComplete' if a store to the 'Later' location +/// isCompleteOverwrite - Return true if a store to the 'Later' location /// completely overwrites a store to the 'Earlier' location. -/// 'OverwriteEnd' if the end of the 'Earlier' location is completely -/// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined -static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, - const AliasAnalysis::Location &Earlier, - AliasAnalysis &AA, - int64_t& EarlierOff, - int64_t& LaterOff) { +static bool isCompleteOverwrite(const AliasAnalysis::Location &Later, + const AliasAnalysis::Location &Earlier, + AliasAnalysis &AA) { const Value *P1 = Earlier.Ptr->stripPointerCasts(); const Value *P2 = Later.Ptr->stripPointerCasts(); @@ -342,24 +311,23 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, // If we have no TargetData information around, then the size of the store // is inferrable from the pointee type. If they are the same type, then // we know that the store is safe. - if (AA.getTargetData() == 0 && - Later.Ptr->getType() == Earlier.Ptr->getType()) - return OverwriteComplete; - - return OverwriteUnknown; + if (AA.getTargetData() == 0) + return Later.Ptr->getType() == Earlier.Ptr->getType(); + return false; } // Make sure that the Later size is >= the Earlier size. - if (Later.Size >= Earlier.Size) - return OverwriteComplete; + if (Later.Size < Earlier.Size) + return false; + return true; } // Otherwise, we have to have size information, and the later store has to be // larger than the earlier one. if (Later.Size == AliasAnalysis::UnknownSize || Earlier.Size == AliasAnalysis::UnknownSize || - AA.getTargetData() == 0) - return OverwriteUnknown; + Later.Size <= Earlier.Size || AA.getTargetData() == 0) + return false; // Check to see if the later store is to the entire object (either a global, // an alloca, or a byval argument). If so, then it clearly overwrites any @@ -372,27 +340,26 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, // If we can't resolve the same pointers to the same object, then we can't // analyze them at all. if (UO1 != UO2) - return OverwriteUnknown; + return false; // If the "Later" store is to a recognizable object, get its size. if (isObjectPointerWithTrustworthySize(UO2)) { uint64_t ObjectSize = TD.getTypeAllocSize(cast(UO2->getType())->getElementType()); if (ObjectSize == Later.Size) - return OverwriteComplete; + return true; } // Okay, we have stores to two completely different pointers. Try to // decompose the pointer into a "base + constant_offset" form. If the base // pointers are equal, then we can reason about the two stores. - EarlierOff = 0; - LaterOff = 0; + int64_t EarlierOff = 0, LaterOff = 0; const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD); const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD); // If the base pointers still differ, we have two completely different stores. if (BP1 != BP2) - return OverwriteUnknown; + return false; // The later store completely overlaps the earlier store if: // @@ -410,24 +377,11 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, // // We have to be careful here as *Off is signed while *.Size is unsigned. if (EarlierOff >= LaterOff && - Later.Size > Earlier.Size && uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size) - return OverwriteComplete; - - // The other interesting case is if the later store overwrites the end of - // the earlier store - // - // |--earlier--| - // |-- later --| - // - // In this case we may want to trim the size of earlier to avoid generating - // writes to addresses which will definitely be overwritten later - if (LaterOff > EarlierOff && - LaterOff + Later.Size >= EarlierOff + Earlier.Size) - return OverwriteEnd; + return true; // Otherwise, they don't completely overlap. - return OverwriteUnknown; + return false; } /// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a @@ -551,52 +505,22 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { // If we find a write that is a) removable (i.e., non-volatile), b) is // completely obliterated by the store to 'Loc', and c) which we know that // 'Inst' doesn't load from, then we can remove it. - if (isRemovable(DepWrite) && + if (isRemovable(DepWrite) && isCompleteOverwrite(Loc, DepLoc, *AA) && !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) { - int64_t InstWriteOffset, DepWriteOffset; - OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA, - DepWriteOffset, InstWriteOffset); - if (OR == OverwriteComplete) { - DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " - << *DepWrite << "\n KILLER: " << *Inst << '\n'); - - // Delete the store and now-dead instructions that feed it. - DeleteDeadInstruction(DepWrite, *MD); - ++NumFastStores; - MadeChange = true; - - // DeleteDeadInstruction can delete the current instruction in loop - // cases, reset BBI. - BBI = Inst; - if (BBI != BB.begin()) - --BBI; - break; - } else if (OR == OverwriteEnd && isShortenable(DepWrite)) { - // TODO: base this on the target vector size so that if the earlier - // store was too small to get vector writes anyway then its likely - // a good idea to shorten it - // Power of 2 vector writes are probably always a bad idea to optimize - // as any store/memset/memcpy is likely using vector instructions so - // shortening it to not vector size is likely to be slower - MemIntrinsic* DepIntrinsic = cast(DepWrite); - unsigned DepWriteAlign = DepIntrinsic->getAlignment(); - if (llvm::isPowerOf2_64(InstWriteOffset) || - ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) { - - DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: " - << *DepWrite << "\n KILLER (offset " - << InstWriteOffset << ", " - << DepLoc.Size << ")" - << *Inst << '\n'); - - Value* DepWriteLength = DepIntrinsic->getLength(); - Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(), - InstWriteOffset - - DepWriteOffset); - DepIntrinsic->setLength(TrimmedLength); - MadeChange = true; - } - } + DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " + << *DepWrite << "\n KILLER: " << *Inst << '\n'); + + // Delete the store and now-dead instructions that feed it. + DeleteDeadInstruction(DepWrite, *MD); + ++NumFastStores; + MadeChange = true; + + // DeleteDeadInstruction can delete the current instruction in loop + // cases, reset BBI. + BBI = Inst; + if (BBI != BB.begin()) + --BBI; + break; } // If this is a may-aliased store that is clobbering the store value, we -- cgit v1.1 From f470cbbad204caa85275873004151b92fba24375 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Fri, 4 Nov 2011 00:50:21 +0000 Subject: Add fast-isel support for returning i1, i8, and i16. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143669 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index d1f2c7f..ab5caa3 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -1751,19 +1751,32 @@ bool ARMFastISel::SelectRet(const Instruction *I) { CCValAssign &VA = ValLocs[0]; // Don't bother handling odd stuff for now. - // FIXME: Should be able to handle i1, i8, and/or i16 return types. if (VA.getLocInfo() != CCValAssign::Full) return false; // Only handle register returns for now. if (!VA.isRegLoc()) return false; - // TODO: For now, don't try to handle cases where getLocInfo() - // says Full but the types don't match. - if (TLI.getValueType(RV->getType()) != VA.getValVT()) - return false; - // Make the copy. unsigned SrcReg = Reg + VA.getValNo(); + EVT RVVT = TLI.getValueType(RV->getType()); + EVT DestVT = VA.getValVT(); + // Special handling for extended integers. + if (RVVT != DestVT) { + if (RVVT != MVT::i1 && RVVT != MVT::i8 && RVVT != MVT::i16) + return false; + + if (!Outs[0].Flags.isZExt() && !Outs[0].Flags.isSExt()) + return false; + + assert(DestVT == MVT::i32 && "ARM should always ext to i32"); + + bool isZExt = Outs[0].Flags.isZExt(); + unsigned ResultReg = ARMEmitIntExt(RVVT, SrcReg, DestVT, isZExt); + if (ResultReg == 0) return false; + SrcReg = ResultReg; + } + + // Make the copy. unsigned DstReg = VA.getLocReg(); const TargetRegisterClass* SrcRC = MRI.getRegClass(SrcReg); // Avoid a cross-class copy. This is very unlikely. -- cgit v1.1 From 8e4a2e4f730e691e116a4b2cee3a2c760a54ac09 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Fri, 4 Nov 2011 00:58:10 +0000 Subject: Indentation. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143670 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index ab5caa3..0290c9d 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -1947,7 +1947,7 @@ bool ARMFastISel::SelectCall(const Instruction *I) { if (CS.paramHasAttr(AttrInd, Attribute::ZExt)) Flags.setZExt(); - // FIXME: Only handle *easy* calls for now. + // FIXME: Only handle *easy* calls for now. if (CS.paramHasAttr(AttrInd, Attribute::InReg) || CS.paramHasAttr(AttrInd, Attribute::StructRet) || CS.paramHasAttr(AttrInd, Attribute::Nest) || -- cgit v1.1 From c39916b166dffba53d19e6189a4154d649e572b6 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Fri, 4 Nov 2011 01:48:58 +0000 Subject: Fix some minor scheduling itinerary bug. It's not expected to actually affect codegen. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143675 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index c08c363..70316fd 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -2191,9 +2191,10 @@ def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2), } // Indexed loads -multiclass AI2_ldridx { +multiclass AI2_ldridx { def _PRE_IMM : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins addrmode_imm12:$addr), IndexModePre, LdFrm, itin, + (ins addrmode_imm12:$addr), IndexModePre, LdFrm, iii, opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { bits<17> addr; let Inst{25} = 0; @@ -2205,7 +2206,7 @@ multiclass AI2_ldridx { } def _PRE_REG : AI2ldstidx<1, isByte, 1, (outs GPR:$Rt, GPR:$Rn_wb), - (ins ldst_so_reg:$addr), IndexModePre, LdFrm, itin, + (ins ldst_so_reg:$addr), IndexModePre, LdFrm, iir, opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { bits<17> addr; let Inst{25} = 1; @@ -2219,7 +2220,7 @@ multiclass AI2_ldridx { def _POST_REG : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$addr, am2offset_reg:$offset), - IndexModePost, LdFrm, itin, + IndexModePost, LdFrm, iir, opc, "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", []> { // {12} isAdd @@ -2236,7 +2237,7 @@ multiclass AI2_ldridx { def _POST_IMM : AI2ldstidx<1, isByte, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$addr, am2offset_imm:$offset), - IndexModePost, LdFrm, itin, + IndexModePost, LdFrm, iii, opc, "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", []> { // {12} isAdd @@ -2254,8 +2255,10 @@ multiclass AI2_ldridx { } let mayLoad = 1, neverHasSideEffects = 1 in { -defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_ru>; -defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_ru>; +// FIXME: for LDR_PRE_REG etc. the itineray should be either IIC_iLoad_ru or +// IIC_iLoad_siu depending on whether it the offset register is shifted. +defm LDR : AI2_ldridx<0, "ldr", IIC_iLoad_iu, IIC_iLoad_ru>; +defm LDRB : AI2_ldridx<1, "ldrb", IIC_iLoad_bh_iu, IIC_iLoad_bh_ru>; } multiclass AI3_ldridx op, string opc, InstrItinClass itin> { @@ -2445,10 +2448,11 @@ def STRD : AI3str<0b1111, (outs), (ins GPR:$Rt, GPR:$src2, addrmode3:$addr), } // Indexed stores -multiclass AI2_stridx { +multiclass AI2_stridx { def _PRE_IMM : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb), (ins GPR:$Rt, addrmode_imm12:$addr), IndexModePre, - StFrm, itin, + StFrm, iii, opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { bits<17> addr; let Inst{25} = 0; @@ -2461,7 +2465,7 @@ multiclass AI2_stridx { def _PRE_REG : AI2ldstidx<0, isByte, 1, (outs GPR:$Rn_wb), (ins GPR:$Rt, ldst_so_reg:$addr), - IndexModePre, StFrm, itin, + IndexModePre, StFrm, iir, opc, "\t$Rt, $addr!", "$addr.base = $Rn_wb", []> { bits<17> addr; let Inst{25} = 1; @@ -2474,7 +2478,7 @@ multiclass AI2_stridx { } def _POST_REG : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addr_offset_none:$addr, am2offset_reg:$offset), - IndexModePost, StFrm, itin, + IndexModePost, StFrm, iir, opc, "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", []> { // {12} isAdd @@ -2491,7 +2495,7 @@ multiclass AI2_stridx { def _POST_IMM : AI2ldstidx<0, isByte, 0, (outs GPR:$Rn_wb), (ins GPR:$Rt, addr_offset_none:$addr, am2offset_imm:$offset), - IndexModePost, StFrm, itin, + IndexModePost, StFrm, iii, opc, "\t$Rt, $addr, $offset", "$addr.base = $Rn_wb", []> { // {12} isAdd @@ -2508,8 +2512,10 @@ multiclass AI2_stridx { } let mayStore = 1, neverHasSideEffects = 1 in { -defm STR : AI2_stridx<0, "str", IIC_iStore_ru>; -defm STRB : AI2_stridx<1, "strb", IIC_iStore_bh_ru>; +// FIXME: for STR_PRE_REG etc. the itineray should be either IIC_iStore_ru or +// IIC_iStore_siu depending on whether it the offset register is shifted. +defm STR : AI2_stridx<0, "str", IIC_iStore_iu, IIC_iStore_ru>; +defm STRB : AI2_stridx<1, "strb", IIC_iStore_bh_iu, IIC_iStore_bh_ru>; } def : ARMPat<(post_store GPR:$Rt, addr_offset_none:$addr, -- cgit v1.1 From 018262768f8df65f47ab43d8c0731c68d0a5b172 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 4 Nov 2011 06:59:49 +0000 Subject: Add intrinsics for X86 vcvtps2ph and vcvtph2ps instructions git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143683 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 53 +++++++++++++++++++++++-------------------- 1 file changed, 29 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 8f7e27b..d2291a2 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7365,29 +7365,34 @@ let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, //===----------------------------------------------------------------------===// // Half precision conversion instructions // +multiclass f16c_ph2ps { let Predicates = [HasAVX, HasF16C] in { - def VCVTPH2PSrm : I<0x13, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src), - "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; - def VCVTPH2PSrr : I<0x13, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), - "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; - def VCVTPH2PSYrm : I<0x13, MRMSrcMem, (outs VR256:$dst), (ins f128mem:$src), - "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; - def VCVTPH2PSYrr : I<0x13, MRMSrcReg, (outs VR256:$dst), (ins VR128:$src), - "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; - def VCVTPS2PHmr : Ii8<0x1D, MRMDestMem, (outs f64mem:$dst), - (ins VR128:$src1, i32i8imm:$src2), - "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - TA, OpSize, VEX; - def VCVTPS2PHrr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), - (ins VR128:$src1, i32i8imm:$src2), - "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - TA, OpSize, VEX; - def VCVTPS2PHYmr : Ii8<0x1D, MRMDestMem, (outs f128mem:$dst), - (ins VR256:$src1, i32i8imm:$src2), - "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - TA, OpSize, VEX; - def VCVTPS2PHYrr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), - (ins VR256:$src1, i32i8imm:$src2), - "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, - TA, OpSize, VEX; + def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), + "vcvtph2ps\t{$src, $dst|$dst, $src}", + [(set RC:$dst, (Int VR128:$src))]>, + T8, OpSize, VEX; + let neverHasSideEffects = 1, mayLoad = 1 in + def rm : I<0x13, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), + "vcvtph2ps\t{$src, $dst|$dst, $src}", []>, T8, OpSize, VEX; } +} + +multiclass f16c_ps2ph { +let Predicates = [HasAVX, HasF16C] in { + def rr : Ii8<0x1D, MRMDestReg, (outs VR128:$dst), + (ins RC:$src1, i32i8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, (Int RC:$src1, imm:$src2))]>, + TA, OpSize, VEX; + let neverHasSideEffects = 1, mayLoad = 1 in + def mr : Ii8<0x1D, MRMDestMem, (outs x86memop:$dst), + (ins RC:$src1, i32i8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, + TA, OpSize, VEX; +} +} + +defm VCVTPH2PS : f16c_ph2ps; +defm VCVTPH2PSY : f16c_ph2ps; +defm VCVTPS2PH : f16c_ps2ph; +defm VCVTPS2PHY : f16c_ps2ph; -- cgit v1.1 From efd2d5e1c48e9e530d2323123d28120f50dfed76 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 4 Nov 2011 13:52:17 +0000 Subject: Simplify code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143695 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/Archive.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp index a1bd8b6..8bdab16 100644 --- a/lib/Object/Archive.cpp +++ b/lib/Object/Archive.cpp @@ -242,10 +242,9 @@ error_code Archive::Symbol::getMember(child_iterator &Result) const { Archive::Symbol Archive::Symbol::getNext() const { Symbol t(*this); - const char *buf = Parent->SymbolTable->getBuffer()->getBufferStart(); - buf += t.StringIndex; - while (*buf++); // Go to one past next null. - t.StringIndex = buf - Parent->SymbolTable->getBuffer()->getBufferStart(); + // Go to one past next null. + t.StringIndex = + Parent->SymbolTable->getBuffer()->getBuffer().find('\0', t.StringIndex) + 1; ++t.SymbolIndex; return t; } -- cgit v1.1 From 5b7cc334c8803284df7cf3aaee378b78f775b6ef Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Fri, 4 Nov 2011 17:29:35 +0000 Subject: Add missing argument for atomic instructions in c++ backend. PR11268, part 2. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143712 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/CppBackend/CPPBackend.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp index 17ca23a..394ea2b 100644 --- a/lib/Target/CppBackend/CPPBackend.cpp +++ b/lib/Target/CppBackend/CPPBackend.cpp @@ -1492,7 +1492,7 @@ void CppWriter::printInstruction(const Instruction *I, StringRef CrossThread = ConvertAtomicSynchScope(fi->getSynchScope()); Out << "FenceInst* " << iName << " = new FenceInst(mod->getContext(), " - << Ordering << ", " << CrossThread + << Ordering << ", " << CrossThread << ", " << bbname << ");"; break; } @@ -1503,7 +1503,7 @@ void CppWriter::printInstruction(const Instruction *I, Out << "AtomicCmpXchgInst* " << iName << " = new AtomicCmpXchgInst(" << opNames[0] << ", " << opNames[1] << ", " << opNames[2] << ", " - << Ordering << ", " << CrossThread + << Ordering << ", " << CrossThread << ", " << bbname << ");"; nl(Out) << iName << "->setName(\""; printEscapedString(cxi->getName()); @@ -1533,7 +1533,7 @@ void CppWriter::printInstruction(const Instruction *I, << " = new AtomicRMWInst(" << Operation << ", " << opNames[0] << ", " << opNames[1] << ", " - << Ordering << ", " << CrossThread + << Ordering << ", " << CrossThread << ", " << bbname << ");"; nl(Out) << iName << "->setName(\""; printEscapedString(rmwi->getName()); -- cgit v1.1 From 71d050315704c23b235594d3fad5268c12d825e3 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Fri, 4 Nov 2011 18:32:42 +0000 Subject: Teach instsimplify to simplify calls to undef. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143719 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InstructionSimplify.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'lib') diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index c2ddc6d..c141632 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -2474,6 +2474,14 @@ Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, return ::SimplifyCmpInst(Predicate, LHS, RHS, TD, DT, RecursionLimit); } +static Value *SimplifyCallInst(CallInst *CI) { + // call undef -> undef + if (isa(CI->getCalledValue())) + return UndefValue::get(CI->getType()); + + return 0; +} + /// SimplifyInstruction - See if we can compute a simplified version of this /// instruction. If not, this returns null. Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD, @@ -2569,6 +2577,9 @@ Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD, case Instruction::PHI: Result = SimplifyPHINode(cast(I), DT); break; + case Instruction::Call: + Result = SimplifyCallInst(cast(I)); + break; } /// If called on unreachable code, the above logic may report that the -- cgit v1.1 From b0527283682c5f44fae24ae0a7e2e794f019fade Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Fri, 4 Nov 2011 19:00:29 +0000 Subject: Emit declarations before definitions if they are available. This causes DW_AT_specification to point back in the file in the included testcase. Fixes PR11300. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143726 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 0f4ea05..b022c43 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -194,11 +194,7 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP(SPNode); DISubprogram SPDecl = SP.getFunctionDeclaration(); - if (SPDecl.isSubprogram()) - // Refer function declaration directly. - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, - SPCU->getOrCreateSubprogramDIE(SPDecl)); - else { + if (!SPDecl.isSubprogram()) { // There is not any need to generate specification DIE for a function // defined at compile unit level. If a function is defined inside another // function then gdb prefers the definition at top level and but does not @@ -512,14 +508,31 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { /// construct SubprogramDIE - Construct subprogram DIE. void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N) { + CompileUnit *&CURef = SPMap[N]; + if (CURef) + return; + CURef = TheCU; + DISubprogram SP(N); if (!SP.isDefinition()) // This is a method declaration which will be handled while constructing // class type. return; + DISubprogram SPDecl = SP.getFunctionDeclaration(); + DIE *DeclDie = NULL; + if (SPDecl.isSubprogram()) { + DeclDie = TheCU->getOrCreateSubprogramDIE(SPDecl); + } + DIE *SubprogramDie = TheCU->getOrCreateSubprogramDIE(SP); + if (DeclDie) { + // Refer function declaration directly. + TheCU->addDIEEntry(SubprogramDie, dwarf::DW_AT_specification, + dwarf::DW_FORM_ref4, DeclDie); + } + // Add to map. TheCU->insertDIE(N, SubprogramDie); @@ -529,7 +542,6 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, // Expose as global. TheCU->addGlobal(SP.getName(), SubprogramDie); - SPMap[N] = TheCU; return; } -- cgit v1.1 From b0d9ce567f5aee3af94c290d7cd52b1582c27b4f Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Fri, 4 Nov 2011 19:04:23 +0000 Subject: build/cmake: Use tblgen macro directly instead of llvm_tablegen, which just added a layer of indirection with no value (not even conciseness). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143727 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/CMakeLists.txt | 26 +++++++++++++------------- lib/Target/CellSPU/CMakeLists.txt | 14 +++++++------- lib/Target/MBlaze/CMakeLists.txt | 20 ++++++++++---------- lib/Target/MSP430/CMakeLists.txt | 12 ++++++------ lib/Target/Mips/CMakeLists.txt | 14 +++++++------- lib/Target/PTX/CMakeLists.txt | 10 +++++----- lib/Target/PowerPC/CMakeLists.txt | 16 ++++++++-------- lib/Target/Sparc/CMakeLists.txt | 12 ++++++------ lib/Target/X86/CMakeLists.txt | 22 +++++++++++----------- lib/Target/XCore/CMakeLists.txt | 12 ++++++------ 10 files changed, 79 insertions(+), 79 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt index d6a5c57..baa55f2 100644 --- a/lib/Target/ARM/CMakeLists.txt +++ b/lib/Target/ARM/CMakeLists.txt @@ -1,18 +1,18 @@ set(LLVM_TARGET_DEFINITIONS ARM.td) -llvm_tablegen(ARMGenRegisterInfo.inc -gen-register-info) -llvm_tablegen(ARMGenInstrInfo.inc -gen-instr-info) -llvm_tablegen(ARMGenCodeEmitter.inc -gen-emitter) -llvm_tablegen(ARMGenMCCodeEmitter.inc -gen-emitter -mc-emitter) -llvm_tablegen(ARMGenMCPseudoLowering.inc -gen-pseudo-lowering) -llvm_tablegen(ARMGenAsmWriter.inc -gen-asm-writer) -llvm_tablegen(ARMGenAsmMatcher.inc -gen-asm-matcher) -llvm_tablegen(ARMGenDAGISel.inc -gen-dag-isel) -llvm_tablegen(ARMGenFastISel.inc -gen-fast-isel) -llvm_tablegen(ARMGenCallingConv.inc -gen-callingconv) -llvm_tablegen(ARMGenSubtargetInfo.inc -gen-subtarget) -llvm_tablegen(ARMGenEDInfo.inc -gen-enhanced-disassembly-info) -llvm_tablegen(ARMGenDisassemblerTables.inc -gen-disassembler) +tablegen(LLVM ARMGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM ARMGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM ARMGenCodeEmitter.inc -gen-emitter) +tablegen(LLVM ARMGenMCCodeEmitter.inc -gen-emitter -mc-emitter) +tablegen(LLVM ARMGenMCPseudoLowering.inc -gen-pseudo-lowering) +tablegen(LLVM ARMGenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM ARMGenAsmMatcher.inc -gen-asm-matcher) +tablegen(LLVM ARMGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM ARMGenFastISel.inc -gen-fast-isel) +tablegen(LLVM ARMGenCallingConv.inc -gen-callingconv) +tablegen(LLVM ARMGenSubtargetInfo.inc -gen-subtarget) +tablegen(LLVM ARMGenEDInfo.inc -gen-enhanced-disassembly-info) +tablegen(LLVM ARMGenDisassemblerTables.inc -gen-disassembler) add_public_tablegen_target(ARMCommonTableGen) add_llvm_target(ARMCodeGen diff --git a/lib/Target/CellSPU/CMakeLists.txt b/lib/Target/CellSPU/CMakeLists.txt index 158fb3e..b442a5c 100644 --- a/lib/Target/CellSPU/CMakeLists.txt +++ b/lib/Target/CellSPU/CMakeLists.txt @@ -1,12 +1,12 @@ set(LLVM_TARGET_DEFINITIONS SPU.td) -llvm_tablegen(SPUGenAsmWriter.inc -gen-asm-writer) -llvm_tablegen(SPUGenCodeEmitter.inc -gen-emitter) -llvm_tablegen(SPUGenRegisterInfo.inc -gen-register-info) -llvm_tablegen(SPUGenInstrInfo.inc -gen-instr-info) -llvm_tablegen(SPUGenDAGISel.inc -gen-dag-isel) -llvm_tablegen(SPUGenSubtargetInfo.inc -gen-subtarget) -llvm_tablegen(SPUGenCallingConv.inc -gen-callingconv) +tablegen(LLVM SPUGenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM SPUGenCodeEmitter.inc -gen-emitter) +tablegen(LLVM SPUGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM SPUGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM SPUGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM SPUGenSubtargetInfo.inc -gen-subtarget) +tablegen(LLVM SPUGenCallingConv.inc -gen-callingconv) add_public_tablegen_target(CellSPUCommonTableGen) add_llvm_target(CellSPUCodeGen diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt index 47b0db2..d3f1383 100644 --- a/lib/Target/MBlaze/CMakeLists.txt +++ b/lib/Target/MBlaze/CMakeLists.txt @@ -1,15 +1,15 @@ set(LLVM_TARGET_DEFINITIONS MBlaze.td) -llvm_tablegen(MBlazeGenRegisterInfo.inc -gen-register-info) -llvm_tablegen(MBlazeGenInstrInfo.inc -gen-instr-info) -llvm_tablegen(MBlazeGenCodeEmitter.inc -gen-emitter) -llvm_tablegen(MBlazeGenAsmWriter.inc -gen-asm-writer) -llvm_tablegen(MBlazeGenAsmMatcher.inc -gen-asm-matcher) -llvm_tablegen(MBlazeGenDAGISel.inc -gen-dag-isel) -llvm_tablegen(MBlazeGenCallingConv.inc -gen-callingconv) -llvm_tablegen(MBlazeGenSubtargetInfo.inc -gen-subtarget) -llvm_tablegen(MBlazeGenIntrinsics.inc -gen-tgt-intrinsic) -llvm_tablegen(MBlazeGenEDInfo.inc -gen-enhanced-disassembly-info) +tablegen(LLVM MBlazeGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM MBlazeGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM MBlazeGenCodeEmitter.inc -gen-emitter) +tablegen(LLVM MBlazeGenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM MBlazeGenAsmMatcher.inc -gen-asm-matcher) +tablegen(LLVM MBlazeGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM MBlazeGenCallingConv.inc -gen-callingconv) +tablegen(LLVM MBlazeGenSubtargetInfo.inc -gen-subtarget) +tablegen(LLVM MBlazeGenIntrinsics.inc -gen-tgt-intrinsic) +tablegen(LLVM MBlazeGenEDInfo.inc -gen-enhanced-disassembly-info) add_public_tablegen_target(MBlazeCommonTableGen) add_llvm_target(MBlazeCodeGen diff --git a/lib/Target/MSP430/CMakeLists.txt b/lib/Target/MSP430/CMakeLists.txt index 0952b76..55c2d7d 100644 --- a/lib/Target/MSP430/CMakeLists.txt +++ b/lib/Target/MSP430/CMakeLists.txt @@ -1,11 +1,11 @@ set(LLVM_TARGET_DEFINITIONS MSP430.td) -llvm_tablegen(MSP430GenRegisterInfo.inc -gen-register-info) -llvm_tablegen(MSP430GenInstrInfo.inc -gen-instr-info) -llvm_tablegen(MSP430GenAsmWriter.inc -gen-asm-writer) -llvm_tablegen(MSP430GenDAGISel.inc -gen-dag-isel) -llvm_tablegen(MSP430GenCallingConv.inc -gen-callingconv) -llvm_tablegen(MSP430GenSubtargetInfo.inc -gen-subtarget) +tablegen(LLVM MSP430GenRegisterInfo.inc -gen-register-info) +tablegen(LLVM MSP430GenInstrInfo.inc -gen-instr-info) +tablegen(LLVM MSP430GenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM MSP430GenDAGISel.inc -gen-dag-isel) +tablegen(LLVM MSP430GenCallingConv.inc -gen-callingconv) +tablegen(LLVM MSP430GenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(MSP430CommonTableGen) add_llvm_target(MSP430CodeGen diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index 71391f3..e81ba6f 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -1,12 +1,12 @@ set(LLVM_TARGET_DEFINITIONS Mips.td) -llvm_tablegen(MipsGenRegisterInfo.inc -gen-register-info) -llvm_tablegen(MipsGenInstrInfo.inc -gen-instr-info) -llvm_tablegen(MipsGenCodeEmitter.inc -gen-emitter) -llvm_tablegen(MipsGenAsmWriter.inc -gen-asm-writer) -llvm_tablegen(MipsGenDAGISel.inc -gen-dag-isel) -llvm_tablegen(MipsGenCallingConv.inc -gen-callingconv) -llvm_tablegen(MipsGenSubtargetInfo.inc -gen-subtarget) +tablegen(LLVM MipsGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM MipsGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM MipsGenCodeEmitter.inc -gen-emitter) +tablegen(LLVM MipsGenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM MipsGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM MipsGenCallingConv.inc -gen-callingconv) +tablegen(LLVM MipsGenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(MipsCommonTableGen) add_llvm_target(MipsCodeGen diff --git a/lib/Target/PTX/CMakeLists.txt b/lib/Target/PTX/CMakeLists.txt index 6e87b17..46a458c 100644 --- a/lib/Target/PTX/CMakeLists.txt +++ b/lib/Target/PTX/CMakeLists.txt @@ -1,10 +1,10 @@ set(LLVM_TARGET_DEFINITIONS PTX.td) -llvm_tablegen(PTXGenAsmWriter.inc -gen-asm-writer) -llvm_tablegen(PTXGenDAGISel.inc -gen-dag-isel) -llvm_tablegen(PTXGenInstrInfo.inc -gen-instr-info) -llvm_tablegen(PTXGenRegisterInfo.inc -gen-register-info) -llvm_tablegen(PTXGenSubtargetInfo.inc -gen-subtarget) +tablegen(LLVM PTXGenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM PTXGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM PTXGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM PTXGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM PTXGenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(PTXCommonTableGen) add_llvm_target(PTXCodeGen diff --git a/lib/Target/PowerPC/CMakeLists.txt b/lib/Target/PowerPC/CMakeLists.txt index 73b4aba..05c1ffd 100644 --- a/lib/Target/PowerPC/CMakeLists.txt +++ b/lib/Target/PowerPC/CMakeLists.txt @@ -1,13 +1,13 @@ set(LLVM_TARGET_DEFINITIONS PPC.td) -llvm_tablegen(PPCGenAsmWriter.inc -gen-asm-writer) -llvm_tablegen(PPCGenCodeEmitter.inc -gen-emitter) -llvm_tablegen(PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter) -llvm_tablegen(PPCGenRegisterInfo.inc -gen-register-info) -llvm_tablegen(PPCGenInstrInfo.inc -gen-instr-info) -llvm_tablegen(PPCGenDAGISel.inc -gen-dag-isel) -llvm_tablegen(PPCGenCallingConv.inc -gen-callingconv) -llvm_tablegen(PPCGenSubtargetInfo.inc -gen-subtarget) +tablegen(LLVM PPCGenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM PPCGenCodeEmitter.inc -gen-emitter) +tablegen(LLVM PPCGenMCCodeEmitter.inc -gen-emitter -mc-emitter) +tablegen(LLVM PPCGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM PPCGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM PPCGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM PPCGenCallingConv.inc -gen-callingconv) +tablegen(LLVM PPCGenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(PowerPCCommonTableGen) add_llvm_target(PowerPCCodeGen diff --git a/lib/Target/Sparc/CMakeLists.txt b/lib/Target/Sparc/CMakeLists.txt index 5b87849..9687951 100644 --- a/lib/Target/Sparc/CMakeLists.txt +++ b/lib/Target/Sparc/CMakeLists.txt @@ -1,11 +1,11 @@ set(LLVM_TARGET_DEFINITIONS Sparc.td) -llvm_tablegen(SparcGenRegisterInfo.inc -gen-register-info) -llvm_tablegen(SparcGenInstrInfo.inc -gen-instr-info) -llvm_tablegen(SparcGenAsmWriter.inc -gen-asm-writer) -llvm_tablegen(SparcGenDAGISel.inc -gen-dag-isel) -llvm_tablegen(SparcGenSubtargetInfo.inc -gen-subtarget) -llvm_tablegen(SparcGenCallingConv.inc -gen-callingconv) +tablegen(LLVM SparcGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM SparcGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM SparcGenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM SparcGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM SparcGenSubtargetInfo.inc -gen-subtarget) +tablegen(LLVM SparcGenCallingConv.inc -gen-callingconv) add_public_tablegen_target(SparcCommonTableGen) add_llvm_target(SparcCodeGen diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index 351e767..b590199 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -1,16 +1,16 @@ set(LLVM_TARGET_DEFINITIONS X86.td) -llvm_tablegen(X86GenRegisterInfo.inc -gen-register-info) -llvm_tablegen(X86GenDisassemblerTables.inc -gen-disassembler) -llvm_tablegen(X86GenInstrInfo.inc -gen-instr-info) -llvm_tablegen(X86GenAsmWriter.inc -gen-asm-writer) -llvm_tablegen(X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1) -llvm_tablegen(X86GenAsmMatcher.inc -gen-asm-matcher) -llvm_tablegen(X86GenDAGISel.inc -gen-dag-isel) -llvm_tablegen(X86GenFastISel.inc -gen-fast-isel) -llvm_tablegen(X86GenCallingConv.inc -gen-callingconv) -llvm_tablegen(X86GenSubtargetInfo.inc -gen-subtarget) -llvm_tablegen(X86GenEDInfo.inc -gen-enhanced-disassembly-info) +tablegen(LLVM X86GenRegisterInfo.inc -gen-register-info) +tablegen(LLVM X86GenDisassemblerTables.inc -gen-disassembler) +tablegen(LLVM X86GenInstrInfo.inc -gen-instr-info) +tablegen(LLVM X86GenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM X86GenAsmWriter1.inc -gen-asm-writer -asmwriternum=1) +tablegen(LLVM X86GenAsmMatcher.inc -gen-asm-matcher) +tablegen(LLVM X86GenDAGISel.inc -gen-dag-isel) +tablegen(LLVM X86GenFastISel.inc -gen-fast-isel) +tablegen(LLVM X86GenCallingConv.inc -gen-callingconv) +tablegen(LLVM X86GenSubtargetInfo.inc -gen-subtarget) +tablegen(LLVM X86GenEDInfo.inc -gen-enhanced-disassembly-info) add_public_tablegen_target(X86CommonTableGen) set(sources diff --git a/lib/Target/XCore/CMakeLists.txt b/lib/Target/XCore/CMakeLists.txt index 3dc51e1..d91da8c 100644 --- a/lib/Target/XCore/CMakeLists.txt +++ b/lib/Target/XCore/CMakeLists.txt @@ -1,11 +1,11 @@ set(LLVM_TARGET_DEFINITIONS XCore.td) -llvm_tablegen(XCoreGenRegisterInfo.inc -gen-register-info) -llvm_tablegen(XCoreGenInstrInfo.inc -gen-instr-info) -llvm_tablegen(XCoreGenAsmWriter.inc -gen-asm-writer) -llvm_tablegen(XCoreGenDAGISel.inc -gen-dag-isel) -llvm_tablegen(XCoreGenCallingConv.inc -gen-callingconv) -llvm_tablegen(XCoreGenSubtargetInfo.inc -gen-subtarget) +tablegen(LLVM XCoreGenRegisterInfo.inc -gen-register-info) +tablegen(LLVM XCoreGenInstrInfo.inc -gen-instr-info) +tablegen(LLVM XCoreGenAsmWriter.inc -gen-asm-writer) +tablegen(LLVM XCoreGenDAGISel.inc -gen-dag-isel) +tablegen(LLVM XCoreGenCallingConv.inc -gen-callingconv) +tablegen(LLVM XCoreGenSubtargetInfo.inc -gen-subtarget) add_public_tablegen_target(XCoreCommonTableGen) add_llvm_target(XCoreCodeGen -- cgit v1.1 From 7b1dd9aaf10e5e7db2c29c7e4cf187b0dff6304e Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Fri, 4 Nov 2011 19:04:42 +0000 Subject: build/cmake: Coalesce the configuration time header include fragment generation for target definitions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143731 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/CMakeLists.txt | 37 +------------------------------------ 1 file changed, 1 insertion(+), 36 deletions(-) (limited to 'lib') diff --git a/lib/Target/CMakeLists.txt b/lib/Target/CMakeLists.txt index 030f808..60e2189 100644 --- a/lib/Target/CMakeLists.txt +++ b/lib/Target/CMakeLists.txt @@ -19,42 +19,7 @@ add_llvm_library_dependencies(LLVMTarget LLVMSupport ) -set(LLVM_ENUM_ASM_PRINTERS "") -set(LLVM_ENUM_ASM_PARSERS "") -set(LLVM_ENUM_DISASSEMBLERS "") foreach(t ${LLVM_TARGETS_TO_BUILD}) message(STATUS "Targeting ${t}") add_subdirectory(${t}) - set( td ${LLVM_MAIN_SRC_DIR}/lib/Target/${t} ) - file(GLOB asmp_file "${td}/*AsmPrinter.cpp") - if( asmp_file ) - set(LLVM_ENUM_ASM_PRINTERS - "${LLVM_ENUM_ASM_PRINTERS}LLVM_ASM_PRINTER(${t})\n") - endif() - if( EXISTS ${td}/AsmParser/CMakeLists.txt ) - set(LLVM_ENUM_ASM_PARSERS - "${LLVM_ENUM_ASM_PARSERS}LLVM_ASM_PARSER(${t})\n") - endif() - if( EXISTS ${td}/Disassembler/CMakeLists.txt ) - set(LLVM_ENUM_DISASSEMBLERS - "${LLVM_ENUM_DISASSEMBLERS}LLVM_DISASSEMBLER(${t})\n") - endif() -endforeach(t) - -# Produce llvm/Config/AsmPrinters.def -configure_file( - ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/AsmPrinters.def.in - ${LLVM_BINARY_DIR}/include/llvm/Config/AsmPrinters.def - ) - -# Produce llvm/Config/AsmParsers.def -configure_file( - ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/AsmParsers.def.in - ${LLVM_BINARY_DIR}/include/llvm/Config/AsmParsers.def - ) - -# Produce llvm/Config/Disassemblers.def -configure_file( - ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/Disassemblers.def.in - ${LLVM_BINARY_DIR}/include/llvm/Config/Disassemblers.def - ) +endforeach() -- cgit v1.1 From a0dd4cbc8739ab24535542b58c4c25e15146b44d Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Fri, 4 Nov 2011 22:24:36 +0000 Subject: Add mips ELF relocation types. Patch by Jack Carter! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143738 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/ELFObjectWriter.cpp | 61 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 55 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/MC/ELFObjectWriter.cpp b/lib/MC/ELFObjectWriter.cpp index 3d16de5..eee002a 100644 --- a/lib/MC/ELFObjectWriter.cpp +++ b/lib/MC/ELFObjectWriter.cpp @@ -28,6 +28,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/StringSwitch.h" +#include "../Target/Mips/MCTargetDesc/MipsFixupKinds.h" #include "../Target/X86/MCTargetDesc/X86FixupKinds.h" #include "../Target/ARM/MCTargetDesc/ARMFixupKinds.h" #include "../Target/PowerPC/MCTargetDesc/PPCFixupKinds.h" @@ -277,7 +278,7 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF, MCDataFragment *ShndxF, const MCAssembler &Asm, const MCAsmLayout &Layout, - const SectionIndexMapTy &SectionIndexMap) { + const SectionIndexMapTy &SectionIndexMap) { // The string table must be emitted first because we need the index // into the string table for all the symbol names. assert(StringTable.size() && "Missing string table"); @@ -306,7 +307,8 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF, Section.getType() == ELF::SHT_SYMTAB_SHNDX) continue; WriteSymbolEntry(SymtabF, ShndxF, 0, ELF::STT_SECTION, 0, 0, - ELF::STV_DEFAULT, SectionIndexMap.lookup(&Section), false); + ELF::STV_DEFAULT, SectionIndexMap.lookup(&Section), + false); LastLocalSymbolIndex++; } @@ -416,7 +418,7 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm, // Offset of the symbol in the section int64_t a = Layout.getSymbolOffset(&SDB); - // Ofeset of the relocation in the section + // Offset of the relocation in the section int64_t b = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); Value += b - a; } @@ -1273,7 +1275,6 @@ MCObjectWriter *llvm::createELFObjectWriter(MCELFObjectTargetWriter *MOTW, } } - /// START OF SUBCLASSES for ELFObjectWriter //===- ARMELFObjectWriter -------------------------------------------===// @@ -1815,6 +1816,8 @@ unsigned X86ELFObjectWriter::GetRelocType(const MCValue &Target, return Type; } +//===- MipsELFObjectWriter -------------------------------------------===// + MipsELFObjectWriter::MipsELFObjectWriter(MCELFObjectTargetWriter *MOTW, raw_ostream &_OS, bool IsLittleEndian) @@ -1827,6 +1830,52 @@ unsigned MipsELFObjectWriter::GetRelocType(const MCValue &Target, bool IsPCRel, bool IsRelocWithSymbol, int64_t Addend) { - // tbd - return 1; + // determine the type of the relocation + unsigned Type = (unsigned)ELF::R_MIPS_NONE; + unsigned Kind = (unsigned)Fixup.getKind(); + + switch (Kind) { + default: + llvm_unreachable("invalid fixup kind!"); + case FK_Data_4: + Type = ELF::R_MIPS_32; + break; + case Mips::fixup_Mips_GPREL16: + Type = ELF::R_MIPS_GPREL16; + break; + case Mips::fixup_Mips_26: + Type = ELF::R_MIPS_26; + break; + case Mips::fixup_Mips_CALL16: + Type = ELF::R_MIPS_CALL16; + break; + case Mips::fixup_Mips_GOT16: + Type = ELF::R_MIPS_GOT16; + break; + case Mips::fixup_Mips_HI16: + Type = ELF::R_MIPS_HI16; + break; + case Mips::fixup_Mips_LO16: + Type = ELF::R_MIPS_LO16; + break; + case Mips::fixup_Mips_TLSGD: + Type = ELF::R_MIPS_TLS_GD; + break; + case Mips::fixup_Mips_GOTTPREL: + Type = ELF::R_MIPS_TLS_GOTTPREL; + break; + case Mips::fixup_Mips_TPREL_HI: + Type = ELF::R_MIPS_TLS_TPREL_HI16; + break; + case Mips::fixup_Mips_TPREL_LO: + Type = ELF::R_MIPS_TLS_TPREL_LO16; + break; + case Mips::fixup_Mips_Branch_PCRel: + case Mips::fixup_Mips_PC16: + Type = ELF::R_MIPS_PC16; + break; + } + + return Type; } + -- cgit v1.1 From 44e895761f289029657a8d066f67f0c9d18693b3 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Fri, 4 Nov 2011 22:29:00 +0000 Subject: Enable support for materializing i1, i8, and i16 integers via move immediate. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143739 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 0290c9d..e8e8124 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -545,22 +545,27 @@ unsigned ARMFastISel::ARMMaterializeFP(const ConstantFP *CFP, EVT VT) { unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { - // For now 32-bit only. - if (VT != MVT::i32) return false; - - unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); + if (VT != MVT::i32 && VT != MVT::i16 && VT != MVT::i8 && VT != MVT::i1) + return false; // If we can do this in a single instruction without a constant pool entry // do so now. const ConstantInt *CI = cast(C); if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getSExtValue())) { unsigned Opc = isThumb ? ARM::t2MOVi16 : ARM::MOVi16; + unsigned ImmReg = createResultReg(TLI.getRegClassFor(VT)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(Opc), DestReg) + TII.get(Opc), ImmReg) .addImm(CI->getSExtValue())); - return DestReg; + return ImmReg; } + // For now 32-bit only. + if (VT != MVT::i32) + return false; + + unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); + // MachineConstantPool wants an explicit alignment. unsigned Align = TD.getPrefTypeAlignment(C->getType()); if (Align == 0) { -- cgit v1.1 From a4e07270bccb3cb6774af975300628e072bf03f1 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Fri, 4 Nov 2011 23:09:49 +0000 Subject: When materializing an i32, SExt vs ZExt doesn't matter when we're trying to fit in a 16-bit immediate. However, for the shorter non-legal types (i.e., i1, i8, i16) we should not sign-extend. This prevents us from materializing things such as 'true' (i.e., i1 1). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143743 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index e8e8124..a315c30 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -551,7 +551,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { // If we can do this in a single instruction without a constant pool entry // do so now. const ConstantInt *CI = cast(C); - if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getSExtValue())) { + if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) { unsigned Opc = isThumb ? ARM::t2MOVi16 : ARM::MOVi16; unsigned ImmReg = createResultReg(TLI.getRegClassFor(VT)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, -- cgit v1.1 From 451afbc6a22a3a662eea7e86088c65c36e84949f Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Fri, 4 Nov 2011 23:45:39 +0000 Subject: Cannot create a result register for non-legal types. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143749 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index a315c30..517f73f 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -552,8 +552,9 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { // do so now. const ConstantInt *CI = cast(C); if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) { + EVT SrcVT = MVT::i32; unsigned Opc = isThumb ? ARM::t2MOVi16 : ARM::MOVi16; - unsigned ImmReg = createResultReg(TLI.getRegClassFor(VT)); + unsigned ImmReg = createResultReg(TLI.getRegClassFor(SrcVT)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ImmReg) .addImm(CI->getSExtValue())); -- cgit v1.1 From bd00a934c653fb1666fa7d18267644b4e9d14e5e Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Fri, 4 Nov 2011 23:46:11 +0000 Subject: Enhanced vzeroupper insertion pass that avoids inserting vzeroupper where it is unnecessary through local analysis. Patch from Bruno Cardoso Lopes, with some additional changes. I'm going to wait for any review comments and perform some additional testing before turning this on by default. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143750 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86VZeroUpper.cpp | 234 ++++++++++++++++++++++++++++++++++----- 1 file changed, 207 insertions(+), 27 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86VZeroUpper.cpp b/lib/Target/X86/X86VZeroUpper.cpp index 3958494..9bb54a8 100644 --- a/lib/Target/X86/X86VZeroUpper.cpp +++ b/lib/Target/X86/X86VZeroUpper.cpp @@ -14,14 +14,16 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "x86-codegen" +#define DEBUG_TYPE "x86-vzeroupper" #include "X86.h" #include "X86InstrInfo.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/GlobalValue.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" using namespace llvm; @@ -41,6 +43,60 @@ namespace { private: const TargetInstrInfo *TII; // Machine instruction info. MachineBasicBlock *MBB; // Current basic block + + // Any YMM register live-in to this function? + bool FnHasLiveInYmm; + + // BBState - Contains the state of each MBB: unknown, clean, dirty + SmallVector BBState; + + // BBSolved - Keep track of all MBB which had been already analyzed + // and there is no further processing required. + BitVector BBSolved; + + // Machine Basic Blocks are classified according this pass: + // + // ST_UNKNOWN - The MBB state is unknown, meaning from the entry state + // until the MBB exit there isn't a instruction using YMM to change + // the state to dirty, or one of the incoming predecessors is unknown + // and there's not a dirty predecessor between them. + // + // ST_CLEAN - No YMM usage in the end of the MBB. A MBB could have + // instructions using YMM and be marked ST_CLEAN, as long as the state + // is cleaned by a vzeroupper before any call. + // + // ST_DIRTY - Any MBB ending with a YMM usage not cleaned up by a + // vzeroupper instruction. + // + // ST_INIT - Placeholder for an empty state set + // + enum { + ST_UNKNOWN = 0, + ST_CLEAN = 1, + ST_DIRTY = 2, + ST_INIT = 3 + }; + + // computeState - Given two states, compute the resulting state, in + // the following way + // + // 1) One dirty state yields another dirty state + // 2) All states must be clean for the result to be clean + // 3) If none above and one unknown, the result state is also unknown + // + unsigned computeState(unsigned PrevState, unsigned CurState) { + if (PrevState == ST_INIT) + return CurState; + + if (PrevState == ST_DIRTY || CurState == ST_DIRTY) + return ST_DIRTY; + + if (PrevState == ST_CLEAN && CurState == ST_CLEAN) + return ST_CLEAN; + + return ST_UNKNOWN; + } + }; char VZeroUpperInserter::ID = 0; } @@ -49,37 +105,82 @@ FunctionPass *llvm::createX86IssueVZeroUpperPass() { return new VZeroUpperInserter(); } +static bool isYmmReg(unsigned Reg) { + if (Reg >= X86::YMM0 && Reg <= X86::YMM15) + return true; + + return false; +} + +static bool checkFnHasLiveInYmm(MachineRegisterInfo &MRI) { + for (MachineRegisterInfo::livein_iterator I = MRI.livein_begin(), + E = MRI.livein_end(); I != E; ++I) + if (isYmmReg(I->first)) + return true; + + return false; +} + +static bool hasYmmReg(MachineInstr *MI) { + for (int i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.isDebug()) + continue; + if (isYmmReg(MO.getReg())) + return true; + } + return false; +} + /// runOnMachineFunction - Loop over all of the basic blocks, inserting /// vzero upper instructions before function calls. bool VZeroUpperInserter::runOnMachineFunction(MachineFunction &MF) { TII = MF.getTarget().getInstrInfo(); - bool Changed = false; - - // Process any unreachable blocks in arbitrary order now. - for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) - Changed |= processBasicBlock(MF, *BB); + MachineRegisterInfo &MRI = MF.getRegInfo(); + bool EverMadeChange = false; - return Changed; -} + // Fast check: if the function doesn't use any ymm registers, we don't need + // to insert any VZEROUPPER instructions. This is constant-time, so it is + // cheap in the common case of no ymm use. + bool YMMUsed = false; + TargetRegisterClass *RC = X86::VR256RegisterClass; + for (TargetRegisterClass::iterator i = RC->begin(), e = RC->end(); + i != e; i++) { + if (MRI.isPhysRegUsed(*i)) { + YMMUsed = true; + break; + } + } + if (!YMMUsed) + return EverMadeChange; -static bool isCallToModuleFn(const MachineInstr *MI) { - assert(MI->getDesc().isCall() && "Isn't a call instruction"); + // Pre-compute the existence of any live-in YMM registers to this function + FnHasLiveInYmm = checkFnHasLiveInYmm(MRI); - for (int i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); + assert(BBState.empty()); + BBState.resize(MF.getNumBlockIDs(), 0); + BBSolved.resize(MF.getNumBlockIDs(), 0); - if (!MO.isGlobal()) - continue; + // Each BB state depends on all predecessors, loop over until everything + // converges. (Once we converge, we can implicitly mark everything that is + // still ST_UNKNOWN as ST_CLEAN.) + while (1) { + bool MadeChange = false; - const GlobalValue *GV = MO.getGlobal(); - GlobalValue::LinkageTypes LT = GV->getLinkage(); - if (GV->isInternalLinkage(LT) || GV->isPrivateLinkage(LT) || - (GV->isExternalLinkage(LT) && !GV->isDeclaration())) - return true; + // Process all basic blocks. + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + MadeChange |= processBasicBlock(MF, *I); - return false; + // If this iteration over the code changed anything, keep iterating. + if (!MadeChange) break; + EverMadeChange = true; } - return false; + + BBState.clear(); + BBSolved.clear(); + return EverMadeChange; } /// processBasicBlock - Loop over all of the instructions in the basic block, @@ -87,19 +188,98 @@ static bool isCallToModuleFn(const MachineInstr *MI) { bool VZeroUpperInserter::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) { bool Changed = false; + unsigned BBNum = BB.getNumber(); MBB = &BB; + // Don't process already solved BBs + if (BBSolved[BBNum]) + return false; // No changes + + // Check the state of all predecessors + unsigned EntryState = ST_INIT; + for (MachineBasicBlock::const_pred_iterator PI = BB.pred_begin(), + PE = BB.pred_end(); PI != PE; ++PI) { + EntryState = computeState(EntryState, BBState[(*PI)->getNumber()]); + if (EntryState == ST_DIRTY) + break; + } + + + // The entry MBB for the function may set the inital state to dirty if + // the function receives any YMM incoming arguments + if (MBB == MF.begin()) { + EntryState = ST_CLEAN; + if (FnHasLiveInYmm) + EntryState = ST_DIRTY; + } + + // The current state is initialized according to the predecessors + unsigned CurState = EntryState; + bool BBHasCall = false; + for (MachineBasicBlock::iterator I = BB.begin(); I != BB.end(); ++I) { MachineInstr *MI = I; DebugLoc dl = I->getDebugLoc(); + bool isControlFlow = MI->getDesc().isCall() || MI->getDesc().isReturn(); + + // Shortcut: don't need to check regular instructions in dirty state. + if (!isControlFlow && CurState == ST_DIRTY) + continue; + + if (hasYmmReg(MI)) { + // We found a ymm-using instruction; this could be an AVX instruction, + // or it could be control flow. + CurState = ST_DIRTY; + continue; + } - // Insert a vzeroupper instruction before each control transfer - // to functions outside this module - if (MI->getDesc().isCall() && !isCallToModuleFn(MI)) { - BuildMI(*MBB, I, dl, TII->get(X86::VZEROUPPER)); - ++NumVZU; + // Check for control-flow out of the current function (which might + // indirectly execute SSE instructions). + if (!isControlFlow) + continue; + + BBHasCall = true; + + // The VZEROUPPER instruction resets the upper 128 bits of all Intel AVX + // registers. This instruction has zero latency. In addition, the processor + // changes back to Clean state, after which execution of Intel SSE + // instructions or Intel AVX instructions has no transition penalty. Add + // the VZEROUPPER instruction before any function call/return that might + // execute SSE code. + // FIXME: In some cases, we may want to move the VZEROUPPER into a + // predecessor block. + if (CurState == ST_DIRTY) { + // Only insert the VZEROUPPER in case the entry state isn't unknown. + // When unknown, only compute the information within the block to have + // it available in the exit if possible, but don't change the block. + if (EntryState != ST_UNKNOWN) { + BuildMI(*MBB, I, dl, TII->get(X86::VZEROUPPER)); + ++NumVZU; + } + + // After the inserted VZEROUPPER the state becomes clean again, but + // other YMM may appear before other subsequent calls or even before + // the end of the BB. + CurState = ST_CLEAN; } } + DEBUG(dbgs() << "MBB #" << BBNum + << ", current state: " << CurState << '\n'); + + // A BB can only be considered solved when we both have done all the + // necessary transformations, and have computed the exit state. This happens + // in two cases: + // 1) We know the entry state: this immediately implies the exit state and + // all the necessary transformations. + // 2) There are no calls, and and a non-call instruction marks this block: + // no transformations are necessary, and we know the exit state. + if (EntryState != ST_UNKNOWN || (!BBHasCall && CurState != ST_UNKNOWN)) + BBSolved[BBNum] = true; + + if (CurState != BBState[BBNum]) + Changed = true; + + BBState[BBNum] = CurState; return Changed; } -- cgit v1.1 From 9c58aa743d5232a3eb78c0ee329281709a50b2f6 Mon Sep 17 00:00:00 2001 From: Pete Cooper Date: Fri, 4 Nov 2011 23:49:14 +0000 Subject: Added missing &. Fixes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143753 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/IfConversion.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index ce7ed29..d888939 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -1319,7 +1319,7 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // fold the tail block in as well. Otherwise, unless it falls through to the // tail, add a unconditional branch to it. if (TailBB) { - BBInfo TailBBI = BBAnalysis[TailBB->getNumber()]; + BBInfo &TailBBI = BBAnalysis[TailBB->getNumber()]; bool CanMergeTail = !TailBBI.HasFallThrough; // There may still be a fall-through edge from BBI1 or BBI2 to TailBB; // check if there are any other predecessors besides those. -- cgit v1.1 From 38f5c0da6d3097ddd65aaab15ce22c1b95d52902 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Sat, 5 Nov 2011 00:02:56 +0000 Subject: Allow i1 to be promoted to i32 for ARM APCS calling convention. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143755 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMCallingConv.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index d2981c0..5539d28 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -25,7 +25,7 @@ def CC_ARM_APCS : CallingConv<[ // Handles byval parameters. CCIfByVal>, - CCIfType<[i8, i16], CCPromoteToType>, + CCIfType<[i1, i8, i16], CCPromoteToType>, // Handle all vector types as either f64 or v2f64. CCIfType<[v1i64, v2i32, v4i16, v8i8, v2f32], CCBitConvertToType>, -- cgit v1.1 From 41a964931a0e0943ceef28b0c691843bf8ca87b7 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 5 Nov 2011 08:57:40 +0000 Subject: Add more PRI.64 macros for MSVC and use them throughout the codebase. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143799 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 3 ++- lib/CodeGen/AsmPrinter/DIE.cpp | 4 ++-- lib/DebugInfo/DWARFDebugArangeSet.cpp | 5 +++-- lib/DebugInfo/DWARFDebugAranges.cpp | 7 ++++--- lib/DebugInfo/DWARFDebugLine.cpp | 2 +- lib/DebugInfo/DWARFFormValue.cpp | 6 +++--- lib/Support/Timer.cpp | 2 +- lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp | 2 +- 8 files changed, 17 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 0c0fdc1..ec4d30c 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1759,7 +1759,8 @@ static void EmitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, case 4: case 8: if (AP.isVerbose()) - AP.OutStreamer.GetCommentOS() << format("0x%llx\n", CI->getZExtValue()); + AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n", + CI->getZExtValue()); AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace); return; default: diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index a2dc873..4f2d3e5 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -230,8 +230,8 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { #ifndef NDEBUG void DIEInteger::print(raw_ostream &O) { - O << "Int: " << (int64_t)Integer - << format(" 0x%llx", (unsigned long long)Integer); + O << "Int: " << (int64_t)Integer << " 0x"; + O.write_hex(Integer); } #endif diff --git a/lib/DebugInfo/DWARFDebugArangeSet.cpp b/lib/DebugInfo/DWARFDebugArangeSet.cpp index b0c0354..2efbfd1 100644 --- a/lib/DebugInfo/DWARFDebugArangeSet.cpp +++ b/lib/DebugInfo/DWARFDebugArangeSet.cpp @@ -122,8 +122,9 @@ void DWARFDebugArangeSet::dump(raw_ostream &OS) const { const uint32_t hex_width = Header.AddrSize * 2; for (DescriptorConstIter pos = ArangeDescriptors.begin(), end = ArangeDescriptors.end(); pos != end; ++pos) - OS << format("[0x%*.*llx -", hex_width, hex_width, pos->Address) - << format(" 0x%*.*llx)\n", hex_width, hex_width, pos->getEndAddress()); + OS << format("[0x%*.*" PRIx64 " -", hex_width, hex_width, pos->Address) + << format(" 0x%*.*" PRIx64 ")\n", + hex_width, hex_width, pos->getEndAddress()); } diff --git a/lib/DebugInfo/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARFDebugAranges.cpp index 576d37d..1788145 100644 --- a/lib/DebugInfo/DWARFDebugAranges.cpp +++ b/lib/DebugInfo/DWARFDebugAranges.cpp @@ -100,13 +100,14 @@ void DWARFDebugAranges::dump(raw_ostream &OS) const { const uint32_t num_ranges = getNumRanges(); for (uint32_t i = 0; i < num_ranges; ++i) { const Range &range = Aranges[i]; - OS << format("0x%8.8x: [0x%8.8llx - 0x%8.8llx)\n", range.Offset, - (uint64_t)range.LoPC, (uint64_t)range.HiPC()); + OS << format("0x%8.8x: [0x%8.8" PRIx64 " - 0x%8.8" PRIx64 ")\n", + range.Offset, (uint64_t)range.LoPC, (uint64_t)range.HiPC()); } } void DWARFDebugAranges::Range::dump(raw_ostream &OS) const { - OS << format("{0x%8.8x}: [0x%8.8llx - 0x%8.8llx)\n", Offset, LoPC, HiPC()); + OS << format("{0x%8.8x}: [0x%8.8" PRIx64 " - 0x%8.8" PRIx64 ")\n", + Offset, LoPC, HiPC()); } void DWARFDebugAranges::appendRange(uint32_t offset, uint64_t low_pc, diff --git a/lib/DebugInfo/DWARFDebugLine.cpp b/lib/DebugInfo/DWARFDebugLine.cpp index fe1ef78..0248ad3 100644 --- a/lib/DebugInfo/DWARFDebugLine.cpp +++ b/lib/DebugInfo/DWARFDebugLine.cpp @@ -68,7 +68,7 @@ void DWARFDebugLine::Row::reset(bool default_is_stmt) { } void DWARFDebugLine::Row::dump(raw_ostream &OS) const { - OS << format("0x%16.16llx %6u %6u", Address, Line, Column) + OS << format("0x%16.16" PRIx64 " %6u %6u", Address, Line, Column) << format(" %6u %3u ", File, Isa) << (IsStmt ? " is_stmt" : "") << (BasicBlock ? " basic_block" : "") diff --git a/lib/DebugInfo/DWARFFormValue.cpp b/lib/DebugInfo/DWARFFormValue.cpp index 705efe5..86e28bd 100644 --- a/lib/DebugInfo/DWARFFormValue.cpp +++ b/lib/DebugInfo/DWARFFormValue.cpp @@ -280,7 +280,7 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { case DW_FORM_block4: if (uvalue > 0) { switch (Form) { - case DW_FORM_block: OS << format("<0x%llx> ", uvalue); break; + case DW_FORM_block: OS << format("<0x%" PRIx64 "> ", uvalue); break; case DW_FORM_block1: OS << format("<0x%2.2x> ", (uint8_t)uvalue); break; case DW_FORM_block2: OS << format("<0x%4.4x> ", (uint16_t)uvalue); break; case DW_FORM_block4: OS << format("<0x%8.8x> ", (uint32_t)uvalue); break; @@ -330,11 +330,11 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { break; case DW_FORM_ref8: cu_relative_offset = true; - OS << format("cu + 0x%8.8llx", uvalue); + OS << format("cu + 0x%8.8" PRIx64, uvalue); break; case DW_FORM_ref_udata: cu_relative_offset = true; - OS << format("cu + 0x%llx", uvalue); + OS << format("cu + 0x%" PRIx64, uvalue); break; // All DW_FORM_indirect attributes should be resolved prior to calling diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp index 03ac963..598e8ad 100644 --- a/lib/Support/Timer.cpp +++ b/lib/Support/Timer.cpp @@ -184,7 +184,7 @@ void TimeRecord::print(const TimeRecord &Total, raw_ostream &OS) const { OS << " "; if (Total.getMemUsed()) - OS << format("%9lld ", (long long)getMemUsed()); + OS << format("%9" PRId64 " ", (int64_t)getMemUsed()); } diff --git a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp index 029d491..8f26d9f 100644 --- a/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp +++ b/lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp @@ -97,7 +97,7 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, O << '$' << (int64_t)Op.getImm(); if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256)) - *CommentStream << format("imm = 0x%llX\n", (long long)Op.getImm()); + *CommentStream << format("imm = 0x%" PRIX64 "\n", (uint64_t)Op.getImm()); } else { assert(Op.isExpr() && "unknown operand kind in printOperand"); -- cgit v1.1 From 336b88dac8054d6ed6cda6d6198b7d4bb026b3e1 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Sat, 5 Nov 2011 10:48:42 +0000 Subject: Do simple cross-block DSE when we encounter a free statement. Fixes PR11240. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143808 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/DeadStoreElimination.cpp | 81 ++++++++++++++++++-------- 1 file changed, 56 insertions(+), 25 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index c0738a9..f114418 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -34,6 +34,7 @@ #include "llvm/Support/Debug.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ADT/STLExtras.h" using namespace llvm; STATISTIC(NumFastStores, "Number of stores deleted"); @@ -43,25 +44,26 @@ namespace { struct DSE : public FunctionPass { AliasAnalysis *AA; MemoryDependenceAnalysis *MD; + DominatorTree *DT; static char ID; // Pass identification, replacement for typeid - DSE() : FunctionPass(ID), AA(0), MD(0) { + DSE() : FunctionPass(ID), AA(0), MD(0), DT(0) { initializeDSEPass(*PassRegistry::getPassRegistry()); } virtual bool runOnFunction(Function &F) { AA = &getAnalysis(); MD = &getAnalysis(); - DominatorTree &DT = getAnalysis(); + DT = &getAnalysis(); bool Changed = false; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) // Only check non-dead blocks. Dead blocks may have strange pointer // cycles that will confuse alias analysis. - if (DT.isReachableFromEntry(I)) + if (DT->isReachableFromEntry(I)) Changed |= runOnBasicBlock(*I); - AA = 0; MD = 0; + AA = 0; MD = 0; DT = 0; return Changed; } @@ -549,37 +551,66 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { return MadeChange; } +/// Find all blocks that will unconditionally lead to the block BB and append +/// them to F. +static void FindUnconditionalPreds(SmallVectorImpl &Blocks, + BasicBlock *BB, DominatorTree *DT) { + for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { + BasicBlock *Pred = *I; + TerminatorInst *PredTI = Pred->getTerminator(); + if (PredTI->getNumSuccessors() != 1) + continue; + + if (DT->isReachableFromEntry(Pred)) + Blocks.push_back(Pred); + } +} + /// HandleFree - Handle frees of entire structures whose dependency is a store /// to a field of that structure. bool DSE::HandleFree(CallInst *F) { bool MadeChange = false; - MemDepResult Dep = MD->getDependency(F); + AliasAnalysis::Location Loc = AliasAnalysis::Location(F->getOperand(0)); + SmallVector Blocks; + Blocks.push_back(F->getParent()); + + while (!Blocks.empty()) { + BasicBlock *BB = Blocks.pop_back_val(); + Instruction *InstPt = BB->getTerminator(); + if (BB == F->getParent()) InstPt = F; + + MemDepResult Dep = MD->getPointerDependencyFrom(Loc, false, InstPt, BB); + while (Dep.isDef() || Dep.isClobber()) { + Instruction *Dependency = Dep.getInst(); + if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency)) + break; - while (Dep.isDef() || Dep.isClobber()) { - Instruction *Dependency = Dep.getInst(); - if (!hasMemoryWrite(Dependency) || !isRemovable(Dependency)) - return MadeChange; + Value *DepPointer = + GetUnderlyingObject(getStoredPointerOperand(Dependency)); - Value *DepPointer = - GetUnderlyingObject(getStoredPointerOperand(Dependency)); + // Check for aliasing. + if (!AA->isMustAlias(F->getArgOperand(0), DepPointer)) + break; - // Check for aliasing. - if (!AA->isMustAlias(F->getArgOperand(0), DepPointer)) - return MadeChange; + Instruction *Next = llvm::next(BasicBlock::iterator(Dependency)); - // DCE instructions only used to calculate that store - DeleteDeadInstruction(Dependency, *MD); - ++NumFastStores; - MadeChange = true; + // DCE instructions only used to calculate that store + DeleteDeadInstruction(Dependency, *MD); + ++NumFastStores; + MadeChange = true; - // Inst's old Dependency is now deleted. Compute the next dependency, - // which may also be dead, as in - // s[0] = 0; - // s[1] = 0; // This has just been deleted. - // free(s); - Dep = MD->getDependency(F); - }; + // Inst's old Dependency is now deleted. Compute the next dependency, + // which may also be dead, as in + // s[0] = 0; + // s[1] = 0; // This has just been deleted. + // free(s); + Dep = MD->getPointerDependencyFrom(Loc, false, Next, BB); + } + + if (Dep.isNonLocal()) + FindUnconditionalPreds(Blocks, BB, DT); + } return MadeChange; } -- cgit v1.1 From c25c90897781f116d5dbabab170bafcf65900fa7 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 5 Nov 2011 11:52:44 +0000 Subject: Add an option to pad an uleb128 to MCObjectWriter and remove the uleb128 encoding from the DWARF asm printer. As a side effect we now print dwarf ulebs with .ascii directives. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143809 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp | 37 ++---------------------------- lib/MC/MCObjectWriter.cpp | 12 ++++++++-- lib/MC/MCStreamer.cpp | 5 ++-- 3 files changed, 15 insertions(+), 39 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 4d6c281..605b775 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -35,23 +35,8 @@ using namespace llvm; void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const { if (isVerbose() && Desc) OutStreamer.AddComment(Desc); - - if (MAI->hasLEB128()) { - OutStreamer.EmitSLEB128IntValue(Value); - return; - } - // If we don't have .sleb128, emit as .bytes. - int Sign = Value >> (8 * sizeof(Value) - 1); - bool IsMore; - - do { - unsigned char Byte = static_cast(Value & 0x7f); - Value >>= 7; - IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0; - if (IsMore) Byte |= 0x80; - OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0); - } while (IsMore); + OutStreamer.EmitSLEB128IntValue(Value); } /// EmitULEB128 - emit the specified signed leb128 value. @@ -60,25 +45,7 @@ void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc, if (isVerbose() && Desc) OutStreamer.AddComment(Desc); - // FIXME: Should we add a PadTo option to the streamer? - if (MAI->hasLEB128() && PadTo == 0) { - OutStreamer.EmitULEB128IntValue(Value); - return; - } - - // If we don't have .uleb128 or we want to emit padding, emit as .bytes. - do { - unsigned char Byte = static_cast(Value & 0x7f); - Value >>= 7; - if (Value || PadTo != 0) Byte |= 0x80; - OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0); - } while (Value); - - if (PadTo) { - if (PadTo > 1) - OutStreamer.EmitFill(PadTo - 1, 0x80/*fillval*/, 0/*addrspace*/); - OutStreamer.EmitFill(1, 0/*fillval*/, 0/*addrspace*/); - } + OutStreamer.EmitULEB128IntValue(Value, 0/*addrspace*/, PadTo); } /// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value. diff --git a/lib/MC/MCObjectWriter.cpp b/lib/MC/MCObjectWriter.cpp index efe9f68..1888739 100644 --- a/lib/MC/MCObjectWriter.cpp +++ b/lib/MC/MCObjectWriter.cpp @@ -33,14 +33,22 @@ void MCObjectWriter::EncodeSLEB128(int64_t Value, raw_ostream &OS) { } /// Utility function to encode a ULEB128 value. -void MCObjectWriter::EncodeULEB128(uint64_t Value, raw_ostream &OS) { +void MCObjectWriter::EncodeULEB128(uint64_t Value, raw_ostream &OS, + unsigned Padding) { do { uint8_t Byte = Value & 0x7f; Value >>= 7; - if (Value != 0) + if (Value != 0 || Padding != 0) Byte |= 0x80; // Mark this byte that that more bytes will follow. OS << char(Byte); } while (Value != 0); + + // Pad with 0x80 and emit a null byte at the end. + if (Padding != 0) { + for (; Padding != 1; --Padding) + OS << '\x80'; + OS << '\x00'; + } } bool diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index 2a37f82..62abe54 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -94,10 +94,11 @@ void MCStreamer::EmitIntValue(uint64_t Value, unsigned Size, /// EmitULEB128Value - Special case of EmitULEB128Value that avoids the /// client having to pass in a MCExpr for constant integers. -void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace) { +void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned AddrSpace, + unsigned Padding) { SmallString<32> Tmp; raw_svector_ostream OSE(Tmp); - MCObjectWriter::EncodeULEB128(Value, OSE); + MCObjectWriter::EncodeULEB128(Value, OSE, Padding); EmitBytes(OSE.str(), AddrSpace); } -- cgit v1.1 From ef56d1d35d741da0de6e27717d51564eade63e6d Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 5 Nov 2011 12:13:21 +0000 Subject: MachOObject: Use DataExtractor's uleb parser instead of rolling our own. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143810 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/MachOObject.cpp | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) (limited to 'lib') diff --git a/lib/Object/MachOObject.cpp b/lib/Object/MachOObject.cpp index 9cdac86..ff17683 100644 --- a/lib/Object/MachOObject.cpp +++ b/lib/Object/MachOObject.cpp @@ -10,11 +10,12 @@ #include "llvm/Object/MachOObject.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Host.h" -#include "llvm/Support/SwapByteOrder.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Debug.h" +#include "llvm/Support/SwapByteOrder.h" using namespace llvm; using namespace llvm::object; @@ -359,25 +360,13 @@ void MachOObject::ReadSymbol64TableEntry(uint64_t SymbolTableOffset, void MachOObject::ReadULEB128s(uint64_t Index, SmallVectorImpl &Out) const { - const char *ptr = Buffer->getBufferStart() + Index; + DataExtractor extractor(Buffer->getBuffer(), true, 0); + + uint32_t offset = Index; uint64_t data = 0; - uint64_t delta = 0; - uint32_t shift = 0; - while (true) { - assert(ptr < Buffer->getBufferEnd() && "index out of bounds"); - assert(shift < 64 && "too big for uint64_t"); - - uint8_t byte = *ptr++; - delta |= ((byte & 0x7F) << shift); - shift += 7; - if (byte < 0x80) { - if (delta == 0) - break; - data += delta; - Out.push_back(data); - delta = 0; - shift = 0; - } + while (uint64_t delta = extractor.getULEB128(&offset)) { + data += delta; + Out.push_back(data); } } -- cgit v1.1 From 3f4c979e1b1e02e51443cf88f4b7ec47a6865aae Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 5 Nov 2011 13:11:25 +0000 Subject: Twinify. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143811 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCStreamer.cpp | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCStreamer.cpp b/lib/MC/MCStreamer.cpp index 62abe54..60a0a9d 100644 --- a/lib/MC/MCStreamer.cpp +++ b/lib/MC/MCStreamer.cpp @@ -16,7 +16,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include using namespace llvm; @@ -188,9 +187,8 @@ void MCStreamer::EmitDataRegion() { if (!MAI.getSupportsDataRegions()) return; // Generate a unique symbol name. - MCSymbol *NewSym = Context.GetOrCreateSymbol( - Twine(MAI.getDataBeginLabelName()) + - utostr(UniqueDataBeginSuffix++)); + MCSymbol *NewSym = Context.GetOrCreateSymbol(MAI.getDataBeginLabelName() + + Twine(UniqueDataBeginSuffix++)); EmitLabel(NewSym); RegionIndicator = Data; @@ -204,9 +202,8 @@ void MCStreamer::EmitCodeRegion() { if (!MAI.getSupportsDataRegions()) return; // Generate a unique symbol name. - MCSymbol *NewSym = Context.GetOrCreateSymbol( - Twine(MAI.getCodeBeginLabelName()) + - utostr(UniqueCodeBeginSuffix++)); + MCSymbol *NewSym = Context.GetOrCreateSymbol(MAI.getCodeBeginLabelName() + + Twine(UniqueCodeBeginSuffix++)); EmitLabel(NewSym); RegionIndicator = Code; @@ -220,9 +217,9 @@ void MCStreamer::EmitJumpTable8Region() { if (!MAI.getSupportsDataRegions()) return; // Generate a unique symbol name. - MCSymbol *NewSym = Context.GetOrCreateSymbol( - Twine(MAI.getJumpTable8BeginLabelName()) + - utostr(UniqueDataBeginSuffix++)); + MCSymbol *NewSym = + Context.GetOrCreateSymbol(MAI.getJumpTable8BeginLabelName() + + Twine(UniqueDataBeginSuffix++)); EmitLabel(NewSym); RegionIndicator = JumpTable8; @@ -236,9 +233,9 @@ void MCStreamer::EmitJumpTable16Region() { if (!MAI.getSupportsDataRegions()) return; // Generate a unique symbol name. - MCSymbol *NewSym = Context.GetOrCreateSymbol( - Twine(MAI.getJumpTable16BeginLabelName()) + - utostr(UniqueDataBeginSuffix++)); + MCSymbol *NewSym = + Context.GetOrCreateSymbol(MAI.getJumpTable16BeginLabelName() + + Twine(UniqueDataBeginSuffix++)); EmitLabel(NewSym); RegionIndicator = JumpTable16; @@ -253,9 +250,9 @@ void MCStreamer::EmitJumpTable32Region() { if (!MAI.getSupportsDataRegions()) return; // Generate a unique symbol name. - MCSymbol *NewSym = Context.GetOrCreateSymbol( - Twine(MAI.getJumpTable32BeginLabelName()) + - utostr(UniqueDataBeginSuffix++)); + MCSymbol *NewSym = + Context.GetOrCreateSymbol(MAI.getJumpTable32BeginLabelName() + + Twine(UniqueDataBeginSuffix++)); EmitLabel(NewSym); RegionIndicator = JumpTable32; -- cgit v1.1 From 80cc2598f89d09a6df2b84a5f8cea813b280b17b Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 5 Nov 2011 15:35:00 +0000 Subject: Reduce the offsets in DwarfDebugInfoEntry to 32 bit, they're printed with %x and that breaks on big-endian machines. I have to clean up the 32/64 bit confusion in libDebugInfo some day. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143812 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/DebugInfo/DWARFDebugInfoEntry.cpp | 2 +- lib/DebugInfo/DWARFDebugInfoEntry.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARFDebugInfoEntry.cpp index 1b089ad..67ab111 100644 --- a/lib/DebugInfo/DWARFDebugInfoEntry.cpp +++ b/lib/DebugInfo/DWARFDebugInfoEntry.cpp @@ -26,7 +26,7 @@ void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS, uint32_t offset = Offset; if (debug_info_data.isValidOffset(offset)) { - uint64_t abbrCode = debug_info_data.getULEB128(&offset); + uint32_t abbrCode = debug_info_data.getULEB128(&offset); OS << format("\n0x%8.8x: ", Offset); if (abbrCode) { diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.h b/lib/DebugInfo/DWARFDebugInfoEntry.h index aff2e85..37b3bcd 100644 --- a/lib/DebugInfo/DWARFDebugInfoEntry.h +++ b/lib/DebugInfo/DWARFDebugInfoEntry.h @@ -23,7 +23,7 @@ class DWARFFormValue; /// DWARFDebugInfoEntryMinimal - A DIE with only the minimum required data. class DWARFDebugInfoEntryMinimal { /// Offset within the .debug_info of the start of this entry. - uint64_t Offset; + uint32_t Offset; /// How many to subtract from "this" to get the parent. /// If zero this die has no parent. @@ -52,7 +52,7 @@ public: uint32_t getTag() const { return AbbrevDecl ? AbbrevDecl->getTag() : 0; } bool isNULL() const { return AbbrevDecl == 0; } - uint64_t getOffset() const { return Offset; } + uint32_t getOffset() const { return Offset; } uint32_t getNumAttributes() const { return !isNULL() ? AbbrevDecl->getNumAttributes() : 0; } -- cgit v1.1 From 5eccd36f1a8d917201b39697f3f1b6f45ecc139c Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sat, 5 Nov 2011 16:01:13 +0000 Subject: Audited all the format strings in libDebugInfo and fixed those that didn't match the types. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143814 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/DebugInfo/DWARFDebugAbbrev.cpp | 2 +- lib/DebugInfo/DWARFDebugAbbrev.h | 6 +++--- lib/DebugInfo/DWARFDebugLine.cpp | 5 +++-- lib/DebugInfo/DWARFFormValue.cpp | 14 +++++++------- 4 files changed, 14 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/DebugInfo/DWARFDebugAbbrev.cpp b/lib/DebugInfo/DWARFDebugAbbrev.cpp index a11ae3f..6e6c37e 100644 --- a/lib/DebugInfo/DWARFDebugAbbrev.cpp +++ b/lib/DebugInfo/DWARFDebugAbbrev.cpp @@ -83,7 +83,7 @@ void DWARFDebugAbbrev::dump(raw_ostream &OS) const { DWARFAbbreviationDeclarationCollMapConstIter pos; for (pos = AbbrevCollMap.begin(); pos != AbbrevCollMap.end(); ++pos) { - OS << format("Abbrev table for offset: 0x%8.8x\n", pos->first); + OS << format("Abbrev table for offset: 0x%8.8" PRIx64 "\n", pos->first); pos->second.dump(OS); } } diff --git a/lib/DebugInfo/DWARFDebugAbbrev.h b/lib/DebugInfo/DWARFDebugAbbrev.h index 03189b1..c7c0436 100644 --- a/lib/DebugInfo/DWARFDebugAbbrev.h +++ b/lib/DebugInfo/DWARFDebugAbbrev.h @@ -25,21 +25,21 @@ typedef DWARFAbbreviationDeclarationColl::const_iterator DWARFAbbreviationDeclarationCollConstIter; class DWARFAbbreviationDeclarationSet { - uint64_t Offset; + uint32_t Offset; uint32_t IdxOffset; std::vector Decls; public: DWARFAbbreviationDeclarationSet() : Offset(0), IdxOffset(0) {} - DWARFAbbreviationDeclarationSet(uint64_t offset, uint32_t idxOffset) + DWARFAbbreviationDeclarationSet(uint32_t offset, uint32_t idxOffset) : Offset(offset), IdxOffset(idxOffset) {} void clear() { IdxOffset = 0; Decls.clear(); } - uint64_t getOffset() const { return Offset; } + uint32_t getOffset() const { return Offset; } void dump(raw_ostream &OS) const; bool extract(DataExtractor data, uint32_t* offset_ptr); diff --git a/lib/DebugInfo/DWARFDebugLine.cpp b/lib/DebugInfo/DWARFDebugLine.cpp index 0248ad3..117fa31 100644 --- a/lib/DebugInfo/DWARFDebugLine.cpp +++ b/lib/DebugInfo/DWARFDebugLine.cpp @@ -41,8 +41,9 @@ void DWARFDebugLine::Prologue::dump(raw_ostream &OS) const { "----------------\n"; for (uint32_t i = 0; i < FileNames.size(); ++i) { const FileNameEntry& fileEntry = FileNames[i]; - OS << format("file_names[%3u] %4u ", i+1, fileEntry.DirIdx) - << format("0x%8.8x 0x%8.8x ", fileEntry.ModTime, fileEntry.Length) + OS << format("file_names[%3u] %4" PRIu64 " ", i+1, fileEntry.DirIdx) + << format("0x%8.8" PRIx64 " 0x%8.8" PRIx64 " ", + fileEntry.ModTime, fileEntry.Length) << fileEntry.Name << '\n'; } } diff --git a/lib/DebugInfo/DWARFFormValue.cpp b/lib/DebugInfo/DWARFFormValue.cpp index 86e28bd..1c7b9d7 100644 --- a/lib/DebugInfo/DWARFFormValue.cpp +++ b/lib/DebugInfo/DWARFFormValue.cpp @@ -263,12 +263,12 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { bool cu_relative_offset = false; switch (Form) { - case DW_FORM_addr: OS << format("0x%016x", uvalue); break; + case DW_FORM_addr: OS << format("0x%016" PRIx64, uvalue); break; case DW_FORM_flag: - case DW_FORM_data1: OS << format("0x%02x", uvalue); break; - case DW_FORM_data2: OS << format("0x%04x", uvalue); break; - case DW_FORM_data4: OS << format("0x%08x", uvalue); break; - case DW_FORM_data8: OS << format("0x%016x", uvalue); break; + case DW_FORM_data1: OS << format("0x%02x", (uint8_t)uvalue); break; + case DW_FORM_data2: OS << format("0x%04x", (uint16_t)uvalue); break; + case DW_FORM_data4: OS << format("0x%08x", (uint32_t)uvalue); break; + case DW_FORM_data8: OS << format("0x%016" PRIx64, uvalue); break; case DW_FORM_string: OS << '"'; OS.write_escaped(getAsCString(NULL)); @@ -314,7 +314,7 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { break; } case DW_FORM_ref_addr: - OS << format("0x%016x", uvalue); + OS << format("0x%016" PRIx64, uvalue); break; case DW_FORM_ref1: cu_relative_offset = true; @@ -348,7 +348,7 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { } if (cu_relative_offset) - OS << format(" => {0x%8.8x}", (uvalue + (cu ? cu->getOffset() : 0))); + OS << format(" => {0x%8.8"PRIx64"}", (uvalue + (cu ? cu->getOffset() : 0))); } const char* -- cgit v1.1 From 42536af5ce152593f489ca88bd0732218594d536 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Sat, 5 Nov 2011 20:16:15 +0000 Subject: Add support for passing i1, i8, and i16 call parameters. Also, be sure to zero-extend the constant integer encoding. Test case provides testing for both call parameters and materialization of i1, i8, and i16 types. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143821 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 44 +++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 28 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 517f73f..c98156e 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -557,7 +557,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { unsigned ImmReg = createResultReg(TLI.getRegClassFor(SrcVT)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ImmReg) - .addImm(CI->getSExtValue())); + .addImm(CI->getZExtValue())); return ImmReg; } @@ -1599,33 +1599,21 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl &Args, switch (VA.getLocInfo()) { case CCValAssign::Full: break; case CCValAssign::SExt: { - bool Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), - Arg, ArgVT, Arg); - assert(Emitted && "Failed to emit a sext!"); (void)Emitted; - Emitted = true; - ArgVT = VA.getLocVT(); + EVT DestVT = VA.getLocVT(); + unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT, + /*isZExt*/false); + assert (ResultReg != 0 && "Failed to emit a sext"); + Arg = ResultReg; break; } + case CCValAssign::AExt: + // Intentional fall-through. Handle AExt and ZExt. case CCValAssign::ZExt: { - bool Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), - Arg, ArgVT, Arg); - assert(Emitted && "Failed to emit a zext!"); (void)Emitted; - Emitted = true; - ArgVT = VA.getLocVT(); - break; - } - case CCValAssign::AExt: { - bool Emitted = FastEmitExtend(ISD::ANY_EXTEND, VA.getLocVT(), - Arg, ArgVT, Arg); - if (!Emitted) - Emitted = FastEmitExtend(ISD::ZERO_EXTEND, VA.getLocVT(), - Arg, ArgVT, Arg); - if (!Emitted) - Emitted = FastEmitExtend(ISD::SIGN_EXTEND, VA.getLocVT(), - Arg, ArgVT, Arg); - - assert(Emitted && "Failed to emit a aext!"); (void)Emitted; - ArgVT = VA.getLocVT(); + EVT DestVT = VA.getLocVT(); + unsigned ResultReg = ARMEmitIntExt(ArgVT, Arg, DestVT, + /*isZExt*/true); + assert (ResultReg != 0 && "Failed to emit a sext"); + Arg = ResultReg; break; } case CCValAssign::BCvt: { @@ -1643,7 +1631,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl &Args, if (VA.isRegLoc() && !VA.needsCustom()) { BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), VA.getLocReg()) - .addReg(Arg); + .addReg(Arg); RegArgs.push_back(VA.getLocReg()); } else if (VA.needsCustom()) { // TODO: We need custom lowering for vector (v2f64) args. @@ -1962,8 +1950,8 @@ bool ARMFastISel::SelectCall(const Instruction *I) { Type *ArgTy = (*i)->getType(); MVT ArgVT; - // FIXME: Should be able to handle i1, i8, and/or i16 parameters. - if (!isTypeLegal(ArgTy, ArgVT)) + if (!isTypeLegal(ArgTy, ArgVT) && ArgVT != MVT::i16 && ArgVT != MVT::i8 && + ArgVT != MVT::i1) return false; unsigned OriginalAlignment = TD.getABITypeAlignment(ArgTy); Flags.setOrigAlign(OriginalAlignment); -- cgit v1.1 From 27e5d0c72a54e0d26fd0248f302043d20f73721b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 6 Nov 2011 06:12:20 +0000 Subject: Add more AVX2 instructions and intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143861 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrFormats.td | 14 ++++++ lib/Target/X86/X86InstrSSE.td | 95 +++++++++++++++++++++++++++++++++------ 2 files changed, 96 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index 5236daf..b7c172e 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -451,6 +451,20 @@ class AVXAIi8 o, Format F, dag outs, dag ins, string asm, : Ii8, TA, OpSize, Requires<[HasAVX]>; +// AVX2 Instruction Templates: +// Instructions introduced in AVX2 (no SSE equivalent forms) +// +// AVX28I - AVX2 instructions with T8 and OpSize prefix. +// AVX2AIi8 - AVX2 instructions with TA, OpSize prefix and ImmT = Imm8. +class AVX28I o, Format F, dag outs, dag ins, string asm, + list pattern> + : I, T8, OpSize, + Requires<[HasAVX2]>; +class AVX2Ii8 o, Format F, dag outs, dag ins, string asm, + list pattern> + : Ii8, TA, OpSize, + Requires<[HasAVX2]>; + // AES Instruction Templates: // // AES8I diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index d2291a2..acd9a80 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7083,11 +7083,12 @@ class avx_broadcast opc, string OpcodeStr, RegisterClass RC, !strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"), [(set RC:$dst, (Int addr:$src))]>, VEX; -class avx_broadcast_reg opc, string OpcodeStr, RegisterClass RC, - Intrinsic Int> : - AVX8I, VEX; +// AVX2 adds register forms +class avx2_broadcast_reg opc, string OpcodeStr, RegisterClass RC, + Intrinsic Int> : + AVX28I, VEX; def VBROADCASTSSrm : avx_broadcast<0x18, "vbroadcastss", VR128, f32mem, int_x86_avx_vbroadcast_ss>; @@ -7098,16 +7099,16 @@ def VBROADCASTSDrm : avx_broadcast<0x19, "vbroadcastsd", VR256, f64mem, def VBROADCASTF128 : avx_broadcast<0x1A, "vbroadcastf128", VR256, f128mem, int_x86_avx_vbroadcastf128_pd_256>; -let Predicates = [HasAVX2] in { -def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem, - int_x86_avx2_vbroadcasti128>; -def VBROADCASTSSrr : avx_broadcast_reg<0x18, "vbroadcastss", VR128, +def VBROADCASTSSrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR128, int_x86_avx2_vbroadcast_ss_ps>; -def VBROADCASTSSYrr : avx_broadcast_reg<0x18, "vbroadcastss", VR256, +def VBROADCASTSSYrr : avx2_broadcast_reg<0x18, "vbroadcastss", VR256, int_x86_avx2_vbroadcast_ss_ps_256>; -def VBROADCASTSDrr : avx_broadcast_reg<0x19, "vbroadcastsd", VR256, +def VBROADCASTSDrr : avx2_broadcast_reg<0x19, "vbroadcastsd", VR256, int_x86_avx2_vbroadcast_sd_pd_256>; -} + +let Predicates = [HasAVX2] in +def VBROADCASTI128 : avx_broadcast<0x5A, "vbroadcasti128", VR256, i128mem, + int_x86_avx2_vbroadcasti128>; def : Pat<(int_x86_avx_vbroadcastf128_ps_256 addr:$src), (VBROADCASTF128 addr:$src)>; @@ -7364,7 +7365,7 @@ let Defs = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7, //===----------------------------------------------------------------------===// // Half precision conversion instructions -// +//===----------------------------------------------------------------------===// multiclass f16c_ph2ps { let Predicates = [HasAVX, HasF16C] in { def rr : I<0x13, MRMSrcReg, (outs RC:$dst), (ins VR128:$src), @@ -7396,3 +7397,71 @@ defm VCVTPH2PS : f16c_ph2ps; defm VCVTPH2PSY : f16c_ph2ps; defm VCVTPS2PH : f16c_ps2ph; defm VCVTPS2PHY : f16c_ps2ph; + +//===----------------------------------------------------------------------===// +// AVX2 Instructions +//===----------------------------------------------------------------------===// + +/// AVX2I_binop_rmi_int - AVX2 binary operator with 8-bit immediate +multiclass AVX2I_binop_rmi_int opc, string OpcodeStr, + Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, + X86MemOperand x86memop> { + let isCommutable = 1 in + def rri : AVX2Ii8, + VEX_4V; + def rmi : AVX2Ii8, + VEX_4V; +} + +let isCommutable = 0 in { +defm VPBLENDD : AVX2I_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128, + VR128, memopv16i8, i128mem>; +defm VPBLENDDY : AVX2I_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256, + VR256, memopv32i8, i256mem>; +} + +//===----------------------------------------------------------------------===// +// VPBROADCAST - Load from memory and broadcast to all elements of the +// destination operand +// +multiclass avx2_broadcast opc, string OpcodeStr, + X86MemOperand x86memop, PatFrag ld_frag, + Intrinsic Int128, Intrinsic Int256> { + def rr : AVX28I, VEX; + def rm : AVX28I, VEX; + def Yrr : AVX28I, VEX; + def Yrm : AVX28I, VEX; +} + +defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8, + int_x86_avx2_pbroadcastb_128, + int_x86_avx2_pbroadcastb_256>; +defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16, + int_x86_avx2_pbroadcastw_128, + int_x86_avx2_pbroadcastw_256>; +defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32, + int_x86_avx2_pbroadcastd_128, + int_x86_avx2_pbroadcastd_256>; +defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64, + int_x86_avx2_pbroadcastq_128, + int_x86_avx2_pbroadcastq_256>; -- cgit v1.1 From 5ced70d8f8458f41e6dc7ac76ad2e7c63ae33664 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Sun, 6 Nov 2011 16:45:46 +0000 Subject: Return only the least significant 8 bits of the exit status from Process::Wait on Windows (mimicing POSIX behaviour). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143876 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Windows/Program.inc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Support/Windows/Program.inc b/lib/Support/Windows/Program.inc index e486e6e..7e38168 100644 --- a/lib/Support/Windows/Program.inc +++ b/lib/Support/Windows/Program.inc @@ -367,7 +367,7 @@ Program::Wait(const Path &path, return -2; } - return status; + return status & 0377; } bool -- cgit v1.1 From 589fbb1770df5f7bee1c5e24e9e8f4ca5091d528 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Sun, 6 Nov 2011 18:04:43 +0000 Subject: ADT/StringRef: Add ::lower() and ::upper() methods. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143880 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/StringRef.cpp | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'lib') diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index 576b95f..c78b6d0 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -25,6 +25,12 @@ static char ascii_tolower(char x) { return x; } +static char ascii_toupper(char x) { + if (x >= 'a' && x <= 'z') + return x - 'a' + 'A'; + return x; +} + static bool ascii_isdigit(char x) { return x >= '0' && x <= '9'; } @@ -132,6 +138,26 @@ unsigned StringRef::edit_distance(llvm::StringRef Other, } //===----------------------------------------------------------------------===// +// String Operations +//===----------------------------------------------------------------------===// + +std::string StringRef::lower() const { + std::string Result(size(), char()); + for (size_type i = 0, e = size(); i != e; ++i) { + Result[i] = ascii_tolower(Data[i]); + } + return Result; +} + +std::string StringRef::upper() const { + std::string Result(size(), char()); + for (size_type i = 0, e = size(); i != e; ++i) { + Result[i] = ascii_tolower(Data[i]); + } + return Result; +} + +//===----------------------------------------------------------------------===// // String Searching //===----------------------------------------------------------------------===// -- cgit v1.1 From a7b966fc8d63b9b9432e1b33b33d4be6179e1fff Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sun, 6 Nov 2011 20:36:50 +0000 Subject: Fix a typo. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143890 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/StringRef.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index c78b6d0..e73c6e3 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -152,7 +152,7 @@ std::string StringRef::lower() const { std::string StringRef::upper() const { std::string Result(size(), char()); for (size_type i = 0, e = size(); i != e; ++i) { - Result[i] = ascii_tolower(Data[i]); + Result[i] = ascii_toupper(Data[i]); } return Result; } -- cgit v1.1 From 590853667345d6fb191764b9d0bd2ff13589e3a3 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Sun, 6 Nov 2011 20:37:06 +0000 Subject: Replace (Lower|Upper)caseString in favor of StringRef's newest methods. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143891 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/SubtargetFeature.cpp | 3 +-- lib/Target/ARM/ARMAsmPrinter.cpp | 7 +++---- lib/Target/ARM/AsmParser/ARMAsmLexer.cpp | 7 ++----- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 13 +++++-------- lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp | 7 +------ lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp | 3 +-- lib/Target/Mips/MipsAsmPrinter.cpp | 9 ++++----- lib/Target/Sparc/SparcAsmPrinter.cpp | 5 ++--- lib/Target/X86/AsmParser/X86AsmLexer.cpp | 7 +------ lib/Target/X86/AsmParser/X86AsmParser.cpp | 3 +-- 10 files changed, 21 insertions(+), 43 deletions(-) (limited to 'lib') diff --git a/lib/MC/SubtargetFeature.cpp b/lib/MC/SubtargetFeature.cpp index 4f23a85..be41579 100644 --- a/lib/MC/SubtargetFeature.cpp +++ b/lib/MC/SubtargetFeature.cpp @@ -15,7 +15,6 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/StringExtras.h" #include #include #include @@ -115,7 +114,7 @@ void SubtargetFeatures::AddFeature(const StringRef String, // Don't add empty features if (!String.empty()) { // Convert to lowercase, prepend flag and add to vector - Features.push_back(PrependFlag(LowercaseString(String), IsEnabled)); + Features.push_back(PrependFlag(String.lower(), IsEnabled)); } } diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index ea3319f..bbca228 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -47,7 +47,6 @@ #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -86,12 +85,12 @@ namespace { void EmitTextAttribute(unsigned Attribute, StringRef String) { switch (Attribute) { case ARMBuildAttrs::CPU_name: - Streamer.EmitRawText(StringRef("\t.cpu ") + LowercaseString(String)); + Streamer.EmitRawText(StringRef("\t.cpu ") + String.lower()); break; /* GAS requires .fpu to be emitted regardless of EABI attribute */ case ARMBuildAttrs::Advanced_SIMD_arch: case ARMBuildAttrs::VFP_arch: - Streamer.EmitRawText(StringRef("\t.fpu ") + LowercaseString(String)); + Streamer.EmitRawText(StringRef("\t.fpu ") + String.lower()); break; default: assert(0 && "Unsupported Text attribute in ASM Mode"); break; } @@ -201,7 +200,7 @@ namespace { Streamer.EmitULEB128IntValue(item.IntValue, 0); break; case AttributeItemType::TextAttribute: - Streamer.EmitBytes(UppercaseString(item.StringValue), 0); + Streamer.EmitBytes(item.StringValue.upper(), 0); Streamer.EmitIntValue(0, 1); // '\0' break; default: diff --git a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp index 14d35ba..eb8aaf2 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmLexer.cpp @@ -19,7 +19,6 @@ #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include @@ -107,11 +106,9 @@ AsmToken ARMBaseAsmLexer::LexTokenUAL() { SetError(Lexer->getErrLoc(), Lexer->getErr()); break; case AsmToken::Identifier: { - std::string upperCase = lexedToken.getString().str(); - std::string lowerCase = LowercaseString(upperCase); - StringRef lowerRef(lowerCase); + std::string lowerCase = lexedToken.getString().lower(); - unsigned regID = MatchRegisterName(lowerRef); + unsigned regID = MatchRegisterName(lowerCase); // Check for register aliases. // r13 -> sp // r14 -> lr diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 0abfabe..cb0c97b 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -30,7 +30,6 @@ #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" @@ -2063,8 +2062,7 @@ int ARMAsmParser::tryParseRegister() { // FIXME: Validate register for the current architecture; we have to do // validation later, so maybe there is no need for this here. - std::string upperCase = Tok.getString().str(); - std::string lowerCase = LowercaseString(upperCase); + std::string lowerCase = Tok.getString().lower(); unsigned RegNum = MatchRegisterName(lowerCase); if (!RegNum) { RegNum = StringSwitch(lowerCase) @@ -2092,8 +2090,7 @@ int ARMAsmParser::tryParseShiftRegister( const AsmToken &Tok = Parser.getTok(); assert(Tok.is(AsmToken::Identifier) && "Token is not an Identifier"); - std::string upperCase = Tok.getString().str(); - std::string lowerCase = LowercaseString(upperCase); + std::string lowerCase = Tok.getString().lower(); ARM_AM::ShiftOpc ShiftTy = StringSwitch(lowerCase) .Case("lsl", ARM_AM::lsl) .Case("lsr", ARM_AM::lsr) @@ -2688,7 +2685,7 @@ parseMSRMaskOperand(SmallVectorImpl &Operands) { // Split spec_reg from flag, example: CPSR_sxf => "CPSR" and "sxf" size_t Start = 0, Next = Mask.find('_'); StringRef Flags = ""; - std::string SpecReg = LowercaseString(Mask.slice(Start, Next)); + std::string SpecReg = Mask.slice(Start, Next).lower(); if (Next != StringRef::npos) Flags = Mask.slice(Next+1, Mask.size()); @@ -2756,8 +2753,8 @@ parsePKHImm(SmallVectorImpl &Operands, StringRef Op, return MatchOperand_ParseFail; } StringRef ShiftName = Tok.getString(); - std::string LowerOp = LowercaseString(Op); - std::string UpperOp = UppercaseString(Op); + std::string LowerOp = Op.lower(); + std::string UpperOp = Op.upper(); if (ShiftName != LowerOp && ShiftName != UpperOp) { Error(Parser.getTok().getLoc(), Op + " operand expected."); return MatchOperand_ParseFail; diff --git a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp index 2d357bb..7105b2e 100644 --- a/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp +++ b/lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp @@ -11,7 +11,6 @@ #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCParser/MCAsmLexer.h" @@ -100,11 +99,7 @@ AsmToken MBlazeBaseAsmLexer::LexTokenUAL() { return AsmToken(lexedToken); case AsmToken::Identifier: { - std::string upperCase = lexedToken.getString().str(); - std::string lowerCase = LowercaseString(upperCase); - StringRef lowerRef(lowerCase); - - unsigned regID = MatchRegisterName(lowerRef); + unsigned regID = MatchRegisterName(lexedToken.getString().lower()); if (regID) { return AsmToken(AsmToken::Register, diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index 3dafc61..53282ab 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -17,7 +17,6 @@ #include "llvm/MC/MCInst.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/StringExtras.h" using namespace llvm; #define GET_INSTRUCTION_NAME @@ -66,7 +65,7 @@ StringRef MipsInstPrinter::getOpcodeName(unsigned Opcode) const { } void MipsInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { - OS << '$' << LowercaseString(getRegisterName(RegNo)); + OS << '$' << StringRef(getRegisterName(RegNo)).lower(); } void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O, diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index 0e82681..a44d97f 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -36,7 +36,6 @@ #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -177,7 +176,7 @@ void MipsAsmPrinter::printSavedRegsBitmask(raw_ostream &O) { void MipsAsmPrinter::printHex32(unsigned Value, raw_ostream &O) { O << "0x"; for (int i = 7; i >= 0; i--) - O << utohexstr((Value & (0xF << (i*4))) >> (i*4)); + O.write_hex((Value & (0xF << (i*4))) >> (i*4)); } //===----------------------------------------------------------------------===// @@ -193,9 +192,9 @@ void MipsAsmPrinter::emitFrameDirective() { unsigned stackSize = MF->getFrameInfo()->getStackSize(); OutStreamer.EmitRawText("\t.frame\t$" + - Twine(LowercaseString(MipsInstPrinter::getRegisterName(stackReg))) + + StringRef(MipsInstPrinter::getRegisterName(stackReg)).lower() + "," + Twine(stackSize) + ",$" + - Twine(LowercaseString(MipsInstPrinter::getRegisterName(returnReg)))); + StringRef(MipsInstPrinter::getRegisterName(returnReg)).lower()); } /// Emit Set directives. @@ -335,7 +334,7 @@ void MipsAsmPrinter::printOperand(const MachineInstr *MI, int opNum, switch (MO.getType()) { case MachineOperand::MO_Register: O << '$' - << LowercaseString(MipsInstPrinter::getRegisterName(MO.getReg())); + << StringRef(MipsInstPrinter::getRegisterName(MO.getReg())).lower(); break; case MachineOperand::MO_Immediate: diff --git a/lib/Target/Sparc/SparcAsmPrinter.cpp b/lib/Target/Sparc/SparcAsmPrinter.cpp index 345e1bc..deb39d9 100644 --- a/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -23,7 +23,6 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Target/Mangler.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -82,7 +81,7 @@ void SparcAsmPrinter::printOperand(const MachineInstr *MI, int opNum, } switch (MO.getType()) { case MachineOperand::MO_Register: - O << "%" << LowercaseString(getRegisterName(MO.getReg())); + O << "%" << StringRef(getRegisterName(MO.getReg())).lower(); break; case MachineOperand::MO_Immediate: @@ -147,7 +146,7 @@ bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum, "Operand is not a physical register "); assert(MO.getReg() != SP::O7 && "%o7 is assigned as destination for getpcx!"); - operand = "%" + LowercaseString(getRegisterName(MO.getReg())); + operand = "%" + StringRef(getRegisterName(MO.getReg())).lower(); break; } diff --git a/lib/Target/X86/AsmParser/X86AsmLexer.cpp b/lib/Target/X86/AsmParser/X86AsmLexer.cpp index 1eaccff..2794e60 100644 --- a/lib/Target/X86/AsmParser/X86AsmLexer.cpp +++ b/lib/Target/X86/AsmParser/X86AsmLexer.cpp @@ -14,7 +14,6 @@ #include "llvm/MC/MCTargetAsmLexer.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" using namespace llvm; @@ -144,11 +143,7 @@ AsmToken X86AsmLexer::LexTokenIntel() { SetError(Lexer->getErrLoc(), Lexer->getErr()); return lexedToken; case AsmToken::Identifier: { - std::string upperCase = lexedToken.getString().str(); - std::string lowerCase = LowercaseString(upperCase); - StringRef lowerRef(lowerCase); - - unsigned regID = MatchRegisterName(lowerRef); + unsigned regID = MatchRegisterName(lexedToken.getString().lower()); if (regID) return AsmToken(AsmToken::Register, diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 6bedd52..f4639a3 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -20,7 +20,6 @@ #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/SourceMgr.h" @@ -412,7 +411,7 @@ bool X86ATTAsmParser::ParseRegister(unsigned &RegNo, // If the match failed, try the register name as lowercase. if (RegNo == 0) - RegNo = MatchRegisterName(LowercaseString(Tok.getString())); + RegNo = MatchRegisterName(Tok.getString().lower()); if (!is64BitMode()) { // FIXME: This should be done using Requires and -- cgit v1.1 From c8eb880a7fb0958a3a048a82c8558beec11f1209 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 6 Nov 2011 23:04:08 +0000 Subject: More AVX2 instructions and their intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143895 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../X86/Disassembler/X86DisassemblerDecoder.c | 11 +-- .../Disassembler/X86DisassemblerDecoderCommon.h | 3 +- lib/Target/X86/X86InstrFormats.td | 2 +- lib/Target/X86/X86InstrSSE.td | 78 +++++++++++++++++++--- 4 files changed, 80 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c index f9b0fe5..1a24807 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.c @@ -773,17 +773,20 @@ static int getID(struct InternalInstruction* insn) { if (insn->rexPrefix & 0x08) attrMask |= ATTR_REXW; - + if (getIDWithAttrMask(&instructionID, insn, attrMask)) return -1; - + /* The following clauses compensate for limitations of the tables. */ - - if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW)) { + + if ((attrMask & ATTR_VEXL) && (attrMask & ATTR_REXW) && + !(attrMask & ATTR_OPSIZE)) { /* * Some VEX instructions ignore the L-bit, but use the W-bit. Normally L-bit * has precedence since there are no L-bit with W-bit entries in the tables. * So if the L-bit isn't significant we should use the W-bit instead. + * We only need to do this if the instruction doesn't specify OpSize since + * there is a VEX_L_W_OPSIZE table. */ const struct InstructionSpecifier *spec; diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h index 8b79335..a7ef0cc 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoderCommon.h @@ -111,7 +111,8 @@ enum attributeBits { ENUM_ENTRY(IC_VEX_L, 3, "requires VEX and the L prefix") \ ENUM_ENTRY(IC_VEX_L_XS, 4, "requires VEX and the L and XS prefix")\ ENUM_ENTRY(IC_VEX_L_XD, 4, "requires VEX and the L and XD prefix")\ - ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize") + ENUM_ENTRY(IC_VEX_L_OPSIZE, 4, "requires VEX, L, and OpSize") \ + ENUM_ENTRY(IC_VEX_L_W_OPSIZE, 5, "requires VEX, L, W and OpSize") #define ENUM_ENTRY(n, r, d) n, diff --git a/lib/Target/X86/X86InstrFormats.td b/lib/Target/X86/X86InstrFormats.td index b7c172e..ecd6a93 100644 --- a/lib/Target/X86/X86InstrFormats.td +++ b/lib/Target/X86/X86InstrFormats.td @@ -460,7 +460,7 @@ class AVX28I o, Format F, dag outs, dag ins, string asm, list pattern> : I, T8, OpSize, Requires<[HasAVX2]>; -class AVX2Ii8 o, Format F, dag outs, dag ins, string asm, +class AVX2AIi8 o, Format F, dag outs, dag ins, string asm, list pattern> : Ii8, TA, OpSize, Requires<[HasAVX2]>; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index acd9a80..de7326a 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7310,14 +7310,17 @@ def : Pat<(v4i64 (X86VPermilpdy VR256:$src1, (i8 imm:$imm))), //===----------------------------------------------------------------------===// // VPERM2F128 - Permute Floating-Point Values in 128-bit chunks // +let neverHasSideEffects = 1 in { def VPERM2F128rr : AVXAIi8<0x06, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, VEX_4V; +let mayLoad = 1 in def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, i8imm:$src3), "vperm2f128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, VEX_4V; +} def : Pat<(int_x86_avx_vperm2f128_ps_256 VR256:$src1, VR256:$src2, imm:$src3), (VPERM2F128rr VR256:$src1, VR256:$src2, imm:$src3)>; @@ -7402,18 +7405,18 @@ defm VCVTPS2PHY : f16c_ps2ph; // AVX2 Instructions //===----------------------------------------------------------------------===// -/// AVX2I_binop_rmi_int - AVX2 binary operator with 8-bit immediate -multiclass AVX2I_binop_rmi_int opc, string OpcodeStr, +/// AVX2_binop_rmi_int - AVX2 binary operator with 8-bit immediate +multiclass AVX2_binop_rmi_int opc, string OpcodeStr, Intrinsic IntId, RegisterClass RC, PatFrag memop_frag, X86MemOperand x86memop> { let isCommutable = 1 in - def rri : AVX2Ii8, VEX_4V; - def rmi : AVX2Ii8 opc, string OpcodeStr, } let isCommutable = 0 in { -defm VPBLENDD : AVX2I_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128, - VR128, memopv16i8, i128mem>; -defm VPBLENDDY : AVX2I_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256, - VR256, memopv32i8, i256mem>; +defm VPBLENDD : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_128, + VR128, memopv16i8, i128mem>; +defm VPBLENDDY : AVX2_binop_rmi_int<0x02, "vpblendd", int_x86_avx2_pblendd_256, + VR256, memopv32i8, i256mem>; } //===----------------------------------------------------------------------===// @@ -7465,3 +7468,62 @@ defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32, defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64, int_x86_avx2_pbroadcastq_128, int_x86_avx2_pbroadcastq_256>; + +//===----------------------------------------------------------------------===// +// VPERM - Permute instructions +// + +multiclass avx2_perm opc, string OpcodeStr, PatFrag mem_frag, + Intrinsic Int> { + def Yrr : AVX28I, VEX_4V; + def Yrm : AVX28I, + VEX_4V; +} + +defm VPERMD : avx2_perm<0x36, "vpermd", memopv8i32, int_x86_avx2_permd>; +defm VPERMPS : avx2_perm<0x16, "vpermps", memopv8f32, int_x86_avx2_permps>; + +multiclass avx2_perm_imm opc, string OpcodeStr, PatFrag mem_frag, + Intrinsic Int> { + def Yrr : AVX2AIi8, VEX; + def Yrm : AVX2AIi8, + VEX; +} + +defm VPERMQ : avx2_perm_imm<0x00, "vpermq", memopv4i64, int_x86_avx2_permq>, + VEX_W; +defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, int_x86_avx2_permpd>, + VEX_W; + +//===----------------------------------------------------------------------===// +// VPERM2F128 - Permute Floating-Point Values in 128-bit chunks +// +def VPERM2I128rr : AVXAIi8<0x46, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR256:$src2, i8imm:$src3), + "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + [(set VR256:$dst, + (int_x86_avx2_vperm2i128 VR256:$src1, VR256:$src2, imm:$src3))]>, + VEX_4V; +def VPERM2I128rm : AVXAIi8<0x46, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, f256mem:$src2, i8imm:$src3), + "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + [(set VR256:$dst, + (int_x86_avx2_vperm2i128 VR256:$src1, (memopv4i64 addr:$src2), + imm:$src3))]>, + VEX_4V; -- cgit v1.1 From 69f5df777819cf7a00975280b46b4ef9afa2f745 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 7 Nov 2011 02:00:04 +0000 Subject: Add AVX2 VEXTRACTI128 and VINSERTI128 instructions. Fix VPERM2I128 to be qualified with HasAVX2 instead of HasAVX. Mark VINSERTF128 and VEXTRACTF128 as never having side effects. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143902 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index de7326a..fc36884 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7130,14 +7130,17 @@ def : Pat<(v4i32 (X86VBroadcast (loadi32 addr:$src))), //===----------------------------------------------------------------------===// // VINSERTF128 - Insert packed floating-point values // +let neverHasSideEffects = 1 in { def VINSERTF128rr : AVXAIi8<0x18, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR128:$src2, i8imm:$src3), "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, VEX_4V; +let mayLoad = 1 in def VINSERTF128rm : AVXAIi8<0x18, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f128mem:$src2, i8imm:$src3), "vinsertf128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>, VEX_4V; +} def : Pat<(int_x86_avx_vinsertf128_pd_256 VR256:$src1, VR128:$src2, imm:$src3), (VINSERTF128rr VR256:$src1, VR128:$src2, imm:$src3)>; @@ -7174,14 +7177,17 @@ def : Pat<(vinsertf128_insert:$ins (v16i16 VR256:$src1), (v8i16 VR128:$src2), //===----------------------------------------------------------------------===// // VEXTRACTF128 - Extract packed floating-point values // +let neverHasSideEffects = 1 in { def VEXTRACTF128rr : AVXAIi8<0x19, MRMDestReg, (outs VR128:$dst), (ins VR256:$src1, i8imm:$src2), "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX; +let mayStore = 1 in def VEXTRACTF128mr : AVXAIi8<0x19, MRMDestMem, (outs), (ins f128mem:$dst, VR256:$src1, i8imm:$src2), "vextractf128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX; +} def : Pat<(int_x86_avx_vextractf128_pd_256 VR256:$src1, imm:$src2), (VEXTRACTF128rr VR256:$src1, imm:$src2)>; @@ -7514,16 +7520,46 @@ defm VPERMPD : avx2_perm_imm<0x01, "vpermpd", memopv4f64, int_x86_avx2_permpd>, //===----------------------------------------------------------------------===// // VPERM2F128 - Permute Floating-Point Values in 128-bit chunks // -def VPERM2I128rr : AVXAIi8<0x46, MRMSrcReg, (outs VR256:$dst), +def VPERM2I128rr : AVX2AIi8<0x46, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR256:$dst, (int_x86_avx2_vperm2i128 VR256:$src1, VR256:$src2, imm:$src3))]>, VEX_4V; -def VPERM2I128rm : AVXAIi8<0x46, MRMSrcMem, (outs VR256:$dst), +def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, f256mem:$src2, i8imm:$src3), "vperm2i128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", [(set VR256:$dst, (int_x86_avx2_vperm2i128 VR256:$src1, (memopv4i64 addr:$src2), imm:$src3))]>, VEX_4V; + +//===----------------------------------------------------------------------===// +// VINSERTI128 - Insert packed integer values +// +def VINSERTI128rr : AVX2AIi8<0x38, MRMSrcReg, (outs VR256:$dst), + (ins VR256:$src1, VR128:$src2, i8imm:$src3), + "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + [(set VR256:$dst, + (int_x86_avx2_vinserti128 VR256:$src1, VR128:$src2, imm:$src3))]>, + VEX_4V; +def VINSERTI128rm : AVX2AIi8<0x38, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, i128mem:$src2, i8imm:$src3), + "vinserti128\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", + [(set VR256:$dst, + (int_x86_avx2_vinserti128 VR256:$src1, (memopv2i64 addr:$src2), + imm:$src3))]>, VEX_4V; + +//===----------------------------------------------------------------------===// +// VEXTRACTI128 - Extract packed integer values +// +def VEXTRACTI128rr : AVX2AIi8<0x39, MRMDestReg, (outs VR128:$dst), + (ins VR256:$src1, i8imm:$src2), + "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", + [(set VR128:$dst, + (int_x86_avx2_vextracti128 VR256:$src1, imm:$src2))]>, + VEX; +let neverHasSideEffects = 1, mayStore = 1 in +def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), + (ins i128mem:$dst, VR256:$src1, i8imm:$src2), + "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX; -- cgit v1.1 From 28692044db488c57084cb0d421f7688f2c42ace7 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 7 Nov 2011 03:20:35 +0000 Subject: Add AVX2 VPMOVMASK instructions and intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143904 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index fc36884..b46e5d1 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7563,3 +7563,41 @@ let neverHasSideEffects = 1, mayStore = 1 in def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs), (ins i128mem:$dst, VR256:$src1, i8imm:$src2), "vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>, VEX; + +//===----------------------------------------------------------------------===// +// VPMASKMOV - Conditional SIMD Integer Packed Loads and Stores +// +multiclass avx2_pmovmask { + def rm : AVX28I<0x8c, MRMSrcMem, (outs VR128:$dst), + (ins VR128:$src1, i128mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR128:$dst, (IntLd128 addr:$src2, VR128:$src1))]>, VEX_4V; + def Yrm : AVX28I<0x8c, MRMSrcMem, (outs VR256:$dst), + (ins VR256:$src1, i256mem:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set VR256:$dst, (IntLd256 addr:$src2, VR256:$src1))]>, VEX_4V; + def mr : AVX28I<0x8e, MRMDestMem, (outs), + (ins i128mem:$dst, VR128:$src1, VR128:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(IntSt128 addr:$dst, VR128:$src1, VR128:$src2)]>, VEX_4V; + def Ymr : AVX28I<0x8e, MRMDestMem, (outs), + (ins i256mem:$dst, VR256:$src1, VR256:$src2), + !strconcat(OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(IntSt256 addr:$dst, VR256:$src1, VR256:$src2)]>, VEX_4V; +} + +defm VPMASKMOVD : avx2_pmovmask<"vpmaskmovd", + int_x86_avx2_maskload_d, + int_x86_avx2_maskload_d_256, + int_x86_avx2_maskstore_d, + int_x86_avx2_maskstore_d_256, + memopv4i32, memopv8i32>; +defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", + int_x86_avx2_maskload_q, + int_x86_avx2_maskload_q_256, + int_x86_avx2_maskstore_q, + int_x86_avx2_maskstore_q_256, + memopv2i64, memopv4i64>, VEX_W; -- cgit v1.1 From 4c763ee61333acbf7121e89d284124bc514325bb Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 7 Nov 2011 08:26:24 +0000 Subject: Add AVX2 variable shift instructions and intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143915 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index b46e5d1..db4382a 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7601,3 +7601,43 @@ defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", int_x86_avx2_maskstore_q, int_x86_avx2_maskstore_q_256, memopv2i64, memopv4i64>, VEX_W; + + +//===----------------------------------------------------------------------===// +// Variable Bit Shifts +// +multiclass avx2_var_shift opc, string OpcodeStr, + PatFrag pf128, PatFrag pf256, + Intrinsic Int128, Intrinsic Int256> { + def rr : AVX28I, VEX_4V; + def rm : AVX28I, + VEX_4V; + def Yrr : AVX28I, VEX_4V; + def Yrm : AVX28I, + VEX_4V; +} + +defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", memopv4i32, memopv8i32, + int_x86_avx2_psllv_d, int_x86_avx2_psllv_d_256>; +defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", memopv2i64, memopv4i64, + int_x86_avx2_psllv_q, int_x86_avx2_psllv_q_256>, + VEX_W; +defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", memopv4i32, memopv8i32, + int_x86_avx2_psrlv_d, int_x86_avx2_psrlv_d_256>; +defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", memopv2i64, memopv4i64, + int_x86_avx2_psrlv_q, int_x86_avx2_psrlv_q_256>, + VEX_W; +defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", memopv4i32, memopv8i32, + int_x86_avx2_psrav_d, int_x86_avx2_psrav_d_256>; -- cgit v1.1 From 33aa20f737e9ebb3c29c93508da0c041a6a772c7 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Mon, 7 Nov 2011 09:18:32 +0000 Subject: Typo. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143918 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 96d7902..efab32e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -29,7 +29,7 @@ class ConstantInt; class DbgVariable; //===----------------------------------------------------------------------===// -/// CompileUnit - This dwarf writer support class manages information associate +/// CompileUnit - This dwarf writer support class manages information associated /// with a source file. class CompileUnit { /// ID - File identifier for source. -- cgit v1.1 From 6370118b16da30f20b5a68ccb814f3a8b8bc58b6 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Mon, 7 Nov 2011 09:18:35 +0000 Subject: Fix up comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143919 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index b022c43..d1e4ca4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1744,7 +1744,7 @@ void DwarfDebug::emitDebugPubTypes() { for (DenseMap::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { CompileUnit *TheCU = I->second; - // Start the dwarf pubnames section. + // Start the dwarf pubtypes section. Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfPubTypesSection()); Asm->OutStreamer.AddComment("Length of Public Types Info"); -- cgit v1.1 From d8a8752d5b7f546fd7ebb295366a43b2b76afbd6 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Mon, 7 Nov 2011 09:18:38 +0000 Subject: Expose a way to get the beginning of the dwarf string section. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143920 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 31 +++++++++++++++++++------------ lib/CodeGen/AsmPrinter/DwarfDebug.h | 4 ++++ 2 files changed, 23 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index d1e4ca4..037b4ac 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -136,6 +136,18 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfDebug::~DwarfDebug() { } +/// EmitSectionSym - Switch to the specified MCSection and emit an assembler +/// temporary label to it if SymbolStem is specified. +static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section, + const char *SymbolStem = 0) { + Asm->OutStreamer.SwitchSection(Section); + if (!SymbolStem) return 0; + + MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem); + Asm->OutStreamer.EmitLabel(TmpSym); + return TmpSym; +} + MCSymbol *DwarfDebug::getStringPool() { return Asm->GetTempSymbol("section_str"); } @@ -148,6 +160,13 @@ MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) { return Entry.first = Asm->GetTempSymbol("string", Entry.second); } +MCSymbol *DwarfDebug::getDwarfStrSectionSym(void) { + if (DwarfStrSectionSym) return DwarfStrSectionSym; + DwarfStrSectionSym = + EmitSectionSym(Asm, Asm->getObjFileLowering().getDwarfStrSection(), + "section_str"); + return DwarfStrSectionSym; +} /// assignAbbrevNumber - Define a unique number for the abbreviation. /// @@ -1465,18 +1484,6 @@ void DwarfDebug::computeSizeAndOffsets() { } } -/// EmitSectionSym - Switch to the specified MCSection and emit an assembler -/// temporary label to it if SymbolStem is specified. -static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section, - const char *SymbolStem = 0) { - Asm->OutStreamer.SwitchSection(Section); - if (!SymbolStem) return 0; - - MCSymbol *TmpSym = Asm->GetTempSymbol(SymbolStem); - Asm->OutStreamer.EmitLabel(TmpSym); - return TmpSym; -} - /// EmitSectionLabels - Emit initial Dwarf sections with a label at /// the start of each one. void DwarfDebug::EmitSectionLabels() { diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index d5ce696..7bcc5b4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -513,6 +513,10 @@ public: /// getStringPoolEntry - returns an entry into the string pool with the given /// string text. MCSymbol *getStringPoolEntry(StringRef Str); + + /// getDwarfStrSectionSym - returns the symbol that starts the dwarf string + /// section. + MCSymbol *getDwarfStrSectionSym(); }; } // End of namespace llvm -- cgit v1.1 From bcbd3a4637f33036d05833364e180f9dfaabb67c Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Mon, 7 Nov 2011 09:18:42 +0000 Subject: Add a new dwarf accelerator table prototype with the goal of replacing the pubnames and pubtypes tables. LLDB can currently use this format and a full spec is forthcoming and submission for standardization is planned. A basic summary: The dwarf accelerator tables are an indirect hash table optimized for null lookup rather than access to known data. They are output into an on-disk format that looks like this: .-------------. | HEADER | |-------------| | BUCKETS | |-------------| | HASHES | |-------------| | OFFSETS | |-------------| | DATA | `-------------' where the header contains a magic number, version, type of hash function, the number of buckets, total number of hashes, and room for a special struct of data and the length of that struct. The buckets contain an index (e.g. 6) into the hashes array. The hashes section contains all of the 32-bit hash values in contiguous memory, and the offsets contain the offset into the data area for the particular hash. For a lookup example, we could hash a function name and take it modulo the number of buckets giving us our bucket. From there we take the bucket value as an index into the hashes table and look at each successive hash as long as the hash value is still the same modulo result (bucket value) as earlier. If we have a match we look at that same entry in the offsets table and grab the offset in the data for our final match. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143921 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp | 250 ++++++++++++++++++++++++++++ lib/CodeGen/AsmPrinter/DwarfAccelTable.h | 254 +++++++++++++++++++++++++++++ 2 files changed, 504 insertions(+) create mode 100644 lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp create mode 100644 lib/CodeGen/AsmPrinter/DwarfAccelTable.h (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp new file mode 100644 index 0000000..b7c8c6e --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -0,0 +1,250 @@ +//=-- llvm/CodeGen/DwarfAccelTable.cpp - Dwarf Accelerator Tables -*- C++ -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf accelerator tables. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Debug.h" +#include "DwarfAccelTable.h" +#include "DwarfDebug.h" +#include "DIE.h" + +using namespace llvm; + +const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) { + switch (AT) { + default: llvm_unreachable("invalid AtomType!"); + case eAtomTypeNULL: return "eAtomTypeNULL"; + case eAtomTypeDIEOffset: return "eAtomTypeDIEOffset"; + case eAtomTypeCUOffset: return "eAtomTypeCUOffset"; + case eAtomTypeTag: return "eAtomTypeTag"; + case eAtomTypeNameFlags: return "eAtomTypeNameFlags"; + case eAtomTypeTypeFlags: return "eAtomTypeTypeFlags"; + } +} + +// The general case would need to have a less hard coded size for the +// length of the HeaderData, however, if we're constructing based on a +// single Atom then we know it will always be: 4 + 4 + 2 + 2. +DwarfAccelTable::DwarfAccelTable(DwarfAccelTable::Atom atom) : + Header(12), + HeaderData(atom) { +} + +void DwarfAccelTable::AddName(StringRef Name, DIE* die) { + // If the string is in the list already then add this die to the list + // otherwise add a new one. + DIEArray &DIEs = Entries[Name]; + DIEs.push_back(die); +} + +void DwarfAccelTable::ComputeBucketCount(void) { + // First get the number of unique hashes. + std::vector uniques; + uniques.resize(Data.size()); + for (size_t i = 0; i < Data.size(); ++i) + uniques[i] = Data[i]->HashValue; + std::sort(uniques.begin(), uniques.end()); + std::vector::iterator p = + std::unique(uniques.begin(), uniques.end()); + uint32_t num = std::distance(uniques.begin(), p); + + // Then compute the bucket size, minimum of 1 bucket. + if (num > 1024) Header.bucket_count = num/4; + if (num > 16) Header.bucket_count = num/2; + else Header.bucket_count = num > 0 ? num : 1; + + Header.hashes_count = num; +} + +void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, const char *Prefix) { + // Create the individual hash data outputs. + for (StringMap::const_iterator + EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { + struct HashData *Entry = new HashData((*EI).getKeyData()); + for (DIEArray::const_iterator DI = (*EI).second.begin(), + DE = (*EI).second.end(); + DI != DE; ++DI) + Entry->addOffset((*DI)->getOffset()); + Data.push_back(Entry); + } + + // Figure out how many buckets we need, then compute the bucket + // contents and the final ordering. We'll emit the hashes and offsets + // by doing a walk during the emission phase. We add temporary + // symbols to the data so that we can reference them during the offset + // later, we'll emit them when we emit the data. + ComputeBucketCount(); + + // Compute bucket contents and final ordering. + Buckets.resize(Header.bucket_count); + for (size_t i = 0; i < Data.size(); ++i) { + uint32_t bucket = Data[i]->HashValue % Header.bucket_count; + Buckets[bucket].push_back(Data[i]); + Data[i]->Sym = Asm->GetTempSymbol(Prefix, i); + } +} + +// Emits the header for the table via the AsmPrinter. +void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) { + Asm->OutStreamer.AddComment("Header Magic"); + Asm->EmitInt32(Header.magic); + Asm->OutStreamer.AddComment("Header Version"); + Asm->EmitInt16(Header.version); + Asm->OutStreamer.AddComment("Header Hash Function"); + Asm->EmitInt16(Header.hash_function); + Asm->OutStreamer.AddComment("Header Bucket Count"); + Asm->EmitInt32(Header.bucket_count); + Asm->OutStreamer.AddComment("Header Hash Count"); + Asm->EmitInt32(Header.hashes_count); + Asm->OutStreamer.AddComment("Header Data Length"); + Asm->EmitInt32(Header.header_data_len); + Asm->OutStreamer.AddComment("HeaderData Die Offset Base"); + Asm->EmitInt32(HeaderData.die_offset_base); + Asm->OutStreamer.AddComment("HeaderData Atom Count"); + Asm->EmitInt32(HeaderData.Atoms.size()); + for (size_t i = 0; i < HeaderData.Atoms.size(); i++) { + Atom A = HeaderData.Atoms[i]; + Asm->OutStreamer.AddComment(Atom::AtomTypeString(A.type)); + Asm->EmitInt16(A.type); + Asm->OutStreamer.AddComment(dwarf::FormEncodingString(A.form)); + Asm->EmitInt16(A.form); + } +} + +// Walk through and emit the buckets for the table. This will look +// like a list of numbers of how many elements are in each bucket. +void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) { + unsigned index = 0; + for (size_t i = 0; i < Buckets.size(); ++i) { + Twine Comment = Twine("Bucket ") + Twine(i); + Asm->OutStreamer.AddComment(Comment); + if (Buckets[i].size() != 0) + Asm->EmitInt32(index); + else + Asm->EmitInt32(UINT32_MAX); + index += Buckets[i].size(); + } +} + +// Walk through the buckets and emit the individual hashes for each +// bucket. +void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { + for (size_t i = 0; i < Buckets.size(); ++i) { + for (HashList::const_iterator HI = Buckets[i].begin(), + HE = Buckets[i].end(); HI != HE; ++HI) { + Twine Comment = Twine("Hash in Bucket ") + Twine(i); + Asm->OutStreamer.AddComment(Comment); + Asm->EmitInt32((*HI)->HashValue); + } + } +} + +// Walk through the buckets and emit the individual offsets for each +// element in each bucket. This is done via a symbol subtraction from the +// beginning of the section. The non-section symbol will be output later +// when we emit the actual data. +void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { + for (size_t i = 0; i < Buckets.size(); ++i) { + for (HashList::const_iterator HI = Buckets[i].begin(), + HE = Buckets[i].end(); HI != HE; ++HI) { + Twine Comment = Twine("Offset in Bucket ") + Twine(i); + Asm->OutStreamer.AddComment(Comment); + MCContext &Context = Asm->OutStreamer.getContext(); + const MCExpr *Sub = + MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create((*HI)->Sym, Context), + MCSymbolRefExpr::Create(SecBegin, Context), + Context); + Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t), 0); + } + } +} + +// Walk through the buckets and emit the full data for each element in +// the bucket. For the string case emit the dies and the various offsets. +// Terminate each HashData bucket with 0. +void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) { + uint64_t PrevHash = UINT64_MAX; + for (size_t i = 0; i < Buckets.size(); ++i) { + for (HashList::const_iterator HI = Buckets[i].begin(), + HE = Buckets[i].end(); HI != HE; ++HI) { + // Remember to emit the label for our offset. + Asm->OutStreamer.EmitLabel((*HI)->Sym); + Asm->OutStreamer.AddComment((*HI)->Str); + Asm->EmitSectionOffset(D->getStringPoolEntry((*HI)->Str), + D->getDwarfStrSectionSym()); + Asm->OutStreamer.AddComment("Num DIEs"); + Asm->EmitInt32((*HI)->DIEOffsets.size()); + for (std::vector::const_iterator + DI = (*HI)->DIEOffsets.begin(), DE = (*HI)->DIEOffsets.end(); + DI != DE; ++DI) { + Asm->EmitInt32((*DI)); + } + // Emit a 0 to terminate the data unless we have a hash collision. + if (PrevHash != (*HI)->HashValue) + Asm->EmitInt32(0); + PrevHash = (*HI)->HashValue; + } + } +} + +// Emit the entire data structure to the output file. +void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, + DwarfDebug *D) { + // Emit the header. + EmitHeader(Asm); + + // Emit the buckets. + EmitBuckets(Asm); + + // Emit the hashes. + EmitHashes(Asm); + + // Emit the offsets. + EmitOffsets(Asm, SecBegin); + + // Emit the hash data. + EmitData(Asm, D); +} + +#ifndef NDEBUG +void DwarfAccelTable::print(raw_ostream &O) { + + Header.print(O); + HeaderData.print(O); + + O << "Entries: \n"; + for (StringMap::const_iterator + EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { + O << "Name: " << (*EI).getKeyData() << "\n"; + for (DIEArray::const_iterator DI = (*EI).second.begin(), + DE = (*EI).second.end(); + DI != DE; ++DI) + (*DI)->print(O); + } + + O << "Buckets and Hashes: \n"; + for (size_t i = 0; i < Buckets.size(); ++i) + for (HashList::const_iterator HI = Buckets[i].begin(), + HE = Buckets[i].end(); HI != HE; ++HI) + (*HI)->print(O); + + O << "Data: \n"; + for (std::vector::const_iterator + DI = Data.begin(), DE = Data.end(); DI != DE; ++DI) + (*DI)->print(O); + + +} +#endif diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h new file mode 100644 index 0000000..242841a --- /dev/null +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -0,0 +1,254 @@ +//==-- llvm/CodeGen/DwarfAccelTable.h - Dwarf Accelerator Tables -*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for writing dwarf accelerator tables. +// +//===----------------------------------------------------------------------===// + +#ifndef CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__ +#define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__ + +#include "llvm/ADT/StringMap.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/FormattedStream.h" +#include +#include + +// The apple dwarf accelerator tables are an indirect hash table optimized +// for null lookup rather than access to known data. They are output into +// an on-disk format that looks like this: +// +// .-------------. +// | HEADER | +// |-------------| +// | BUCKETS | +// |-------------| +// | HASHES | +// |-------------| +// | OFFSETS | +// |-------------| +// | DATA | +// `-------------' +// +// where the header contains a magic number, version, type of hash function, +// the number of buckets, total number of hashes, and room for a special +// struct of data and the length of that struct. +// +// The buckets contain an index (e.g. 6) into the hashes array. The hashes +// section contains all of the 32-bit hash values in contiguous memory, and +// the offsets contain the offset into the data area for the particular +// hash. +// +// For a lookup example, we could hash a function name and take it modulo the +// number of buckets giving us our bucket. From there we take the bucket value +// as an index into the hashes table and look at each successive hash as long +// as the hash value is still the same modulo result (bucket value) as earlier. +// If we have a match we look at that same entry in the offsets table and +// grab the offset in the data for our final match. + +namespace llvm { + +class AsmPrinter; +class DIE; +class DwarfDebug; + +class DwarfAccelTable { + + enum HashFunctionType { + eHashFunctionDJB = 0u + }; + + static uint32_t HashDJB (const char *s) { + uint32_t h = 5381; + for (unsigned char c = *s; c; c = *++s) + h = ((h << 5) + h) + c; + return h; + } + + // Helper function to compute the number of buckets needed based on + // the number of unique hashes. + void ComputeBucketCount (void); + + struct TableHeader { + uint32_t magic; // 'HASH' magic value to allow endian detection + uint16_t version; // Version number. + uint16_t hash_function; // The hash function enumeration that was used. + uint32_t bucket_count; // The number of buckets in this hash table. + uint32_t hashes_count; // The total number of unique hash values + // and hash data offsets in this table. + uint32_t header_data_len; // The bytes to skip to get to the hash + // indexes (buckets) for correct alignment. + // Also written to disk is the implementation specific header data. + + static const uint32_t MagicHash = 0x48415348; + + TableHeader (uint32_t data_len) : + magic (MagicHash), version (1), hash_function (eHashFunctionDJB), + bucket_count (0), hashes_count (0), header_data_len (data_len) + {}; + +#ifndef NDEBUG + void print(raw_ostream &O) { + O << "Magic: " << format("0x%x", magic) << "\n" + << "Version: " << version << "\n" + << "Hash Function: " << hash_function << "\n" + << "Bucket Count: " << bucket_count << "\n" + << "Header Data Length: " << header_data_len << "\n"; + } + void dump() { print(dbgs()); } +#endif + }; + +public: + // The HeaderData describes the form of each set of data. In general this + // is as a list of atoms (atom_count) where each atom contains a type + // (AtomType type) of data, and an encoding form (form). In the case of + // data that is referenced via DW_FORM_ref_* the die_offset_base is + // used to describe the offset for all forms in the list of atoms. + // This also serves as a public interface of sorts. + // When written to disk this will have the form: + // + // uint32_t die_offset_base + // uint32_t atom_count + // atom_count Atoms + enum AtomType { + eAtomTypeNULL = 0u, + eAtomTypeDIEOffset = 1u, // DIE offset, check form for encoding + eAtomTypeCUOffset = 2u, // DIE offset of the compiler unit header that + // contains the item in question + eAtomTypeTag = 3u, // DW_TAG_xxx value, should be encoded as + // DW_FORM_data1 (if no tags exceed 255) or + // DW_FORM_data2. + eAtomTypeNameFlags = 4u, // Flags from enum NameFlags + eAtomTypeTypeFlags = 5u // Flags from enum TypeFlags + }; + + // Make these public so that they can be used as a general interface to + // the class. + struct Atom { + AtomType type; // enum AtomType + uint16_t form; // DWARF DW_FORM_ defines + + Atom(AtomType type, uint16_t form) : type(type), form(form) {}; + static const char * AtomTypeString(enum AtomType); +#ifndef NDEBUG + void print(raw_ostream &O) { + O << "Type: " << dwarf::TagString(type) << "\n" + << "Form: " << dwarf::FormEncodingString(form) << "\n"; + } + void dump() { + print(dbgs()); + } +#endif + }; + + private: + struct TableHeaderData { + + uint32_t die_offset_base; + std::vector Atoms; + + TableHeaderData(DwarfAccelTable::Atom Atom, uint32_t offset = 0) + : die_offset_base(offset) { + Atoms.push_back(Atom); + } + +#ifndef NDEBUG + void print (raw_ostream &O) { + O << "die_offset_base: " << die_offset_base << "\n"; + for (size_t i = 0; i < Atoms.size(); i++) + Atoms[i].print(O); + } + void dump() { + print(dbgs()); + } +#endif + }; + + // The data itself consists of a str_offset (to deal with collisions in + // some magical way? this looks like the KeyType from the spec, which + // should mean an integer compare on read), a count of the DIEs in the + // hash and the offsets to the DIEs themselves. + // On disk each data section is ended with a 0 KeyType as the end of the + // hash chain. + // On output this looks like: + // uint32_t str_offset + // uint32_t hash_data_count + // HashData[hash_data_count] + struct HashData { + StringRef Str; + uint32_t HashValue; + MCSymbol *Sym; + std::vector DIEOffsets; // offsets + HashData(StringRef S) : Str(S) { + HashValue = DwarfAccelTable::HashDJB(S.str().c_str()); + } + void addOffset(uint32_t off) { DIEOffsets.push_back(off); } + #ifndef NDEBUG + void print(raw_ostream &O) { + O << "Name: " << Str << "\n"; + O << " Hash Value: " << format("0x%x", HashValue) << "\n"; + O << " Symbol: " ; + if (Sym) Sym->print(O); + else O << ""; + O << "\n"; + for (size_t i = 0; i < DIEOffsets.size(); i++) + O << " Offset: " << DIEOffsets[i] << "\n"; + } + void dump() { + print(dbgs()); + } + #endif + }; + + DwarfAccelTable(const DwarfAccelTable&); // DO NOT IMPLEMENT + void operator=(const DwarfAccelTable&); // DO NOT IMPLEMENT + + // Internal Functions + void EmitHeader(AsmPrinter *); + void EmitBuckets(AsmPrinter *); + void EmitHashes(AsmPrinter *); + void EmitOffsets(AsmPrinter *, MCSymbol *); + void EmitData(AsmPrinter *, DwarfDebug *D); + + // Output Variables + TableHeader Header; + TableHeaderData HeaderData; + std::vector Data; + + // String Data + typedef std::vector DIEArray; + typedef StringMap StringEntries; + StringEntries Entries; + + // Buckets/Hashes/Offsets + typedef std::vector HashList; + typedef std::vector BucketList; + BucketList Buckets; + HashList Hashes; + + // Public Implementation + public: + DwarfAccelTable(DwarfAccelTable::Atom Atom); + void AddName(StringRef, DIE*); + void FinalizeTable(AsmPrinter *, const char *); + void Emit(AsmPrinter *, MCSymbol *, DwarfDebug *); +#ifndef NDEBUG + void print(raw_ostream &O); + void dump() { print(dbgs()); } +#endif +}; + +} +#endif -- cgit v1.1 From 09ac3d841367d5d56328eade506c951e0dc3a72d Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Mon, 7 Nov 2011 09:24:32 +0000 Subject: Add the support code to enable the dwarf accelerator tables. Upcoming patches to fix the types section (all types, not just global types), and testcases. The code to do the final emission is disabled by default. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143923 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfAccelTable.h | 6 +- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 13 ++- lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 28 +++++ lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 168 ++++++++++++++++++++++++++++ lib/CodeGen/AsmPrinter/DwarfDebug.h | 15 +++ lib/MC/MCObjectFileInfo.cpp | 26 ++++- 6 files changed, 248 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 242841a..2d3f4fd 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -25,7 +25,7 @@ #include #include -// The apple dwarf accelerator tables are an indirect hash table optimized +// The dwarf accelerator tables are an indirect hash table optimized // for null lookup rather than access to known data. They are output into // an on-disk format that looks like this: // @@ -176,9 +176,7 @@ public: #endif }; - // The data itself consists of a str_offset (to deal with collisions in - // some magical way? this looks like the KeyType from the spec, which - // should mean an integer compare on read), a count of the DIEs in the + // The data itself consists of a str_offset, a count of the DIEs in the // hash and the offsets to the DIEs themselves. // On disk each data section is ended with a 0 KeyType as the end of the // hash chain. diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 95f1f92..cb80bd8 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -904,8 +904,11 @@ DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) { return NDie; NDie = new DIE(dwarf::DW_TAG_namespace); insertDIE(NS, NDie); - if (!NS.getName().empty()) + if (!NS.getName().empty()) { addString(NDie, dwarf::DW_AT_name, NS.getName()); + addAccelNamespace(NS.getName(), NDie); + } else + addAccelNamespace("(anonymous namespace)", NDie); addSourceLine(NDie, NS); addToContextOwner(NDie, NS.getContext()); return NDie; @@ -1078,7 +1081,9 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { DIDescriptor GVContext = GV.getContext(); addToContextOwner(VariableDIE, GVContext); // Add location. + bool addToAccelTable = false; if (isGlobalVariable) { + addToAccelTable = true; DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); addLabel(Block, 0, dwarf::DW_FORM_udata, @@ -1097,11 +1102,12 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { addDie(VariableSpecDIE); } else { addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); - } + } } else if (const ConstantInt *CI = dyn_cast_or_null(GV.getConstant())) addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType()); else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) { + addToAccelTable = true; // GV is a merged global. DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); Value *Ptr = CE->getOperand(0); @@ -1116,6 +1122,9 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); } + if (addToAccelTable) + addAccelName(GV.getName(), VariableDIE); + return; } diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index efab32e..b994ce3 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -64,6 +64,13 @@ class CompileUnit { /// StringMap GlobalTypes; + /// AccelNames - A map of names for the name accelerator table. + /// + StringMap AccelNames; + StringMap > AccelObjC; + StringMap AccelNamespace; + StringMap AccelTypes; + /// DIEBlocks - A list of all the DIEBlocks in use. std::vector DIEBlocks; @@ -82,6 +89,13 @@ public: const StringMap &getGlobals() const { return Globals; } const StringMap &getGlobalTypes() const { return GlobalTypes; } + const StringMap &getAccelNames() const { return AccelNames; } + const StringMap > &getAccelObjC() const { + return AccelObjC; + } + const StringMap &getAccelNamespace() const { return AccelNamespace; } + const StringMap &getAccelTypes() const { return AccelTypes; } + /// hasContent - Return true if this compile unit has something to write out. /// bool hasContent() const { return !CUDie->getChildren().empty(); } @@ -94,6 +108,20 @@ public: /// void addGlobalType(DIType Ty); + + /// addAccelName - Add a new name to the name accelerator table. + void addAccelName(StringRef Name, DIE *Die) { AccelNames[Name] = Die; } + void addAccelObjC(StringRef Name, DIE *Die) { + std::vector &DIEs = AccelObjC[Name]; + DIEs.push_back(Die); + } + void addAccelNamespace(StringRef Name, DIE *Die) { + AccelNamespace[Name] = Die; + } + void addAccelType(StringRef Name, DIE *Die) { + AccelTypes[Name] = Die; + } + /// getDIE - Returns the debug information entry map slot for the /// specified debug variable. DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 037b4ac..8acc857 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "dwarfdebug" #include "DwarfDebug.h" #include "DIE.h" +#include "DwarfAccelTable.h" #include "DwarfCompileUnit.h" #include "llvm/Constants.h" #include "llvm/Module.h" @@ -52,6 +53,10 @@ static cl::opt UnknownLocations("use-unknown-locations", cl::Hidden, cl::desc("Make an absence of debug location information explicit."), cl::init(false)); +static cl::opt DwarfAccelTables("dwarf-accel-tables", cl::Hidden, + cl::desc("Output prototype dwarf accelerator tables."), + cl::init(false)); + namespace { const char *DWARFGroupName = "DWARF Emission"; const char *DbgTimerName = "DWARF Debug Writer"; @@ -444,6 +449,9 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { if (DS.isSubprogram()) TheCU->addPubTypes(DISubprogram(DS)); + if (DS.isSubprogram() && !Scope->isAbstractScope()) + TheCU->addAccelName(DISubprogram(DS).getName(), ScopeDIE); + return ScopeDIE; } @@ -524,6 +532,36 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { return NewCU; } +static bool isObjCClass(StringRef Name) { + return Name[0] == '+' || Name[0] == '-'; +} + +static bool hasObjCCategory(StringRef Name) { + if (Name[0] != '+' && Name[0] != '-') + return false; + + size_t pos = Name.find(')'); + if (pos != std::string::npos) { + if (Name[pos+1] != ' ') return false; + return true; + } + + return false; +} + +static void getObjCClassCategory(StringRef In, StringRef &Class, + StringRef &Category) { + if (!hasObjCCategory(In)) { + Class = In.slice(In.find('[') + 1, In.find(' ')); + Category = ""; + return; + } + + Class = In.slice(In.find('[') + 1, In.find('(')); + Category = In.slice(In.find('[') + 1, In.find(' ')); + return; +} + /// construct SubprogramDIE - Construct subprogram DIE. void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N) { @@ -561,6 +599,18 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, // Expose as global. TheCU->addGlobal(SP.getName(), SubprogramDie); + // Add to Accel Names + TheCU->addAccelName(SP.getName(), SubprogramDie); + + // If this is an Objective-C selector name add it to the ObjC accelerator too. + if (isObjCClass(SP.getName())) { + StringRef Class, Category; + getObjCClassCategory(SP.getName(), Class, Category); + TheCU->addAccelObjC(Class, SubprogramDie); + if (Category != "") + TheCU->addAccelObjC(Category, SubprogramDie); + } + return; } @@ -757,6 +807,14 @@ void DwarfDebug::endModule() { // Corresponding abbreviations into a abbrev section. emitAbbreviations(); + // Emit info into a dwarf accelerator table sections. + if (DwarfAccelTables) { + emitAccelNames(); + emitAccelObjC(); + emitAccelNamespaces(); + emitAccelTypes(); + } + // Emit info into a debug pubnames section. emitDebugPubNames(); @@ -1696,6 +1754,116 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { Asm->EmitInt8(1); } +/// emitAccelNames - Emit visible names into a hashed accelerator table +/// section. +void DwarfDebug::emitAccelNames() { + DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + dwarf::DW_FORM_data4)); + for (DenseMap::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + const StringMap &Names = TheCU->getAccelNames(); + for (StringMap::const_iterator + GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + DIE *Entity = GI->second; + AT.AddName(Name, Entity); + } + } + + AT.FinalizeTable(Asm, "Names"); + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfAccelNamesSection()); + MCSymbol *SectionBegin = Asm->GetTempSymbol("names_begin"); + Asm->OutStreamer.EmitLabel(SectionBegin); + + // Emit the full data. + AT.Emit(Asm, SectionBegin, this); +} + +/// emitAccelObjC - Emit objective C classes and categories into a hashed +/// accelerator table section. +void DwarfDebug::emitAccelObjC() { + DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + dwarf::DW_FORM_data4)); + for (DenseMap::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + const StringMap > &Names = TheCU->getAccelObjC(); + for (StringMap >::const_iterator + GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + std::vector Entities = GI->second; + for (std::vector::const_iterator DI = Entities.begin(), + DE = Entities.end(); DI != DE; ++DI) + AT.AddName(Name, (*DI)); + } + } + + AT.FinalizeTable(Asm, "ObjC"); + Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering() + .getDwarfAccelObjCSection()); + MCSymbol *SectionBegin = Asm->GetTempSymbol("objc_begin"); + Asm->OutStreamer.EmitLabel(SectionBegin); + + // Emit the full data. + AT.Emit(Asm, SectionBegin, this); +} + +/// emitAccelNamespace - Emit namespace dies into a hashed accelerator +/// table. +void DwarfDebug::emitAccelNamespaces() { + DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + dwarf::DW_FORM_data4)); + for (DenseMap::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + const StringMap &Names = TheCU->getAccelNamespace(); + for (StringMap::const_iterator + GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + DIE *Entity = GI->second; + AT.AddName(Name, Entity); + } + } + + AT.FinalizeTable(Asm, "namespac"); + Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering() + .getDwarfAccelNamespaceSection()); + MCSymbol *SectionBegin = Asm->GetTempSymbol("namespac_begin"); + Asm->OutStreamer.EmitLabel(SectionBegin); + + // Emit the full data. + AT.Emit(Asm, SectionBegin, this); +} + +/// emitAccelTypes() - Emit type dies into a hashed accelerator table. +void DwarfDebug::emitAccelTypes() { + DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + dwarf::DW_FORM_data4)); + for (DenseMap::iterator I = CUMap.begin(), + E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + const StringMap &Names = TheCU->getGlobalTypes(); + //TODO: TheCU->getAccelTypes(); + for (StringMap::const_iterator + GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + DIE *Entity = GI->second; + AT.AddName(Name, Entity); + } + } + + AT.FinalizeTable(Asm, "types"); + Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering() + .getDwarfAccelTypesSection()); + MCSymbol *SectionBegin = Asm->GetTempSymbol("types_begin"); + Asm->OutStreamer.EmitLabel(SectionBegin); + + // Emit the full data. + AT.Emit(Asm, SectionBegin, this); +} + /// emitDebugPubNames - Emit visible names into a debug pubnames section. /// void DwarfDebug::emitDebugPubNames() { diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 7bcc5b4..ce5e59e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -367,6 +367,21 @@ private: /// void emitEndOfLineMatrix(unsigned SectionEnd); + /// emitAccelNames - Emit visible names into a hashed accelerator table + /// section. + void emitAccelNames(); + + /// emitAccelObjC - Emit objective C classes and categories into a hashed + /// accelerator table section. + void emitAccelObjC(); + + /// emitAccelNamespace - Emit namespace dies into a hashed accelerator + /// table. + void emitAccelNamespaces(); + + /// emitAccelTypes() - Emit type dies into a hashed accelerator table. + void emitAccelTypes(); + /// emitDebugPubNames - Emit visible names into a debug pubnames section. /// void emitDebugPubNames(); diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index 397e9c3..1fcda31 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -152,6 +152,24 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { SectionKind::getReadOnly()); // Debug Information. + DwarfAccelNamesSection = + Ctx->getMachOSection("__DWARF", "__apple_names", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfAccelObjCSection = + Ctx->getMachOSection("__DWARF", "__apple_objc", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + // 16 character section limit... + DwarfAccelNamespaceSection = + Ctx->getMachOSection("__DWARF", "__apple_namespac", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfAccelTypesSection = + Ctx->getMachOSection("__DWARF", "__apple_types", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfAbbrevSection = Ctx->getMachOSection("__DWARF", "__debug_abbrev", MCSectionMachO::S_ATTR_DEBUG, @@ -506,8 +524,12 @@ void MCObjectFileInfo::InitMCObjectFileInfo(StringRef TT, Reloc::Model relocm, PersonalityEncoding = LSDAEncoding = FDEEncoding = FDECFIEncoding = TTypeEncoding = dwarf::DW_EH_PE_absptr; - EHFrameSection = 0; // Created on demand. - CompactUnwindSection = 0; // Used only by selected targets. + EHFrameSection = 0; // Created on demand. + CompactUnwindSection = 0; // Used only by selected targets. + DwarfAccelNamesSection = 0; // Used only by selected targets. + DwarfAccelObjCSection = 0; // Used only by selected targets. + DwarfAccelNamespaceSection = 0; // Used only by selected targets. + DwarfAccelTypesSection = 0; // Used only by selected targets. Triple T(TT); Triple::ArchType Arch = T.getArch(); -- cgit v1.1 From 2c802b7e88718f2c033fe1e7e602e341501b5d80 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Mon, 7 Nov 2011 09:37:06 +0000 Subject: Add new files to cmake. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143924 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt index 67d9273..f6ce17d 100644 --- a/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -4,6 +4,7 @@ add_llvm_library(LLVMAsmPrinter AsmPrinterDwarf.cpp AsmPrinterInlineAsm.cpp DIE.cpp + DwarfAccelTable.cpp DwarfCFIException.cpp DwarfCompileUnit.cpp DwarfDebug.cpp -- cgit v1.1 From 76a4e1a0682c8e855f8b7c0a05d802dce8c07d7a Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Mon, 7 Nov 2011 09:38:42 +0000 Subject: Remove unnecessary addition to API. Replace with something much simpler. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143925 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp | 2 +- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 8 -------- lib/CodeGen/AsmPrinter/DwarfDebug.h | 4 ---- 3 files changed, 1 insertion(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index b7c8c6e..84a2839 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -183,7 +183,7 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) { Asm->OutStreamer.EmitLabel((*HI)->Sym); Asm->OutStreamer.AddComment((*HI)->Str); Asm->EmitSectionOffset(D->getStringPoolEntry((*HI)->Str), - D->getDwarfStrSectionSym()); + D->getStringPool()); Asm->OutStreamer.AddComment("Num DIEs"); Asm->EmitInt32((*HI)->DIEOffsets.size()); for (std::vector::const_iterator diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 8acc857..633f216 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -165,14 +165,6 @@ MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) { return Entry.first = Asm->GetTempSymbol("string", Entry.second); } -MCSymbol *DwarfDebug::getDwarfStrSectionSym(void) { - if (DwarfStrSectionSym) return DwarfStrSectionSym; - DwarfStrSectionSym = - EmitSectionSym(Asm, Asm->getObjFileLowering().getDwarfStrSection(), - "section_str"); - return DwarfStrSectionSym; -} - /// assignAbbrevNumber - Define a unique number for the abbreviation. /// void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) { diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index ce5e59e..174333e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -528,10 +528,6 @@ public: /// getStringPoolEntry - returns an entry into the string pool with the given /// string text. MCSymbol *getStringPoolEntry(StringRef Str); - - /// getDwarfStrSectionSym - returns the symbol that starts the dwarf string - /// section. - MCSymbol *getDwarfStrSectionSym(); }; } // End of namespace llvm -- cgit v1.1 From 19a4daff9bbe18dab2620e25ac6cbf0635639ec6 Mon Sep 17 00:00:00 2001 From: Richard Osborne Date: Mon, 7 Nov 2011 17:09:05 +0000 Subject: Don't introduce custom nodes after legalization in TargetLowering::BuildSDIV() and TargetLowering::BuildUDIV(). Fixes PR11283 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143964 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 4 ++-- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 22 ++++++++++++++-------- 2 files changed, 16 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0a142bd..064cee2 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -7822,7 +7822,7 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, /// SDValue DAGCombiner::BuildSDIV(SDNode *N) { std::vector Built; - SDValue S = TLI.BuildSDIV(N, DAG, &Built); + SDValue S = TLI.BuildSDIV(N, DAG, LegalOperations, &Built); for (std::vector::iterator ii = Built.begin(), ee = Built.end(); ii != ee; ++ii) @@ -7836,7 +7836,7 @@ SDValue DAGCombiner::BuildSDIV(SDNode *N) { /// SDValue DAGCombiner::BuildUDIV(SDNode *N) { std::vector Built; - SDValue S = TLI.BuildUDIV(N, DAG, &Built); + SDValue S = TLI.BuildUDIV(N, DAG, LegalOperations, &Built); for (std::vector::iterator ii = Built.begin(), ee = Built.end(); ii != ee; ++ii) diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index f1807ca..6cbdb8b 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3242,8 +3242,9 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: /// -SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, - std::vector* Created) const { +SDValue TargetLowering:: +BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, + std::vector* Created) const { EVT VT = N->getValueType(0); DebugLoc dl= N->getDebugLoc(); @@ -3258,10 +3259,12 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, // Multiply the numerator (operand 0) by the magic value // FIXME: We should support doing a MUL in a wider type SDValue Q; - if (isOperationLegalOrCustom(ISD::MULHS, VT)) + if (IsAfterLegalization ? isOperationLegal(ISD::MULHS, VT) : + isOperationLegalOrCustom(ISD::MULHS, VT)) Q = DAG.getNode(ISD::MULHS, dl, VT, N->getOperand(0), DAG.getConstant(magics.m, VT)); - else if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) + else if (IsAfterLegalization ? isOperationLegal(ISD::SMUL_LOHI, VT) : + isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) Q = SDValue(DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), N->getOperand(0), DAG.getConstant(magics.m, VT)).getNode(), 1); @@ -3299,8 +3302,9 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: /// -SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, - std::vector* Created) const { +SDValue TargetLowering:: +BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, + std::vector* Created) const { EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); @@ -3332,9 +3336,11 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG, // Multiply the numerator (operand 0) by the magic value // FIXME: We should support doing a MUL in a wider type - if (isOperationLegalOrCustom(ISD::MULHU, VT)) + if (IsAfterLegalization ? isOperationLegal(ISD::MULHU, VT) : + isOperationLegalOrCustom(ISD::MULHU, VT)) Q = DAG.getNode(ISD::MULHU, dl, VT, Q, DAG.getConstant(magics.m, VT)); - else if (isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) + else if (IsAfterLegalization ? isOperationLegal(ISD::UMUL_LOHI, VT) : + isOperationLegalOrCustom(ISD::UMUL_LOHI, VT)) Q = SDValue(DAG.getNode(ISD::UMUL_LOHI, dl, DAG.getVTList(VT, VT), Q, DAG.getConstant(magics.m, VT)).getNode(), 1); else -- cgit v1.1 From 7135457efff83960bfad420f2dc3a5b4b58e8d07 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Mon, 7 Nov 2011 18:10:17 +0000 Subject: Allow for the case where the name of the subprogram is "". Fixes a self-host error. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143970 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 633f216..d3414d7 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -525,6 +525,7 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { } static bool isObjCClass(StringRef Name) { + if (Name == "") return false; return Name[0] == '+' || Name[0] == '-'; } -- cgit v1.1 From c545322c276f933759e4140027e5f84e480d15d6 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Mon, 7 Nov 2011 18:34:47 +0000 Subject: Avoid the use of a local temporary for comment twines. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143974 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index 84a2839..2f175d4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -128,8 +128,7 @@ void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) { void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) { unsigned index = 0; for (size_t i = 0; i < Buckets.size(); ++i) { - Twine Comment = Twine("Bucket ") + Twine(i); - Asm->OutStreamer.AddComment(Comment); + Asm->OutStreamer.AddComment("Bucket " + Twine(i)); if (Buckets[i].size() != 0) Asm->EmitInt32(index); else @@ -144,8 +143,7 @@ void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { for (size_t i = 0; i < Buckets.size(); ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), HE = Buckets[i].end(); HI != HE; ++HI) { - Twine Comment = Twine("Hash in Bucket ") + Twine(i); - Asm->OutStreamer.AddComment(Comment); + Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i)); Asm->EmitInt32((*HI)->HashValue); } } @@ -159,8 +157,7 @@ void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { for (size_t i = 0; i < Buckets.size(); ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), HE = Buckets[i].end(); HI != HE; ++HI) { - Twine Comment = Twine("Offset in Bucket ") + Twine(i); - Asm->OutStreamer.AddComment(Comment); + Asm->OutStreamer.AddComment("Offset in Bucket " + Twine(i)); MCContext &Context = Asm->OutStreamer.getContext(); const MCExpr *Sub = MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create((*HI)->Sym, Context), -- cgit v1.1 From 2ea402541fa1bd2aa9942abf7b281ef01f9a10ff Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Mon, 7 Nov 2011 18:53:23 +0000 Subject: Use StringRef::startswith to do some string comparisons. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143982 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index d3414d7..39d0d1c 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -525,20 +525,17 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { } static bool isObjCClass(StringRef Name) { - if (Name == "") return false; - return Name[0] == '+' || Name[0] == '-'; + return Name.startswith("+") || Name.startswith("-"); } static bool hasObjCCategory(StringRef Name) { - if (Name[0] != '+' && Name[0] != '-') - return false; + if (!isObjCClass(Name)) return false; size_t pos = Name.find(')'); if (pos != std::string::npos) { if (Name[pos+1] != ' ') return false; return true; } - return false; } -- cgit v1.1 From bce22b48fee6a0b0295cc18c7994f3a515e63398 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Mon, 7 Nov 2011 18:57:41 +0000 Subject: Add 64-bit to 32-bit trunc pattern. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143988 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips64InstrInfo.td | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib') diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 05470d5..3708c4a 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -172,3 +172,8 @@ defm : SetlePats; defm : SetgtPats; defm : SetgePats; defm : SetgeImmPats; + +// truncate +def : Pat<(i32 (trunc CPU64Regs:$src)), + (SLL (EXTRACT_SUBREG CPU64Regs:$src, sub_32), 0)>, Requires<[IsN64]>; + -- cgit v1.1 From 68698cc20d7e1fa1b45a30e7c25313796f40d5c6 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Mon, 7 Nov 2011 18:59:49 +0000 Subject: Make the type of shift amount i32 in order to reduce the number of shift instruction definitions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143989 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips64InstrInfo.td | 6 +++--- lib/Target/Mips/MipsISelLowering.h | 2 ++ lib/Target/Mips/MipsInstrInfo.td | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 3708c4a..83bd7b8 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -32,7 +32,7 @@ def Subtract32 : SDNodeXForm; // imm32_63 predicate - True if imm is in range [32, 63]. -def imm32_63 : ImmLeaf= 32 && (int32_t)Imm < 64;}], Subtract32>; @@ -43,12 +43,12 @@ def imm32_63 : ImmLeaf func, bits<5> isRotate, string instr_asm, SDNode OpNode>: - shift_rotate_imm; class shift_rotate_imm64_32 func, bits<5> isRotate, string instr_asm, SDNode OpNode>: - shift_rotate_imm; // Mul, Div diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 62e7f09..f2b64e3 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -98,6 +98,8 @@ namespace llvm { public: explicit MipsTargetLowering(MipsTargetMachine &TM); + virtual MVT getShiftAmountTy(EVT LHSTy) const { return MVT::i32; } + virtual bool allowsUnalignedMemoryAccesses (EVT VT) const; /// LowerOperation - Provide custom lowering hooks for some operations. diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index fd99b13..64dbd26 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -327,9 +327,9 @@ class shift_rotate_imm32 func, bits<5> isRotate, string instr_asm, class shift_rotate_reg func, bits<5> isRotate, string instr_asm, SDNode OpNode, RegisterClass RC>: - FR<0x00, func, (outs RC:$rd), (ins RC:$rs, RC:$rt), + FR<0x00, func, (outs RC:$rd), (ins CPURegs:$rs, RC:$rt), !strconcat(instr_asm, "\t$rd, $rt, $rs"), - [(set RC:$rd, (OpNode RC:$rt, RC:$rs))], IIAlu> { + [(set RC:$rd, (OpNode RC:$rt, CPURegs:$rs))], IIAlu> { let shamt = isRotate; } -- cgit v1.1 From 778404601266effff17d5c43d0ad08b8ca2522d0 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Mon, 7 Nov 2011 19:01:49 +0000 Subject: Fix patterns for unaligned 32-bit load. DSLL32 or DSRL32 should be emitted when shift amount is larger than 32. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143990 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips64InstrInfo.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 83bd7b8..1e8bf70 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -155,9 +155,9 @@ def : Pat<(i64 immZExt16:$in), (ORi64 ZERO_64, imm:$in)>; // zextloadi32_u -def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64_P8 addr:$a), 32), 32)>, +def : Pat<(zextloadi32_u addr:$a), (DSRL32 (DSLL32 (ULW64_P8 addr:$a), 0), 0)>, Requires<[IsN64]>; -def : Pat<(zextloadi32_u addr:$a), (DSRL (DSLL (ULW64 addr:$a), 32), 32)>, +def : Pat<(zextloadi32_u addr:$a), (DSRL32 (DSLL32 (ULW64 addr:$a), 0), 0)>, Requires<[NotN64]>; // hi/lo relocs -- cgit v1.1 From c4d6fd569d7524dc39045d54b58ad692650c6542 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Mon, 7 Nov 2011 19:03:40 +0000 Subject: Use array_lengthof to compute the number of iterations of a loop. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143991 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsRegisterInfo.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp index f8c0fda..5331f09 100644 --- a/lib/Target/Mips/MipsRegisterInfo.cpp +++ b/lib/Target/Mips/MipsRegisterInfo.cpp @@ -191,23 +191,23 @@ BitVector MipsRegisterInfo:: getReservedRegs(const MachineFunction &MF) const { static const unsigned ReservedCPURegs[] = { Mips::ZERO, Mips::AT, Mips::K0, Mips::K1, - Mips::GP, Mips::SP, Mips::FP, Mips::RA, 0 + Mips::GP, Mips::SP, Mips::FP, Mips::RA }; static const unsigned ReservedCPU64Regs[] = { Mips::ZERO_64, Mips::AT_64, Mips::K0_64, Mips::K1_64, - Mips::GP_64, Mips::SP_64, Mips::FP_64, Mips::RA_64, 0 + Mips::GP_64, Mips::SP_64, Mips::FP_64, Mips::RA_64 }; BitVector Reserved(getNumRegs()); typedef TargetRegisterClass::iterator RegIter; - for (const unsigned *Reg = ReservedCPURegs; *Reg; ++Reg) - Reserved.set(*Reg); + for (unsigned I = 0; I < array_lengthof(ReservedCPURegs); ++I) + Reserved.set(ReservedCPURegs[I]); if (Subtarget.hasMips64()) { - for (const unsigned *Reg = ReservedCPU64Regs; *Reg; ++Reg) - Reserved.set(*Reg); + for (unsigned I = 0; I < array_lengthof(ReservedCPU64Regs); ++I) + Reserved.set(ReservedCPU64Regs[I]); // Reserve all registers in AFGR64. for (RegIter Reg = Mips::AFGR64RegisterClass->begin(); -- cgit v1.1 From 213b2a218840ce81196ac5914de47647c927b287 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Mon, 7 Nov 2011 19:06:10 +0000 Subject: Define functions that get or set the size of area on callee's stack frame which is used to save va_arg or byval arguments passed in registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143992 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsMachineFunction.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index bc30b6b..be27606 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -51,12 +51,16 @@ private: mutable int DynAllocFI; // Frame index of dynamically allocated stack area. unsigned MaxCallFrameSize; + // Size of area on callee's stack frame which is used to save va_arg or + // byval arguments passed in registers. + unsigned RegSaveAreaSize; + public: MipsFunctionInfo(MachineFunction& MF) : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)), OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), DynAllocFI(0), - MaxCallFrameSize(0) + MaxCallFrameSize(0), RegSaveAreaSize(0) {} bool isInArgFI(int FI) const { @@ -100,6 +104,11 @@ public: unsigned getMaxCallFrameSize() const { return MaxCallFrameSize; } void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; } + + unsigned getRegSaveAreaSize() const { return RegSaveAreaSize; } + void setRegSaveAreaSize(unsigned S) { + if (RegSaveAreaSize < S) RegSaveAreaSize = S; + } }; } // end of namespace llvm -- cgit v1.1 From f83ba3293fb6a3ef84483b5513e2e28af15e1222 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Mon, 7 Nov 2011 19:07:35 +0000 Subject: Include RegSaveAreaSize in the computation of stack size. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143993 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsFrameLowering.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index 22d1e47..71f3116 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -157,6 +157,7 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { (MFI->getObjectOffset(MipsFI->getGPFI()) + RegSize) : MipsFI->getMaxCallFrameSize(); unsigned StackSize = AlignOffset(LocalVarAreaOffset, StackAlign) + + AlignOffset(MipsFI->getRegSaveAreaSize(), StackAlign) + AlignOffset(MFI->getStackSize(), StackAlign); // Update stack size -- cgit v1.1 From d83d98d4eb9595a88b830f5e3f5c6c24fae80df1 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Mon, 7 Nov 2011 19:10:49 +0000 Subject: Add definition of 64-bit load upper immediate. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143994 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips64InstrInfo.td | 1 + lib/Target/Mips/MipsInstrInfo.td | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 1e8bf70..9a769e8 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -69,6 +69,7 @@ def SLTi64 : SetCC_I<0x0a, "slti", setlt, simm16_64, immSExt16, CPU64Regs>; def SLTiu64 : SetCC_I<0x0b, "sltiu", setult, simm16_64, immSExt16, CPU64Regs>; def ORi64 : ArithLogicI<0x0d, "ori", or, uimm16_64, immZExt16, CPU64Regs>; def XORi64 : ArithLogicI<0x0e, "xori", xor, uimm16_64, immZExt16, CPU64Regs>; +def LUi64 : LoadUpper<0x0f, "lui", CPU64Regs, uimm16_64>; /// Arithmetic Instructions (3-Operand, R-Type) def DADDu : ArithLogicR<0x00, 0x2d, "daddu", add, IIAlu, CPU64Regs, 1>; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 64dbd26..1cc3841 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -334,8 +334,8 @@ class shift_rotate_reg func, bits<5> isRotate, string instr_asm, } // Load Upper Imediate -class LoadUpper op, string instr_asm>: - FI op, string instr_asm, RegisterClass RC, Operand Imm>: + FI { let rs = 0; } @@ -680,7 +680,7 @@ def SLTiu : SetCC_I<0x0b, "sltiu", setult, simm16, immSExt16, CPURegs>; def ANDi : ArithLogicI<0x0c, "andi", and, uimm16, immZExt16, CPURegs>; def ORi : ArithLogicI<0x0d, "ori", or, uimm16, immZExt16, CPURegs>; def XORi : ArithLogicI<0x0e, "xori", xor, uimm16, immZExt16, CPURegs>; -def LUi : LoadUpper<0x0f, "lui">; +def LUi : LoadUpper<0x0f, "lui", CPURegs, uimm16>; /// Arithmetic Instructions (3-Operand, R-Type) def ADDu : ArithLogicR<0x00, 0x21, "addu", add, IIAlu, CPURegs, 1>; -- cgit v1.1 From 3e5d5c53a03e4a08cdb67f8a7f44567f925be9a5 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Mon, 7 Nov 2011 19:15:58 +0000 Subject: Expand V_SET0 to xorps by default. The xorps instruction is smaller than pxor, so prefer that encoding. The ExecutionDepsFix pass will switch the encoding to pxor and xorpd when appropriate. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@143996 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 3a02de0..8dcd637 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2420,7 +2420,7 @@ bool X86InstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { bool HasAVX = TM.getSubtarget().hasAVX(); switch (MI->getOpcode()) { case X86::V_SET0: - return Expand2AddrUndef(MI, get(HasAVX ? X86::VPXORrr : X86::PXORrr)); + return Expand2AddrUndef(MI, get(HasAVX ? X86::VXORPSrr : X86::XORPSrr)); case X86::TEST8ri_NOREX: MI->setDesc(get(X86::TEST8ri)); return true; -- cgit v1.1 From ac101e584873d72715b4fc4d2e35e3ba0ec8217b Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 7 Nov 2011 19:38:34 +0000 Subject: Make sure we don't insert instructions before a landingpad instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144000 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/DemoteRegToStack.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Utils/DemoteRegToStack.cpp b/lib/Transforms/Utils/DemoteRegToStack.cpp index 8cc2649..3ef6b01 100644 --- a/lib/Transforms/Utils/DemoteRegToStack.cpp +++ b/lib/Transforms/Utils/DemoteRegToStack.cpp @@ -97,8 +97,8 @@ AllocaInst* llvm::DemoteRegToStack(Instruction &I, bool VolatileLoads, InsertPt = II.getNormalDest()->begin(); } - for (; isa(InsertPt); ++InsertPt) - /* empty */; // Don't insert before any PHI nodes. + for (; isa(InsertPt) || isa(InsertPt); ++InsertPt) + /* empty */; // Don't insert before any PHI nodes or landingpad instrs. new StoreInst(&I, Slot, InsertPt); return Slot; -- cgit v1.1 From 055a647a9dbce8ea4291a46c0db8f3b716ed4af9 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 7 Nov 2011 21:00:43 +0000 Subject: Simplify code. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144012 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PTX/PTXAsmPrinter.cpp | 246 +++++++++++++++------------------------ 1 file changed, 91 insertions(+), 155 deletions(-) (limited to 'lib') diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp index e329d5d..45a6afc 100644 --- a/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/lib/Target/PTX/PTXAsmPrinter.cpp @@ -25,7 +25,6 @@ #include "llvm/Function.h" #include "llvm/Module.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/DebugInfo.h" #include "llvm/CodeGen/AsmPrinter.h" @@ -139,15 +138,15 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M) const PTXSubtarget& ST = TM.getSubtarget(); // Emit the PTX .version and .target attributes - OutStreamer.EmitRawText(Twine("\t.version " + ST.getPTXVersionString())); - OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() + + OutStreamer.EmitRawText(Twine("\t.version ") + ST.getPTXVersionString()); + OutStreamer.EmitRawText(Twine("\t.target ") + ST.getTargetString() + (ST.supportsDouble() ? "" - : ", map_f64_to_f32"))); + : ", map_f64_to_f32")); // .address_size directive is optional, but it must immediately follow // the .target directive if present within a module if (ST.supportsPTX23()) { - std::string addrSize = ST.is64Bit() ? "64" : "32"; - OutStreamer.EmitRawText(Twine("\t.address_size " + addrSize)); + const char *addrSize = ST.is64Bit() ? "64" : "32"; + OutStreamer.EmitRawText(Twine("\t.address_size ") + addrSize); } OutStreamer.AddBlankLine(); @@ -179,68 +178,47 @@ void PTXAsmPrinter::EmitFunctionBodyStart() { const PTXParamManager &PM = MFI->getParamManager(); // Print register definitions - std::string regDefs; + SmallString<128> regDefs; + raw_svector_ostream os(regDefs); unsigned numRegs; // pred numRegs = MFI->getNumRegistersForClass(PTX::RegPredRegisterClass); - if(numRegs > 0) { - regDefs += "\t.reg .pred %p<"; - regDefs += utostr(numRegs); - regDefs += ">;\n"; - } + if(numRegs > 0) + os << "\t.reg .pred %p<" << numRegs << ">;\n"; // i16 numRegs = MFI->getNumRegistersForClass(PTX::RegI16RegisterClass); - if(numRegs > 0) { - regDefs += "\t.reg .b16 %rh<"; - regDefs += utostr(numRegs); - regDefs += ">;\n"; - } + if(numRegs > 0) + os << "\t.reg .b16 %rh<" << numRegs << ">;\n"; // i32 numRegs = MFI->getNumRegistersForClass(PTX::RegI32RegisterClass); - if(numRegs > 0) { - regDefs += "\t.reg .b32 %r<"; - regDefs += utostr(numRegs); - regDefs += ">;\n"; - } + if(numRegs > 0) + os << "\t.reg .b32 %r<" << numRegs << ">;\n"; // i64 numRegs = MFI->getNumRegistersForClass(PTX::RegI64RegisterClass); - if(numRegs > 0) { - regDefs += "\t.reg .b64 %rd<"; - regDefs += utostr(numRegs); - regDefs += ">;\n"; - } + if(numRegs > 0) + os << "\t.reg .b64 %rd<" << numRegs << ">;\n"; // f32 numRegs = MFI->getNumRegistersForClass(PTX::RegF32RegisterClass); - if(numRegs > 0) { - regDefs += "\t.reg .f32 %f<"; - regDefs += utostr(numRegs); - regDefs += ">;\n"; - } + if(numRegs > 0) + os << "\t.reg .f32 %f<" << numRegs << ">;\n"; // f64 numRegs = MFI->getNumRegistersForClass(PTX::RegF64RegisterClass); - if(numRegs > 0) { - regDefs += "\t.reg .f64 %fd<"; - regDefs += utostr(numRegs); - regDefs += ">;\n"; - } + if(numRegs > 0) + os << "\t.reg .f64 %fd<" << numRegs << ">;\n"; // Local params for (PTXParamManager::param_iterator i = PM.local_begin(), e = PM.local_end(); - i != e; ++i) { - regDefs += "\t.param .b"; - regDefs += utostr(PM.getParamSize(*i)); - regDefs += " "; - regDefs += PM.getParamName(*i); - regDefs += ";\n"; - } + i != e; ++i) + os << "\t.param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i) + << ";\n"; - OutStreamer.EmitRawText(Twine(regDefs)); + OutStreamer.EmitRawText(os.str()); const MachineFrameInfo* FrameInfo = MF->getFrameInfo(); @@ -249,16 +227,13 @@ void PTXAsmPrinter::EmitFunctionBodyStart() { for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) { DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n"); if (FrameInfo->getObjectSize(i) > 0) { - std::string def = "\t.local .align "; - def += utostr(FrameInfo->getObjectAlignment(i)); - def += " .b8"; - def += " __local"; - def += utostr(i); - def += "["; - def += utostr(FrameInfo->getObjectSize(i)); // Convert to bits - def += "]"; - def += ";"; - OutStreamer.EmitRawText(Twine(def)); + OutStreamer.EmitRawText("\t.local .align " + + Twine(FrameInfo->getObjectAlignment(i)) + + " .b8 __local" + + Twine(i) + + "[" + + Twine(FrameInfo->getObjectSize(i)) + + "];"); } } @@ -295,32 +270,27 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { assert(gvsym->isUndefined() && "Cannot define a symbol twice!"); - std::string decl; + SmallString<128> decl; + raw_svector_ostream os(decl); // check if it is defined in some other translation unit if (gv->isDeclaration()) - decl += ".extern "; + os << ".extern "; // state space: e.g., .global - decl += "."; - decl += getStateSpaceName(gv->getType()->getAddressSpace()); - decl += " "; + os << '.' << getStateSpaceName(gv->getType()->getAddressSpace()) << ' '; // alignment (optional) unsigned alignment = gv->getAlignment(); - if (alignment != 0) { - decl += ".align "; - decl += utostr(gv->getAlignment()); - decl += " "; - } + if (alignment != 0) + os << ".align " << gv->getAlignment() << ' '; if (PointerType::classof(gv->getType())) { PointerType* pointerTy = dyn_cast(gv->getType()); Type* elementTy = pointerTy->getElementType(); - - if (elementTy->isArrayTy()) - { + + if (elementTy->isArrayTy()) { assert(elementTy->isArrayTy() && "Only pointers to arrays are supported"); ArrayType* arrayTy = dyn_cast(elementTy); @@ -329,7 +299,6 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { unsigned numElements = arrayTy->getNumElements(); while (elementTy->isArrayTy()) { - arrayTy = dyn_cast(elementTy); elementTy = arrayTy->getElementType(); @@ -338,64 +307,46 @@ void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) { // FIXME: isPrimitiveType() == false for i16? assert(elementTy->isSingleValueType() && - "Non-primitive types are not handled"); - + "Non-primitive types are not handled"); + // Find the size of the element in bits unsigned elementSize = elementTy->getPrimitiveSizeInBits(); - decl += ".b"; - decl += utostr(elementSize); - decl += " "; - decl += gvsym->getName(); - decl += "["; - decl += utostr(numElements); - decl += "]"; - } - else - { - decl += ".b8 "; - decl += gvsym->getName(); - decl += "[]"; + os << ".b" << elementSize << ' ' << gvsym->getName() + << '[' << numElements << ']'; + } else { + os << ".b8" << gvsym->getName() << "[]"; } // handle string constants (assume ConstantArray means string) - - if (gv->hasInitializer()) - { + if (gv->hasInitializer()) { const Constant *C = gv->getInitializer(); - if (const ConstantArray *CA = dyn_cast(C)) - { - decl += " = {"; + if (const ConstantArray *CA = dyn_cast(C)) { + os << " = {"; - for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) - { - if (i > 0) decl += ","; + for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { + if (i > 0) + os << ','; - decl += "0x" + - utohexstr(cast(CA->getOperand(i))->getZExtValue()); + os << "0x"; + os.write_hex(cast(CA->getOperand(i))->getZExtValue()); } - decl += "}"; + os << '}'; } } - } - else { + } else { // Note: this is currently the fall-through case and most likely generates // incorrect code. - decl += getTypeName(gv->getType()); - decl += " "; - - decl += gvsym->getName(); + os << getTypeName(gv->getType()) << ' ' << gvsym->getName(); - if (ArrayType::classof(gv->getType()) || - PointerType::classof(gv->getType())) - decl += "[]"; + if (isa(gv->getType()) || isa(gv->getType())) + os << "[]"; } - decl += ";"; - - OutStreamer.EmitRawText(Twine(decl)); + os << ';'; + OutStreamer.EmitRawText(os.str()); OutStreamer.AddBlankLine(); } @@ -414,43 +365,36 @@ void PTXAsmPrinter::EmitFunctionEntryLabel() { const PTXSubtarget& ST = TM.getSubtarget(); const MachineRegisterInfo& MRI = MF->getRegInfo(); - std::string decl = isKernel ? ".entry" : ".func"; + SmallString<128> decl; + raw_svector_ostream os(decl); + os << (isKernel ? ".entry" : ".func"); if (!isKernel) { - decl += " ("; + os << " ("; if (ST.useParamSpaceForDeviceArgs()) { for (PTXParamManager::param_iterator i = PM.ret_begin(), e = PM.ret_end(), b = i; i != e; ++i) { - if (i != b) { - decl += ", "; - } + if (i != b) + os << ", "; - decl += ".param .b"; - decl += utostr(PM.getParamSize(*i)); - decl += " "; - decl += PM.getParamName(*i); + os << ".param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i); } } else { for (PTXMachineFunctionInfo::reg_iterator i = MFI->retreg_begin(), e = MFI->retreg_end(), b = i; i != e; ++i) { - if (i != b) { - decl += ", "; - } - decl += ".reg ."; - decl += getRegisterTypeName(*i, MRI); - decl += " "; - decl += MFI->getRegisterName(*i); + if (i != b) + os << ", "; + + os << ".reg ." << getRegisterTypeName(*i, MRI) << ' ' + << MFI->getRegisterName(*i); } } - decl += ")"; + os << ')'; } // Print function name - decl += " "; - decl += CurrentFnSym->getName().str(); - - decl += " ("; + os << ' ' << CurrentFnSym->getName() << " ("; const Function *F = MF->getFunction(); @@ -458,64 +402,56 @@ void PTXAsmPrinter::EmitFunctionEntryLabel() { if (isKernel || ST.useParamSpaceForDeviceArgs()) { /*for (PTXParamManager::param_iterator i = PM.arg_begin(), e = PM.arg_end(), b = i; i != e; ++i) { - if (i != b) { - decl += ", "; - } + if (i != b) + os << ", "; - decl += ".param .b"; - decl += utostr(PM.getParamSize(*i)); - decl += " "; - decl += PM.getParamName(*i); + os << ".param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i); }*/ int Counter = 1; for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(), b = i; i != e; ++i) { if (i != b) - decl += ", "; + os << ", "; const Type *ArgType = (*i).getType(); - decl += ".param .b"; + os << ".param .b"; if (ArgType->isPointerTy()) { if (ST.is64Bit()) - decl += "64"; + os << "64"; else - decl += "32"; + os << "32"; } else { - decl += utostr(ArgType->getPrimitiveSizeInBits()); + os << ArgType->getPrimitiveSizeInBits(); } if (ArgType->isPointerTy() && ST.emitPtrAttribute()) { const PointerType *PtrType = dyn_cast(ArgType); - decl += " .ptr"; + os << " .ptr"; switch (PtrType->getAddressSpace()) { default: llvm_unreachable("Unknown address space in argument"); case PTXStateSpace::Global: - decl += " .global"; + os << " .global"; break; case PTXStateSpace::Shared: - decl += " .shared"; + os << " .shared"; break; } } - decl += " __param_"; - decl += utostr(Counter++); + os << " __param_" << Counter++; } } else { for (PTXMachineFunctionInfo::reg_iterator i = MFI->argreg_begin(), e = MFI->argreg_end(), b = i; i != e; ++i) { - if (i != b) { - decl += ", "; - } + if (i != b) + os << ", "; - decl += ".reg ."; - decl += getRegisterTypeName(*i, MRI); - decl += " "; - decl += MFI->getRegisterName(*i); + os << ".reg ." << getRegisterTypeName(*i, MRI) << ' ' + << MFI->getRegisterName(*i); } } - decl += ")"; + os << ')'; - OutStreamer.EmitRawText(Twine(decl)); + OutStreamer.EmitRawText(os.str()); } unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName, -- cgit v1.1 From 70be28a5adba5bcae0c6dcd63f17592864c351fc Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 7 Nov 2011 21:00:59 +0000 Subject: Simplify some uses of utohexstr. As a side effect hex is printed lowercase instead of uppercase now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144013 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCDwarf.cpp | 5 ++--- lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 4 ++-- lib/Target/MBlaze/MBlazeAsmPrinter.cpp | 3 +-- 3 files changed, 5 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index 4658a30..4982808 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -21,7 +21,6 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" using namespace llvm; @@ -738,8 +737,8 @@ bool FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer, // Compact Encoding Size = getSizeForEncoding(Streamer, dwarf::DW_EH_PE_udata4); - if (VerboseAsm) Streamer.AddComment(Twine("Compact Unwind Encoding: 0x") + - Twine(llvm::utohexstr(Encoding))); + if (VerboseAsm) Streamer.AddComment("Compact Unwind Encoding: 0x" + + Twine::utohexstr(Encoding)); Streamer.EmitIntValue(Encoding, Size); diff --git a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp index 844e3ab..6c6c021 100644 --- a/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp +++ b/lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp @@ -18,7 +18,6 @@ #include "llvm/MC/MCInst.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCExpr.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -967,7 +966,8 @@ void ARMInstPrinter::printNEONModImmOperand(const MCInst *MI, unsigned OpNum, unsigned EncodedImm = MI->getOperand(OpNum).getImm(); unsigned EltBits; uint64_t Val = ARM_AM::decodeNEONModImm(EncodedImm, EltBits); - O << "#0x" << utohexstr(Val); + O << "#0x"; + O.write_hex(Val); } void ARMInstPrinter::printImmPlusOneOperand(const MCInst *MI, unsigned OpNum, diff --git a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp index 97bd083..ff051e3 100644 --- a/lib/Target/MBlaze/MBlazeAsmPrinter.cpp +++ b/lib/Target/MBlaze/MBlazeAsmPrinter.cpp @@ -39,7 +39,6 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" @@ -119,7 +118,7 @@ namespace { static void printHex32(unsigned int Value, raw_ostream &O) { O << "0x"; for (int i = 7; i >= 0; i--) - O << utohexstr((Value & (0xF << (i*4))) >> (i*4)); + O.write_hex((Value & (0xF << (i*4))) >> (i*4)); } // Create a bitmask with all callee saved registers for CPU or Floating Point -- cgit v1.1 From cd7dcad82a30363132d2dbabb45d60f1d2164a92 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Mon, 7 Nov 2011 21:23:39 +0000 Subject: Fix pass name after the source was moved. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144014 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ExecutionDepsFix.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index 01dccdb..3786d57 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -131,7 +131,7 @@ public: virtual bool runOnMachineFunction(MachineFunction &MF); virtual const char *getPassName() const { - return "SSE execution domain fixup"; + return "Execution dependency fix"; } private: -- cgit v1.1 From d8f9f342307b1351a05c081cd5c7fe92678d3c1f Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Mon, 7 Nov 2011 21:23:42 +0000 Subject: MBB doesn't need to be a class member. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144015 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ExecutionDepsFix.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index 3786d57..7f04224 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -111,7 +111,6 @@ class ExeDepsFix : public MachineFunctionPass { MachineFunction *MF; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - MachineBasicBlock *MBB; std::vector AliasMap; const unsigned NumRegs; DomainValue **LiveRegs; @@ -149,7 +148,7 @@ private: void Collapse(DomainValue *dv, unsigned domain); bool Merge(DomainValue *A, DomainValue *B); - void enterBasicBlock(); + void enterBasicBlock(MachineBasicBlock*); void visitGenericInstr(MachineInstr*); void visitSoftInstr(MachineInstr*, unsigned mask); void visitHardInstr(MachineInstr*, unsigned domain); @@ -271,7 +270,7 @@ bool ExeDepsFix::Merge(DomainValue *A, DomainValue *B) { return true; } -void ExeDepsFix::enterBasicBlock() { +void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { // Try to coalesce live-out registers from predecessors. for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), e = MBB->livein_end(); i != e; ++i) { @@ -451,7 +450,6 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { MF = &mf; TII = MF->getTarget().getInstrInfo(); TRI = MF->getTarget().getRegisterInfo(); - MBB = 0; LiveRegs = 0; Distance = 0; assert(NumRegs == RC->getNumRegs() && "Bad regclass"); @@ -482,8 +480,8 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { for (df_ext_iterator > DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited); DFI != DFE; ++DFI) { - MBB = *DFI; - enterBasicBlock(); + MachineBasicBlock *MBB = *DFI; + enterBasicBlock(MBB); for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { MachineInstr *mi = I; -- cgit v1.1 From e7126ebd549d34a1c864db86ddfac1226994b6b0 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Mon, 7 Nov 2011 21:32:58 +0000 Subject: Add definitions of 64-bit instructions which move data between integer and floating pointer registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144016 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsInstrFPU.td | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'lib') diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 62cee12..0778c3d 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -183,6 +183,14 @@ def MTC1 : FFRGPR<0x04, (outs FGR32:$fs), (ins CPURegs:$rt), "mtc1\t$rt, $fs", [(set FGR32:$fs, (bitconvert CPURegs:$rt))]>; +def DMFC1 : FFRGPR<0x01, (outs CPU64Regs:$rt), (ins FGR64:$fs), + "dmfc1\t$rt, $fs", + [(set CPU64Regs:$rt, (bitconvert FGR64:$fs))]>; + +def DMTC1 : FFRGPR<0x05, (outs FGR64:$fs), (ins CPU64Regs:$rt), + "dmtc1\t$rt, $fs", + [(set FGR64:$fs, (bitconvert CPU64Regs:$rt))]>; + def FMOV_S : FFR1<0x6, 16, "mov", "s", FGR32, FGR32>; def FMOV_D32 : FFR1<0x6, 17, "mov", "d", AFGR64, AFGR64>, Requires<[NotFP64bit]>; -- cgit v1.1 From 29d525a3edf6df1e7797ecb60ebfa445a27e37c6 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Mon, 7 Nov 2011 21:35:45 +0000 Subject: Add code needed for copying between 64-bit integer and floating pointer registers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144017 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsInstrInfo.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp index 559943a..5358dc0 100644 --- a/lib/Target/Mips/MipsInstrInfo.cpp +++ b/lib/Target/Mips/MipsInstrInfo.cpp @@ -131,6 +131,8 @@ copyPhysReg(MachineBasicBlock &MBB, Opc = Mips::FMOV_S; else if (Mips::AFGR64RegClass.contains(DestReg, SrcReg)) Opc = Mips::FMOV_D32; + else if (Mips::FGR64RegClass.contains(DestReg, SrcReg)) + Opc = Mips::FMOV_D64; else if (Mips::CCRRegClass.contains(DestReg, SrcReg)) Opc = Mips::MOVCCRToCCR; else if (Mips::CPU64RegsRegClass.contains(DestReg)) { // Copy to CPU64 Reg. @@ -140,12 +142,16 @@ copyPhysReg(MachineBasicBlock &MBB, Opc = Mips::MFHI64, SrcReg = 0; else if (SrcReg == Mips::LO64) Opc = Mips::MFLO64, SrcReg = 0; + else if (Mips::FGR64RegClass.contains(SrcReg)) + Opc = Mips::DMFC1; } else if (Mips::CPU64RegsRegClass.contains(SrcReg)) { // Copy from CPU64 Reg. if (DestReg == Mips::HI64) Opc = Mips::MTHI64, DestReg = 0; else if (DestReg == Mips::LO64) Opc = Mips::MTLO64, DestReg = 0; + else if (Mips::FGR64RegClass.contains(DestReg)) + Opc = Mips::DMTC1; } assert(Opc && "Cannot copy registers"); -- cgit v1.1 From c370619bac3ad2e9c9ae08a4c2fa9652b7ab31f7 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Mon, 7 Nov 2011 21:37:33 +0000 Subject: Add definition of the base class for floating point comparison instructions and add Mips64's version too. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144018 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsInstrFPU.td | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index 0778c3d..baf3ebd 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -267,16 +267,16 @@ def MIPS_FCOND_NGE : PatLeaf<(i32 13)>; def MIPS_FCOND_LE : PatLeaf<(i32 14)>; def MIPS_FCOND_NGT : PatLeaf<(i32 15)>; +class FCMP fmt, RegisterClass RC, string typestr> : + FCC; + /// Floating Point Compare let Defs=[FCR31] in { - def FCMP_S32 : FCC<0x10, (outs), (ins FGR32:$fs, FGR32:$ft, condcode:$cc), - "c.$cc.s\t$fs, $ft", - [(MipsFPCmp FGR32:$fs, FGR32:$ft, imm:$cc)]>; - - def FCMP_D32 : FCC<0x11, (outs), (ins AFGR64:$fs, AFGR64:$ft, condcode:$cc), - "c.$cc.d\t$fs, $ft", - [(MipsFPCmp AFGR64:$fs, AFGR64:$ft, imm:$cc)]>, - Requires<[NotFP64bit]>; + def FCMP_S32 : FCMP<0x10, FGR32, "s">; + def FCMP_D32 : FCMP<0x11, AFGR64, "d">, Requires<[NotFP64bit]>; + def FCMP_D64 : FCMP<0x11, FGR64, "d">, Requires<[IsFP64bit]>; } //===----------------------------------------------------------------------===// -- cgit v1.1 From 4cae74bd7450c1ec29369229e50052dfac5c09d7 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Mon, 7 Nov 2011 21:38:58 +0000 Subject: Various Mips64 floating point instruction patterns. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144019 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsInstrFPU.td | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td index baf3ebd..7638f54 100644 --- a/lib/Target/Mips/MipsInstrFPU.td +++ b/lib/Target/Mips/MipsInstrFPU.td @@ -317,13 +317,28 @@ def : Pat<(f32 fpimm0), (MTC1 ZERO)>; def : Pat<(f32 fpimm0neg), (FNEG_S (MTC1 ZERO))>; def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVT_S_W (MTC1 CPURegs:$src))>; -def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVT_D32_W (MTC1 CPURegs:$src))>; - def : Pat<(i32 (fp_to_sint FGR32:$src)), (MFC1 (TRUNC_W_S FGR32:$src))>; -def : Pat<(i32 (fp_to_sint AFGR64:$src)), (MFC1 (TRUNC_W_D32 AFGR64:$src))>; let Predicates = [NotFP64bit] in { + def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVT_D32_W (MTC1 CPURegs:$src))>; + def : Pat<(i32 (fp_to_sint AFGR64:$src)), (MFC1 (TRUNC_W_D32 AFGR64:$src))>; def : Pat<(f32 (fround AFGR64:$src)), (CVT_S_D32 AFGR64:$src)>; def : Pat<(f64 (fextend FGR32:$src)), (CVT_D32_S FGR32:$src)>; } +let Predicates = [IsFP64bit] in { + def : Pat<(f64 fpimm0), (DMTC1 ZERO_64)>; + def : Pat<(f64 fpimm0neg), (FNEG_D64 (DMTC1 ZERO_64))>; + + def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVT_D64_W (MTC1 CPURegs:$src))>; + def : Pat<(f32 (sint_to_fp CPU64Regs:$src)), + (CVT_S_L (DMTC1 CPU64Regs:$src))>; + def : Pat<(f64 (sint_to_fp CPU64Regs:$src)), + (CVT_D64_L (DMTC1 CPU64Regs:$src))>; + + def : Pat<(i32 (fp_to_sint FGR64:$src)), (MFC1 (TRUNC_W_D64 FGR64:$src))>; + def : Pat<(i64 (fp_to_sint FGR64:$src)), (DMFC1 (TRUNC_L_D64 FGR64:$src))>; + + def : Pat<(f32 (fround FGR64:$src)), (CVT_S_D64 FGR64:$src)>; + def : Pat<(f64 (fextend FGR32:$src)), (CVT_D64_S FGR32:$src)>; +} \ No newline at end of file -- cgit v1.1 From 25265d0e7af83f30e64851458c29c5b0c01befeb Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Mon, 7 Nov 2011 21:40:27 +0000 Subject: Extract two methods. No functional change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144020 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ExecutionDepsFix.cpp | 44 ++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index 7f04224..8b002e7 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -149,6 +149,8 @@ private: bool Merge(DomainValue *A, DomainValue *B); void enterBasicBlock(MachineBasicBlock*); + void leaveBasicBlock(MachineBasicBlock*); + void visitInstr(MachineInstr*); void visitGenericInstr(MachineInstr*); void visitSoftInstr(MachineInstr*, unsigned mask); void visitHardInstr(MachineInstr*, unsigned domain); @@ -305,6 +307,27 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { } } +void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) { + // Save live registers at end of MBB - used by enterBasicBlock(). + if (LiveRegs) + LiveOuts.insert(std::make_pair(MBB, LiveRegs)); + LiveRegs = 0; +} + +void ExeDepsFix::visitInstr(MachineInstr *MI) { + if (MI->isDebugValue()) + return; + ++Distance; + std::pair domp = TII->getExecutionDomain(MI); + if (domp.first) + if (domp.second) + visitSoftInstr(MI, domp.second); + else + visitHardInstr(MI, domp.first); + else if (LiveRegs) + visitGenericInstr(MI); +} + // A hard instruction only works in one domain. All input registers will be // forced into that domain. void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) { @@ -483,24 +506,9 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { MachineBasicBlock *MBB = *DFI; enterBasicBlock(MBB); for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; - ++I) { - MachineInstr *mi = I; - if (mi->isDebugValue()) continue; - ++Distance; - std::pair domp = TII->getExecutionDomain(mi); - if (domp.first) - if (domp.second) - visitSoftInstr(mi, domp.second); - else - visitHardInstr(mi, domp.first); - else if (LiveRegs) - visitGenericInstr(mi); - } - - // Save live registers at end of MBB - used by enterBasicBlock(). - if (LiveRegs) - LiveOuts.insert(std::make_pair(MBB, LiveRegs)); - LiveRegs = 0; + ++I) + visitInstr(I); + leaveBasicBlock(MBB); } // Clear the LiveOuts vectors. Should we also collapse any remaining -- cgit v1.1 From 62c8e8e3f65a2943cedbce37a6b9b47653f0ea0a Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Mon, 7 Nov 2011 21:43:40 +0000 Subject: Allow i1 to be promoted to i32 for ARM AAPCS and AAPCS-VFP calling convention as well. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144021 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMCallingConv.td | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index 5539d28..a482a20 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -89,7 +89,7 @@ def RetFastCC_ARM_APCS : CallingConv<[ def CC_ARM_AAPCS_Common : CallingConv<[ - CCIfType<[i8, i16], CCPromoteToType>, + CCIfType<[i1, i8, i16], CCPromoteToType>, // i64/f64 is passed in even pairs of GPRs // i64 is 8-aligned i32 here, so we may need to eat R1 as a pad register -- cgit v1.1 From e77546c3c3634863a79ffc3adea52882685db454 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Mon, 7 Nov 2011 21:49:28 +0000 Subject: Simple destructor to delete the hash data we created earlier. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144023 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp | 5 +++++ lib/CodeGen/AsmPrinter/DwarfAccelTable.h | 1 + 2 files changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index 2f175d4..6a32ab6 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -42,6 +42,11 @@ DwarfAccelTable::DwarfAccelTable(DwarfAccelTable::Atom atom) : HeaderData(atom) { } +DwarfAccelTable::~DwarfAccelTable() { + for (size_t i = 0 ; i < Data.size(); ++i) + delete Data[i]; +} + void DwarfAccelTable::AddName(StringRef Name, DIE* die) { // If the string is in the list already then add this die to the list // otherwise add a new one. diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 2d3f4fd..4fc6118 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -239,6 +239,7 @@ public: // Public Implementation public: DwarfAccelTable(DwarfAccelTable::Atom Atom); + ~DwarfAccelTable(); void AddName(StringRef, DIE*); void FinalizeTable(AsmPrinter *, const char *); void Emit(AsmPrinter *, MCSymbol *, DwarfDebug *); -- cgit v1.1 From 2dd5e1e64d718a0aeaaf988a54d5acc0ec70f243 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Mon, 7 Nov 2011 21:49:35 +0000 Subject: Move the hash function to using and taking a StringRef. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144024 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfAccelTable.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 4fc6118..a0f64f0 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -69,10 +69,10 @@ class DwarfAccelTable { eHashFunctionDJB = 0u }; - static uint32_t HashDJB (const char *s) { + static uint32_t HashDJB (StringRef Str) { uint32_t h = 5381; - for (unsigned char c = *s; c; c = *++s) - h = ((h << 5) + h) + c; + for (unsigned i = 0, e = Str.size(); i != e; ++i) + h = ((h << 5) + h) + Str[i]; return h; } @@ -190,7 +190,7 @@ public: MCSymbol *Sym; std::vector DIEOffsets; // offsets HashData(StringRef S) : Str(S) { - HashValue = DwarfAccelTable::HashDJB(S.str().c_str()); + HashValue = DwarfAccelTable::HashDJB(S); } void addOffset(uint32_t off) { DIEOffsets.push_back(off); } #ifndef NDEBUG -- cgit v1.1 From a59ce0379134b249a3c949f7dcd6ec3566c4d7e3 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Mon, 7 Nov 2011 21:59:29 +0000 Subject: Use a reverse post order instead of a DFS order. The enterBasicBlock() function is combining live-out values from predecessor blocks. The RPO traversal means that more predecessors have been visited when that happens, only back-edges are missing. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144025 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ExecutionDepsFix.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index 8b002e7..3d6f256 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -26,7 +26,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" @@ -499,11 +499,10 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { } MachineBasicBlock *Entry = MF->begin(); - SmallPtrSet Visited; - for (df_ext_iterator > - DFI = df_ext_begin(Entry, Visited), DFE = df_ext_end(Entry, Visited); - DFI != DFE; ++DFI) { - MachineBasicBlock *MBB = *DFI; + ReversePostOrderTraversal RPOT(Entry); + for (ReversePostOrderTraversal::rpo_iterator + MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { + MachineBasicBlock *MBB = *MBBI; enterBasicBlock(MBB); for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) -- cgit v1.1 From 56c2b109f73f895b591d2303128c8adcf0bc2c3d Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Mon, 7 Nov 2011 22:11:16 +0000 Subject: Add all completed and named types to the dwarf type accelerator tables. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144027 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 6 ++++++ lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 3 +-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index cb80bd8..660992a 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -634,6 +634,12 @@ void CompileUnit::addType(DIE *Entity, DIType Ty) { // If this is a complete composite type then include it in the // list of global types. addGlobalType(Ty); + + // If this is a named finished type then include it in the list of types + // for the accelerator tables. + if (!Ty.getName().empty() && !Ty.isForwardDecl()) + if (DIEEntry *Entry = getDIEEntry(Ty)) + AccelTypes[Ty.getName()] = Entry->getEntry(); } /// addGlobalType - Add a new global type to the compile unit. diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 39d0d1c..64e0e92 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1834,8 +1834,7 @@ void DwarfDebug::emitAccelTypes() { for (DenseMap::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { CompileUnit *TheCU = I->second; - const StringMap &Names = TheCU->getGlobalTypes(); - //TODO: TheCU->getAccelTypes(); + const StringMap &Names = TheCU->getAccelTypes(); for (StringMap::const_iterator GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { const char *Name = GI->getKeyData(); -- cgit v1.1 From 1b4f6f2532e9a6a99fcab5fde5b4a2187c9c9c2b Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Mon, 7 Nov 2011 22:51:10 +0000 Subject: Add a bunch of calls to RemoveDeadNode in LegalizeDAG, so legalization doesn't get confused by CSE later on. Fixes PR11318. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144034 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 7425669..9cce6fe 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -285,6 +285,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), ST->isVolatile(), ST->isNonTemporal(), Alignment); DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); + DAG.RemoveDeadNode(ST, DUL); return; } // Do a (aligned) store to a stack slot, then copy from the stack slot @@ -349,6 +350,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], Stores.size()); DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); + DAG.RemoveDeadNode(ST, DUL); return; } assert(ST->getMemoryVT().isInteger() && @@ -381,6 +383,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); + DAG.RemoveDeadNode(ST, DUL); } /// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads. @@ -1144,6 +1147,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (!ST->isTruncatingStore()) { if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { DAG.ReplaceAllUsesWith(ST, OptStore, this); + DAG.RemoveDeadNode(ST, this); break; } @@ -1169,8 +1173,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { break; case TargetLowering::Custom: Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Tmp1.getNode()) + if (Tmp1.getNode()) { DAG.ReplaceAllUsesWith(SDValue(Node, 0), Tmp1, this); + DAG.RemoveDeadNode(Node, this); + } break; case TargetLowering::Promote: { assert(VT.isVector() && "Unknown legal promote case!"); @@ -1181,6 +1187,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); + DAG.RemoveDeadNode(Node, this); break; } } @@ -1203,6 +1210,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), NVT, isVolatile, isNonTemporal, Alignment); DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); + DAG.RemoveDeadNode(Node, this); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. assert(!StVT.isVector() && "Unsupported truncstore!"); @@ -1258,6 +1266,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { // The order of the stores doesn't matter. SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); + DAG.RemoveDeadNode(Node, this); } else { if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() || Tmp2 != ST->getBasePtr()) @@ -1280,6 +1289,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { DAG.ReplaceAllUsesWith(SDValue(Node, 0), TLI.LowerOperation(SDValue(Node, 0), DAG), this); + DAG.RemoveDeadNode(Node, this); break; case TargetLowering::Expand: assert(!StVT.isVector() && @@ -1292,6 +1302,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); + DAG.RemoveDeadNode(Node, this); break; } } @@ -3361,6 +3372,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Scalars[0], Scalars.size()); DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); + DAG.RemoveDeadNode(Node, this); break; } case ISD::GLOBAL_OFFSET_TABLE: @@ -3377,8 +3389,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } // Replace the original node with the legalized result. - if (!Results.empty()) + if (!Results.empty()) { DAG.ReplaceAllUsesWith(Node, Results.data(), this); + DAG.RemoveDeadNode(Node, this); + } } void SelectionDAGLegalize::PromoteNode(SDNode *Node) { @@ -3512,8 +3526,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { } // Replace the original node with the legalized result. - if (!Results.empty()) + if (!Results.empty()) { DAG.ReplaceAllUsesWith(Node, Results.data(), this); + DAG.RemoveDeadNode(Node, this); + } } // SelectionDAG::Legalize - This is the entry point for the file. -- cgit v1.1 From a29fc806fe02cea76f7896b7e344bb919dd7ac25 Mon Sep 17 00:00:00 2001 From: Pete Cooper Date: Mon, 7 Nov 2011 23:04:49 +0000 Subject: InstCombine now optimizes vector udiv by power of 2 to shifts Fixes r8429 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144036 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/InstCombine/InstCombineMulDivRem.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index 7f48125..2f82b7b 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -441,19 +441,23 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { // Handle the integer div common cases if (Instruction *Common = commonIDivTransforms(I)) return Common; - - if (ConstantInt *C = dyn_cast(Op1)) { + + { // X udiv 2^C -> X >> C // Check to see if this is an unsigned division with an exact power of 2, // if so, convert to a right shift. - if (C->getValue().isPowerOf2()) { // 0 not included in isPowerOf2 + const APInt *C; + if (match(Op1, m_Power2(C))) { BinaryOperator *LShr = - BinaryOperator::CreateLShr(Op0, - ConstantInt::get(Op0->getType(), C->getValue().logBase2())); + BinaryOperator::CreateLShr(Op0, + ConstantInt::get(Op0->getType(), + C->logBase2())); if (I.isExact()) LShr->setIsExact(); return LShr; } + } + if (ConstantInt *C = dyn_cast(Op1)) { // X udiv C, where C >= signbit if (C->getValue().isNegative()) { Value *IC = Builder->CreateICmpULT(Op0, C); -- cgit v1.1 From b26c7727c9a45613d9bae69995cfd719c57c5614 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Mon, 7 Nov 2011 23:08:21 +0000 Subject: Kill and collapse outstanding DomainValues. DomainValues that are only used by "don't care" instructions are now collapsed to the first possible execution domain after all basic blocks have been processed. This typically means the PS domain on x86. For example, the vsel_i64 and vsel_double functions in sse2-blend.ll are completely collapsed to the PS domain instead of containing a mix of execution domains created by isel. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144037 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ExecutionDepsFix.cpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index 3d6f256..bd77f65 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -510,11 +510,20 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { leaveBasicBlock(MBB); } - // Clear the LiveOuts vectors. Should we also collapse any remaining - // DomainValues? - for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end(); - i != e; ++i) - delete[] i->second; + // Clear the LiveOuts vectors and collapse any remaining DomainValues. + for (ReversePostOrderTraversal::rpo_iterator + MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { + LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI); + if (FI == LiveOuts.end()) + continue; + assert(FI->second && "Null entry"); + // The DomainValue is collapsed when the last reference is killed. + LiveRegs = FI->second; + for (unsigned i = 0, e = NumRegs; i != e; ++i) + if (LiveRegs[i]) + Kill(i); + delete[] LiveRegs; + } LiveOuts.clear(); Avail.clear(); Allocator.DestroyAll(); -- cgit v1.1 From e13eba26711c8ea48059697e406f90cebccc9243 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Mon, 7 Nov 2011 23:36:48 +0000 Subject: This code is dead, what with the new EH model and the auto-upgraders in place. Delete! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144043 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/DwarfEHPrepare.cpp | 651 +---------------------------------------- 1 file changed, 12 insertions(+), 639 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index ed9e409..aa44223 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -28,98 +28,33 @@ #include "llvm/Transforms/Utils/SSAUpdater.h" using namespace llvm; -STATISTIC(NumLandingPadsSplit, "Number of landing pads split"); -STATISTIC(NumUnwindsLowered, "Number of unwind instructions lowered"); -STATISTIC(NumResumesLowered, "Number of eh.resume calls lowered"); -STATISTIC(NumExceptionValuesMoved, "Number of eh.exception calls moved"); +STATISTIC(NumResumesLowered, "Number of resume calls lowered"); namespace { class DwarfEHPrepare : public FunctionPass { const TargetMachine *TM; const TargetLowering *TLI; - // The eh.exception intrinsic. - Function *ExceptionValueIntrinsic; - - // The eh.selector intrinsic. - Function *SelectorIntrinsic; - - // _Unwind_Resume_or_Rethrow or _Unwind_SjLj_Resume call. - Constant *URoR; - - // The EH language-specific catch-all type. - GlobalVariable *EHCatchAllValue; - - // _Unwind_Resume or the target equivalent. + // RewindFunction - _Unwind_Resume or the target equivalent. Constant *RewindFunction; - // We both use and preserve dominator info. - DominatorTree *DT; - - // The function we are running on. - Function *F; - - // The landing pads for this function. - typedef SmallPtrSet BBSet; - BBSet LandingPads; - - bool InsertUnwindResumeCalls(); - - bool NormalizeLandingPads(); - bool LowerUnwindsAndResumes(); - bool MoveExceptionValueCalls(); - - Instruction *CreateExceptionValueCall(BasicBlock *BB); - - /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still - /// use the "llvm.eh.catch.all.value" call need to convert to using its - /// initializer instead. - bool CleanupSelectors(SmallPtrSet &Sels); - - bool HasCatchAllInSelector(IntrinsicInst *); + bool InsertUnwindResumeCalls(Function &Fn); - /// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups. - void FindAllCleanupSelectors(SmallPtrSet &Sels, - SmallPtrSet &CatchAllSels); - - /// FindAllURoRInvokes - Find all URoR invokes in the function. - void FindAllURoRInvokes(SmallPtrSet &URoRInvokes); - - /// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or - /// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to - /// a landing pad within the current function. This is a candidate to merge - /// the selector associated with the URoR invoke with the one from the - /// URoR's landing pad. - bool HandleURoRInvokes(); - - /// FindSelectorAndURoR - Find the eh.selector call and URoR call associated - /// with the eh.exception call. This recursively looks past instructions - /// which don't change the EH pointer value, like casts or PHI nodes. - bool FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, - SmallPtrSet &SelCalls, - SmallPtrSet &SeenPHIs); - public: static char ID; // Pass identification, replacement for typeid. DwarfEHPrepare(const TargetMachine *tm) : FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()), - ExceptionValueIntrinsic(0), SelectorIntrinsic(0), - URoR(0), EHCatchAllValue(0), RewindFunction(0) { + RewindFunction(0) { initializeDominatorTreePass(*PassRegistry::getPassRegistry()); } virtual bool runOnFunction(Function &Fn); - // getAnalysisUsage - We need the dominator tree for handling URoR. - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addPreserved(); - } + virtual void getAnalysisUsage(AnalysisUsage &AU) const { } const char *getPassName() const { return "Exception handling preparation"; } - }; } // end anonymous namespace @@ -129,543 +64,12 @@ FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) { return new DwarfEHPrepare(tm); } -/// HasCatchAllInSelector - Return true if the intrinsic instruction has a -/// catch-all. -bool DwarfEHPrepare::HasCatchAllInSelector(IntrinsicInst *II) { - if (!EHCatchAllValue) return false; - - unsigned ArgIdx = II->getNumArgOperands() - 1; - GlobalVariable *GV = dyn_cast(II->getArgOperand(ArgIdx)); - return GV == EHCatchAllValue; -} - -/// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups. -void DwarfEHPrepare:: -FindAllCleanupSelectors(SmallPtrSet &Sels, - SmallPtrSet &CatchAllSels) { - for (Value::use_iterator - I = SelectorIntrinsic->use_begin(), - E = SelectorIntrinsic->use_end(); I != E; ++I) { - IntrinsicInst *II = cast(*I); - - if (II->getParent()->getParent() != F) - continue; - - if (!HasCatchAllInSelector(II)) - Sels.insert(II); - else - CatchAllSels.insert(II); - } -} - -/// FindAllURoRInvokes - Find all URoR invokes in the function. -void DwarfEHPrepare:: -FindAllURoRInvokes(SmallPtrSet &URoRInvokes) { - for (Value::use_iterator - I = URoR->use_begin(), - E = URoR->use_end(); I != E; ++I) { - if (InvokeInst *II = dyn_cast(*I)) - URoRInvokes.insert(II); - } -} - -/// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use -/// the "llvm.eh.catch.all.value" call need to convert to using its -/// initializer instead. -bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet &Sels) { - if (!EHCatchAllValue) return false; - - if (!SelectorIntrinsic) { - SelectorIntrinsic = - Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector); - if (!SelectorIntrinsic) return false; - } - - bool Changed = false; - for (SmallPtrSet::iterator - I = Sels.begin(), E = Sels.end(); I != E; ++I) { - IntrinsicInst *Sel = *I; - - // Index of the "llvm.eh.catch.all.value" variable. - unsigned OpIdx = Sel->getNumArgOperands() - 1; - GlobalVariable *GV = dyn_cast(Sel->getArgOperand(OpIdx)); - if (GV != EHCatchAllValue) continue; - Sel->setArgOperand(OpIdx, EHCatchAllValue->getInitializer()); - Changed = true; - } - - return Changed; -} - -/// FindSelectorAndURoR - Find the eh.selector call associated with the -/// eh.exception call. And indicate if there is a URoR "invoke" associated with -/// the eh.exception call. This recursively looks past instructions which don't -/// change the EH pointer value, like casts or PHI nodes. -bool -DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, - SmallPtrSet &SelCalls, - SmallPtrSet &SeenPHIs) { - bool Changed = false; - - for (Value::use_iterator - I = Inst->use_begin(), E = Inst->use_end(); I != E; ++I) { - Instruction *II = dyn_cast(*I); - if (!II || II->getParent()->getParent() != F) continue; - - if (IntrinsicInst *Sel = dyn_cast(II)) { - if (Sel->getIntrinsicID() == Intrinsic::eh_selector) - SelCalls.insert(Sel); - } else if (InvokeInst *Invoke = dyn_cast(II)) { - if (Invoke->getCalledFunction() == URoR) - URoRInvoke = true; - } else if (CastInst *CI = dyn_cast(II)) { - Changed |= FindSelectorAndURoR(CI, URoRInvoke, SelCalls, SeenPHIs); - } else if (PHINode *PN = dyn_cast(II)) { - if (SeenPHIs.insert(PN)) - // Don't process a PHI node more than once. - Changed |= FindSelectorAndURoR(PN, URoRInvoke, SelCalls, SeenPHIs); - } - } - - return Changed; -} - -/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or -/// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to a -/// landing pad within the current function. This is a candidate to merge the -/// selector associated with the URoR invoke with the one from the URoR's -/// landing pad. -bool DwarfEHPrepare::HandleURoRInvokes() { - if (!EHCatchAllValue) { - EHCatchAllValue = - F->getParent()->getNamedGlobal("llvm.eh.catch.all.value"); - if (!EHCatchAllValue) return false; - } - - if (!SelectorIntrinsic) { - SelectorIntrinsic = - Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_selector); - if (!SelectorIntrinsic) return false; - } - - SmallPtrSet Sels; - SmallPtrSet CatchAllSels; - FindAllCleanupSelectors(Sels, CatchAllSels); - - if (!URoR) { - URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow"); - if (!URoR) return CleanupSelectors(CatchAllSels); - } - - SmallPtrSet URoRInvokes; - FindAllURoRInvokes(URoRInvokes); - - SmallPtrSet SelsToConvert; - - for (SmallPtrSet::iterator - SI = Sels.begin(), SE = Sels.end(); SI != SE; ++SI) { - const BasicBlock *SelBB = (*SI)->getParent(); - for (SmallPtrSet::iterator - UI = URoRInvokes.begin(), UE = URoRInvokes.end(); UI != UE; ++UI) { - const BasicBlock *URoRBB = (*UI)->getParent(); - if (DT->dominates(SelBB, URoRBB)) { - SelsToConvert.insert(*SI); - break; - } - } - } - - bool Changed = false; - - if (Sels.size() != SelsToConvert.size()) { - // If we haven't been able to convert all of the clean-up selectors, then - // loop through the slow way to see if they still need to be converted. - if (!ExceptionValueIntrinsic) { - ExceptionValueIntrinsic = - Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_exception); - if (!ExceptionValueIntrinsic) - return CleanupSelectors(CatchAllSels); - } - - for (Value::use_iterator - I = ExceptionValueIntrinsic->use_begin(), - E = ExceptionValueIntrinsic->use_end(); I != E; ++I) { - IntrinsicInst *EHPtr = dyn_cast(*I); - if (!EHPtr || EHPtr->getParent()->getParent() != F) continue; - - bool URoRInvoke = false; - SmallPtrSet SelCalls; - SmallPtrSet SeenPHIs; - Changed |= FindSelectorAndURoR(EHPtr, URoRInvoke, SelCalls, SeenPHIs); - - if (URoRInvoke) { - // This EH pointer is being used by an invoke of an URoR instruction and - // an eh.selector intrinsic call. If the eh.selector is a 'clean-up', we - // need to convert it to a 'catch-all'. - for (SmallPtrSet::iterator - SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI) - if (!HasCatchAllInSelector(*SI)) - SelsToConvert.insert(*SI); - } - } - } - - if (!SelsToConvert.empty()) { - // Convert all clean-up eh.selectors, which are associated with "invokes" of - // URoR calls, into catch-all eh.selectors. - Changed = true; - - for (SmallPtrSet::iterator - SI = SelsToConvert.begin(), SE = SelsToConvert.end(); - SI != SE; ++SI) { - IntrinsicInst *II = *SI; - - // Use the exception object pointer and the personality function - // from the original selector. - CallSite CS(II); - IntrinsicInst::op_iterator I = CS.arg_begin(); - IntrinsicInst::op_iterator E = CS.arg_end(); - IntrinsicInst::op_iterator B = prior(E); - - // Exclude last argument if it is an integer. - if (isa(B)) E = B; - - // Add exception object pointer (front). - // Add personality function (next). - // Add in any filter IDs (rest). - SmallVector Args(I, E); - - Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator. - - CallInst *NewSelector = - CallInst::Create(SelectorIntrinsic, Args, "eh.sel.catch.all", II); - - NewSelector->setTailCall(II->isTailCall()); - NewSelector->setAttributes(II->getAttributes()); - NewSelector->setCallingConv(II->getCallingConv()); - - II->replaceAllUsesWith(NewSelector); - II->eraseFromParent(); - } - } - - Changed |= CleanupSelectors(CatchAllSels); - return Changed; -} - -/// NormalizeLandingPads - Normalize and discover landing pads, noting them -/// in the LandingPads set. A landing pad is normal if the only CFG edges -/// that end at it are unwind edges from invoke instructions. If we inlined -/// through an invoke we could have a normal branch from the previous -/// unwind block through to the landing pad for the original invoke. -/// Abnormal landing pads are fixed up by redirecting all unwind edges to -/// a new basic block which falls through to the original. -bool DwarfEHPrepare::NormalizeLandingPads() { - bool Changed = false; - - const MCAsmInfo *MAI = TM->getMCAsmInfo(); - bool usingSjLjEH = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj; - - for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { - TerminatorInst *TI = I->getTerminator(); - if (!isa(TI)) - continue; - BasicBlock *LPad = TI->getSuccessor(1); - // Skip landing pads that have already been normalized. - if (LandingPads.count(LPad)) - continue; - - // Check that only invoke unwind edges end at the landing pad. - bool OnlyUnwoundTo = true; - bool SwitchOK = usingSjLjEH; - for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); - PI != PE; ++PI) { - TerminatorInst *PT = (*PI)->getTerminator(); - // The SjLj dispatch block uses a switch instruction. This is effectively - // an unwind edge, so we can disregard it here. There will only ever - // be one dispatch, however, so if there are multiple switches, one - // of them truly is a normal edge, not an unwind edge. - if (SwitchOK && isa(PT)) { - SwitchOK = false; - continue; - } - if (!isa(PT) || LPad == PT->getSuccessor(0)) { - OnlyUnwoundTo = false; - break; - } - } - - if (OnlyUnwoundTo) { - // Only unwind edges lead to the landing pad. Remember the landing pad. - LandingPads.insert(LPad); - continue; - } - - // At least one normal edge ends at the landing pad. Redirect the unwind - // edges to a new basic block which falls through into this one. - - // Create the new basic block. - BasicBlock *NewBB = BasicBlock::Create(F->getContext(), - LPad->getName() + "_unwind_edge"); - - // Insert it into the function right before the original landing pad. - LPad->getParent()->getBasicBlockList().insert(LPad, NewBB); - - // Redirect unwind edges from the original landing pad to NewBB. - for (pred_iterator PI = pred_begin(LPad), PE = pred_end(LPad); PI != PE; ) { - TerminatorInst *PT = (*PI++)->getTerminator(); - if (isa(PT) && PT->getSuccessor(1) == LPad) - // Unwind to the new block. - PT->setSuccessor(1, NewBB); - } - - // If there are any PHI nodes in LPad, we need to update them so that they - // merge incoming values from NewBB instead. - for (BasicBlock::iterator II = LPad->begin(); isa(II); ++II) { - PHINode *PN = cast(II); - pred_iterator PB = pred_begin(NewBB), PE = pred_end(NewBB); - - // Check to see if all of the values coming in via unwind edges are the - // same. If so, we don't need to create a new PHI node. - Value *InVal = PN->getIncomingValueForBlock(*PB); - for (pred_iterator PI = PB; PI != PE; ++PI) { - if (PI != PB && InVal != PN->getIncomingValueForBlock(*PI)) { - InVal = 0; - break; - } - } - - if (InVal == 0) { - // Different unwind edges have different values. Create a new PHI node - // in NewBB. - PHINode *NewPN = PHINode::Create(PN->getType(), - PN->getNumIncomingValues(), - PN->getName()+".unwind", NewBB); - // Add an entry for each unwind edge, using the value from the old PHI. - for (pred_iterator PI = PB; PI != PE; ++PI) - NewPN->addIncoming(PN->getIncomingValueForBlock(*PI), *PI); - - // Now use this new PHI as the common incoming value for NewBB in PN. - InVal = NewPN; - } - - // Revector exactly one entry in the PHI node to come from NewBB - // and delete all other entries that come from unwind edges. If - // there are both normal and unwind edges from the same predecessor, - // this leaves an entry for the normal edge. - for (pred_iterator PI = PB; PI != PE; ++PI) - PN->removeIncomingValue(*PI); - PN->addIncoming(InVal, NewBB); - } - - // Add a fallthrough from NewBB to the original landing pad. - BranchInst::Create(LPad, NewBB); - - // Now update DominatorTree analysis information. - DT->splitBlock(NewBB); - - // Remember the newly constructed landing pad. The original landing pad - // LPad is no longer a landing pad now that all unwind edges have been - // revectored to NewBB. - LandingPads.insert(NewBB); - ++NumLandingPadsSplit; - Changed = true; - } - - return Changed; -} - -/// LowerUnwinds - Turn unwind instructions into calls to _Unwind_Resume, -/// rethrowing any previously caught exception. This will crash horribly -/// at runtime if there is no such exception: using unwind to throw a new -/// exception is currently not supported. -bool DwarfEHPrepare::LowerUnwindsAndResumes() { - SmallVector ResumeInsts; - - for (Function::iterator fi = F->begin(), fe = F->end(); fi != fe; ++fi) { - for (BasicBlock::iterator bi = fi->begin(), be = fi->end(); bi != be; ++bi){ - if (isa(bi)) - ResumeInsts.push_back(bi); - else if (CallInst *call = dyn_cast(bi)) - if (Function *fn = dyn_cast(call->getCalledValue())) - if (fn->getName() == "llvm.eh.resume") - ResumeInsts.push_back(bi); - } - } - - if (ResumeInsts.empty()) return false; - - // Find the rewind function if we didn't already. - if (!RewindFunction) { - LLVMContext &Ctx = ResumeInsts[0]->getContext(); - FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), - Type::getInt8PtrTy(Ctx), false); - const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME); - RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy); - } - - bool Changed = false; - - for (SmallVectorImpl::iterator - I = ResumeInsts.begin(), E = ResumeInsts.end(); I != E; ++I) { - Instruction *RI = *I; - - // Replace the resuming instruction with a call to _Unwind_Resume (or the - // appropriate target equivalent). - - llvm::Value *ExnValue; - if (isa(RI)) - ExnValue = CreateExceptionValueCall(RI->getParent()); - else - ExnValue = cast(RI)->getArgOperand(0); - - // Create the call... - CallInst *CI = CallInst::Create(RewindFunction, ExnValue, "", RI); - CI->setCallingConv(TLI->getLibcallCallingConv(RTLIB::UNWIND_RESUME)); - - // ...followed by an UnreachableInst, if it was an unwind. - // Calls to llvm.eh.resume are typically already followed by this. - if (isa(RI)) - new UnreachableInst(RI->getContext(), RI); - - if (isa(RI)) - ++NumUnwindsLowered; - else - ++NumResumesLowered; - - // Nuke the resume instruction. - RI->eraseFromParent(); - - Changed = true; - } - - return Changed; -} - -/// MoveExceptionValueCalls - Ensure that eh.exception is only ever called from -/// landing pads by replacing calls outside of landing pads with direct use of -/// a register holding the appropriate value; this requires adding calls inside -/// all landing pads to initialize the register. Also, move eh.exception calls -/// inside landing pads to the start of the landing pad (optional, but may make -/// things simpler for later passes). -bool DwarfEHPrepare::MoveExceptionValueCalls() { - // If the eh.exception intrinsic is not declared in the module then there is - // nothing to do. Speed up compilation by checking for this common case. - if (!ExceptionValueIntrinsic && - !F->getParent()->getFunction(Intrinsic::getName(Intrinsic::eh_exception))) - return false; - - bool Changed = false; - - // Move calls to eh.exception that are inside a landing pad to the start of - // the landing pad. - for (BBSet::const_iterator LI = LandingPads.begin(), LE = LandingPads.end(); - LI != LE; ++LI) { - BasicBlock *LP = *LI; - for (BasicBlock::iterator II = LP->getFirstNonPHIOrDbg(), IE = LP->end(); - II != IE;) - if (EHExceptionInst *EI = dyn_cast(II++)) { - // Found a call to eh.exception. - if (!EI->use_empty()) { - // If there is already a call to eh.exception at the start of the - // landing pad, then get hold of it; otherwise create such a call. - Value *CallAtStart = CreateExceptionValueCall(LP); - - // If the call was at the start of a landing pad then leave it alone. - if (EI == CallAtStart) - continue; - EI->replaceAllUsesWith(CallAtStart); - } - EI->eraseFromParent(); - ++NumExceptionValuesMoved; - Changed = true; - } - } - - // Look for calls to eh.exception that are not in a landing pad. If one is - // found, then a register that holds the exception value will be created in - // each landing pad, and the SSAUpdater will be used to compute the values - // returned by eh.exception calls outside of landing pads. - SSAUpdater SSA; - - // Remember where we found the eh.exception call, to avoid rescanning earlier - // basic blocks which we already know contain no eh.exception calls. - bool FoundCallOutsideLandingPad = false; - Function::iterator BB = F->begin(); - for (Function::iterator BE = F->end(); BB != BE; ++BB) { - // Skip over landing pads. - if (LandingPads.count(BB)) - continue; - - for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); - II != IE; ++II) - if (isa(II)) { - SSA.Initialize(II->getType(), II->getName()); - FoundCallOutsideLandingPad = true; - break; - } - - if (FoundCallOutsideLandingPad) - break; - } - - // If all calls to eh.exception are in landing pads then we are done. - if (!FoundCallOutsideLandingPad) - return Changed; - - // Add a call to eh.exception at the start of each landing pad, and tell the - // SSAUpdater that this is the value produced by the landing pad. - for (BBSet::iterator LI = LandingPads.begin(), LE = LandingPads.end(); - LI != LE; ++LI) - SSA.AddAvailableValue(*LI, CreateExceptionValueCall(*LI)); - - // Now turn all calls to eh.exception that are not in a landing pad into a use - // of the appropriate register. - for (Function::iterator BE = F->end(); BB != BE; ++BB) { - // Skip over landing pads. - if (LandingPads.count(BB)) - continue; - - for (BasicBlock::iterator II = BB->getFirstNonPHIOrDbg(), IE = BB->end(); - II != IE;) - if (EHExceptionInst *EI = dyn_cast(II++)) { - // Found a call to eh.exception, replace it with the value from any - // upstream landing pad(s). - EI->replaceAllUsesWith(SSA.GetValueAtEndOfBlock(BB)); - EI->eraseFromParent(); - ++NumExceptionValuesMoved; - } - } - - return true; -} - -/// CreateExceptionValueCall - Insert a call to the eh.exception intrinsic at -/// the start of the basic block (unless there already is one, in which case -/// the existing call is returned). -Instruction *DwarfEHPrepare::CreateExceptionValueCall(BasicBlock *BB) { - Instruction *Start = BB->getFirstNonPHIOrDbg(); - // Is this a call to eh.exception? - if (IntrinsicInst *CI = dyn_cast(Start)) - if (CI->getIntrinsicID() == Intrinsic::eh_exception) - // Reuse the existing call. - return Start; - - // Find the eh.exception intrinsic if we didn't already. - if (!ExceptionValueIntrinsic) - ExceptionValueIntrinsic = Intrinsic::getDeclaration(F->getParent(), - Intrinsic::eh_exception); - - // Create the call. - return CallInst::Create(ExceptionValueIntrinsic, "eh.value.call", Start); -} - /// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present /// into calls to the appropriate _Unwind_Resume function. -bool DwarfEHPrepare::InsertUnwindResumeCalls() { +bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { bool UsesNewEH = false; SmallVector Resumes; - for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { + for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { TerminatorInst *TI = I->getTerminator(); if (ResumeInst *RI = dyn_cast(TI)) Resumes.push_back(RI); @@ -682,18 +86,17 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() { FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), Type::getInt8PtrTy(Ctx), false); const char *RewindName = TLI->getLibcallName(RTLIB::UNWIND_RESUME); - RewindFunction = F->getParent()->getOrInsertFunction(RewindName, FTy); + RewindFunction = Fn.getParent()->getOrInsertFunction(RewindName, FTy); } // Create the basic block where the _Unwind_Resume call will live. - LLVMContext &Ctx = F->getContext(); - BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", F); + LLVMContext &Ctx = Fn.getContext(); + BasicBlock *UnwindBB = BasicBlock::Create(Ctx, "unwind_resume", &Fn); PHINode *PN = PHINode::Create(Type::getInt8PtrTy(Ctx), Resumes.size(), "exn.obj", UnwindBB); // Extract the exception object from the ResumeInst and add it to the PHI node // that feeds the _Unwind_Resume call. - BasicBlock *UnwindBBDom = Resumes[0]->getParent(); for (SmallVectorImpl::iterator I = Resumes.begin(), E = Resumes.end(); I != E; ++I) { ResumeInst *RI = *I; @@ -701,8 +104,8 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() { ExtractValueInst *ExnObj = ExtractValueInst::Create(RI->getOperand(0), 0, "exn.obj", RI); PN->addIncoming(ExnObj, RI->getParent()); - UnwindBBDom = DT->findNearestCommonDominator(RI->getParent(), UnwindBBDom); RI->eraseFromParent(); + ++NumResumesLowered; } // Call the function. @@ -711,40 +114,10 @@ bool DwarfEHPrepare::InsertUnwindResumeCalls() { // We never expect _Unwind_Resume to return. new UnreachableInst(Ctx, UnwindBB); - - // Now update DominatorTree analysis information. - DT->addNewBlock(UnwindBB, UnwindBBDom); return true; } bool DwarfEHPrepare::runOnFunction(Function &Fn) { - bool Changed = false; - - // Initialize internal state. - DT = &getAnalysis(); // FIXME: We won't need this with the new EH. - F = &Fn; - - if (InsertUnwindResumeCalls()) { - // FIXME: The reset of this function can go once the new EH is done. - LandingPads.clear(); - return true; - } - - // Ensure that only unwind edges end at landing pads (a landing pad is a - // basic block where an invoke unwind edge ends). - Changed |= NormalizeLandingPads(); - - // Turn unwind instructions and eh.resume calls into libcalls. - Changed |= LowerUnwindsAndResumes(); - - // TODO: Move eh.selector calls to landing pads and combine them. - - // Move eh.exception calls to landing pads. - Changed |= MoveExceptionValueCalls(); - - Changed |= HandleURoRInvokes(); - - LandingPads.clear(); - + bool Changed = InsertUnwindResumeCalls(Fn); return Changed; } -- cgit v1.1 From 58dd0fec4d4b8b1dc87e8a6a080b776d0d425cad Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Mon, 7 Nov 2011 23:53:20 +0000 Subject: Revert r144034 while I try to track down a crash. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144044 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 9cce6fe..7425669 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -285,7 +285,6 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), ST->isVolatile(), ST->isNonTemporal(), Alignment); DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); - DAG.RemoveDeadNode(ST, DUL); return; } // Do a (aligned) store to a stack slot, then copy from the stack slot @@ -350,7 +349,6 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], Stores.size()); DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); - DAG.RemoveDeadNode(ST, DUL); return; } assert(ST->getMemoryVT().isInteger() && @@ -383,7 +381,6 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); - DAG.RemoveDeadNode(ST, DUL); } /// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads. @@ -1147,7 +1144,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (!ST->isTruncatingStore()) { if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { DAG.ReplaceAllUsesWith(ST, OptStore, this); - DAG.RemoveDeadNode(ST, this); break; } @@ -1173,10 +1169,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { break; case TargetLowering::Custom: Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Tmp1.getNode()) { + if (Tmp1.getNode()) DAG.ReplaceAllUsesWith(SDValue(Node, 0), Tmp1, this); - DAG.RemoveDeadNode(Node, this); - } break; case TargetLowering::Promote: { assert(VT.isVector() && "Unknown legal promote case!"); @@ -1187,7 +1181,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); - DAG.RemoveDeadNode(Node, this); break; } } @@ -1210,7 +1203,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), NVT, isVolatile, isNonTemporal, Alignment); DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); - DAG.RemoveDeadNode(Node, this); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. assert(!StVT.isVector() && "Unsupported truncstore!"); @@ -1266,7 +1258,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { // The order of the stores doesn't matter. SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); - DAG.RemoveDeadNode(Node, this); } else { if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() || Tmp2 != ST->getBasePtr()) @@ -1289,7 +1280,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { DAG.ReplaceAllUsesWith(SDValue(Node, 0), TLI.LowerOperation(SDValue(Node, 0), DAG), this); - DAG.RemoveDeadNode(Node, this); break; case TargetLowering::Expand: assert(!StVT.isVector() && @@ -1302,7 +1292,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); - DAG.RemoveDeadNode(Node, this); break; } } @@ -3372,7 +3361,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Scalars[0], Scalars.size()); DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); - DAG.RemoveDeadNode(Node, this); break; } case ISD::GLOBAL_OFFSET_TABLE: @@ -3389,10 +3377,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } // Replace the original node with the legalized result. - if (!Results.empty()) { + if (!Results.empty()) DAG.ReplaceAllUsesWith(Node, Results.data(), this); - DAG.RemoveDeadNode(Node, this); - } } void SelectionDAGLegalize::PromoteNode(SDNode *Node) { @@ -3526,10 +3512,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { } // Replace the original node with the legalized result. - if (!Results.empty()) { + if (!Results.empty()) DAG.ReplaceAllUsesWith(Node, Results.data(), this); - DAG.RemoveDeadNode(Node, this); - } } // SelectionDAG::Legalize - This is the entry point for the file. -- cgit v1.1 From 0eff39f2e25e9d8dd52b1eb7fa4e7cc6cc77875f Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Tue, 8 Nov 2011 00:03:32 +0000 Subject: Enable support for returning i1, i8, and i16. Nothing special todo as it's the callee's responsibility to sign or zero-extend the return value. The additional test case just checks to make sure the calls are selected (i.e., -fast-isel-abort doesn't assert). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144047 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMCallingConv.td | 2 ++ lib/Target/ARM/ARMFastISel.cpp | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td index a482a20..c22a08e 100644 --- a/lib/Target/ARM/ARMCallingConv.td +++ b/lib/Target/ARM/ARMCallingConv.td @@ -43,6 +43,7 @@ def CC_ARM_APCS : CallingConv<[ ]>; def RetCC_ARM_APCS : CallingConv<[ + CCIfType<[i1, i8, i16], CCPromoteToType>, CCIfType<[f32], CCBitConvertToType>, // Handle all vector types as either f64 or v2f64. @@ -106,6 +107,7 @@ def CC_ARM_AAPCS_Common : CallingConv<[ ]>; def RetCC_ARM_AAPCS_Common : CallingConv<[ + CCIfType<[i1, i8, i16], CCPromoteToType>, CCIfType<[i32], CCAssignToReg<[R0, R1, R2, R3]>>, CCIfType<[i64], CCAssignToRegWithShadow<[R0, R2], [R1, R3]>> ]>; diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index c98156e..8acdd91 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -1697,6 +1697,11 @@ bool ARMFastISel::FinishCall(MVT RetVT, SmallVectorImpl &UsedRegs, } else { assert(RVLocs.size() == 1 &&"Can't handle non-double multi-reg retvals!"); EVT CopyVT = RVLocs[0].getValVT(); + + // Special handling for extended integers. + if (RetVT == MVT::i1 || RetVT == MVT::i8 || RetVT == MVT::i16) + CopyVT = MVT::i32; + TargetRegisterClass* DstRC = TLI.getRegClassFor(CopyVT); unsigned ResultReg = createResultReg(DstRC); @@ -1913,7 +1918,8 @@ bool ARMFastISel::SelectCall(const Instruction *I) { MVT RetVT; if (RetTy->isVoidTy()) RetVT = MVT::isVoid; - else if (!isTypeLegal(RetTy, RetVT)) + else if (!isTypeLegal(RetTy, RetVT) && RetVT != MVT::i16 && + RetVT != MVT::i8 && RetVT != MVT::i1) return false; // TODO: For now if we have long calls specified we don't handle the call. -- cgit v1.1 From 7bc389b6b00e26e07fa5ac64e43d9b06b73828c1 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Tue, 8 Nov 2011 00:31:58 +0000 Subject: Add x86 isel logic and patterns to match movlps from clang generated IR for _mm_loadl_pi(). rdar://10134392, rdar://10050222 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144052 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 11 +++++------ lib/Target/X86/X86InstrSSE.td | 6 ++++++ 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index b15dfac..aab7c73 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6190,6 +6190,10 @@ static bool MayFoldVectorLoad(SDValue V) { V = V.getOperand(0); if (V.hasOneUse() && V.getOpcode() == ISD::SCALAR_TO_VECTOR) V = V.getOperand(0); + if (V.hasOneUse() && V.getOpcode() == ISD::BUILD_VECTOR && + V.getNumOperands() == 2 && V.getOperand(1).getOpcode() == ISD::UNDEF) + // BUILD_VECTOR (load), undef + V = V.getOperand(0); if (MayFoldLoad(V)) return true; return false; @@ -6372,15 +6376,10 @@ SDValue getMOVLP(SDValue &Op, DebugLoc &dl, SelectionDAG &DAG, bool HasXMMInt) { // turns into: // (MOVLPSmr addr:$src1, VR128:$src2) // So, recognize this potential and also use MOVLPS or MOVLPD - if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op)) + else if (MayFoldVectorLoad(V1) && MayFoldIntoStore(Op)) CanFoldLoad = true; ShuffleVectorSDNode *SVOp = cast(Op); - - // Both of them can't be memory operations though. - if (MayFoldVectorLoad(V1) && MayFoldVectorLoad(V2)) - CanFoldLoad = false; - if (CanFoldLoad) { if (HasXMMInt && NumElems == 2) return getTargetShuffleNode(X86ISD::MOVLPD, dl, VT, V1, V2, DAG); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index db4382a..4b6ba5d 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1035,6 +1035,9 @@ let Predicates = [HasSSE1] in { } // (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS + def : Pat<(store (i64 (vector_extract (bc_v2i64 (v4f32 VR128:$src2)), + (iPTR 0))), addr:$src1), + (MOVLPSmr addr:$src1, VR128:$src2)>; def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1), (MOVLPSmr addr:$src1, VR128:$src2)>; def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), @@ -1049,6 +1052,9 @@ let Predicates = [HasSSE1] in { def : Pat<(X86Movlps VR128:$src1, (bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))), (MOVLPSrm VR128:$src1, addr:$src2)>; + def : Pat<(X86Movlps VR128:$src1, + (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))), + (MOVLPSrm VR128:$src1, addr:$src2)>; // Store patterns def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)), -- cgit v1.1 From 2efa35f779213a828fa15d6aa3a508fc81d75d73 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Tue, 8 Nov 2011 01:25:24 +0000 Subject: Add a bunch of calls to RemoveDeadNode in LegalizeDAG, so legalization doesn't get confused by CSE later on. Fixes PR11318. Re-commit of r144034, with an extra fix so that RemoveDeadNode doesn't blow up. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144055 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 22 +++++++++++++++++++--- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 6 ++++++ 2 files changed, 25 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 7425669..9cce6fe 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -285,6 +285,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), ST->isVolatile(), ST->isNonTemporal(), Alignment); DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); + DAG.RemoveDeadNode(ST, DUL); return; } // Do a (aligned) store to a stack slot, then copy from the stack slot @@ -349,6 +350,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], Stores.size()); DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); + DAG.RemoveDeadNode(ST, DUL); return; } assert(ST->getMemoryVT().isInteger() && @@ -381,6 +383,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); + DAG.RemoveDeadNode(ST, DUL); } /// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads. @@ -1144,6 +1147,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (!ST->isTruncatingStore()) { if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { DAG.ReplaceAllUsesWith(ST, OptStore, this); + DAG.RemoveDeadNode(ST, this); break; } @@ -1169,8 +1173,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { break; case TargetLowering::Custom: Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Tmp1.getNode()) + if (Tmp1.getNode()) { DAG.ReplaceAllUsesWith(SDValue(Node, 0), Tmp1, this); + DAG.RemoveDeadNode(Node, this); + } break; case TargetLowering::Promote: { assert(VT.isVector() && "Unknown legal promote case!"); @@ -1181,6 +1187,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); + DAG.RemoveDeadNode(Node, this); break; } } @@ -1203,6 +1210,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), NVT, isVolatile, isNonTemporal, Alignment); DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); + DAG.RemoveDeadNode(Node, this); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. assert(!StVT.isVector() && "Unsupported truncstore!"); @@ -1258,6 +1266,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { // The order of the stores doesn't matter. SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); + DAG.RemoveDeadNode(Node, this); } else { if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() || Tmp2 != ST->getBasePtr()) @@ -1280,6 +1289,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { DAG.ReplaceAllUsesWith(SDValue(Node, 0), TLI.LowerOperation(SDValue(Node, 0), DAG), this); + DAG.RemoveDeadNode(Node, this); break; case TargetLowering::Expand: assert(!StVT.isVector() && @@ -1292,6 +1302,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); + DAG.RemoveDeadNode(Node, this); break; } } @@ -3361,6 +3372,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Scalars[0], Scalars.size()); DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); + DAG.RemoveDeadNode(Node, this); break; } case ISD::GLOBAL_OFFSET_TABLE: @@ -3377,8 +3389,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } // Replace the original node with the legalized result. - if (!Results.empty()) + if (!Results.empty()) { DAG.ReplaceAllUsesWith(Node, Results.data(), this); + DAG.RemoveDeadNode(Node, this); + } } void SelectionDAGLegalize::PromoteNode(SDNode *Node) { @@ -3512,8 +3526,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { } // Replace the original node with the legalized result. - if (!Results.empty()) + if (!Results.empty()) { DAG.ReplaceAllUsesWith(Node, Results.data(), this); + DAG.RemoveDeadNode(Node, this); + } } // SelectionDAG::Legalize - This is the entry point for the file. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 010a740..b3f94a4 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -564,6 +564,12 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl &DeadNodes, void SelectionDAG::RemoveDeadNode(SDNode *N, DAGUpdateListener *UpdateListener){ SmallVector DeadNodes(1, N); + + // Create a dummy node that adds a reference to the root node, preventing + // it from being deleted. (This matters if the root is an operand of the + // dead node.) + HandleSDNode Dummy(getRoot()); + RemoveDeadNodes(DeadNodes, UpdateListener); } -- cgit v1.1 From 9f1f26aefaf0bc02e732a1d36e664165d228901a Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Tue, 8 Nov 2011 01:43:53 +0000 Subject: Make sure to mark vector extload's as expand on ARM. Fixes PR11319. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144057 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 2f533c2..dc1f4bc 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -126,14 +126,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); setOperationAction(ISD::SRL, VT.getSimpleVT(), Custom); - setLoadExtAction(ISD::SEXTLOAD, VT.getSimpleVT(), Expand); - setLoadExtAction(ISD::ZEXTLOAD, VT.getSimpleVT(), Expand); - for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; - InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) - setTruncStoreAction(VT.getSimpleVT(), - (MVT::SimpleValueType)InnerVT, Expand); } - setLoadExtAction(ISD::EXTLOAD, VT.getSimpleVT(), Expand); // Promote all bit-wise operations. if (VT.isInteger() && VT != PromotedBitwiseVT) { @@ -442,6 +435,17 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTruncStoreAction(MVT::f64, MVT::f32, Expand); } + for (unsigned VT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + VT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++VT) { + for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; + InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT) + setTruncStoreAction((MVT::SimpleValueType)VT, + (MVT::SimpleValueType)InnerVT, Expand); + setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand); + setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand); + setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand); + } + if (Subtarget->hasNEON()) { addDRTypeForNEON(MVT::v2f32); addDRTypeForNEON(MVT::v8i8); @@ -483,8 +487,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Expand); setOperationAction(ISD::FFLOOR, MVT::v2f64, Expand); - setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand); - // Neon does not support some operations on v1i64 and v2i64 types. setOperationAction(ISD::MUL, MVT::v1i64, Expand); // Custom handling for some quad-vector types to detect VMULL. -- cgit v1.1 From 0839033cbca20a1d1348bdf1c9f63fb263b248a6 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Tue, 8 Nov 2011 04:00:07 +0000 Subject: PPCInstrInfo.cpp: Fix one "unused" warning. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144071 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCInstrInfo.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 36a10f4..f148e9d 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -52,6 +52,7 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( // Should use subtarget info to pick the right hazard recognizer. For // now, always return a PPC970 recognizer. const TargetInstrInfo *TII = TM->getInstrInfo(); + (void)TII; assert(TII && "No InstrInfo?"); unsigned Directive = TM->getSubtarget().getDarwinDirective(); -- cgit v1.1 From ad6eef4a6518ea5736cfec60b174019be805060d Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Tue, 8 Nov 2011 12:47:11 +0000 Subject: This patch handles unaligned loads and stores in Mips JIT. Mips backend implements unaligned loads and stores with assembler macro-instructions ulw, usw, ulh, ulhu, ush, and this patch emits corresponding instructions instead of these macros. Since each unaligned load/store is expanded into two corresponding loads/stores where offset for second load/store is modified by +3 (for words) or +1 (for halfwords). Patch by Petar Jovanovic and Sasa Stankovic. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144081 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsCodeEmitter.cpp | 144 +++++++++++++++++++++++++++++++++++- lib/Target/Mips/MipsJITInfo.cpp | 10 ++- 2 files changed, 148 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp index 23fabe3..dc4ecd6 100644 --- a/lib/Target/Mips/MipsCodeEmitter.cpp +++ b/lib/Target/Mips/MipsCodeEmitter.cpp @@ -108,6 +108,15 @@ class MipsCodeEmitter : public MachineFunctionPass { unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const; + + int emitULW(const MachineInstr &MI); + int emitUSW(const MachineInstr &MI); + int emitULH(const MachineInstr &MI); + int emitULHu(const MachineInstr &MI); + int emitUSH(const MachineInstr &MI); + + void emitGlobalAddressUnaligned(const GlobalValue *GV, unsigned Reloc, + int Offset) const; }; } @@ -186,9 +195,15 @@ unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI, return MipsRegisterInfo::getRegisterNumbering(MO.getReg()); else if (MO.isImm()) return static_cast(MO.getImm()); - else if (MO.isGlobal()) - emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true); - else if (MO.isSymbol()) + else if (MO.isGlobal()) { + if (MI.getOpcode() == Mips::ULW || MI.getOpcode() == Mips::USW || + MI.getOpcode() == Mips::ULH || MI.getOpcode() == Mips::ULHu) + emitGlobalAddressUnaligned(MO.getGlobal(), getRelocation(MI, MO), 4); + else if (MI.getOpcode() == Mips::USH) + emitGlobalAddressUnaligned(MO.getGlobal(), getRelocation(MI, MO), 8); + else + emitGlobalAddress(MO.getGlobal(), getRelocation(MI, MO), true); + } else if (MO.isSymbol()) emitExternalSymbolAddress(MO.getSymbolName(), getRelocation(MI, MO)); else if (MO.isCPI()) emitConstPoolAddress(MO.getIndex(), getRelocation(MI, MO)); @@ -207,6 +222,14 @@ void MipsCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, const_cast(GV), 0, MayNeedFarStub)); } +void MipsCodeEmitter::emitGlobalAddressUnaligned(const GlobalValue *GV, + unsigned Reloc, int Offset) const { + MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, + const_cast(GV), 0, false)); + MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset() + Offset, + Reloc, const_cast(GV), 0, false)); +} + void MipsCodeEmitter:: emitExternalSymbolAddress(const char *ES, unsigned Reloc) const { MCE.addRelocation(MachineRelocation::getExtSym(MCE.getCurrentPCOffset(), @@ -230,6 +253,103 @@ void MipsCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB, Reloc, BB)); } +int MipsCodeEmitter::emitUSW(const MachineInstr &MI) { + unsigned src = getMachineOpValue(MI, MI.getOperand(0)); + unsigned base = getMachineOpValue(MI, MI.getOperand(1)); + unsigned offset = getMachineOpValue(MI, MI.getOperand(2)); + // swr src, offset(base) + // swl src, offset+3(base) + MCE.emitWordLE( + (0x2e << 26) | (base << 21) | (src << 16) | (offset & 0xffff)); + MCE.emitWordLE( + (0x2a << 26) | (base << 21) | (src << 16) | ((offset+3) & 0xffff)); + return 2; +} + +int MipsCodeEmitter::emitULW(const MachineInstr &MI) { + unsigned dst = getMachineOpValue(MI, MI.getOperand(0)); + unsigned base = getMachineOpValue(MI, MI.getOperand(1)); + unsigned offset = getMachineOpValue(MI, MI.getOperand(2)); + unsigned at = 1; + if (dst != base) { + // lwr dst, offset(base) + // lwl dst, offset+3(base) + MCE.emitWordLE( + (0x26 << 26) | (base << 21) | (dst << 16) | (offset & 0xffff)); + MCE.emitWordLE( + (0x22 << 26) | (base << 21) | (dst << 16) | ((offset+3) & 0xffff)); + return 2; + } else { + // lwr at, offset(base) + // lwl at, offset+3(base) + // addu dst, at, $zero + MCE.emitWordLE( + (0x26 << 26) | (base << 21) | (at << 16) | (offset & 0xffff)); + MCE.emitWordLE( + (0x22 << 26) | (base << 21) | (at << 16) | ((offset+3) & 0xffff)); + MCE.emitWordLE( + (0x0 << 26) | (at << 21) | (0x0 << 16) | (dst << 11) | (0x0 << 6) | 0x21); + return 3; + } +} + +int MipsCodeEmitter::emitUSH(const MachineInstr &MI) { + unsigned src = getMachineOpValue(MI, MI.getOperand(0)); + unsigned base = getMachineOpValue(MI, MI.getOperand(1)); + unsigned offset = getMachineOpValue(MI, MI.getOperand(2)); + unsigned at = 1; + // sb src, offset(base) + // srl at,src,8 + // sb at, offset+1(base) + MCE.emitWordLE( + (0x28 << 26) | (base << 21) | (src << 16) | (offset & 0xffff)); + MCE.emitWordLE( + (0x0 << 26) | (0x0 << 21) | (src << 16) | (at << 11) | (0x8 << 6) | 0x2); + MCE.emitWordLE( + (0x28 << 26) | (base << 21) | (at << 16) | ((offset+1) & 0xffff)); + return 3; +} + +int MipsCodeEmitter::emitULH(const MachineInstr &MI) { + unsigned dst = getMachineOpValue(MI, MI.getOperand(0)); + unsigned base = getMachineOpValue(MI, MI.getOperand(1)); + unsigned offset = getMachineOpValue(MI, MI.getOperand(2)); + unsigned at = 1; + // lbu at, offset(base) + // lb dst, offset+1(base) + // sll dst,dst,8 + // or dst,dst,at + MCE.emitWordLE( + (0x24 << 26) | (base << 21) | (at << 16) | (offset & 0xffff)); + MCE.emitWordLE( + (0x20 << 26) | (base << 21) | (dst << 16) | ((offset+1) & 0xffff)); + MCE.emitWordLE( + (0x0 << 26) | (0x0 << 21) | (dst << 16) | (dst << 11) | (0x8 << 6) | 0x0); + MCE.emitWordLE( + (0x0 << 26) | (dst << 21) | (at << 16) | (dst << 11) | (0x0 << 6) | 0x25); + return 4; +} + +int MipsCodeEmitter::emitULHu(const MachineInstr &MI) { + unsigned dst = getMachineOpValue(MI, MI.getOperand(0)); + unsigned base = getMachineOpValue(MI, MI.getOperand(1)); + unsigned offset = getMachineOpValue(MI, MI.getOperand(2)); + unsigned at = 1; + // lbu at, offset(base) + // lbu dst, offset+1(base) + // sll dst,dst,8 + // or dst,dst,at + MCE.emitWordLE( + (0x24 << 26) | (base << 21) | (at << 16) | (offset & 0xffff)); + MCE.emitWordLE( + (0x24 << 26) | (base << 21) | (dst << 16) | ((offset+1) & 0xffff)); + MCE.emitWordLE( + (0x0 << 26) | (0x0 << 21) | (dst << 16) | (dst << 11) | (0x8 << 6) | 0x0); + MCE.emitWordLE( + (0x0 << 26) | (dst << 21) | (at << 16) | (dst << 11) | (0x0 << 6) | 0x25); + return 4; +} + void MipsCodeEmitter::emitInstruction(const MachineInstr &MI) { DEBUG(errs() << "JIT: " << (void*)MCE.getCurrentPCValue() << ":\t" << MI); @@ -239,11 +359,27 @@ void MipsCodeEmitter::emitInstruction(const MachineInstr &MI) { if ((MI.getDesc().TSFlags & MipsII::FormMask) == MipsII::Pseudo) return; - ++NumEmitted; // Keep track of the # of mi's emitted switch (MI.getOpcode()) { + case Mips::USW: + NumEmitted += emitUSW(MI); + break; + case Mips::ULW: + NumEmitted += emitULW(MI); + break; + case Mips::ULH: + NumEmitted += emitULH(MI); + break; + case Mips::ULHu: + NumEmitted += emitULHu(MI); + break; + case Mips::USH: + NumEmitted += emitUSH(MI); + break; + default: emitWordLE(getBinaryCodeForInstr(MI)); + ++NumEmitted; // Keep track of the # of mi's emitted break; } diff --git a/lib/Target/Mips/MipsJITInfo.cpp b/lib/Target/Mips/MipsJITInfo.cpp index e3f6a75..a0ee722 100644 --- a/lib/Target/Mips/MipsJITInfo.cpp +++ b/lib/Target/Mips/MipsJITInfo.cpp @@ -218,10 +218,16 @@ void MipsJITInfo::relocate(void *Function, MachineRelocation *MR, *((unsigned*) RelocPos) |= (unsigned) ResultPtr; break; - case Mips::reloc_mips_lo: - ResultPtr = ResultPtr & 0xffff; + case Mips::reloc_mips_lo: { + // Addend is needed for unaligned load/store instructions, where offset + // for the second load/store in the expanded instruction sequence must + // be modified by +1 or +3. Otherwise, Addend is 0. + int Addend = *((unsigned*) RelocPos) & 0xffff; + ResultPtr = (ResultPtr + Addend) & 0xffff; + *((unsigned*) RelocPos) &= 0xffff0000; *((unsigned*) RelocPos) |= (unsigned) ResultPtr; break; + } default: llvm_unreachable("ERROR: Unknown Mips relocation."); -- cgit v1.1 From ec8ffc29c04e5e11d8637f1ba1df67d97bafb499 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Tue, 8 Nov 2011 18:22:25 +0000 Subject: Don't evaluate Data.size() on every iteration. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144095 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index 6a32ab6..a684966 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -43,7 +43,7 @@ DwarfAccelTable::DwarfAccelTable(DwarfAccelTable::Atom atom) : } DwarfAccelTable::~DwarfAccelTable() { - for (size_t i = 0 ; i < Data.size(); ++i) + for (size_t i = 0, e = Data.size() ; i < e; ++i) delete Data[i]; } -- cgit v1.1 From 30b4d8b83b7b3995ac1b53f35d3110d48676b187 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Tue, 8 Nov 2011 18:38:40 +0000 Subject: A few more places where we can avoid multiple size queries. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144099 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index a684966..7c93dbf 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -58,7 +58,7 @@ void DwarfAccelTable::ComputeBucketCount(void) { // First get the number of unique hashes. std::vector uniques; uniques.resize(Data.size()); - for (size_t i = 0; i < Data.size(); ++i) + for (size_t i = 0, e = Data.size(); i < e; ++i) uniques[i] = Data[i]->HashValue; std::sort(uniques.begin(), uniques.end()); std::vector::iterator p = @@ -94,7 +94,7 @@ void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, const char *Prefix) { // Compute bucket contents and final ordering. Buckets.resize(Header.bucket_count); - for (size_t i = 0; i < Data.size(); ++i) { + for (size_t i = 0, e = Data.size(); i < e; ++i) { uint32_t bucket = Data[i]->HashValue % Header.bucket_count; Buckets[bucket].push_back(Data[i]); Data[i]->Sym = Asm->GetTempSymbol(Prefix, i); @@ -132,7 +132,7 @@ void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) { // like a list of numbers of how many elements are in each bucket. void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) { unsigned index = 0; - for (size_t i = 0; i < Buckets.size(); ++i) { + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { Asm->OutStreamer.AddComment("Bucket " + Twine(i)); if (Buckets[i].size() != 0) Asm->EmitInt32(index); @@ -145,7 +145,7 @@ void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) { // Walk through the buckets and emit the individual hashes for each // bucket. void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { - for (size_t i = 0; i < Buckets.size(); ++i) { + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), HE = Buckets[i].end(); HI != HE; ++HI) { Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i)); @@ -159,7 +159,7 @@ void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { // beginning of the section. The non-section symbol will be output later // when we emit the actual data. void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { - for (size_t i = 0; i < Buckets.size(); ++i) { + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), HE = Buckets[i].end(); HI != HE; ++HI) { Asm->OutStreamer.AddComment("Offset in Bucket " + Twine(i)); @@ -178,7 +178,7 @@ void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { // Terminate each HashData bucket with 0. void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) { uint64_t PrevHash = UINT64_MAX; - for (size_t i = 0; i < Buckets.size(); ++i) { + for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), HE = Buckets[i].end(); HI != HE; ++HI) { // Remember to emit the label for our offset. @@ -237,7 +237,7 @@ void DwarfAccelTable::print(raw_ostream &O) { } O << "Buckets and Hashes: \n"; - for (size_t i = 0; i < Buckets.size(); ++i) + for (size_t i = 0, e = Buckets.size(); i < e; ++i) for (HashList::const_iterator HI = Buckets[i].begin(), HE = Buckets[i].end(); HI != HE; ++HI) (*HI)->print(O); -- cgit v1.1 From d752e0f7e64585839cb3a458ef52456eaebbea3c Mon Sep 17 00:00:00 2001 From: Pete Cooper Date: Tue, 8 Nov 2011 18:42:53 +0000 Subject: Added invariant field to the DAG.getLoad method and changed all calls. When this field is true it means that the load is from constant (runt-time or compile-time) and so can be hoisted from loops or moved around other memory accesses git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144100 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineInstr.cpp | 1 + lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 20 +++--- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 45 ++++++++------ lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 6 +- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 5 +- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 2 +- lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp | 10 +-- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 17 +++--- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 34 ++++++----- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 8 ++- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 2 +- lib/Target/ARM/ARMISelLowering.cpp | 67 ++++++++++---------- lib/Target/ARM/ARMSelectionDAGInfo.cpp | 5 +- lib/Target/CellSPU/SPUISelDAGToDAG.cpp | 2 +- lib/Target/CellSPU/SPUISelLowering.cpp | 13 ++-- lib/Target/MBlaze/MBlazeISelLowering.cpp | 2 +- lib/Target/MSP430/MSP430ISelLowering.cpp | 8 +-- lib/Target/Mips/MipsISelLowering.cpp | 18 +++--- lib/Target/PTX/PTXSelectionDAGInfo.cpp | 5 +- lib/Target/PowerPC/PPCISelLowering.cpp | 47 +++++++------- lib/Target/Sparc/SparcISelLowering.cpp | 30 ++++----- lib/Target/X86/X86ISelLowering.cpp | 74 +++++++++++++---------- lib/Target/XCore/XCoreISelLowering.cpp | 14 +++-- 23 files changed, 241 insertions(+), 194 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index a240667..b0ef9d4 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -1253,6 +1253,7 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const { E = memoperands_end(); I != E; ++I) { if ((*I)->isVolatile()) return false; if ((*I)->isStore()) return false; + if ((*I)->isInvariant()) return true; if (const Value *V = (*I)->getValue()) { // A load from a constant PseudoSourceValue is invariant. diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 064cee2..e67016c 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4702,7 +4702,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { if (ExtType == ISD::NON_EXTLOAD) Load = DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), - LN0->isVolatile(), LN0->isNonTemporal(), NewAlign); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->isInvariant(), NewAlign); else Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), @@ -4931,7 +4932,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), LD1->getBasePtr(), LD1->getPointerInfo(), - false, false, Align); + false, false, false, Align); } return SDValue(); @@ -5001,7 +5002,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getPointerInfo(), LN0->isVolatile(), LN0->isNonTemporal(), - OrigAlign); + LN0->isInvariant(), OrigAlign); AddToWorkList(N); CombineTo(N0.getNode(), DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), @@ -6219,7 +6220,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(), BetterChain, Ptr, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + LD->isInvariant(), LD->getAlignment()); } else { ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), LD->getValueType(0), @@ -6483,7 +6484,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { LD->getChain(), NewPtr, LD->getPointerInfo().getWithOffset(PtrOff), LD->isVolatile(), LD->isNonTemporal(), - NewAlign); + LD->isInvariant(), NewAlign); SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD, DAG.getConstant(NewImm, NewVT)); SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(), @@ -6543,7 +6544,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(), LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), - false, false, LDAlign); + false, false, false, LDAlign); SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(), NewLD, ST->getBasePtr(), @@ -6928,7 +6929,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), - LN0->isVolatile(), LN0->isNonTemporal(), Align); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->isInvariant(), Align); } return SDValue(); @@ -7497,7 +7499,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, // FIXME: Discards pointer info. LLD->getChain(), Addr, MachinePointerInfo(), LLD->isVolatile(), LLD->isNonTemporal(), - LLD->getAlignment()); + LLD->isInvariant(), LLD->getAlignment()); } else { Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType() : LLD->getExtensionType(), @@ -7613,7 +7615,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, AddToWorkList(CPIdx.getNode()); return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), false, - false, Alignment); + false, false, Alignment); } } diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 9cce6fe..0e864fe 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -259,7 +259,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { } SDValue Result = DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, - MachinePointerInfo::getConstantPool(), false, false, + MachinePointerInfo::getConstantPool(), false, false, false, Alignment); return Result; } @@ -315,7 +315,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // Load one integer register's worth from the stack slot. SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); // Store it to the final location. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr, ST->getPointerInfo().getWithOffset(Offset), @@ -403,7 +403,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // then bitconvert to floating point or vector. SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(), LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + LD->isNonTemporal(), + LD->isInvariant(), LD->getAlignment()); SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad); if (VT.isFloatingPoint() && LoadedVT != VT) Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result); @@ -434,6 +435,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(Offset), LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), MinAlign(LD->getAlignment(), Offset)); // Follow the load with a store to the stack slot. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, @@ -570,7 +572,8 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, false, false, 0); // Load the updated vector. return DAG.getLoad(VT, dl, Ch, StackPtr, - MachinePointerInfo::getFixedStack(SPFI), false, false, 0); + MachinePointerInfo::getFixedStack(SPFI), false, false, + false, 0); } @@ -911,7 +914,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + LD->isInvariant(), LD->getAlignment()); Tmp3 = DAG.getNode(ISD::BITCAST, dl, VT, Tmp1); Tmp4 = Tmp1.getValue(1); break; @@ -1086,7 +1089,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + LD->isInvariant(), LD->getAlignment()); unsigned ExtendOp; switch (ExtType) { case ISD::EXTLOAD: @@ -1336,7 +1339,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { if (Op.getValueType().isVector()) return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(), - false, false, 0); + false, false, false, 0); return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, MachinePointerInfo(), Vec.getValueType().getVectorElementType(), @@ -1384,7 +1387,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { // Finally, load the updated vector. return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo, - false, false, 0); + false, false, false, 0); } SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { @@ -1434,7 +1437,8 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { StoreChain = DAG.getEntryNode(); // Result is a load from the stack slot. - return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, false, false, 0); + return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, + false, false, false, 0); } SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { @@ -1463,7 +1467,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { assert(FloatVT.isByteSized() && "Unsupported floating point type!"); // Load out a legal integer with the same sign bit as the float. SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); } else { // Little endian SDValue LoadPtr = StackPtr; // The float may be wider than the integer we are going to load. Advance @@ -1474,7 +1478,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { LoadPtr, DAG.getIntPtrConstant(ByteOffset)); // Load a legal integer containing the sign bit. SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); // Move the sign bit to the top bit of the loaded integer. unsigned BitShift = LoadTy.getSizeInBits() - (FloatVT.getSizeInBits() - 8 * ByteOffset); @@ -1616,7 +1620,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, // Result is a load from the stack slot. if (SlotSize == DestSize) return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, - false, false, DestAlign); + false, false, false, DestAlign); assert(SlotSize < DestSize && "Unknown extension!"); return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, @@ -1639,7 +1643,7 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { false, false, 0); return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr, MachinePointerInfo::getFixedStack(SPFI), - false, false, 0); + false, false, false, 0); } @@ -1713,7 +1717,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { unsigned Alignment = cast(CPIdx)->getAlignment(); return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), - false, false, Alignment); + false, false, false, Alignment); } if (!MoreThanTwoValues) { @@ -1975,7 +1979,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, // Remainder is loaded back from the stack frame. SDValue Rem = DAG.getLoad(RetVT, dl, CallInfo.second, FIPtr, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, false, 0); Results.push_back(CallInfo.first); Results.push_back(Rem); } @@ -2024,7 +2028,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, false, false, 0); // load the constructed double SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, false, 0); // FP constant to bias correct the final result SDValue Bias = DAG.getConstantFP(isSigned ? BitsToDouble(0x4330000080000000ULL) : @@ -2164,7 +2168,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, if (DestVT == MVT::f32) FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), - false, false, Alignment); + false, false, false, Alignment); else { SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, DAG.getEntryNode(), CPIdx, @@ -2703,7 +2707,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { unsigned Align = Node->getConstantOperandVal(3); SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, - MachinePointerInfo(V), false, false, 0); + MachinePointerInfo(V), + false, false, false, 0); SDValue VAList = VAListLoad; if (Align > TLI.getMinStackArgumentAlignment()) { @@ -2728,7 +2733,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { MachinePointerInfo(V), false, false, 0); // Load the actual argument out of the pointer VAList Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(), - false, false, 0)); + false, false, false, 0)); Results.push_back(Results[0].getValue(1)); break; } @@ -2739,7 +2744,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { const Value *VS = cast(Node->getOperand(4))->getValue(); Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0), Node->getOperand(2), MachinePointerInfo(VS), - false, false, 0); + false, false, false, 0); Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), MachinePointerInfo(VD), false, false, 0); Results.push_back(Tmp1); diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 7c1cc69..6732d37 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -479,8 +479,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { if (L->getExtensionType() == ISD::NON_EXTLOAD) { NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), - L->getPointerInfo(), NVT, - L->isVolatile(), L->isNonTemporal(), L->getAlignment()); + L->getPointerInfo(), NVT, L->isVolatile(), + L->isNonTemporal(), false, L->getAlignment()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -492,7 +492,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { L->getMemoryVT(), dl, L->getChain(), L->getBasePtr(), L->getOffset(), L->getPointerInfo(), L->getMemoryVT(), L->isVolatile(), - L->isNonTemporal(), L->getAlignment()); + L->isNonTemporal(), false, L->getAlignment()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 4553071..f4164b2 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -1778,6 +1778,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, unsigned Alignment = N->getAlignment(); bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); + bool isInvariant = N->isInvariant(); DebugLoc dl = N->getDebugLoc(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); @@ -1808,7 +1809,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, } else if (TLI.isLittleEndian()) { // Little-endian - low bits are at low addresses. Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, isInvariant, Alignment); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -2310,7 +2311,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SplitInteger(CallInfo.first, Lo, Hi); SDValue Temp2 = DAG.getLoad(PtrVT, dl, CallInfo.second, Temp, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, false, 0); SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Temp2, DAG.getConstant(0, PtrVT), ISD::SETNE); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 7ed1b98..04a6a4a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -889,7 +889,7 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op, MachinePointerInfo(), false, false, 0); // Result is a load from the stack slot. return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); } /// CustomLowerNode - Replace the node's results with custom code provided diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 8e7e498..84d334a 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -130,7 +130,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { false, false, 0); // Load the first half from the stack slot. - Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, 0); + Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, + false, false, false, 0); // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; @@ -140,7 +141,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { // Load the second half from the stack slot. Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo.getWithOffset(IncrementSize), false, - false, MinAlign(Alignment, IncrementSize)); + false, false, MinAlign(Alignment, IncrementSize)); // Handle endianness of the load. if (TLI.isBigEndian()) @@ -212,11 +213,12 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, unsigned Alignment = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); + bool isInvariant = LD->isInvariant(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, isInvariant, Alignment); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; @@ -224,7 +226,7 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, DAG.getIntPtrConstant(IncrementSize)); Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), - isVolatile, isNonTemporal, + isVolatile, isNonTemporal, isInvariant, MinAlign(Alignment, IncrementSize)); // Build a factor node to remember that this load is independent of the diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 5f8931d..cb5df05 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -194,7 +194,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), N->isVolatile(), N->isNonTemporal(), - N->getOriginalAlignment()); + N->isInvariant(), N->getOriginalAlignment()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -677,7 +677,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Load the Lo part from the stack slot. Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); // Increment the pointer to the other part. unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; @@ -686,7 +686,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Load the Hi part from the stack slot. Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), - false, false, MinAlign(Alignment, IncrementSize)); + false, false, false, MinAlign(Alignment, IncrementSize)); } void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, @@ -713,20 +713,21 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, unsigned Alignment = LD->getOriginalAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); + bool isInvariant = LD->isInvariant(); EVT LoMemVT, HiMemVT; GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal, - Alignment); + isInvariant, Alignment); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, LD->getPointerInfo().getWithOffset(IncrementSize), - HiMemVT, isVolatile, isNonTemporal, Alignment); + HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment); // Build a factor node to remember that this load is independent of the // other one. @@ -2276,6 +2277,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector &LdChain, unsigned Align = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); + bool isInvariant = LD->isInvariant(); int LdWidth = LdVT.getSizeInBits(); int WidthDiff = WidenWidth - LdWidth; // Difference @@ -2285,7 +2287,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector &LdChain, EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); int NewVTWidth = NewVT.getSizeInBits(); SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), - isVolatile, isNonTemporal, Align); + isVolatile, isNonTemporal, isInvariant, Align); LdChain.push_back(LdOp.getValue(1)); // Check if we can load the element with one instruction @@ -2332,7 +2334,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector &LdChain, SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), isVolatile, - isNonTemporal, MinAlign(Align, Increment)); + isNonTemporal, isInvariant, + MinAlign(Align, Increment)); LdChain.push_back(LdOp.getValue(1)); LdOps.push_back(LdOp); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index b3f94a4..38dd7cc 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -475,7 +475,7 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) { /// static inline unsigned encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile, - bool isNonTemporal) { + bool isNonTemporal, bool isInvariant) { assert((ConvType & 3) == ConvType && "ConvType may not require more than 2 bits!"); assert((AM & 7) == AM && @@ -483,7 +483,8 @@ encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile, return ConvType | (AM << 2) | (isVolatile << 5) | - (isNonTemporal << 6); + (isNonTemporal << 6) | + (isInvariant << 7); } //===----------------------------------------------------------------------===// @@ -3568,7 +3569,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, Value = DAG.getLoad(VT, dl, Chain, getMemBasePlusOffset(Src, SrcOff, DAG), SrcPtrInfo.getWithOffset(SrcOff), isVol, - false, SrcAlign); + false, false, SrcAlign); LoadValues.push_back(Value); LoadChains.push_back(Value.getValue(1)); SrcOff += VTSize; @@ -4144,7 +4145,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, SDValue Offset, MachinePointerInfo PtrInfo, EVT MemVT, - bool isVolatile, bool isNonTemporal, + bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const MDNode *TBAAInfo) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); @@ -4156,6 +4157,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, Flags |= MachineMemOperand::MOVolatile; if (isNonTemporal) Flags |= MachineMemOperand::MONonTemporal; + if (isInvariant) + Flags |= MachineMemOperand::MOInvariant; // If we don't have a PtrInfo, infer the trivial frame index case to simplify // clients. @@ -4202,7 +4205,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3); ID.AddInteger(MemVT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(), - MMO->isNonTemporal())); + MMO->isNonTemporal(), + MMO->isInvariant())); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); @@ -4219,10 +4223,12 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, - unsigned Alignment, const MDNode *TBAAInfo) { + bool isInvariant, unsigned Alignment, + const MDNode *TBAAInfo) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, - PtrInfo, VT, isVolatile, isNonTemporal, Alignment, TBAAInfo); + PtrInfo, VT, isVolatile, isNonTemporal, isInvariant, Alignment, + TBAAInfo); } SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, @@ -4232,7 +4238,7 @@ SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, unsigned Alignment, const MDNode *TBAAInfo) { SDValue Undef = getUNDEF(Ptr.getValueType()); return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, - PtrInfo, MemVT, isVolatile, isNonTemporal, Alignment, + PtrInfo, MemVT, isVolatile, isNonTemporal, false, Alignment, TBAAInfo); } @@ -4245,8 +4251,8 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, "Load is already a indexed load!"); return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, LD->getChain(), Base, Offset, LD->getPointerInfo(), - LD->getMemoryVT(), - LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); + LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), + false, LD->getAlignment()); } SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, @@ -4288,7 +4294,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); ID.AddInteger(VT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(), - MMO->isNonTemporal())); + MMO->isNonTemporal(), MMO->isInvariant())); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); @@ -4355,7 +4361,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4); ID.AddInteger(SVT.getRawBits()); ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(), - MMO->isNonTemporal())); + MMO->isNonTemporal(), MMO->isInvariant())); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { cast(E)->refineAlignment(MMO); @@ -5679,7 +5685,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt, MachineMemOperand *mmo) : SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) { SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), - MMO->isNonTemporal()); + MMO->isNonTemporal(), MMO->isInvariant()); assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); assert(isNonTemporal() == MMO->isNonTemporal() && "Non-temporal encoding error!"); @@ -5692,7 +5698,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, : SDNode(Opc, dl, VTs, Ops, NumOps), MemoryVT(memvt), MMO(mmo) { SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), - MMO->isNonTemporal()); + MMO->isNonTemporal(), MMO->isInvariant()); assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!"); } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 4822da3..18c29b8 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -3175,6 +3175,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { bool isVolatile = I.isVolatile(); bool isNonTemporal = I.getMetadata("nontemporal") != 0; + bool isInvariant = I.getMetadata("invariant.load") != 0; unsigned Alignment = I.getAlignment(); const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa); @@ -3224,7 +3225,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { DAG.getConstant(Offsets[i], PtrVT)); SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, A, MachinePointerInfo(SV, Offsets[i]), isVolatile, - isNonTemporal, Alignment, TBAAInfo); + isNonTemporal, isInvariant, Alignment, TBAAInfo); Values[i] = L; Chains[ChainI] = L.getValue(1); @@ -5264,7 +5265,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second, Add, MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), - false, false, 1); + false, false, false, 1); Values[i] = L; Chains[i] = L.getValue(1); } @@ -5375,7 +5376,8 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root, Ptr, MachinePointerInfo(PtrVal), false /*volatile*/, - false /*nontemporal*/, 1 /* align=1 */); + false /*nontemporal*/, + false /*isinvariant*/, 1 /* align=1 */); if (!ConstantMemory) Builder.PendingLoads.push_back(LoadVal.getValue(1)); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 6cbdb8b..3596d6c 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2060,7 +2060,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset); SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr, Lod->getPointerInfo().getWithOffset(bestOffset), - false, false, NewAlign); + false, false, false, NewAlign); return DAG.getSetCC(dl, VT, DAG.getNode(ISD::AND, dl, newVT, NewLoad, DAG.getConstant(bestMask.trunc(bestWidth), diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index dc1f4bc..84a34d5 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -1339,7 +1339,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(j, Load)); } @@ -1432,7 +1432,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); } else if (ExternalSymbolSDNode *S=dyn_cast(Callee)) { const char *Sym = S->getSymbol(); @@ -1447,7 +1447,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); } } else if (GlobalAddressSDNode *G = dyn_cast(Callee)) { const GlobalValue *GV = G->getGlobal(); @@ -1468,7 +1468,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); @@ -1497,7 +1497,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee, Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); Callee = DAG.getNode(ARMISD::PIC_ADD, dl, getPointerTy(), Callee, PICLabel); @@ -1968,7 +1968,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, CPAddr = DAG.getNode(ARMISD::Wrapper, DL, PtrVT, CPAddr); SDValue Result = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); if (RelocM == Reloc::Static) return Result; SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); @@ -1992,7 +1992,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, Argument = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Argument); Argument = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Argument, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); SDValue Chain = Argument.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); @@ -2040,7 +2040,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); Chain = Offset.getValue(1); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); @@ -2048,7 +2048,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); } else { // local exec model ARMConstantPoolValue *CPV = @@ -2057,7 +2057,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, Offset = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, Offset); Offset = DAG.getLoad(PtrVT, dl, Chain, Offset, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); } // The address of the thread local variable is the add of the thread @@ -2095,13 +2095,14 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); SDValue Chain = Result.getValue(1); SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, GOT); if (!UseGOTOFF) Result = DAG.getLoad(PtrVT, dl, Chain, Result, - MachinePointerInfo::getGOT(), false, false, 0); + MachinePointerInfo::getGOT(), + false, false, false, 0); return Result; } @@ -2118,7 +2119,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); } } @@ -2146,7 +2147,8 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, DAG.getTargetGlobalAddress(GV, dl, PtrVT)); if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, - MachinePointerInfo::getGOT(), false, false, 0); + MachinePointerInfo::getGOT(), + false, false, false, 0); return Result; } @@ -2166,7 +2168,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); SDValue Chain = Result.getValue(1); if (RelocM == Reloc::PIC_) { @@ -2176,7 +2178,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) Result = DAG.getLoad(PtrVT, dl, Chain, Result, MachinePointerInfo::getGOT(), - false, false, 0); + false, false, false, 0); return Result; } @@ -2198,7 +2200,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr); SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel); } @@ -2256,7 +2258,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); if (RelocM == Reloc::PIC_) { SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32); @@ -2388,7 +2390,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); ArgValue2 = DAG.getLoad(MVT::i32, dl, Root, FIN, MachinePointerInfo::getFixedStack(FI), - false, false, 0); + false, false, false, 0); } else { Reg = MF.addLiveIn(NextVA.getLocReg(), RC); ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32); @@ -2524,7 +2526,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); ArgValue2 = DAG.getLoad(MVT::f64, dl, Chain, FIN, MachinePointerInfo::getFixedStack(FI), - false, false, 0); + false, false, false, 0); } else { ArgValue2 = GetF64FormalArgument(VA, ArgLocs[++i], Chain, DAG, dl); @@ -2615,7 +2617,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo::getFixedStack(FI), - false, false, 0)); + false, false, false, 0)); } lastInsIndex = index; } @@ -2850,7 +2852,7 @@ static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { return DAG.getLoad(MVT::i32, Op.getDebugLoc(), Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), - Ld->getAlignment()); + Ld->isInvariant(), Ld->getAlignment()); llvm_unreachable("Unknown VFP cmp argument!"); } @@ -2869,7 +2871,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, Ld->getChain(), Ptr, Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), - Ld->getAlignment()); + Ld->isInvariant(), Ld->getAlignment()); EVT PtrType = Ptr.getValueType(); unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); @@ -2879,7 +2881,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, Ld->getChain(), NewPtr, Ld->getPointerInfo().getWithOffset(4), Ld->isVolatile(), Ld->isNonTemporal(), - NewAlign); + Ld->isInvariant(), NewAlign); return; } @@ -3003,13 +3005,14 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { Addr = DAG.getLoad((EVT)MVT::i32, dl, Chain, Addr, MachinePointerInfo::getJumpTable(), - false, false, 0); + false, false, false, 0); Chain = Addr.getValue(1); Addr = DAG.getNode(ISD::ADD, dl, PTy, Addr, Table); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); } else { Addr = DAG.getLoad(PTy, dl, Chain, Addr, - MachinePointerInfo::getJumpTable(), false, false, 0); + MachinePointerInfo::getJumpTable(), + false, false, false, 0); Chain = Addr.getValue(1); return DAG.getNode(ARMISD::BR_JT, dl, MVT::Other, Chain, Addr, JTI, UId); } @@ -3179,7 +3182,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ SDValue Offset = DAG.getConstant(4, MVT::i32); return DAG.getLoad(VT, dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, false, 0); } // Return LR, which contains the return address. Mark it an implicit live-in. @@ -3200,7 +3203,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); return FrameAddr; } @@ -4595,7 +4598,8 @@ static SDValue SkipExtension(SDNode *N, SelectionDAG &DAG) { if (LoadSDNode *LD = dyn_cast(N)) return DAG.getLoad(LD->getMemoryVT(), N->getDebugLoc(), LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + LD->isNonTemporal(), LD->isInvariant(), + LD->getAlignment()); // Otherwise, the value must be a BUILD_VECTOR. For v2i64, it will // have been legalized as a BITCAST from v4i32. if (N->getOpcode() == ISD::BITCAST) { @@ -7067,13 +7071,14 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, SDValue BasePtr = LD->getBasePtr(); SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(), LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + LD->isNonTemporal(), LD->isInvariant(), + LD->getAlignment()); SDValue OffsetPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, DAG.getConstant(4, MVT::i32)); SDValue NewLD2 = DAG.getLoad(MVT::i32, DL, NewLD1.getValue(1), OffsetPtr, LD->getPointerInfo(), LD->isVolatile(), - LD->isNonTemporal(), + LD->isNonTemporal(), LD->isInvariant(), std::min(4U, LD->getAlignment() / 2)); DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), NewLD2.getValue(1)); diff --git a/lib/Target/ARM/ARMSelectionDAGInfo.cpp b/lib/Target/ARM/ARMSelectionDAGInfo.cpp index a3a3d58..36d58de 100644 --- a/lib/Target/ARM/ARMSelectionDAGInfo.cpp +++ b/lib/Target/ARM/ARMSelectionDAGInfo.cpp @@ -67,7 +67,7 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, DAG.getNode(ISD::ADD, dl, MVT::i32, Src, DAG.getConstant(SrcOff, MVT::i32)), SrcPtrInfo.getWithOffset(SrcOff), isVolatile, - false, 0); + false, false, 0); TFOps[i] = Loads[i].getValue(1); SrcOff += VTSize; } @@ -105,7 +105,8 @@ ARMSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, Loads[i] = DAG.getLoad(VT, dl, Chain, DAG.getNode(ISD::ADD, dl, MVT::i32, Src, DAG.getConstant(SrcOff, MVT::i32)), - SrcPtrInfo.getWithOffset(SrcOff), false, false, 0); + SrcPtrInfo.getWithOffset(SrcOff), + false, false, false, 0); TFOps[i] = Loads[i].getValue(1); ++i; SrcOff += VTSize; diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp index a297d03..99837df 100644 --- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp +++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp @@ -216,7 +216,7 @@ namespace { HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl, CurDAG->getEntryNode(), CGPoolOffset, MachinePointerInfo::getConstantPool(), - false, false, Alignment)); + false, false, false, Alignment)); CurDAG->ReplaceAllUsesWith(SDValue(bvNode, 0), Dummy.getValue()); if (SDNode *N = SelectCode(Dummy.getValue().getNode())) return N; diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp index 08ebb92..d58e49b 100644 --- a/lib/Target/CellSPU/SPUISelLowering.cpp +++ b/lib/Target/CellSPU/SPUISelLowering.cpp @@ -667,7 +667,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { // Do the load as a i128 to allow possible shifting SDValue low = DAG.getLoad(MVT::i128, dl, the_chain, basePtr, lowMemPtr, - LN->isVolatile(), LN->isNonTemporal(), 16); + LN->isVolatile(), LN->isNonTemporal(), false, 16); // When the size is not greater than alignment we get all data with just // one load @@ -704,7 +704,8 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { basePtr, DAG.getConstant(16, PtrVT)), highMemPtr, - LN->isVolatile(), LN->isNonTemporal(), 16); + LN->isVolatile(), LN->isNonTemporal(), false, + 16); the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1), high.getValue(1)); @@ -859,7 +860,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { // Load the lower part of the memory to which to store. SDValue low = DAG.getLoad(vecVT, dl, the_chain, basePtr, - lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), 16); + lowMemPtr, SN->isVolatile(), SN->isNonTemporal(), + false, 16); // if we don't need to store over the 16 byte boundary, one store suffices if (alignment >= StVT.getSizeInBits()/8) { @@ -959,7 +961,8 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG, const SPUSubtarget *ST) { DAG.getNode(ISD::ADD, dl, PtrVT, basePtr, DAG.getConstant( 16, PtrVT)), highMemPtr, - SN->isVolatile(), SN->isNonTemporal(), 16); + SN->isVolatile(), SN->isNonTemporal(), + false, 16); the_chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, low.getValue(1), hi.getValue(1)); @@ -1194,7 +1197,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain, int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); ArgOffset += StackSlotSize; } diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp index 8ec548f..148d906 100644 --- a/lib/Target/MBlaze/MBlazeISelLowering.cpp +++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp @@ -951,7 +951,7 @@ LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo::getFixedStack(FI), - false, false, 0)); + false, false, false, 0)); } } diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp index e837ef8..9daeb2a 100644 --- a/lib/Target/MSP430/MSP430ISelLowering.cpp +++ b/lib/Target/MSP430/MSP430ISelLowering.cpp @@ -371,7 +371,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain, SDValue FIN = DAG.getFrameIndex(FI, MVT::i16); InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, MachinePointerInfo::getFixedStack(FI), - false, false, 0)); + false, false, false, 0)); } } @@ -907,13 +907,13 @@ SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameAddr, Offset), - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, false, 0); } // Just load the return address. SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - RetAddrFI, MachinePointerInfo(), false, false, 0); + RetAddrFI, MachinePointerInfo(), false, false, false, 0); } SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, @@ -929,7 +929,7 @@ SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); return FrameAddr; } diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 31378a7..50aa78f 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -1402,7 +1402,7 @@ SDValue MipsTargetLowering::LowerGlobalAddress(SDValue Op, GA = DAG.getNode(MipsISD::WrapperPIC, dl, ValTy, GA); SDValue ResNode = DAG.getLoad(ValTy, dl, DAG.getEntryNode(), GA, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); // On functions and global targets not internal linked only // a load from got/GP is necessary for PIC to work. if (!HasGotOfst) @@ -1438,7 +1438,7 @@ SDValue MipsTargetLowering::LowerBlockAddress(SDValue Op, MipsII::MO_ABS_LO); SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), BAGOTOffset, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, false, 0); SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, BALOOffset); return DAG.getNode(ISD::ADD, dl, MVT::i32, Load, Lo); } @@ -1485,7 +1485,7 @@ LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const MipsII::MO_GOTTPREL); Offset = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), TGA, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); } else { // Local Exec TLS Model SDVTList VTs = DAG.getVTList(MVT::i32); @@ -1524,7 +1524,7 @@ LowerJumpTable(SDValue Op, SelectionDAG &DAG) const JTI = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, JTI); HiPart = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), JTI, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); } SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, @@ -1568,7 +1568,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG) const CP = DAG.getNode(MipsISD::WrapperPIC, dl, MVT::i32, CP); SDValue Load = DAG.getLoad(MVT::i32, dl, DAG.getEntryNode(), CP, MachinePointerInfo::getConstantPool(), - false, false, 0); + false, false, false, 0); SDValue CPLo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(), N->getOffset(), MipsII::MO_ABS_LO); SDValue Lo = DAG.getNode(MipsISD::Lo, dl, MVT::i32, CPLo); @@ -1831,8 +1831,8 @@ WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl, DAG.getConstant(Offset, MVT::i32)); SDValue LoadVal = DAG.getLoad(MVT::i32, dl, Chain, LoadPtr, MachinePointerInfo(), - false, false, std::min(ByValAlign, - (unsigned )4)); + false, false, false, std::min(ByValAlign, + (unsigned )4)); MemOpChains.push_back(LoadVal.getValue(1)); unsigned DstReg = O32IntRegs[LocMemOffset / 4]; RegsToPass.push_back(std::make_pair(DstReg, LoadVal)); @@ -2119,7 +2119,7 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, Callee = DAG.getNode(MipsISD::WrapperPIC, dl, getPointerTy(), Callee); SDValue LoadValue = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee, MachinePointerInfo::getGOT(), - false, false, 0); + false, false, false, 0); // Use GOT+LO if callee has internal linkage. if (CalleeLo.getNode()) { @@ -2350,7 +2350,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy()); InVals.push_back(DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo::getFixedStack(LastFI), - false, false, 0)); + false, false, false, 0)); } } diff --git a/lib/Target/PTX/PTXSelectionDAGInfo.cpp b/lib/Target/PTX/PTXSelectionDAGInfo.cpp index 50ef14a..a116fab 100644 --- a/lib/Target/PTX/PTXSelectionDAGInfo.cpp +++ b/lib/Target/PTX/PTXSelectionDAGInfo.cpp @@ -70,7 +70,7 @@ PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, DAG.getNode(ISD::ADD, dl, PointerType, Src, DAG.getConstant(SrcOff, PointerType)), SrcPtrInfo.getWithOffset(SrcOff), isVolatile, - false, 0); + false, false, 0); TFOps[i] = Loads[i].getValue(1); SrcOff += VTSize; } @@ -108,7 +108,8 @@ PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, Loads[i] = DAG.getLoad(VT, dl, Chain, DAG.getNode(ISD::ADD, dl, PointerType, Src, DAG.getConstant(SrcOff, PointerType)), - SrcPtrInfo.getWithOffset(SrcOff), false, false, 0); + SrcPtrInfo.getWithOffset(SrcOff), false, false, + false, 0); TFOps[i] = Loads[i].getValue(1); ++i; SrcOff += VTSize; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 6502eb1..b188b90 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1223,7 +1223,7 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, // extra load to get the address of the global. if (MOHiFlag & PPCII::MO_NLP_FLAG) Ptr = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); return Ptr; } @@ -1319,11 +1319,13 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, // areas SDValue OverflowArea = DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, + false, 0); InChain = OverflowArea.getValue(1); SDValue RegSaveArea = DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, + false, 0); InChain = RegSaveArea.getValue(1); // select overflow_area if index > 8 @@ -1372,7 +1374,8 @@ SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG, MachinePointerInfo(), MVT::i32, false, false, 0); - return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), false, false, 0); + return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo(), + false, false, false, 0); } SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op, @@ -1721,7 +1724,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( SDValue FIN = DAG.getFrameIndex(FI, PtrVT); InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo(), - false, false, 0)); + false, false, false, 0)); } } @@ -2138,7 +2141,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( isImmutable); SDValue FIN = DAG.getFrameIndex(FI, PtrVT); ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); } InVals.push_back(ArgVal); @@ -2443,7 +2446,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, EVT VT = PPCSubTarget.isPPC64() ? MVT::i64 : MVT::i32; LROpOut = getReturnAddrFrameIndex(DAG); LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); Chain = SDValue(LROpOut.getNode(), 1); // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack @@ -2451,7 +2454,7 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, if (isDarwinABI) { FPOpOut = getFramePointerFrameIndex(DAG); FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); Chain = SDValue(FPOpOut.getNode(), 1); } } @@ -3212,7 +3215,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, if (GPR_idx != NumGPRs) { SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); ArgOffset += PtrByteSize; @@ -3250,7 +3253,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, // Float varargs are always shadowed in available integer registers if (GPR_idx != NumGPRs) { SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, + false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } @@ -3259,7 +3263,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour); SDValue Load = DAG.getLoad(PtrVT, dl, Store, PtrOff, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } @@ -3308,7 +3312,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, if (VR_idx != NumVRs) { SDValue Load = DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load)); } @@ -3319,7 +3323,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, DAG.getConstant(i, PtrVT)); SDValue Load = DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load)); } @@ -3483,7 +3487,7 @@ SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG, // Load the old link SP. SDValue LoadLinkSP = DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); // Restore the stack pointer. Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP); @@ -3674,7 +3678,7 @@ SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, DAG.getConstant(4, FIPtr.getValueType())); return DAG.getLoad(Op.getValueType(), dl, Chain, FIPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); } SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, @@ -3718,7 +3722,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, Ops, 4, MVT::i64, MMO); // Load the value as a double. SDValue Ld = DAG.getLoad(MVT::f64, dl, Store, FIdx, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); // FCFID it and return it. SDValue FP = DAG.getNode(PPCISD::FCFID, dl, MVT::f64, Ld); @@ -3770,7 +3774,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SDValue Four = DAG.getConstant(4, PtrVT); SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); SDValue CWD = DAG.getLoad(MVT::i32, dl, Store, Addr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); // Transform as necessary SDValue CWD1 = @@ -4441,7 +4445,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op, false, false, 0); // Load it out. return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); } SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { @@ -5729,13 +5733,13 @@ SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op, return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameAddr, Offset), - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, false, 0); } // Just load the return address off the stack. SDValue RetAddrFI = getReturnAddrFrameIndex(DAG); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - RetAddrFI, MachinePointerInfo(), false, false, 0); + RetAddrFI, MachinePointerInfo(), false, false, false, 0); } SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, @@ -5758,7 +5762,8 @@ SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op, PtrVT); while (Depth--) FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(), - FrameAddr, MachinePointerInfo(), false, false, 0); + FrameAddr, MachinePointerInfo(), false, false, + false, 0); return FrameAddr; } diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp index d70b163..25104d1 100644 --- a/lib/Target/Sparc/SparcISelLowering.cpp +++ b/lib/Target/Sparc/SparcISelLowering.cpp @@ -175,7 +175,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); SDValue Arg = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); InVals.push_back(Arg); continue; } @@ -197,7 +197,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32); LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); } else { unsigned loReg = MF.addLiveIn(NextVA.getLocReg(), &SP::IntRegsRegClass); @@ -237,7 +237,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy()); SDValue Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr, MachinePointerInfo(), - false,false, 0); + false,false, false, 0); InVals.push_back(Load); continue; } @@ -248,7 +248,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy()); SDValue HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); int FI2 = MF.getFrameInfo()->CreateFixedObject(4, Offset+4, true); @@ -256,7 +256,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, SDValue LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr2, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); SDValue WholeValue = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, LoVal, HiVal); @@ -273,7 +273,7 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain, if (VA.getValVT() == MVT::i32 || VA.getValVT() == MVT::f32) { Load = DAG.getLoad(VA.getValVT(), dl, Chain, FIPtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); } else { ISD::LoadExtType LoadOp = ISD::SEXTLOAD; // Sparc is big endian, so add an offset based on the ObjectVT. @@ -467,13 +467,13 @@ SparcTargetLowering::LowerCall(SDValue Chain, SDValue Callee, false, false, 0); // Sparc is big-endian, so the high part comes first. SDValue Hi = DAG.getLoad(MVT::i32, dl, Store, StackPtr, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, false, 0); // Increment the pointer to the other half. StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, DAG.getIntPtrConstant(4)); // Load the low part. SDValue Lo = DAG.getLoad(MVT::i32, dl, Store, StackPtr, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, false, 0); if (VA.isRegLoc()) { RegsToPass.push_back(std::make_pair(VA.getLocReg(), Hi)); @@ -897,7 +897,7 @@ SDValue SparcTargetLowering::LowerGlobalAddress(SDValue Op, SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, GlobalBase, RelAddr); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - AbsAddr, MachinePointerInfo(), false, false, 0); + AbsAddr, MachinePointerInfo(), false, false, false, 0); } SDValue SparcTargetLowering::LowerConstantPool(SDValue Op, @@ -918,7 +918,7 @@ SDValue SparcTargetLowering::LowerConstantPool(SDValue Op, SDValue AbsAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, GlobalBase, RelAddr); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - AbsAddr, MachinePointerInfo(), false, false, 0); + AbsAddr, MachinePointerInfo(), false, false, false, 0); } static SDValue LowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) { @@ -1026,7 +1026,7 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) { const Value *SV = cast(Node->getOperand(2))->getValue(); DebugLoc dl = Node->getDebugLoc(); SDValue VAList = DAG.getLoad(MVT::i32, dl, InChain, VAListPtr, - MachinePointerInfo(SV), false, false, 0); + MachinePointerInfo(SV), false, false, false, 0); // Increment the pointer, VAList, to the next vaarg SDValue NextPtr = DAG.getNode(ISD::ADD, dl, MVT::i32, VAList, DAG.getConstant(VT.getSizeInBits()/8, @@ -1038,11 +1038,11 @@ static SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG) { // f64 load. if (VT != MVT::f64) return DAG.getLoad(VT, dl, InChain, VAList, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); // Otherwise, load it as i64, then do a bitconvert. SDValue V = DAG.getLoad(MVT::i64, dl, InChain, VAList, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); // Bit-Convert the value to f64. SDValue Ops[2] = { @@ -1103,7 +1103,7 @@ static SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) { FrameAddr = DAG.getLoad(MVT::i32, dl, Chain, Ptr, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, false, 0); } } return FrameAddr; @@ -1135,7 +1135,7 @@ static SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) { RetAddr = DAG.getLoad(MVT::i32, dl, Chain, Ptr, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, false, 0); } } return RetAddr; diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index aab7c73..c1f7592 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1707,7 +1707,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain, SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); return DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo::getFixedStack(FI), - false, false, 0); + false, false, false, 0); } } @@ -1810,7 +1810,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, // If value is passed via pointer - do a load. if (VA.getLocInfo() == CCValAssign::Indirect) ArgValue = DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, false, 0); InVals.push_back(ArgValue); } @@ -2009,7 +2009,7 @@ X86TargetLowering::EmitTailCallLoadRetAddr(SelectionDAG &DAG, // Load the "old" Return address. OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); return SDValue(OutRetAddr.getNode(), 1); } @@ -2371,7 +2371,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee, if (ExtraLoad) Callee = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Callee, MachinePointerInfo::getGOT(), - false, false, 0); + false, false, false, 0); } } else if (ExternalSymbolSDNode *S = dyn_cast(Callee)) { unsigned char OpFlags = 0; @@ -4982,7 +4982,7 @@ X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl, EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems); SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(StartOffset), - false, false, 0); + false, false, false, 0); // Canonicalize it to a v4i32 or v8i32 shuffle. SmallVector Mask; @@ -5047,11 +5047,12 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl &Elts, if (DAG.InferPtrAlignment(LDBase->getBasePtr()) >= 16) return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), LDBase->getPointerInfo(), - LDBase->isVolatile(), LDBase->isNonTemporal(), 0); + LDBase->isVolatile(), LDBase->isNonTemporal(), + LDBase->isInvariant(), 0); return DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), LDBase->getPointerInfo(), LDBase->isVolatile(), LDBase->isNonTemporal(), - LDBase->getAlignment()); + LDBase->isInvariant(), LDBase->getAlignment()); } else if (NumElems == 4 && LastLoadedElt == 1 && DAG.getTargetLoweringInfo().isTypeLegal(MVT::v2i64)) { SDVTList Tys = DAG.getVTList(MVT::v2i64, MVT::Other); @@ -7304,7 +7305,7 @@ X86TargetLowering::LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const { // load. if (isGlobalStubReference(OpFlag)) Result = DAG.getLoad(getPointerTy(), DL, DAG.getEntryNode(), Result, - MachinePointerInfo::getGOT(), false, false, 0); + MachinePointerInfo::getGOT(), false, false, false, 0); return Result; } @@ -7372,7 +7373,7 @@ X86TargetLowering::LowerGlobalAddress(const GlobalValue *GV, DebugLoc dl, // load. if (isGlobalStubReference(OpFlags)) Result = DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), Result, - MachinePointerInfo::getGOT(), false, false, 0); + MachinePointerInfo::getGOT(), false, false, false, 0); // If there was a non-zero offset that we didn't fold, create an explicit // addition for it. @@ -7451,7 +7452,8 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, SDValue ThreadPointer = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0), - MachinePointerInfo(Ptr), false, false, 0); + MachinePointerInfo(Ptr), + false, false, false, 0); unsigned char OperandFlags = 0; // Most TLS accesses are not RIP relative, even on x86-64. One exception is @@ -7477,7 +7479,7 @@ static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, if (model == TLSModel::InitialExec) Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset, - MachinePointerInfo::getGOT(), false, false, 0); + MachinePointerInfo::getGOT(), false, false, false, 0); // The address of the thread local variable is the add of the thread // pointer with the offset of the variable. @@ -7701,7 +7703,7 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain, Op.getValueType(), MMO); Result = DAG.getLoad(Op.getValueType(), DL, Chain, StackSlot, MachinePointerInfo::getFixedStack(SSFI), - false, false, 0); + false, false, false, 0); } return Result; @@ -7775,12 +7777,12 @@ SDValue X86TargetLowering::LowerUINT_TO_FP_i64(SDValue Op, SDValue Unpck1 = getUnpackl(DAG, dl, MVT::v4i32, XR1, XR2); SDValue CLod0 = DAG.getLoad(MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, MachinePointerInfo::getConstantPool(), - false, false, 16); + false, false, false, 16); SDValue Unpck2 = getUnpackl(DAG, dl, MVT::v4i32, Unpck1, CLod0); SDValue XR2F = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Unpck2); SDValue CLod1 = DAG.getLoad(MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, MachinePointerInfo::getConstantPool(), - false, false, 16); + false, false, false, 16); SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); // Add the halves; easiest way is to swap them into another reg first. @@ -8012,7 +8014,8 @@ SDValue X86TargetLowering::LowerFP_TO_SINT(SDValue Op, // Load the result. return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), - FIST, StackSlot, MachinePointerInfo(), false, false, 0); + FIST, StackSlot, MachinePointerInfo(), + false, false, false, 0); } SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, @@ -8023,7 +8026,8 @@ SDValue X86TargetLowering::LowerFP_TO_UINT(SDValue Op, // Load the result. return DAG.getLoad(Op.getValueType(), Op.getDebugLoc(), - FIST, StackSlot, MachinePointerInfo(), false, false, 0); + FIST, StackSlot, MachinePointerInfo(), + false, false, false, 0); } SDValue X86TargetLowering::LowerFABS(SDValue Op, @@ -8050,7 +8054,7 @@ SDValue X86TargetLowering::LowerFABS(SDValue Op, SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), - false, false, 16); + false, false, false, 16); return DAG.getNode(X86ISD::FAND, dl, VT, Op.getOperand(0), Mask); } @@ -8077,7 +8081,7 @@ SDValue X86TargetLowering::LowerFNEG(SDValue Op, SelectionDAG &DAG) const { SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), - false, false, 16); + false, false, false, 16); if (VT.isVector()) { return DAG.getNode(ISD::BITCAST, dl, VT, DAG.getNode(ISD::XOR, dl, MVT::v2i64, @@ -8126,7 +8130,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask1 = DAG.getLoad(SrcVT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), - false, false, 16); + false, false, false, 16); SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, SrcVT, Op1, Mask1); // Shift sign bit right or left if the two operands have different types. @@ -8155,7 +8159,7 @@ SDValue X86TargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Mask2 = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), - false, false, 16); + false, false, false, 16); SDValue Val = DAG.getNode(X86ISD::FAND, dl, VT, Op0, Mask2); // Or the value with the sign bit. @@ -9282,7 +9286,7 @@ SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { Chain, VAARG, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); } SDValue X86TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { @@ -9608,13 +9612,13 @@ SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameAddr, Offset), - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, false, 0); } // Just load the return address. SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(), - RetAddrFI, MachinePointerInfo(), false, false, 0); + RetAddrFI, MachinePointerInfo(), false, false, false, 0); } SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { @@ -9629,7 +9633,7 @@ SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { while (Depth--) FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); return FrameAddr; } @@ -9861,7 +9865,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, // Load FP Control Word from stack slot SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, - MachinePointerInfo(), false, false, 0); + MachinePointerInfo(), false, false, false, 0); // Transform as necessary SDValue CWD1 = @@ -10201,7 +10205,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue Addend = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), - false, false, 16); + false, false, false, 16); Op = DAG.getNode(ISD::ADD, dl, VT, Op, Addend); Op = DAG.getNode(ISD::BITCAST, dl, MVT::v4f32, Op); @@ -10223,7 +10227,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { SDValue CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); SDValue M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), - false, false, 16); + false, false, false, 16); // r = pblendv(r, psllw(r & (char16)15, 4), a); M = DAG.getNode(ISD::AND, dl, VT, R, M); @@ -10238,7 +10242,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { CPIdx = DAG.getConstantPool(C, getPointerTy(), 16); M = DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, MachinePointerInfo::getConstantPool(), - false, false, 16); + false, false, false, 16); // r = pblendv(r, psllw(r & (char16)63, 2), a); M = DAG.getNode(ISD::AND, dl, VT, R, M); @@ -10739,7 +10743,8 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, EVT VT = N->getValueType(0); // Return a load from the stack slot. Results.push_back(DAG.getLoad(VT, dl, FIST, StackSlot, - MachinePointerInfo(), false, false, 0)); + MachinePointerInfo(), + false, false, false, 0)); } return; } @@ -12807,7 +12812,7 @@ static SDValue PerformEXTRACT_VECTOR_ELTCombine(SDNode *N, SelectionDAG &DAG, // Load the scalar. SDValue LoadScalar = DAG.getLoad(Extract->getValueType(0), dl, Ch, ScalarAddr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); // Replace the exact with the load. DAG.ReplaceAllUsesOfValueWith(SDValue(Extract, 0), LoadScalar); @@ -13837,7 +13842,8 @@ static SDValue PerformLOADCombine(SDNode *N, SelectionDAG &DAG, SDValue ScalarLoad = DAG.getLoad(SclrLoadTy, dl, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->isVolatile(), - Ld->isNonTemporal(), Ld->getAlignment()); + Ld->isNonTemporal(), Ld->isInvariant(), + Ld->getAlignment()); // Insert the word loaded into a vector. SDValue ScalarInVector = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, @@ -14033,7 +14039,8 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, EVT LdVT = Subtarget->is64Bit() ? MVT::i64 : MVT::f64; SDValue NewLd = DAG.getLoad(LdVT, LdDL, Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->isVolatile(), - Ld->isNonTemporal(), Ld->getAlignment()); + Ld->isNonTemporal(), Ld->isInvariant(), + Ld->getAlignment()); SDValue NewChain = NewLd.getValue(1); if (TokenFactorIndex != -1) { Ops.push_back(NewChain); @@ -14054,10 +14061,11 @@ static SDValue PerformSTORECombine(SDNode *N, SelectionDAG &DAG, SDValue LoLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), LoAddr, Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), - Ld->getAlignment()); + Ld->isInvariant(), Ld->getAlignment()); SDValue HiLd = DAG.getLoad(MVT::i32, LdDL, Ld->getChain(), HiAddr, Ld->getPointerInfo().getWithOffset(4), Ld->isVolatile(), Ld->isNonTemporal(), + Ld->isInvariant(), MinAlign(Ld->getAlignment(), 4)); SDValue NewChain = LoLd.getValue(1); diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp index 4b74f96..d791daa 100644 --- a/lib/Target/XCore/XCoreISelLowering.cpp +++ b/lib/Target/XCore/XCoreISelLowering.cpp @@ -427,7 +427,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { // return DAG.getLoad(getPointerTy(), DL, Chain, BasePtr, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); } // Lower to // ldw low, base[offset >> 2] @@ -444,9 +444,11 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG) const { SDValue HighAddr = DAG.getNode(ISD::ADD, DL, MVT::i32, Base, HighOffset); SDValue Low = DAG.getLoad(getPointerTy(), DL, Chain, - LowAddr, MachinePointerInfo(), false, false, 0); + LowAddr, MachinePointerInfo(), + false, false, false, 0); SDValue High = DAG.getLoad(getPointerTy(), DL, Chain, - HighAddr, MachinePointerInfo(), false, false, 0); + HighAddr, MachinePointerInfo(), + false, false, false, 0); SDValue LowShifted = DAG.getNode(ISD::SRL, DL, MVT::i32, Low, LowShift); SDValue HighShifted = DAG.getNode(ISD::SHL, DL, MVT::i32, High, HighShift); SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, LowShifted, HighShifted); @@ -761,7 +763,7 @@ LowerVAARG(SDValue Op, SelectionDAG &DAG) const EVT VT = Node->getValueType(0); SDValue VAList = DAG.getLoad(getPointerTy(), dl, Node->getOperand(0), Node->getOperand(1), MachinePointerInfo(V), - false, false, 0); + false, false, false, 0); // Increment the pointer, VAList, to the next vararg SDValue Tmp3 = DAG.getNode(ISD::ADD, dl, getPointerTy(), VAList, DAG.getConstant(VT.getSizeInBits(), @@ -771,7 +773,7 @@ LowerVAARG(SDValue Op, SelectionDAG &DAG) const MachinePointerInfo(V), false, false, 0); // Load the actual argument out of the pointer VAList return DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(), - false, false, 0); + false, false, false, 0); } SDValue XCoreTargetLowering:: @@ -1146,7 +1148,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain, SDValue FIN = DAG.getFrameIndex(FI, MVT::i32); InVals.push_back(DAG.getLoad(VA.getLocVT(), dl, Chain, FIN, MachinePointerInfo::getFixedStack(FI), - false, false, 0)); + false, false, false, 0)); } } -- cgit v1.1 From 5207bf2177e9ef1e68d9408ea4b44f1c8a5ef9c0 Mon Sep 17 00:00:00 2001 From: Lang Hames Date: Tue, 8 Nov 2011 18:56:23 +0000 Subject: Lower mem-ops to unaligned i32/i16 load/stores on ARM where supported. Add support for trimming constants to GetDemandedBits. This fixes some funky constant generation that occurs when stores are expanded for targets that don't support unaligned stores natively. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144102 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 ++++++++++ lib/Target/ARM/ARMISelLowering.cpp | 7 +++++++ 2 files changed, 17 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e67016c..8b28ea9 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4564,6 +4564,16 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { switch (V.getOpcode()) { default: break; + case ISD::Constant: { + const ConstantSDNode *CV = cast(V.getNode()); + assert(CV != 0 && "Const value should be ConstSDNode."); + const APInt &CVal = CV->getAPIntValue(); + APInt NewVal = CVal & Mask; + if (NewVal != CVal) { + return DAG.getConstant(NewVal, V.getValueType()); + } + break; + } case ISD::OR: case ISD::XOR: // If the LHS or RHS don't contribute bits to the or, drop them. diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 84a34d5..c51e7ae 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -8171,6 +8171,13 @@ EVT ARMTargetLowering::getOptimalMemOpType(uint64_t Size, } } + // Lowering to i32/i16 if the size permits. + if (Size >= 4) { + return MVT::i32; + } else if (Size >= 2) { + return MVT::i16; + } + // Let the target-independent logic figure it out. return MVT::Other; } -- cgit v1.1 From 67a917d495d38004e3a4a134eec81b312a71a164 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Tue, 8 Nov 2011 19:16:01 +0000 Subject: Add the base ObjC method name to the names lookup table as well. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144105 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 64e0e92..286075f 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -552,6 +552,10 @@ static void getObjCClassCategory(StringRef In, StringRef &Class, return; } +static StringRef getObjCMethodName(StringRef In) { + return In.slice(In.find(' ') + 1, In.find(']')); +} + /// construct SubprogramDIE - Construct subprogram DIE. void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N) { @@ -599,6 +603,8 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, TheCU->addAccelObjC(Class, SubprogramDie); if (Category != "") TheCU->addAccelObjC(Category, SubprogramDie); + // Also add the base method name to the name table. + TheCU->addAccelName(getObjCMethodName(SP.getName()), SubprogramDie); } return; -- cgit v1.1 From 2d76a7846200fcbe518e6f73291a6cbda8864ea0 Mon Sep 17 00:00:00 2001 From: Pete Cooper Date: Tue, 8 Nov 2011 19:30:00 +0000 Subject: LICM pass now understands invariant load metadata. Nothing generates this yet so it will currently never get used in real tests git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144107 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/LICM.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp index b79bb13..8098b36 100644 --- a/lib/Transforms/Scalar/LICM.cpp +++ b/lib/Transforms/Scalar/LICM.cpp @@ -369,6 +369,8 @@ bool LICM::canSinkOrHoistInst(Instruction &I) { // in the same alias set as something that ends up being modified. if (AA->pointsToConstantMemory(LI->getOperand(0))) return true; + if (LI->getMetadata(LI->getContext().getMDKindID("invariant.load"))) + return true; // Don't hoist loads which have may-aliased stores in loop. uint64_t Size = 0; -- cgit v1.1 From 0d64632c9e8a2632b8804bcc3fbc3523588862a7 Mon Sep 17 00:00:00 2001 From: "Michael J. Spencer" Date: Tue, 8 Nov 2011 19:52:32 +0000 Subject: MC/COFF: Correctly emit the size of an empty string table. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144111 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/WinCOFFObjectWriter.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp index b15e225..4d3b59c 100644 --- a/lib/MC/WinCOFFObjectWriter.cpp +++ b/lib/MC/WinCOFFObjectWriter.cpp @@ -281,6 +281,7 @@ StringTable::StringTable() { // The string table data begins with the length of the entire string table // including the length header. Allocate space for this header. Data.resize(4); + update_length(); } size_t StringTable::size() const { -- cgit v1.1 From e1b3e11c15b705ad55c5ff2b04a0b790599013ea Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 8 Nov 2011 20:57:04 +0000 Subject: Clear old DomainValue after merging. The old value may still be referenced by some live-out list, and we don't wan't to collapse those instructions twice. This fixes the "Can only swizzle VMOVD" assertion in some armv7 SPEC builds. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144117 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ExecutionDepsFix.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index bd77f65..5aa80f9 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -266,6 +266,11 @@ bool ExeDepsFix::Merge(DomainValue *A, DomainValue *B) { A->AvailableDomains = common; A->Dist = std::max(A->Dist, B->Dist); A->Instrs.append(B->Instrs.begin(), B->Instrs.end()); + + // Clear the old DomainValue so we won't try to swizzle instructions twice. + B->Instrs.clear(); + B->AvailableDomains = 0; + for (unsigned rx = 0; rx != NumRegs; ++rx) if (LiveRegs[rx] == B) SetLiveReg(rx, A); @@ -283,7 +288,7 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { LiveOutMap::const_iterator fi = LiveOuts.find(*pi); if (fi == LiveOuts.end()) continue; DomainValue *pdv = fi->second[rx]; - if (!pdv) continue; + if (!pdv || !pdv->AvailableDomains) continue; if (!LiveRegs || !LiveRegs[rx]) { SetLiveReg(rx, pdv); continue; -- cgit v1.1 From 7781ae5be570940178295f6f808215089a3fe411 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Tue, 8 Nov 2011 21:08:02 +0000 Subject: Fix code to match comment. Fixes PR11340, a regression from r143209. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144121 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/InstructionSimplify.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index c141632..2f41f72 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -1612,7 +1612,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, Lower = (-Upper) + 1; } else if (match(LHS, m_UDiv(m_ConstantInt(CI2), m_Value()))) { // 'udiv CI2, x' produces [0, CI2]. - Upper = CI2->getValue(); + Upper = CI2->getValue() + 1; } else if (match(LHS, m_UDiv(m_Value(), m_ConstantInt(CI2)))) { // 'udiv x, CI2' produces [0, UINT_MAX / CI2]. APInt NegOne = APInt::getAllOnesValue(Width); -- cgit v1.1 From 66dc8ca04b719f3ab4aa650609dbd56b055ecb34 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Tue, 8 Nov 2011 21:12:00 +0000 Subject: ARMFastISel doesn't support thumb1. Rename isThumb to isThumb2 to reflect this. No functional change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144122 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 78 +++++++++++++++++++++--------------------- 1 file changed, 39 insertions(+), 39 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 8acdd91..3c6d1e8 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -90,7 +90,7 @@ class ARMFastISel : public FastISel { ARMFunctionInfo *AFI; // Convenience variables to avoid some queries. - bool isThumb; + bool isThumb2; LLVMContext *Context; public: @@ -101,7 +101,7 @@ class ARMFastISel : public FastISel { TLI(*TM.getTargetLowering()) { Subtarget = &TM.getSubtarget(); AFI = funcInfo.MF->getInfo(); - isThumb = AFI->isThumbFunction(); + isThumb2 = AFI->isThumbFunction(); Context = &funcInfo.Fn->getContext(); } @@ -553,7 +553,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { const ConstantInt *CI = cast(C); if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) { EVT SrcVT = MVT::i32; - unsigned Opc = isThumb ? ARM::t2MOVi16 : ARM::MOVi16; + unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16; unsigned ImmReg = createResultReg(TLI.getRegClassFor(SrcVT)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ImmReg) @@ -575,7 +575,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { } unsigned Idx = MCP.getConstantPoolIndex(C, Align); - if (isThumb) + if (isThumb2) AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::t2LDRpci), DestReg) .addConstantPoolIndex(Idx)); @@ -596,7 +596,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { Reloc::Model RelocM = TM.getRelocationModel(); // TODO: Need more magic for ARM PIC. - if (!isThumb && (RelocM == Reloc::PIC_)) return 0; + if (!isThumb2 && (RelocM == Reloc::PIC_)) return 0; // MachineConstantPool wants an explicit alignment. unsigned Align = TD.getPrefTypeAlignment(GV->getType()); @@ -616,7 +616,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { // Load value. MachineInstrBuilder MIB; unsigned DestReg = createResultReg(TLI.getRegClassFor(VT)); - if (isThumb) { + if (isThumb2) { unsigned Opc = (RelocM != Reloc::PIC_) ? ARM::t2LDRpci : ARM::t2LDRpci_pic; MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg) .addConstantPoolIndex(Idx); @@ -633,7 +633,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, EVT VT) { if (Subtarget->GVIsIndirectSymbol(GV, RelocM)) { unsigned NewDestReg = createResultReg(TLI.getRegClassFor(VT)); - if (isThumb) + if (isThumb2) MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::t2LDRi12), NewDestReg) .addReg(DestReg) @@ -681,7 +681,7 @@ unsigned ARMFastISel::TargetMaterializeAlloca(const AllocaInst *AI) { if (SI != FuncInfo.StaticAllocaMap.end()) { TargetRegisterClass* RC = TLI.getRegClassFor(VT); unsigned ResultReg = createResultReg(RC); - unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; + unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addFrameIndex(SI->second) @@ -864,10 +864,10 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) { // put the alloca address into a register, set the base type back to // register and continue. This should almost never happen. if (needsLowering && Addr.BaseType == Address::FrameIndexBase) { - TargetRegisterClass *RC = isThumb ? ARM::tGPRRegisterClass : + TargetRegisterClass *RC = isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass; unsigned ResultReg = createResultReg(RC); - unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri; + unsigned Opc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; AddOptionalDefs(BuildMI(*FuncInfo.MBB, *FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addFrameIndex(Addr.Base.FI) @@ -908,7 +908,7 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, MIB.addFrameIndex(FI); // ARM halfword load/stores need an additional operand. - if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0); + if (!isThumb2 && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0); MIB.addImm(Addr.Offset); MIB.addMemOperand(MMO); @@ -917,7 +917,7 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, MIB.addReg(Addr.Base.Reg); // ARM halfword load/stores need an additional operand. - if (!isThumb && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0); + if (!isThumb2 && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0); MIB.addImm(Addr.Offset); } @@ -933,15 +933,15 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) { // This is mostly going to be Neon/vector support. default: return false; case MVT::i16: - Opc = isThumb ? ARM::t2LDRHi12 : ARM::LDRH; + Opc = isThumb2 ? ARM::t2LDRHi12 : ARM::LDRH; RC = ARM::GPRRegisterClass; break; case MVT::i8: - Opc = isThumb ? ARM::t2LDRBi12 : ARM::LDRBi12; + Opc = isThumb2 ? ARM::t2LDRBi12 : ARM::LDRBi12; RC = ARM::GPRRegisterClass; break; case MVT::i32: - Opc = isThumb ? ARM::t2LDRi12 : ARM::LDRi12; + Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12; RC = ARM::GPRRegisterClass; break; case MVT::f32: @@ -990,22 +990,22 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { // This is mostly going to be Neon/vector support. default: return false; case MVT::i1: { - unsigned Res = createResultReg(isThumb ? ARM::tGPRRegisterClass : + unsigned Res = createResultReg(isThumb2 ? ARM::tGPRRegisterClass : ARM::GPRRegisterClass); - unsigned Opc = isThumb ? ARM::t2ANDri : ARM::ANDri; + unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), Res) .addReg(SrcReg).addImm(1)); SrcReg = Res; } // Fallthrough here. case MVT::i8: - StrOpc = isThumb ? ARM::t2STRBi12 : ARM::STRBi12; + StrOpc = isThumb2 ? ARM::t2STRBi12 : ARM::STRBi12; break; case MVT::i16: - StrOpc = isThumb ? ARM::t2STRHi12 : ARM::STRH; + StrOpc = isThumb2 ? ARM::t2STRHi12 : ARM::STRH; break; case MVT::i32: - StrOpc = isThumb ? ARM::t2STRi12 : ARM::STRi12; + StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12; break; case MVT::f32: if (!Subtarget->hasVFP2()) return false; @@ -1129,7 +1129,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { if (!ARMEmitCmp(CI->getOperand(0), CI->getOperand(1), CI->isUnsigned())) return false; - unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; + unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) .addMBB(TBB).addImm(ARMPred).addReg(ARM::CPSR); FastEmitBranch(FBB, DL); @@ -1140,7 +1140,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { MVT SourceVT; if (TI->hasOneUse() && TI->getParent() == I->getParent() && (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) { - unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri; + unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri; unsigned OpReg = getRegForValue(TI->getOperand(0)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc)) @@ -1152,7 +1152,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { CCMode = ARMCC::EQ; } - unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; + unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); @@ -1178,7 +1178,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { // Regardless, the compare has been done in the predecessor block, // and it left a value for us in a virtual register. Ergo, we test // the one-bit value left in the virtual register. - unsigned TstOpc = isThumb ? ARM::t2TSTri : ARM::TSTri; + unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc)) .addReg(CmpReg).addImm(1)); @@ -1188,7 +1188,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { CCMode = ARMCC::EQ; } - unsigned BrOpc = isThumb ? ARM::t2Bcc : ARM::Bcc; + unsigned BrOpc = isThumb2 ? ARM::t2Bcc : ARM::Bcc; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(BrOpc)) .addMBB(TBB).addImm(CCMode).addReg(ARM::CPSR); FastEmitBranch(FBB, DL); @@ -1223,7 +1223,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, needsExt = true; // Intentional fall-through. case MVT::i32: - CmpOpc = isThumb ? ARM::t2CMPrr : ARM::CMPrr; + CmpOpc = isThumb2 ? ARM::t2CMPrr : ARM::CMPrr; break; } @@ -1272,8 +1272,8 @@ bool ARMFastISel::SelectCmp(const Instruction *I) { // Now set a register based on the comparison. Explicitly set the predicates // here. - unsigned MovCCOpc = isThumb ? ARM::t2MOVCCi : ARM::MOVCCi; - TargetRegisterClass *RC = isThumb ? ARM::rGPRRegisterClass + unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi; + TargetRegisterClass *RC = isThumb2 ? ARM::rGPRRegisterClass : ARM::GPRRegisterClass; unsigned DestReg = createResultReg(RC); Constant *Zero = ConstantInt::get(Type::getInt32Ty(*Context), 0); @@ -1418,11 +1418,11 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { unsigned Op2Reg = getRegForValue(I->getOperand(2)); if (Op2Reg == 0) return false; - unsigned CmpOpc = isThumb ? ARM::t2TSTri : ARM::TSTri; + unsigned CmpOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) .addReg(CondReg).addImm(1)); unsigned ResultReg = createResultReg(RC); - unsigned MovCCOpc = isThumb ? ARM::t2MOVCCr : ARM::MOVCCr; + unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) .addReg(Op1Reg).addReg(Op2Reg) .addImm(ARMCC::EQ).addReg(ARM::CPSR); @@ -1788,7 +1788,7 @@ bool ARMFastISel::SelectRet(const Instruction *I) { MRI.addLiveOut(VA.getLocReg()); } - unsigned RetOpc = isThumb ? ARM::tBX_RET : ARM::BX_RET; + unsigned RetOpc = isThumb2 ? ARM::tBX_RET : ARM::BX_RET; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(RetOpc))); return true; @@ -1798,7 +1798,7 @@ unsigned ARMFastISel::ARMSelectCallOp(const GlobalValue *GV) { // Darwin needs the r9 versions of the opcodes. bool isDarwin = Subtarget->isTargetDarwin(); - if (isThumb) { + if (isThumb2) { return isDarwin ? ARM::tBLr9 : ARM::tBL; } else { return isDarwin ? ARM::BLr9 : ARM::BL; @@ -1864,7 +1864,7 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { // TODO: Turn this into the table of arm call ops. MachineInstrBuilder MIB; unsigned CallOpc = ARMSelectCallOp(NULL); - if(isThumb) + if(isThumb2) // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))) @@ -1979,7 +1979,7 @@ bool ARMFastISel::SelectCall(const Instruction *I) { MachineInstrBuilder MIB; unsigned CallOpc = ARMSelectCallOp(GV); // Explicitly adding the predicate here. - if(isThumb) + if(isThumb2) // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))) @@ -2040,20 +2040,20 @@ unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, case MVT::i16: if (!Subtarget->hasV6Ops()) return 0; if (isZExt) - Opc = isThumb ? ARM::t2UXTH : ARM::UXTH; + Opc = isThumb2 ? ARM::t2UXTH : ARM::UXTH; else - Opc = isThumb ? ARM::t2SXTH : ARM::SXTH; + Opc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; break; case MVT::i8: if (!Subtarget->hasV6Ops()) return 0; if (isZExt) - Opc = isThumb ? ARM::t2UXTB : ARM::UXTB; + Opc = isThumb2 ? ARM::t2UXTB : ARM::UXTB; else - Opc = isThumb ? ARM::t2SXTB : ARM::SXTB; + Opc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; break; case MVT::i1: if (isZExt) { - Opc = isThumb ? ARM::t2ANDri : ARM::ANDri; + Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri; isBoolZext = true; break; } -- cgit v1.1 From 3568a1051efb9a9edbd4914b04b44e9d7bc1b004 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Tue, 8 Nov 2011 21:21:09 +0000 Subject: Add workaround for Cortex-M3 errata 602117 by replacing ldrd x, y, [x] with ldm or ldr pairs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144123 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index faa8ba7..a871ed7 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -62,6 +62,7 @@ namespace { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const ARMSubtarget *STI; ARMFunctionInfo *AFI; RegScavenger *RS; bool isThumb2; @@ -1071,11 +1072,17 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, unsigned Opcode = MI->getOpcode(); if (Opcode == ARM::LDRD || Opcode == ARM::STRD || Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) { + const MachineOperand &BaseOp = MI->getOperand(2); + unsigned BaseReg = BaseOp.getReg(); unsigned EvenReg = MI->getOperand(0).getReg(); unsigned OddReg = MI->getOperand(1).getReg(); unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); - if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) + // ARM errata 602117: LDRD with base in list may result in incorrect base + // register when interrupted or faulted. + bool Errata602117 = EvenReg == BaseReg && STI->getCPUString() == "cortex-m3"; + if (!Errata602117 && + ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)) return false; MachineBasicBlock::iterator NewBBI = MBBI; @@ -1087,8 +1094,6 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, bool OddDeadKill = isLd ? MI->getOperand(1).isDead() : MI->getOperand(1).isKill(); bool OddUndef = MI->getOperand(1).isUndef(); - const MachineOperand &BaseOp = MI->getOperand(2); - unsigned BaseReg = BaseOp.getReg(); bool BaseKill = BaseOp.isKill(); bool BaseUndef = BaseOp.isUndef(); bool OffKill = isT2 ? false : MI->getOperand(3).isKill(); @@ -1380,6 +1385,7 @@ bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { AFI = Fn.getInfo(); TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); + STI = &TM.getSubtarget(); RS = new RegScavenger(); isThumb2 = AFI->isThumb2Function(); -- cgit v1.1 From 9cae2d2225ba58a70ef8ff057feab6873f4af520 Mon Sep 17 00:00:00 2001 From: Dan Gohman Date: Tue, 8 Nov 2011 21:29:06 +0000 Subject: Add a hack to the scheduler to disable pseudo-two-address dependencies in basic blocks containing calls. This works around a problem in which these artificial dependencies can get tied up in calling seqeunce scheduling in a way that makes the graph unschedulable with the current approach of using artificial physical register dependencies for calling sequences. This fixes PR11314. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144124 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index cab303d..f965a5e 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1666,7 +1666,7 @@ public: protected: bool canClobber(const SUnit *SU, const SUnit *Op); - void AddPseudoTwoAddrDeps(); + void AddPseudoTwoAddrDeps(const TargetInstrInfo *TII); void PrescheduleNodesWithMultipleUses(); void CalculateSethiUllmanNumbers(); }; @@ -2628,7 +2628,7 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { void RegReductionPQBase::initNodes(std::vector &sunits) { SUnits = &sunits; // Add pseudo dependency edges for two-address nodes. - AddPseudoTwoAddrDeps(); + AddPseudoTwoAddrDeps(TII); // Reroute edges to nodes with multiple uses. if (!TracksRegPressure) PrescheduleNodesWithMultipleUses(); @@ -2855,7 +2855,17 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { /// one that has a CopyToReg use (more likely to be a loop induction update). /// If both are two-address, but one is commutable while the other is not /// commutable, favor the one that's not commutable. -void RegReductionPQBase::AddPseudoTwoAddrDeps() { +void RegReductionPQBase::AddPseudoTwoAddrDeps(const TargetInstrInfo *TII) { + // If the graph contains any calls, disable this optimization. + // FIXME: This is a kludge to work around the fact that the artificial edges + // can combine with the way call sequences use physical register dependencies + // to model their resource usage to create unschedulable graphs. + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) + for (SDNode *Node = (*SUnits)[i].getNode(); Node; Node = Node->getGluedNode()) + if (Node->isMachineOpcode() && + Node->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) + return; + for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { SUnit *SU = &(*SUnits)[i]; if (!SU->isTwoAddress) -- cgit v1.1 From 74d8a87f40dd89ec263071c0af4d77104b166f4a Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Tue, 8 Nov 2011 21:56:23 +0000 Subject: Also add the linkage name to the name accelerator tables if it exists and is different than the normal name. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144130 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 5 +++++ lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 16 ++++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 660992a..3d353b3 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -1131,6 +1131,11 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { if (addToAccelTable) addAccelName(GV.getName(), VariableDIE); + // If the linkage name is different than the name, go ahead and output + // that as well into the name table. + if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName()) + addAccelName(GV.getLinkageName(), VariableDIE); + return; } diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 286075f..085ad2c 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -441,8 +441,15 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { if (DS.isSubprogram()) TheCU->addPubTypes(DISubprogram(DS)); - if (DS.isSubprogram() && !Scope->isAbstractScope()) - TheCU->addAccelName(DISubprogram(DS).getName(), ScopeDIE); + if (DS.isSubprogram() && !Scope->isAbstractScope()) { + DISubprogram SP = DISubprogram(DS); + TheCU->addAccelName(SP.getName(), ScopeDIE); + + // If the linkage name is different than the name, go ahead and output + // that as well into the name table. + if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName()) + TheCU->addAccelName(SP.getLinkageName(), ScopeDIE); + } return ScopeDIE; } @@ -596,6 +603,11 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, // Add to Accel Names TheCU->addAccelName(SP.getName(), SubprogramDie); + // If the linkage name is different than the name, go ahead and output + // that as well into the name table. + if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName()) + TheCU->addAccelName(SP.getLinkageName(), SubprogramDie); + // If this is an Objective-C selector name add it to the ObjC accelerator too. if (isObjCClass(SP.getName())) { StringRef Class, Category; -- cgit v1.1 From 35e932483a86a2b417d874648b903f6290ec3157 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 8 Nov 2011 21:57:44 +0000 Subject: Handle reference counts in one function: release(). This new function will decrement the reference count, and collapse a domain value when the last reference is gone. This simplifies DomainValue reference counting, and decouples it from the LiveRegs array. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144131 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ExecutionDepsFix.cpp | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index 5aa80f9..b4f99b9 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -139,7 +139,7 @@ private: // DomainValue allocation. DomainValue *Alloc(int domain = -1); - void Recycle(DomainValue*); + void release(DomainValue*); // LiveRegs manipulations. void SetLiveReg(int rx, DomainValue *DV); @@ -176,10 +176,19 @@ DomainValue *ExeDepsFix::Alloc(int domain) { return dv; } -void ExeDepsFix::Recycle(DomainValue *dv) { - assert(dv && "Cannot recycle NULL"); - dv->clear(); - Avail.push_back(dv); +/// release - Release a reference to DV. When the last reference is released, +/// collapse if needed. +void ExeDepsFix::release(DomainValue *DV) { + assert(DV && DV->Refs && "Bad DomainValue"); + if (--DV->Refs) + return; + + // There are no more DV references. Collapse any contained instructions. + if (DV->AvailableDomains && !DV->isCollapsed()) + Collapse(DV, DV->getFirstDomain()); + + DV->clear(); + Avail.push_back(DV); } /// Set LiveRegs[rx] = dv, updating reference counts. @@ -192,10 +201,8 @@ void ExeDepsFix::SetLiveReg(int rx, DomainValue *dv) { if (LiveRegs[rx] == dv) return; - if (LiveRegs[rx]) { - assert(LiveRegs[rx]->Refs && "Bad refcount"); - if (--LiveRegs[rx]->Refs == 0) Recycle(LiveRegs[rx]); - } + if (LiveRegs[rx]) + release(LiveRegs[rx]); LiveRegs[rx] = dv; if (dv) ++dv->Refs; } @@ -205,12 +212,8 @@ void ExeDepsFix::Kill(int rx) { assert(unsigned(rx) < NumRegs && "Invalid index"); if (!LiveRegs || !LiveRegs[rx]) return; - // Before killing the last reference to an open DomainValue, collapse it to - // the first available domain. - if (LiveRegs[rx]->Refs == 1 && !LiveRegs[rx]->isCollapsed()) - Collapse(LiveRegs[rx], LiveRegs[rx]->getFirstDomain()); - else - SetLiveReg(rx, 0); + release(LiveRegs[rx]); + LiveRegs[rx] = 0; } /// Force register rx into domain. -- cgit v1.1 From 6bcb9a783b3220561ee3413322ad1037983d63cb Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 8 Nov 2011 21:57:47 +0000 Subject: Rename all methods to follow style guide. No functional change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144132 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ExecutionDepsFix.cpp | 84 ++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 42 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index b4f99b9..994a5c3 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -135,18 +135,18 @@ public: private: // Register mapping. - int RegIndex(unsigned Reg); + int regIndex(unsigned Reg); // DomainValue allocation. - DomainValue *Alloc(int domain = -1); + DomainValue *alloc(int domain = -1); void release(DomainValue*); // LiveRegs manipulations. - void SetLiveReg(int rx, DomainValue *DV); - void Kill(int rx); - void Force(int rx, unsigned domain); - void Collapse(DomainValue *dv, unsigned domain); - bool Merge(DomainValue *A, DomainValue *B); + void setLiveReg(int rx, DomainValue *DV); + void kill(int rx); + void force(int rx, unsigned domain); + void collapse(DomainValue *dv, unsigned domain); + bool merge(DomainValue *A, DomainValue *B); void enterBasicBlock(MachineBasicBlock*); void leaveBasicBlock(MachineBasicBlock*); @@ -161,12 +161,12 @@ char ExeDepsFix::ID = 0; /// Translate TRI register number to an index into our smaller tables of /// interesting registers. Return -1 for boring registers. -int ExeDepsFix::RegIndex(unsigned Reg) { +int ExeDepsFix::regIndex(unsigned Reg) { assert(Reg < AliasMap.size() && "Invalid register"); return AliasMap[Reg]; } -DomainValue *ExeDepsFix::Alloc(int domain) { +DomainValue *ExeDepsFix::alloc(int domain) { DomainValue *dv = Avail.empty() ? new(Allocator.Allocate()) DomainValue : Avail.pop_back_val(); @@ -185,14 +185,14 @@ void ExeDepsFix::release(DomainValue *DV) { // There are no more DV references. Collapse any contained instructions. if (DV->AvailableDomains && !DV->isCollapsed()) - Collapse(DV, DV->getFirstDomain()); + collapse(DV, DV->getFirstDomain()); DV->clear(); Avail.push_back(DV); } /// Set LiveRegs[rx] = dv, updating reference counts. -void ExeDepsFix::SetLiveReg(int rx, DomainValue *dv) { +void ExeDepsFix::setLiveReg(int rx, DomainValue *dv) { assert(unsigned(rx) < NumRegs && "Invalid index"); if (!LiveRegs) { LiveRegs = new DomainValue*[NumRegs]; @@ -208,7 +208,7 @@ void ExeDepsFix::SetLiveReg(int rx, DomainValue *dv) { } // Kill register rx, recycle or collapse any DomainValue. -void ExeDepsFix::Kill(int rx) { +void ExeDepsFix::kill(int rx) { assert(unsigned(rx) < NumRegs && "Invalid index"); if (!LiveRegs || !LiveRegs[rx]) return; @@ -217,30 +217,30 @@ void ExeDepsFix::Kill(int rx) { } /// Force register rx into domain. -void ExeDepsFix::Force(int rx, unsigned domain) { +void ExeDepsFix::force(int rx, unsigned domain) { assert(unsigned(rx) < NumRegs && "Invalid index"); DomainValue *dv; if (LiveRegs && (dv = LiveRegs[rx])) { if (dv->isCollapsed()) dv->addDomain(domain); else if (dv->hasDomain(domain)) - Collapse(dv, domain); + collapse(dv, domain); else { // This is an incompatible open DomainValue. Collapse it to whatever and // force the new value into domain. This costs a domain crossing. - Collapse(dv, dv->getFirstDomain()); + collapse(dv, dv->getFirstDomain()); assert(LiveRegs[rx] && "Not live after collapse?"); LiveRegs[rx]->addDomain(domain); } } else { // Set up basic collapsed DomainValue. - SetLiveReg(rx, Alloc(domain)); + setLiveReg(rx, alloc(domain)); } } /// Collapse open DomainValue into given domain. If there are multiple /// registers using dv, they each get a unique collapsed DomainValue. -void ExeDepsFix::Collapse(DomainValue *dv, unsigned domain) { +void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) { assert(dv->hasDomain(domain) && "Cannot collapse"); // Collapse all the instructions. @@ -252,12 +252,12 @@ void ExeDepsFix::Collapse(DomainValue *dv, unsigned domain) { if (LiveRegs && dv->Refs > 1) for (unsigned rx = 0; rx != NumRegs; ++rx) if (LiveRegs[rx] == dv) - SetLiveReg(rx, Alloc(domain)); + setLiveReg(rx, alloc(domain)); } /// Merge - All instructions and registers in B are moved to A, and B is /// released. -bool ExeDepsFix::Merge(DomainValue *A, DomainValue *B) { +bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) { assert(!A->isCollapsed() && "Cannot merge into collapsed"); assert(!B->isCollapsed() && "Cannot merge from collapsed"); if (A == B) @@ -276,7 +276,7 @@ bool ExeDepsFix::Merge(DomainValue *A, DomainValue *B) { for (unsigned rx = 0; rx != NumRegs; ++rx) if (LiveRegs[rx] == B) - SetLiveReg(rx, A); + setLiveReg(rx, A); return true; } @@ -284,7 +284,7 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { // Try to coalesce live-out registers from predecessors. for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), e = MBB->livein_end(); i != e; ++i) { - int rx = RegIndex(*i); + int rx = regIndex(*i); if (rx < 0) continue; for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), pe = MBB->pred_end(); pi != pe; ++pi) { @@ -293,7 +293,7 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { DomainValue *pdv = fi->second[rx]; if (!pdv || !pdv->AvailableDomains) continue; if (!LiveRegs || !LiveRegs[rx]) { - SetLiveReg(rx, pdv); + setLiveReg(rx, pdv); continue; } @@ -302,15 +302,15 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { // We are already collapsed, but predecessor is not. Force him. unsigned domain = LiveRegs[rx]->getFirstDomain(); if (!pdv->isCollapsed() && pdv->hasDomain(domain)) - Collapse(pdv, domain); + collapse(pdv, domain); continue; } // Currently open, merge in predecessor. if (!pdv->isCollapsed()) - Merge(LiveRegs[rx], pdv); + merge(LiveRegs[rx], pdv); else - Force(rx, pdv->getFirstDomain()); + force(rx, pdv->getFirstDomain()); } } } @@ -344,19 +344,19 @@ void ExeDepsFix::visitHardInstr(MachineInstr *mi, unsigned domain) { e = mi->getDesc().getNumOperands(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - int rx = RegIndex(mo.getReg()); + int rx = regIndex(mo.getReg()); if (rx < 0) continue; - Force(rx, domain); + force(rx, domain); } // Kill all defs and force them. for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - int rx = RegIndex(mo.getReg()); + int rx = regIndex(mo.getReg()); if (rx < 0) continue; - Kill(rx); - Force(rx, domain); + kill(rx); + force(rx, domain); } } @@ -373,7 +373,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { e = mi->getDesc().getNumOperands(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - int rx = RegIndex(mo.getReg()); + int rx = regIndex(mo.getReg()); if (rx < 0) continue; if (DomainValue *dv = LiveRegs[rx]) { // Bitmask of domains that dv and available have in common. @@ -390,7 +390,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { else // Open DomainValue is not compatible with instruction. It is useless // now. - Kill(rx); + kill(rx); } } @@ -410,7 +410,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { DomainValue *dv = LiveRegs[rx]; // This useless DomainValue could have been missed above. if (!dv->getCommonDomains(available)) { - Kill(*i); + kill(*i); continue; } // sorted, uniqued insert. @@ -438,17 +438,17 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { } DomainValue *latest = doms.pop_back_val(); - if (Merge(dv, latest)) continue; + if (merge(dv, latest)) continue; // If latest didn't merge, it is useless now. Kill all registers using it. for (SmallVector::iterator i=used.begin(), e=used.end(); i != e; ++i) if (LiveRegs[*i] == latest) - Kill(*i); + kill(*i); } // dv is the DomainValue we are going to use for this instruction. if (!dv) - dv = Alloc(); + dv = alloc(); dv->Dist = Distance; dv->AvailableDomains = available; dv->Instrs.push_back(mi); @@ -457,11 +457,11 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { for (unsigned i = 0, e = mi->getDesc().getNumOperands(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - int rx = RegIndex(mo.getReg()); + int rx = regIndex(mo.getReg()); if (rx < 0) continue; if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) { - Kill(rx); - SetLiveReg(rx, dv); + kill(rx); + setLiveReg(rx, dv); } } } @@ -471,9 +471,9 @@ void ExeDepsFix::visitGenericInstr(MachineInstr *mi) { for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { MachineOperand &mo = mi->getOperand(i); if (!mo.isReg()) continue; - int rx = RegIndex(mo.getReg()); + int rx = regIndex(mo.getReg()); if (rx < 0) continue; - Kill(rx); + kill(rx); } } @@ -529,7 +529,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { LiveRegs = FI->second; for (unsigned i = 0, e = NumRegs; i != e; ++i) if (LiveRegs[i]) - Kill(i); + kill(i); delete[] LiveRegs; } LiveOuts.clear(); -- cgit v1.1 From 0fdb05deb9ccbebe55c05f2fb4af6ea813c97a98 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 8 Nov 2011 22:05:17 +0000 Subject: Call release() directly when cleaning up the remaining DomainValues. There is no need to involve the LiveRegs array and kill() any longer. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144133 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ExecutionDepsFix.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index 994a5c3..5a75fde 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -525,12 +525,10 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { if (FI == LiveOuts.end()) continue; assert(FI->second && "Null entry"); - // The DomainValue is collapsed when the last reference is killed. - LiveRegs = FI->second; for (unsigned i = 0, e = NumRegs; i != e; ++i) - if (LiveRegs[i]) - kill(i); - delete[] LiveRegs; + if (FI->second[i]) + release(FI->second[i]); + delete[] FI->second; } LiveOuts.clear(); Avail.clear(); -- cgit v1.1 From a2ff3e2c3c0d8be862525c91f7b0e153480f4f5e Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Tue, 8 Nov 2011 22:23:43 +0000 Subject: Emit the compact unwind *if* we have a compact unwind encoding. *headdesk* git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144138 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCDwarf.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index 4982808..a68215f 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -1020,7 +1020,7 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer, if (IsEH && MOFI->getCompactUnwindSection()) for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) { const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i); - if (!Frame.CompactUnwindEncoding) + if (Frame.CompactUnwindEncoding) Emitter.EmitCompactUnwind(Streamer, Frame); } -- cgit v1.1 From ce1a538ab5b7ae7e0ed48d18c02571280fe105aa Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Tue, 8 Nov 2011 22:26:47 +0000 Subject: Properly handle Mips MC relocations and lower cpload and cprestore macros to MCInsts. Patch by Jack Carter. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144139 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp | 50 ++++++++++++- lib/Target/Mips/MipsAsmPrinter.cpp | 86 ++++++++++++++++------ lib/Target/Mips/MipsMCInstLower.cpp | 97 ++++++++++++++++++++----- lib/Target/Mips/MipsMCInstLower.h | 3 + 4 files changed, 191 insertions(+), 45 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index 53282ab..4a815f3 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -13,8 +13,10 @@ #define DEBUG_TYPE "asm-printer" #include "MipsInstPrinter.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -74,6 +76,52 @@ void MipsInstPrinter::printInst(const MCInst *MI, raw_ostream &O, printAnnotation(O, Annot); } +static void printExpr(const MCExpr *Expr, raw_ostream &OS) { + int Offset = 0; + const MCSymbolRefExpr *SRE; + + if (const MCBinaryExpr *BE = dyn_cast(Expr)) { + SRE = dyn_cast(BE->getLHS()); + const MCConstantExpr *CE = dyn_cast(BE->getRHS()); + assert(SRE && CE && "Binary expression must be sym+const."); + Offset = CE->getValue(); + } + else if (!(SRE = dyn_cast(Expr))) + assert(false && "Unexpected MCExpr type."); + + MCSymbolRefExpr::VariantKind Kind = SRE->getKind(); + + switch (Kind) { + default: assert(0 && "Invalid kind!"); + case MCSymbolRefExpr::VK_None: break; + case MCSymbolRefExpr::VK_Mips_GPREL: OS << "%gp_rel("; break; + case MCSymbolRefExpr::VK_Mips_GOT_CALL: OS << "%call16("; break; + case MCSymbolRefExpr::VK_Mips_GOT: OS << "%got("; break; + case MCSymbolRefExpr::VK_Mips_ABS_HI: OS << "%hi("; break; + case MCSymbolRefExpr::VK_Mips_ABS_LO: OS << "%lo("; break; + case MCSymbolRefExpr::VK_Mips_TLSGD: OS << "%tlsgd("; break; + case MCSymbolRefExpr::VK_Mips_GOTTPREL: OS << "%gottprel("; break; + case MCSymbolRefExpr::VK_Mips_TPREL_HI: OS << "%tprel_hi("; break; + case MCSymbolRefExpr::VK_Mips_TPREL_LO: OS << "%tprel_lo("; break; + case MCSymbolRefExpr::VK_Mips_GPOFF_HI: OS << "%hi(%neg(%gp_rel("; break; + case MCSymbolRefExpr::VK_Mips_GPOFF_LO: OS << "%lo(%neg(%gp_rel("; break; + case MCSymbolRefExpr::VK_Mips_GOT_DISP: OS << "%got_disp("; break; + case MCSymbolRefExpr::VK_Mips_GOT_PAGE: OS << "%got_page("; break; + case MCSymbolRefExpr::VK_Mips_GOT_OFST: OS << "%got_ofst("; break; + } + + OS << SRE->getSymbol(); + + if (Offset) { + if (Offset > 0) + OS << '+'; + OS << Offset; + } + + if (Kind != MCSymbolRefExpr::VK_None) + OS << ')'; +} + void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); @@ -88,7 +136,7 @@ void MipsInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, } assert(Op.isExpr() && "unknown operand kind in printOperand"); - O << *Op.getExpr(); + printExpr(Op.getExpr(), O); } void MipsInstPrinter::printUnsignedImm(const MCInst *MI, int opNum, diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index a44d97f..d7b7f06 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -13,13 +13,17 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "mips-asm-printer" -#include "MipsAsmPrinter.h" #include "Mips.h" +#include "MipsAsmPrinter.h" #include "MipsInstrInfo.h" #include "MipsMachineFunction.h" #include "MipsMCInstLower.h" #include "MipsMCSymbolRefExpr.h" #include "InstPrinter/MipsInstPrinter.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" +#include "llvm/Analysis/DebugInfo.h" #include "llvm/BasicBlock.h" #include "llvm/Instructions.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -27,19 +31,17 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/Instructions.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Analysis/DebugInfo.h" using namespace llvm; @@ -50,6 +52,12 @@ static bool isUnalignedLoadStore(unsigned Opc) { Opc == Mips::USW_P8 || Opc == Mips::USH_P8; } +static bool isDirective(unsigned Opc) { + return Opc == Mips::MACRO || Opc == Mips::NOMACRO || + Opc == Mips::REORDER || Opc == Mips::NOREORDER || + Opc == Mips::ATMACRO || Opc == Mips::NOAT; +} + void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallString<128> Str; raw_svector_ostream OS(Str); @@ -62,8 +70,12 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { MipsMCInstLower MCInstLowering(Mang, *MF, *this); unsigned Opc = MI->getOpcode(); MCInst TmpInst0; + SmallVector MCInsts; MCInstLowering.Lower(MI, TmpInst0); - + + if (!OutStreamer.hasRawTextSupport() && isDirective(Opc)) + return; + // Enclose unaligned load or store with .macro & .nomacro directives. if (isUnalignedLoadStore(Opc)) { MCInst Directive; @@ -75,6 +87,23 @@ void MipsAsmPrinter::EmitInstruction(const MachineInstr *MI) { return; } + if (!OutStreamer.hasRawTextSupport()) { + // Lower CPLOAD and CPRESTORE + if (Opc == Mips::CPLOAD) { + MCInstLowering.LowerCPLOAD(MI, MCInsts); + for (SmallVector::iterator I = MCInsts.begin(); + I != MCInsts.end(); ++I) + OutStreamer.EmitInstruction(*I); + return; + } + + if (Opc == Mips::CPRESTORE) { + MCInstLowering.LowerCPRESTORE(MI, TmpInst0); + OutStreamer.EmitInstruction(TmpInst0); + return; + } + } + OutStreamer.EmitInstruction(TmpInst0); } @@ -191,7 +220,8 @@ void MipsAsmPrinter::emitFrameDirective() { unsigned returnReg = RI.getRARegister(); unsigned stackSize = MF->getFrameInfo()->getStackSize(); - OutStreamer.EmitRawText("\t.frame\t$" + + if (OutStreamer.hasRawTextSupport()) + OutStreamer.EmitRawText("\t.frame\t$" + StringRef(MipsInstPrinter::getRegisterName(stackReg)).lower() + "," + Twine(stackSize) + ",$" + StringRef(MipsInstPrinter::getRegisterName(returnReg)).lower()); @@ -212,7 +242,8 @@ const char *MipsAsmPrinter::getCurrentABIString() const { } void MipsAsmPrinter::EmitFunctionEntryLabel() { - OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName())); + if (OutStreamer.hasRawTextSupport()) + OutStreamer.EmitRawText("\t.ent\t" + Twine(CurrentFnSym->getName())); OutStreamer.EmitLabel(CurrentFnSym); } @@ -221,10 +252,12 @@ void MipsAsmPrinter::EmitFunctionEntryLabel() { void MipsAsmPrinter::EmitFunctionBodyStart() { emitFrameDirective(); - SmallString<128> Str; - raw_svector_ostream OS(Str); - printSavedRegsBitmask(OS); - OutStreamer.EmitRawText(OS.str()); + if (OutStreamer.hasRawTextSupport()) { + SmallString<128> Str; + raw_svector_ostream OS(Str); + printSavedRegsBitmask(OS); + OutStreamer.EmitRawText(OS.str()); + } } /// EmitFunctionBodyEnd - Targets can override this to emit stuff after @@ -233,12 +266,13 @@ void MipsAsmPrinter::EmitFunctionBodyEnd() { // There are instruction for this macros, but they must // always be at the function end, and we can't emit and // break with BB logic. - OutStreamer.EmitRawText(StringRef("\t.set\tmacro")); - OutStreamer.EmitRawText(StringRef("\t.set\treorder")); - OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName())); + if (OutStreamer.hasRawTextSupport()) { + OutStreamer.EmitRawText(StringRef("\t.set\tmacro")); + OutStreamer.EmitRawText(StringRef("\t.set\treorder")); + OutStreamer.EmitRawText("\t.end\t" + Twine(CurrentFnSym->getName())); + } } - /// isBlockOnlyReachableByFallthough - Return true if the basic block has /// exactly one predecessor and the control transfer mechanism between /// the predecessor and this block is a fall-through. @@ -419,18 +453,22 @@ void MipsAsmPrinter::EmitStartOfAsmFile(Module &M) { // FIXME: Use SwitchSection. // Tell the assembler which ABI we are using - OutStreamer.EmitRawText("\t.section .mdebug." + Twine(getCurrentABIString())); + if (OutStreamer.hasRawTextSupport()) + OutStreamer.EmitRawText("\t.section .mdebug." + Twine(getCurrentABIString())); // TODO: handle O64 ABI - if (Subtarget->isABI_EABI()) { - if (Subtarget->isGP32bit()) - OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long32")); - else - OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long64")); + if (OutStreamer.hasRawTextSupport()) { + if (Subtarget->isABI_EABI()) { + if (Subtarget->isGP32bit()) + OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long32")); + else + OutStreamer.EmitRawText(StringRef("\t.section .gcc_compiled_long64")); + } } // return to previous section - OutStreamer.EmitRawText(StringRef("\t.previous")); + if (OutStreamer.hasRawTextSupport()) + OutStreamer.EmitRawText(StringRef("\t.previous")); } MachineLocation diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp index 608a7d2..6c0e4f6 100644 --- a/lib/Target/Mips/MipsMCInstLower.cpp +++ b/lib/Target/Mips/MipsMCInstLower.cpp @@ -12,14 +12,14 @@ // //===----------------------------------------------------------------------===// -#include "MipsMCInstLower.h" #include "MipsAsmPrinter.h" #include "MipsInstrInfo.h" -#include "MipsMCSymbolRefExpr.h" +#include "MipsMCInstLower.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/Target/Mangler.h" using namespace llvm; @@ -31,26 +31,26 @@ MipsMCInstLower::MipsMCInstLower(Mangler *mang, const MachineFunction &mf, MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, MachineOperandType MOTy, unsigned Offset) const { - MipsMCSymbolRefExpr::VariantKind Kind; + MCSymbolRefExpr::VariantKind Kind; const MCSymbol *Symbol; switch(MO.getTargetFlags()) { default: assert(0 && "Invalid target flag!"); - case MipsII::MO_NO_FLAG: Kind = MipsMCSymbolRefExpr::VK_Mips_None; break; - case MipsII::MO_GPREL: Kind = MipsMCSymbolRefExpr::VK_Mips_GPREL; break; - case MipsII::MO_GOT_CALL: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_CALL; break; - case MipsII::MO_GOT: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT; break; - case MipsII::MO_ABS_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_ABS_HI; break; - case MipsII::MO_ABS_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_ABS_LO; break; - case MipsII::MO_TLSGD: Kind = MipsMCSymbolRefExpr::VK_Mips_TLSGD; break; - case MipsII::MO_GOTTPREL: Kind = MipsMCSymbolRefExpr::VK_Mips_GOTTPREL; break; - case MipsII::MO_TPREL_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_HI; break; - case MipsII::MO_TPREL_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_TPREL_LO; break; - case MipsII::MO_GPOFF_HI: Kind = MipsMCSymbolRefExpr::VK_Mips_GPOFF_HI; break; - case MipsII::MO_GPOFF_LO: Kind = MipsMCSymbolRefExpr::VK_Mips_GPOFF_LO; break; - case MipsII::MO_GOT_DISP: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_DISP; break; - case MipsII::MO_GOT_PAGE: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_PAGE; break; - case MipsII::MO_GOT_OFST: Kind = MipsMCSymbolRefExpr::VK_Mips_GOT_OFST; break; + case MipsII::MO_NO_FLAG: Kind = MCSymbolRefExpr::VK_None; break; + case MipsII::MO_GPREL: Kind = MCSymbolRefExpr::VK_Mips_GPREL; break; + case MipsII::MO_GOT_CALL: Kind = MCSymbolRefExpr::VK_Mips_GOT_CALL; break; + case MipsII::MO_GOT: Kind = MCSymbolRefExpr::VK_Mips_GOT; break; + case MipsII::MO_ABS_HI: Kind = MCSymbolRefExpr::VK_Mips_ABS_HI; break; + case MipsII::MO_ABS_LO: Kind = MCSymbolRefExpr::VK_Mips_ABS_LO; break; + case MipsII::MO_TLSGD: Kind = MCSymbolRefExpr::VK_Mips_TLSGD; break; + case MipsII::MO_GOTTPREL: Kind = MCSymbolRefExpr::VK_Mips_GOTTPREL; break; + case MipsII::MO_TPREL_HI: Kind = MCSymbolRefExpr::VK_Mips_TPREL_HI; break; + case MipsII::MO_TPREL_LO: Kind = MCSymbolRefExpr::VK_Mips_TPREL_LO; break; + case MipsII::MO_GPOFF_HI: Kind = MCSymbolRefExpr::VK_Mips_GPOFF_HI; break; + case MipsII::MO_GPOFF_LO: Kind = MCSymbolRefExpr::VK_Mips_GPOFF_LO; break; + case MipsII::MO_GOT_DISP: Kind = MCSymbolRefExpr::VK_Mips_GOT_DISP; break; + case MipsII::MO_GOT_PAGE: Kind = MCSymbolRefExpr::VK_Mips_GOT_PAGE; break; + case MipsII::MO_GOT_OFST: Kind = MCSymbolRefExpr::VK_Mips_GOT_OFST; break; } switch (MOTy) { @@ -84,10 +84,67 @@ MCOperand MipsMCInstLower::LowerSymbolOperand(const MachineOperand &MO, llvm_unreachable(""); } - return MCOperand::CreateExpr(MipsMCSymbolRefExpr::Create(Kind, Symbol, Offset, - Ctx)); + const MCSymbolRefExpr *MCSym = MCSymbolRefExpr::Create(Symbol, Kind, Ctx); + + if (!Offset) + return MCOperand::CreateExpr(MCSym); + + // Assume offset is never negative. + assert(Offset > 0); + + const MCConstantExpr *OffsetExpr = MCConstantExpr::Create(Offset, Ctx); + const MCBinaryExpr *AddExpr = MCBinaryExpr::CreateAdd(MCSym, OffsetExpr, Ctx); + return MCOperand::CreateExpr(AddExpr); +} + +// Lower ".cpload $reg" to +// "lui $gp, %hi(_gp_disp)" +// "addiu $gp, $gp, %lo(_gp_disp)" +// "addu $gp. $gp, $reg" +void MipsMCInstLower::LowerCPLOAD(const MachineInstr *MI, + SmallVector& MCInsts) { + MCInst Lui, Addiu, Addu; + StringRef SymName("_gp_disp"); + const MCSymbol *Symbol = Ctx.GetOrCreateSymbol(SymName); + const MCSymbolRefExpr *MCSym; + + // lui $gp, %hi(_gp_disp) + Lui.setOpcode(Mips::LUi); + Lui.addOperand(MCOperand::CreateReg(Mips::GP)); + MCSym = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_Mips_ABS_HI, Ctx); + Lui.addOperand(MCOperand::CreateExpr(MCSym)); + MCInsts.push_back(Lui); + + // addiu $gp, $gp, %lo(_gp_disp) + Addiu.setOpcode(Mips::ADDiu); + Addiu.addOperand(MCOperand::CreateReg(Mips::GP)); + Addiu.addOperand(MCOperand::CreateReg(Mips::GP)); + MCSym = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_Mips_ABS_LO, Ctx); + Addiu.addOperand(MCOperand::CreateExpr(MCSym)); + MCInsts.push_back(Addiu); + + // addu $gp. $gp, $reg + Addu.setOpcode(Mips::ADDu); + Addu.addOperand(MCOperand::CreateReg(Mips::GP)); + Addu.addOperand(MCOperand::CreateReg(Mips::GP)); + const MachineOperand &MO = MI->getOperand(0); + assert(MO.isReg() && "CPLOAD's operand must be a register."); + Addu.addOperand(MCOperand::CreateReg(MO.getReg())); + MCInsts.push_back(Addu); } +// Lower ".cprestore offset" to "sw $gp, offset($sp)". +void MipsMCInstLower::LowerCPRESTORE(const MachineInstr *MI, MCInst &OutMI) { + OutMI.clear(); + OutMI.setOpcode(Mips::SW); + OutMI.addOperand(MCOperand::CreateReg(Mips::GP)); + OutMI.addOperand(MCOperand::CreateReg(Mips::SP)); + const MachineOperand &MO = MI->getOperand(0); + assert(MO.isImm() && "CPRESTORE's operand must be an immediate."); + OutMI.addOperand(MCOperand::CreateImm(MO.getImm())); +} + + MCOperand MipsMCInstLower::LowerOperand(const MachineOperand& MO) const { MachineOperandType MOTy = MO.getType(); diff --git a/lib/Target/Mips/MipsMCInstLower.h b/lib/Target/Mips/MipsMCInstLower.h index 223f23a..3a24da2 100644 --- a/lib/Target/Mips/MipsMCInstLower.h +++ b/lib/Target/Mips/MipsMCInstLower.h @@ -9,6 +9,7 @@ #ifndef MIPSMCINSTLOWER_H #define MIPSMCINSTLOWER_H +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/Support/Compiler.h" @@ -34,6 +35,8 @@ public: MipsMCInstLower(Mangler *mang, const MachineFunction &MF, MipsAsmPrinter &asmprinter); void Lower(const MachineInstr *MI, MCInst &OutMI) const; + void LowerCPLOAD(const MachineInstr *MI, SmallVector& MCInsts); + void LowerCPRESTORE(const MachineInstr *MI, MCInst &OutMI); private: MCOperand LowerSymbolOperand(const MachineOperand &MO, MachineOperandType MOTy, unsigned Offset) const; -- cgit v1.1 From 737e9a2db27b9c3b212ff64fda7af5537ecbfb45 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 8 Nov 2011 23:26:00 +0000 Subject: Track reference count independently from clear(). This allows clear() to be called on a DomainValue with references. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144147 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ExecutionDepsFix.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index 5a75fde..9a7a289 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -92,10 +92,10 @@ struct DomainValue { return CountTrailingZeros_32(AvailableDomains); } - DomainValue() { clear(); } + DomainValue() : Refs(0) { clear(); } void clear() { - Refs = AvailableDomains = Dist = 0; + AvailableDomains = Dist = 0; Instrs.clear(); } }; @@ -173,6 +173,7 @@ DomainValue *ExeDepsFix::alloc(int domain) { dv->Dist = Distance; if (domain >= 0) dv->addDomain(domain); + assert(dv->Refs == 0 && "Reference count wasn't cleared"); return dv; } @@ -271,8 +272,7 @@ bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) { A->Instrs.append(B->Instrs.begin(), B->Instrs.end()); // Clear the old DomainValue so we won't try to swizzle instructions twice. - B->Instrs.clear(); - B->AvailableDomains = 0; + B->clear(); for (unsigned rx = 0; rx != NumRegs; ++rx) if (LiveRegs[rx] == B) -- cgit v1.1 From 7151ddd6efb2f3f51297b35f27a30ddfa8a76a7e Mon Sep 17 00:00:00 2001 From: "Michael J. Spencer" Date: Tue, 8 Nov 2011 23:34:07 +0000 Subject: Object/COFF: Fix PE reading. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144148 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Object/COFFObjectFile.cpp | 53 ++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 23 deletions(-) (limited to 'lib') diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp index 7de51a4..2abfb44 100644 --- a/lib/Object/COFFObjectFile.cpp +++ b/lib/Object/COFFObjectFile.cpp @@ -345,7 +345,7 @@ error_code COFFObjectFile::getSectionContents(DataRefImpl Sec, // data, as there's nothing that says that is not allowed. uintptr_t con_start = uintptr_t(base()) + sec->PointerToRawData; uintptr_t con_end = con_start + sec->SizeOfRawData; - if (con_end >= uintptr_t(Data->getBufferEnd())) + if (con_end > uintptr_t(Data->getBufferEnd())) return object_error::parse_failed; Result = StringRef(reinterpret_cast(con_start), sec->SizeOfRawData); @@ -424,7 +424,12 @@ relocation_iterator COFFObjectFile::getSectionRelEnd(DataRefImpl Sec) const { } COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec) - : ObjectFile(Binary::isCOFF, Object, ec) { + : ObjectFile(Binary::isCOFF, Object, ec) + , Header(0) + , SectionTable(0) + , SymbolTable(0) + , StringTable(0) + , StringTableSize(0) { // Check that we at least have enough room for a header. if (!checkSize(Data, ec, sizeof(coff_file_header))) return; @@ -437,7 +442,7 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec) // PE/COFF, seek through MS-DOS compatibility stub and 4-byte // PE signature to find 'normal' COFF header. if (!checkSize(Data, ec, 0x3c + 8)) return; - HeaderStart += *reinterpret_cast(base() + 0x3c); + HeaderStart = *reinterpret_cast(base() + 0x3c); // Check the PE header. ("PE\0\0") if (std::memcmp(base() + HeaderStart, "PE\0\0", 4) != 0) { ec = object_error::parse_failed; @@ -459,28 +464,30 @@ COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec) Header->NumberOfSections * sizeof(coff_section))) return; - SymbolTable = - reinterpret_cast(base() - + Header->PointerToSymbolTable); - if (!checkAddr(Data, ec, uintptr_t(SymbolTable), - Header->NumberOfSymbols * sizeof(coff_symbol))) - return; + if (Header->PointerToSymbolTable != 0) { + SymbolTable = + reinterpret_cast(base() + + Header->PointerToSymbolTable); + if (!checkAddr(Data, ec, uintptr_t(SymbolTable), + Header->NumberOfSymbols * sizeof(coff_symbol))) + return; - // Find string table. - StringTable = reinterpret_cast(base()) - + Header->PointerToSymbolTable - + Header->NumberOfSymbols * sizeof(coff_symbol); - if (!checkAddr(Data, ec, uintptr_t(StringTable), sizeof(ulittle32_t))) - return; + // Find string table. + StringTable = reinterpret_cast(base()) + + Header->PointerToSymbolTable + + Header->NumberOfSymbols * sizeof(coff_symbol); + if (!checkAddr(Data, ec, uintptr_t(StringTable), sizeof(ulittle32_t))) + return; - StringTableSize = *reinterpret_cast(StringTable); - if (!checkAddr(Data, ec, uintptr_t(StringTable), StringTableSize)) - return; - // Check that the string table is null terminated if has any in it. - if (StringTableSize < 4 - || (StringTableSize > 4 && StringTable[StringTableSize - 1] != 0)) { - ec = object_error::parse_failed; - return; + StringTableSize = *reinterpret_cast(StringTable); + if (!checkAddr(Data, ec, uintptr_t(StringTable), StringTableSize)) + return; + // Check that the string table is null terminated if has any in it. + if (StringTableSize < 4 + || (StringTableSize > 4 && StringTable[StringTableSize - 1] != 0)) { + ec = object_error::parse_failed; + return; + } } ec = object_error::success; -- cgit v1.1 From dbc372f47e3a77343e6ef1ab4a88bc46f532f774 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Wed, 9 Nov 2011 00:06:18 +0000 Subject: Link to the live DomainValue after merging. When merging two uncollapsed DomainValues, place a link to the active DomainValue from the passive DomainValue. This allows old stale references to the passive DomainValue to be updated to point to the active DomainValue. The new resolve() function finds the active DomainValue and updates the pointer. This change makes old live-out lists more useful since they may contain uncollapsed DomainValues that have since been merged into other DomainValues. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144149 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ExecutionDepsFix.cpp | 63 +++++++++++++++++++++++++++++++--------- 1 file changed, 50 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index 9a7a289..c25f7db 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -60,6 +60,11 @@ struct DomainValue { // Position of the last defining instruction. unsigned Dist; + // Pointer to the next DomainValue in a chain. When two DomainValues are + // merged, Victim.Next is set to point to Victor, so old DomainValue + // references can be updated by folowing the chain. + DomainValue *Next; + // Twiddleable instructions using or defining these registers. SmallVector Instrs; @@ -94,8 +99,10 @@ struct DomainValue { DomainValue() : Refs(0) { clear(); } + // Clear this DomainValue and point to next which has all its data. void clear() { AvailableDomains = Dist = 0; + Next = 0; Instrs.clear(); } }; @@ -139,7 +146,12 @@ private: // DomainValue allocation. DomainValue *alloc(int domain = -1); + DomainValue *retain(DomainValue *DV) { + if (DV) ++DV->Refs; + return DV; + } void release(DomainValue*); + DomainValue *resolve(DomainValue*&); // LiveRegs manipulations. void setLiveReg(int rx, DomainValue *DV); @@ -174,22 +186,46 @@ DomainValue *ExeDepsFix::alloc(int domain) { if (domain >= 0) dv->addDomain(domain); assert(dv->Refs == 0 && "Reference count wasn't cleared"); + assert(!dv->Next && "Chained DomainValue shouldn't have been recycled"); return dv; } /// release - Release a reference to DV. When the last reference is released, /// collapse if needed. void ExeDepsFix::release(DomainValue *DV) { - assert(DV && DV->Refs && "Bad DomainValue"); - if (--DV->Refs) - return; - - // There are no more DV references. Collapse any contained instructions. - if (DV->AvailableDomains && !DV->isCollapsed()) - collapse(DV, DV->getFirstDomain()); + while (DV) { + assert(DV->Refs && "Bad DomainValue"); + if (--DV->Refs) + return; + + // There are no more DV references. Collapse any contained instructions. + if (DV->AvailableDomains && !DV->isCollapsed()) + collapse(DV, DV->getFirstDomain()); + + DomainValue *Next = DV->Next; + DV->clear(); + Avail.push_back(DV); + // Also release the next DomainValue in the chain. + DV = Next; + } +} - DV->clear(); - Avail.push_back(DV); +/// resolve - Follow the chain of dead DomainValues until a live DomainValue is +/// reached. Update the referenced pointer when necessary. +DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) { + DomainValue *DV = DVRef; + if (!DV || !DV->Next) + return DV; + + // DV has a chain. Find the end. + do DV = DV->Next; + while (DV->Next); + + // Update DVRef to point to DV. + retain(DV); + release(DVRef); + DVRef = DV; + return DV; } /// Set LiveRegs[rx] = dv, updating reference counts. @@ -204,8 +240,7 @@ void ExeDepsFix::setLiveReg(int rx, DomainValue *dv) { return; if (LiveRegs[rx]) release(LiveRegs[rx]); - LiveRegs[rx] = dv; - if (dv) ++dv->Refs; + LiveRegs[rx] = retain(dv); } // Kill register rx, recycle or collapse any DomainValue. @@ -273,6 +308,8 @@ bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) { // Clear the old DomainValue so we won't try to swizzle instructions twice. B->clear(); + // All uses of B are referred to A. + B->Next = retain(A); for (unsigned rx = 0; rx != NumRegs; ++rx) if (LiveRegs[rx] == B) @@ -290,8 +327,8 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { pe = MBB->pred_end(); pi != pe; ++pi) { LiveOutMap::const_iterator fi = LiveOuts.find(*pi); if (fi == LiveOuts.end()) continue; - DomainValue *pdv = fi->second[rx]; - if (!pdv || !pdv->AvailableDomains) continue; + DomainValue *pdv = resolve(fi->second[rx]); + if (!pdv) continue; if (!LiveRegs || !LiveRegs[rx]) { setLiveReg(rx, pdv); continue; -- cgit v1.1 From f4c4768fb2277cb940a90cb2f0e9a747ebc671c3 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Wed, 9 Nov 2011 01:06:56 +0000 Subject: Collapse DomainValues across loop back-edges. During the initial RPO traversal of the basic blocks, remember the ones that are incomplete because of back-edges from predecessors that haven't been visited yet. After the initial RPO, revisit all those loop headers so the incoming DomainValues on the back-edges can be properly collapsed. This will properly fix execution domains on software pipelined code, like the included test case. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144151 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ExecutionDepsFix.cpp | 43 ++++++++++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index c25f7db..fc0b612 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -160,7 +160,7 @@ private: void collapse(DomainValue *dv, unsigned domain); bool merge(DomainValue *A, DomainValue *B); - void enterBasicBlock(MachineBasicBlock*); + bool enterBasicBlock(MachineBasicBlock*); void leaveBasicBlock(MachineBasicBlock*); void visitInstr(MachineInstr*); void visitGenericInstr(MachineInstr*); @@ -317,7 +317,13 @@ bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) { return true; } -void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { +// enterBasicBlock - Set up LiveRegs by merging predecessor live-out values. +// Return true if some predecessor hasn't been processed yet (like on a loop +// back-edge). +bool ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { + // Detect back-edges from predecessors we haven't processed yet. + bool seenBackEdge = false; + // Try to coalesce live-out registers from predecessors. for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), e = MBB->livein_end(); i != e; ++i) { @@ -326,7 +332,12 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), pe = MBB->pred_end(); pi != pe; ++pi) { LiveOutMap::const_iterator fi = LiveOuts.find(*pi); - if (fi == LiveOuts.end()) continue; + if (fi == LiveOuts.end()) { + seenBackEdge = true; + continue; + } + if (!fi->second) + continue; DomainValue *pdv = resolve(fi->second[rx]); if (!pdv) continue; if (!LiveRegs || !LiveRegs[rx]) { @@ -350,12 +361,19 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { force(rx, pdv->getFirstDomain()); } } + return seenBackEdge; } void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) { // Save live registers at end of MBB - used by enterBasicBlock(). - if (LiveRegs) - LiveOuts.insert(std::make_pair(MBB, LiveRegs)); + // Also use LiveOuts as a visited set to detect back-edges. + if (!LiveOuts.insert(std::make_pair(MBB, LiveRegs)).second && LiveRegs) { + // Insertion failed, this must be the second pass. + // Release all the DomainValues instead of keeping them. + for (unsigned i = 0, e = NumRegs; i != e; ++i) + release(LiveRegs[i]); + delete[] LiveRegs; + } LiveRegs = 0; } @@ -545,23 +563,32 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { MachineBasicBlock *Entry = MF->begin(); ReversePostOrderTraversal RPOT(Entry); + SmallVector Loops; for (ReversePostOrderTraversal::rpo_iterator MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { MachineBasicBlock *MBB = *MBBI; - enterBasicBlock(MBB); + if (enterBasicBlock(MBB)) + Loops.push_back(MBB); for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) visitInstr(I); leaveBasicBlock(MBB); } + // Visit all the loop blocks again in order to merge DomainValues from + // back-edges. + for (unsigned i = 0, e = Loops.size(); i != e; ++i) { + MachineBasicBlock *MBB = Loops[i]; + enterBasicBlock(MBB); + leaveBasicBlock(MBB); + } + // Clear the LiveOuts vectors and collapse any remaining DomainValues. for (ReversePostOrderTraversal::rpo_iterator MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI); - if (FI == LiveOuts.end()) + if (FI == LiveOuts.end() || !FI->second) continue; - assert(FI->second && "Null entry"); for (unsigned i = 0, e = NumRegs; i != e; ++i) if (FI->second[i]) release(FI->second[i]); -- cgit v1.1 From 44ee4714a8c245d4fdfd03840efcf58c3f66c6bc Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 9 Nov 2011 01:57:03 +0000 Subject: Hide cpu name checking in ARMSubtarget. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144154 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 2 +- lib/Target/ARM/ARMSubtarget.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index a871ed7..4c3be89 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -1080,7 +1080,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); // ARM errata 602117: LDRD with base in list may result in incorrect base // register when interrupted or faulted. - bool Errata602117 = EvenReg == BaseReg && STI->getCPUString() == "cortex-m3"; + bool Errata602117 = EvenReg == BaseReg && STI->isCortexM3(); if (!Errata602117 && ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum)) return false; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 5e884e0..a35f450 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -191,6 +191,7 @@ protected: bool isCortexA8() const { return ARMProcFamily == CortexA8; } bool isCortexA9() const { return ARMProcFamily == CortexA9; } + bool isCortexM3() const { return CPUString == "cortex-m3"; } bool hasARMOps() const { return !NoARM; } -- cgit v1.1 From 2f2fe417f98406140504ba3bbb65676d4a00ed87 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 9 Nov 2011 03:22:02 +0000 Subject: Add support for encoding immediates in icmp and fcmp. Hopefully, this will remove a fair number of unnecessary materialized constants. rdar://10412592 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144163 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 76 +++++++++++++++++++++++++++++++++++------- 1 file changed, 64 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 3c6d1e8..23629e7 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -1206,16 +1206,42 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, if (isFloat && !Subtarget->hasVFP2()) return false; + // Check to see if the 2nd operand is a constant that we can encode directly + // in the compare. + uint64_t Imm; + int EncodedImm = 0; + bool EncodeImm = false; + bool isNegativeImm = false; + if (const ConstantInt *ConstInt = dyn_cast(Src2Value)) { + if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 || + SrcVT == MVT::i1) { + const APInt &CIVal = ConstInt->getValue(); + + isNegativeImm = CIVal.isNegative(); + Imm = (isNegativeImm) ? (-CIVal).getZExtValue() : CIVal.getZExtValue(); + EncodedImm = (int)Imm; + EncodeImm = isThumb2 ? (ARM_AM::getT2SOImmVal(EncodedImm) != -1) : + (ARM_AM::getSOImmVal(EncodedImm) != -1); + } + } else if (const ConstantFP *ConstFP = dyn_cast(Src2Value)) { + if (SrcVT == MVT::f32 || SrcVT == MVT::f64) + if (ConstFP->isZero() && !ConstFP->isNegative()) + EncodeImm = true; + } + unsigned CmpOpc; + bool isICmp = true; bool needsExt = false; switch (SrcVT.getSimpleVT().SimpleTy) { default: return false; // TODO: Verify compares. case MVT::f32: - CmpOpc = ARM::VCMPES; + isICmp = false; + CmpOpc = EncodeImm ? ARM::VCMPEZS : ARM::VCMPES; break; case MVT::f64: - CmpOpc = ARM::VCMPED; + isICmp = false; + CmpOpc = EncodeImm ? ARM::VCMPEZD : ARM::VCMPED; break; case MVT::i1: case MVT::i8: @@ -1223,30 +1249,56 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, needsExt = true; // Intentional fall-through. case MVT::i32: - CmpOpc = isThumb2 ? ARM::t2CMPrr : ARM::CMPrr; + if (isThumb2) { + if (!EncodeImm) + CmpOpc = ARM::t2CMPrr; + else + CmpOpc = isNegativeImm ? ARM::t2CMNzri : ARM::t2CMPri; + } else { + if (!EncodeImm) + CmpOpc = ARM::CMPrr; + else + CmpOpc = isNegativeImm ? ARM::CMNzri : ARM::CMPri; + } break; } unsigned SrcReg1 = getRegForValue(Src1Value); if (SrcReg1 == 0) return false; - unsigned SrcReg2 = getRegForValue(Src2Value); - if (SrcReg2 == 0) return false; + unsigned SrcReg2; + if (!EncodeImm) { + SrcReg2 = getRegForValue(Src2Value); + if (SrcReg2 == 0) return false; + } // We have i1, i8, or i16, we need to either zero extend or sign extend. if (needsExt) { unsigned ResultReg; - EVT DestVT = MVT::i32; - ResultReg = ARMEmitIntExt(SrcVT, SrcReg1, DestVT, isZExt); + ResultReg = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt); if (ResultReg == 0) return false; SrcReg1 = ResultReg; - ResultReg = ARMEmitIntExt(SrcVT, SrcReg2, DestVT, isZExt); - if (ResultReg == 0) return false; - SrcReg2 = ResultReg; + if (!EncodeImm) { + ResultReg = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt); + if (ResultReg == 0) return false; + SrcReg2 = ResultReg; + } } - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) - .addReg(SrcReg1).addReg(SrcReg2)); + if (!EncodeImm) { + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(CmpOpc)) + .addReg(SrcReg1).addReg(SrcReg2)); + } else { + MachineInstrBuilder MIB; + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) + .addReg(SrcReg1); + + // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0. + if (isICmp) + MIB.addImm(EncodedImm); + AddOptionalDefs(MIB); + } // For floating point we need to move the result to a comparison register // that we can then use for branches. -- cgit v1.1 From dfa30e1ab243990eda4732a6dffb91e965e7a755 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Wed, 9 Nov 2011 05:24:07 +0000 Subject: Remove the pubnames section, no one consumes it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144169 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 6 +-- lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 9 ----- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 58 ----------------------------- lib/CodeGen/AsmPrinter/DwarfDebug.h | 5 +-- lib/MC/MCObjectFileInfo.cpp | 12 ------ 5 files changed, 3 insertions(+), 87 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 3d353b3..a68da04 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -1076,11 +1076,9 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { addType(VariableDIE, GTy); // Add scoping info. - if (!GV.isLocalToUnit()) { + if (!GV.isLocalToUnit()) addUInt(VariableDIE, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1); - // Expose as global. - addGlobal(GV.getName(), VariableDIE); - } + // Add line number info. addSourceLine(VariableDIE, GV); // Add to context owner. diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index b994ce3..1cbe3a0 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -56,10 +56,6 @@ class CompileUnit { /// descriptors to debug information entries using a DIEEntry proxy. DenseMap MDNodeToDIEEntryMap; - /// Globals - A map of globally visible named entities for this unit. - /// - StringMap Globals; - /// GlobalTypes - A map of globally visible types for this unit. /// StringMap GlobalTypes; @@ -86,7 +82,6 @@ public: // Accessors. unsigned getID() const { return ID; } DIE* getCUDie() const { return CUDie.get(); } - const StringMap &getGlobals() const { return Globals; } const StringMap &getGlobalTypes() const { return GlobalTypes; } const StringMap &getAccelNames() const { return AccelNames; } @@ -100,10 +95,6 @@ public: /// bool hasContent() const { return !CUDie->getChildren().empty(); } - /// addGlobal - Add a new global entity to the compile unit. - /// - void addGlobal(StringRef Name, DIE *Die) { Globals[Name] = Die; } - /// addGlobalType - Add a new global type to the compile unit. /// void addGlobalType(DIType Ty); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 085ad2c..be7d18a 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -597,9 +597,6 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, // Add to context owner. TheCU->addToContextOwner(SubprogramDie, SP.getContext()); - // Expose as global. - TheCU->addGlobal(SP.getName(), SubprogramDie); - // Add to Accel Names TheCU->addAccelName(SP.getName(), SubprogramDie); @@ -823,9 +820,6 @@ void DwarfDebug::endModule() { emitAccelTypes(); } - // Emit info into a debug pubnames section. - emitDebugPubNames(); - // Emit info into a debug pubtypes section. emitDebugPubTypes(); @@ -1567,7 +1561,6 @@ void DwarfDebug::EmitSectionLabels() { EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); EmitSectionSym(Asm, TLOF.getDwarfLocSection()); - EmitSectionSym(Asm, TLOF.getDwarfPubNamesSection()); EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); DwarfStrSectionSym = EmitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str"); @@ -1871,57 +1864,6 @@ void DwarfDebug::emitAccelTypes() { AT.Emit(Asm, SectionBegin, this); } -/// emitDebugPubNames - Emit visible names into a debug pubnames section. -/// -void DwarfDebug::emitDebugPubNames() { - for (DenseMap::iterator I = CUMap.begin(), - E = CUMap.end(); I != E; ++I) { - CompileUnit *TheCU = I->second; - // Start the dwarf pubnames section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfPubNamesSection()); - - Asm->OutStreamer.AddComment("Length of Public Names Info"); - Asm->EmitLabelDifference( - Asm->GetTempSymbol("pubnames_end", TheCU->getID()), - Asm->GetTempSymbol("pubnames_begin", TheCU->getID()), 4); - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", - TheCU->getID())); - - Asm->OutStreamer.AddComment("DWARF Version"); - Asm->EmitInt16(dwarf::DWARF_VERSION); - - Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); - Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()), - DwarfInfoSectionSym); - - Asm->OutStreamer.AddComment("Compilation Unit Length"); - Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()), - Asm->GetTempSymbol("info_begin", TheCU->getID()), - 4); - - const StringMap &Globals = TheCU->getGlobals(); - for (StringMap::const_iterator - GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { - const char *Name = GI->getKeyData(); - DIE *Entity = GI->second; - - Asm->OutStreamer.AddComment("DIE offset"); - Asm->EmitInt32(Entity->getOffset()); - - if (Asm->isVerbose()) - Asm->OutStreamer.AddComment("External Name"); - Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0); - } - - Asm->OutStreamer.AddComment("End Mark"); - Asm->EmitInt32(0); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end", - TheCU->getID())); - } -} - void DwarfDebug::emitDebugPubTypes() { for (DenseMap::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 174333e..16e9137 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -380,12 +380,9 @@ private: void emitAccelNamespaces(); /// emitAccelTypes() - Emit type dies into a hashed accelerator table. + /// void emitAccelTypes(); - /// emitDebugPubNames - Emit visible names into a debug pubnames section. - /// - void emitDebugPubNames(); - /// emitDebugPubTypes - Emit visible types into a debug pubtypes section. /// void emitDebugPubTypes(); diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index 1fcda31..d76e48b 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -186,10 +186,6 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { Ctx->getMachOSection("__DWARF", "__debug_frame", MCSectionMachO::S_ATTR_DEBUG, SectionKind::getMetadata()); - DwarfPubNamesSection = - Ctx->getMachOSection("__DWARF", "__debug_pubnames", - MCSectionMachO::S_ATTR_DEBUG, - SectionKind::getMetadata()); DwarfPubTypesSection = Ctx->getMachOSection("__DWARF", "__debug_pubtypes", MCSectionMachO::S_ATTR_DEBUG, @@ -365,9 +361,6 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { DwarfFrameSection = Ctx->getELFSection(".debug_frame", ELF::SHT_PROGBITS, 0, SectionKind::getMetadata()); - DwarfPubNamesSection = - Ctx->getELFSection(".debug_pubnames", ELF::SHT_PROGBITS, 0, - SectionKind::getMetadata()); DwarfPubTypesSection = Ctx->getELFSection(".debug_pubtypes", ELF::SHT_PROGBITS, 0, SectionKind::getMetadata()); @@ -453,11 +446,6 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); - DwarfPubNamesSection = - Ctx->getCOFFSection(".debug_pubnames", - COFF::IMAGE_SCN_MEM_DISCARDABLE | - COFF::IMAGE_SCN_MEM_READ, - SectionKind::getMetadata()); DwarfPubTypesSection = Ctx->getCOFFSection(".debug_pubtypes", COFF::IMAGE_SCN_MEM_DISCARDABLE | -- cgit v1.1 From c6bcf4315c568610b89e4c79898cfe36ff2e4905 Mon Sep 17 00:00:00 2001 From: Devang Patel Date: Wed, 9 Nov 2011 06:20:49 +0000 Subject: Remove extra ';' git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144172 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfAccelTable.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index a0f64f0..0c1e949 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -96,7 +96,7 @@ class DwarfAccelTable { TableHeader (uint32_t data_len) : magic (MagicHash), version (1), hash_function (eHashFunctionDJB), bucket_count (0), hashes_count (0), header_data_len (data_len) - {}; + {} #ifndef NDEBUG void print(raw_ostream &O) { @@ -140,7 +140,7 @@ public: AtomType type; // enum AtomType uint16_t form; // DWARF DW_FORM_ defines - Atom(AtomType type, uint16_t form) : type(type), form(form) {}; + Atom(AtomType type, uint16_t form) : type(type), form(form) {} static const char * AtomTypeString(enum AtomType); #ifndef NDEBUG void print(raw_ostream &O) { -- cgit v1.1 From 89d093d5b69d21b5a4f81b969597bd76b6327cb5 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Wed, 9 Nov 2011 07:11:37 +0000 Subject: Don't forget to check FlagNW when determining whether an AddRecExpr will wrap or not. Patch by Brendon Cahoon! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144173 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index f65cf34..b940d93 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -6228,8 +6228,9 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, return getCouldNotCompute(); // Check to see if we have a flag which makes analysis easy. - bool NoWrap = isSigned ? AddRec->getNoWrapFlags(SCEV::FlagNSW) : - AddRec->getNoWrapFlags(SCEV::FlagNUW); + bool NoWrap = isSigned ? + AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNSW | SCEV::FlagNW)) : + AddRec->getNoWrapFlags((SCEV::NoWrapFlags)(SCEV::FlagNUW | SCEV::FlagNW)); if (AddRec->isAffine()) { unsigned BitWidth = getTypeSizeInBits(AddRec->getType()); -- cgit v1.1 From aaa643c70e6b252ac1f7b3de5950a1d6a6656690 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 9 Nov 2011 07:28:55 +0000 Subject: Add AVX2 instruction lowering for add, sub, and mul. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144174 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 105 ++++++++++++++++++++++++++++--------- 1 file changed, 81 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c1f7592..69de3a7 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1031,25 +1031,42 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::SELECT, MVT::v4i64, Custom); setOperationAction(ISD::SELECT, MVT::v8f32, Custom); - setOperationAction(ISD::VSELECT, MVT::v4f64, Legal); - setOperationAction(ISD::VSELECT, MVT::v4i64, Legal); - setOperationAction(ISD::VSELECT, MVT::v8i32, Legal); - setOperationAction(ISD::VSELECT, MVT::v8f32, Legal); - - setOperationAction(ISD::ADD, MVT::v4i64, Custom); - setOperationAction(ISD::ADD, MVT::v8i32, Custom); - setOperationAction(ISD::ADD, MVT::v16i16, Custom); - setOperationAction(ISD::ADD, MVT::v32i8, Custom); - - setOperationAction(ISD::SUB, MVT::v4i64, Custom); - setOperationAction(ISD::SUB, MVT::v8i32, Custom); - setOperationAction(ISD::SUB, MVT::v16i16, Custom); - setOperationAction(ISD::SUB, MVT::v32i8, Custom); - - setOperationAction(ISD::MUL, MVT::v4i64, Custom); - setOperationAction(ISD::MUL, MVT::v8i32, Custom); - setOperationAction(ISD::MUL, MVT::v16i16, Custom); - // Don't lower v32i8 because there is no 128-bit byte mul + setOperationAction(ISD::VSELECT, MVT::v4f64, Legal); + setOperationAction(ISD::VSELECT, MVT::v4i64, Legal); + setOperationAction(ISD::VSELECT, MVT::v8i32, Legal); + setOperationAction(ISD::VSELECT, MVT::v8f32, Legal); + + if (Subtarget->hasAVX2()) { + setOperationAction(ISD::ADD, MVT::v4i64, Legal); + setOperationAction(ISD::ADD, MVT::v8i32, Legal); + setOperationAction(ISD::ADD, MVT::v16i16, Legal); + setOperationAction(ISD::ADD, MVT::v32i8, Legal); + + setOperationAction(ISD::SUB, MVT::v4i64, Legal); + setOperationAction(ISD::SUB, MVT::v8i32, Legal); + setOperationAction(ISD::SUB, MVT::v16i16, Legal); + setOperationAction(ISD::SUB, MVT::v32i8, Legal); + + setOperationAction(ISD::MUL, MVT::v4i64, Custom); + setOperationAction(ISD::MUL, MVT::v8i32, Legal); + setOperationAction(ISD::MUL, MVT::v16i16, Legal); + // Don't lower v32i8 because there is no 128-bit byte mul + } else { + setOperationAction(ISD::ADD, MVT::v4i64, Custom); + setOperationAction(ISD::ADD, MVT::v8i32, Custom); + setOperationAction(ISD::ADD, MVT::v16i16, Custom); + setOperationAction(ISD::ADD, MVT::v32i8, Custom); + + setOperationAction(ISD::SUB, MVT::v4i64, Custom); + setOperationAction(ISD::SUB, MVT::v8i32, Custom); + setOperationAction(ISD::SUB, MVT::v16i16, Custom); + setOperationAction(ISD::SUB, MVT::v32i8, Custom); + + setOperationAction(ISD::MUL, MVT::v4i64, Custom); + setOperationAction(ISD::MUL, MVT::v8i32, Custom); + setOperationAction(ISD::MUL, MVT::v16i16, Custom); + // Don't lower v32i8 because there is no 128-bit byte mul + } // Custom lower several nodes for 256-bit types. for (unsigned i = (unsigned)MVT::FIRST_VECTOR_VALUETYPE; @@ -10004,12 +10021,55 @@ SDValue X86TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); // Decompose 256-bit ops into smaller 128-bit ops. - if (VT.getSizeInBits() == 256) + if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2()) return Lower256IntArith(Op, DAG); - assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply"); DebugLoc dl = Op.getDebugLoc(); + SDValue A = Op.getOperand(0); + SDValue B = Op.getOperand(1); + + if (VT == MVT::v4i64) { + assert(Subtarget->hasAVX2() && "Lowering v4i64 multiply requires AVX2"); + + // ulong2 Ahi = __builtin_ia32_psrlqi256( a, 32); + // ulong2 Bhi = __builtin_ia32_psrlqi256( b, 32); + // ulong2 AloBlo = __builtin_ia32_pmuludq256( a, b ); + // ulong2 AloBhi = __builtin_ia32_pmuludq256( a, Bhi ); + // ulong2 AhiBlo = __builtin_ia32_pmuludq256( Ahi, b ); + // + // AloBhi = __builtin_ia32_psllqi256( AloBhi, 32 ); + // AhiBlo = __builtin_ia32_psllqi256( AhiBlo, 32 ); + // return AloBlo + AloBhi + AhiBlo; + + SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrli_q, MVT::i32), + A, DAG.getConstant(32, MVT::i32)); + SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrli_q, MVT::i32), + B, DAG.getConstant(32, MVT::i32)); + SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_pmulu_dq, MVT::i32), + A, B); + SDValue AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_pmulu_dq, MVT::i32), + A, Bhi); + SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_pmulu_dq, MVT::i32), + Ahi, B); + AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_pslli_q, MVT::i32), + AloBhi, DAG.getConstant(32, MVT::i32)); + AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_pslli_q, MVT::i32), + AhiBlo, DAG.getConstant(32, MVT::i32)); + SDValue Res = DAG.getNode(ISD::ADD, dl, VT, AloBlo, AloBhi); + Res = DAG.getNode(ISD::ADD, dl, VT, Res, AhiBlo); + return Res; + } + + assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply"); + // ulong2 Ahi = __builtin_ia32_psrlqi128( a, 32); // ulong2 Bhi = __builtin_ia32_psrlqi128( b, 32); // ulong2 AloBlo = __builtin_ia32_pmuludq128( a, b ); @@ -10020,9 +10080,6 @@ SDValue X86TargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const { // AhiBlo = __builtin_ia32_psllqi128( AhiBlo, 32 ); // return AloBlo + AloBhi + AhiBlo; - SDValue A = Op.getOperand(0); - SDValue B = Op.getOperand(1); - SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32), A, DAG.getConstant(32, MVT::i32)); -- cgit v1.1 From 0a15035f523a13177b83463779c8b07d8e2873ed Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 9 Nov 2011 08:06:13 +0000 Subject: Add instruction selection for AVX2 integer comparisons. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144176 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 15 +++++++-------- lib/Target/X86/X86InstrSSE.td | 36 ++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 69de3a7..e5d3c91 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -8560,8 +8560,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { UNORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(3, MVT::i8)); EQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(0, MVT::i8)); return DAG.getNode(ISD::OR, dl, VT, UNORD, EQ); - } - else if (SetCCOpcode == ISD::SETONE) { + } else if (SetCCOpcode == ISD::SETONE) { SDValue ORD, NEQ; ORD = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(7, MVT::i8)); NEQ = DAG.getNode(Opc, dl, VT, Op0, Op1, DAG.getConstant(4, MVT::i8)); @@ -8574,7 +8573,7 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { } // Break 256-bit integer vector compare into smaller ones. - if (!isFP && VT.getSizeInBits() == 256) + if (VT.getSizeInBits() == 256 && !Subtarget->hasAVX2()) return Lower256IntVSETCC(Op, DAG); // We are handling one of the integer comparisons here. Since SSE only has @@ -8583,12 +8582,12 @@ SDValue X86TargetLowering::LowerVSETCC(SDValue Op, SelectionDAG &DAG) const { unsigned Opc = 0, EQOpc = 0, GTOpc = 0; bool Swap = false, Invert = false, FlipSigns = false; - switch (VT.getSimpleVT().SimpleTy) { + switch (VT.getVectorElementType().getSimpleVT().SimpleTy) { default: break; - case MVT::v16i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break; - case MVT::v8i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break; - case MVT::v4i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break; - case MVT::v2i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break; + case MVT::i8: EQOpc = X86ISD::PCMPEQB; GTOpc = X86ISD::PCMPGTB; break; + case MVT::i16: EQOpc = X86ISD::PCMPEQW; GTOpc = X86ISD::PCMPGTW; break; + case MVT::i32: EQOpc = X86ISD::PCMPEQD; GTOpc = X86ISD::PCMPGTD; break; + case MVT::i64: EQOpc = X86ISD::PCMPEQQ; GTOpc = X86ISD::PCMPGTQ; break; } switch (SetCCOpcode) { diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 4b6ba5d..8648d48 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -3917,6 +3917,32 @@ let Predicates = [HasAVX2] in { VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; defm VPCMPGTDY : PDI_binop_rm_int<0x66, "vpcmpgtd", int_x86_avx2_pcmpgt_d, VR256, memopv4i64, i256mem, 0, 0>, VEX_4V; + + def : Pat<(v32i8 (X86pcmpeqb VR256:$src1, VR256:$src2)), + (VPCMPEQBYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v32i8 (X86pcmpeqb VR256:$src1, (memop addr:$src2))), + (VPCMPEQBYrm VR256:$src1, addr:$src2)>; + def : Pat<(v16i16 (X86pcmpeqw VR256:$src1, VR256:$src2)), + (VPCMPEQWYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v16i16 (X86pcmpeqw VR256:$src1, (memop addr:$src2))), + (VPCMPEQWYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8i32 (X86pcmpeqd VR256:$src1, VR256:$src2)), + (VPCMPEQDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86pcmpeqd VR256:$src1, (memop addr:$src2))), + (VPCMPEQDYrm VR256:$src1, addr:$src2)>; + + def : Pat<(v32i8 (X86pcmpgtb VR256:$src1, VR256:$src2)), + (VPCMPGTBYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v32i8 (X86pcmpgtb VR256:$src1, (memop addr:$src2))), + (VPCMPGTBYrm VR256:$src1, addr:$src2)>; + def : Pat<(v16i16 (X86pcmpgtw VR256:$src1, VR256:$src2)), + (VPCMPGTWYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v16i16 (X86pcmpgtw VR256:$src1, (memop addr:$src2))), + (VPCMPGTWYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8i32 (X86pcmpgtd VR256:$src1, VR256:$src2)), + (VPCMPGTDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86pcmpgtd VR256:$src1, (memop addr:$src2))), + (VPCMPGTDYrm VR256:$src1, addr:$src2)>; } let Constraints = "$src1 = $dst" in { @@ -6325,6 +6351,11 @@ let Predicates = [HasAVX2] in { int_x86_avx2_pmaxu_w>, VEX_4V; defm VPMULDQ : SS41I_binop_rm_int_y<0x28, "vpmuldq", int_x86_avx2_pmul_dq>, VEX_4V; + + def : Pat<(v4i64 (X86pcmpeqq VR256:$src1, VR256:$src2)), + (VPCMPEQQYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4i64 (X86pcmpeqq VR256:$src1, (memop addr:$src2))), + (VPCMPEQQYrm VR256:$src1, addr:$src2)>; } let Constraints = "$src1 = $dst" in { @@ -6647,6 +6678,11 @@ let Predicates = [HasAVX] in { let Predicates = [HasAVX2] in { defm VPCMPGTQ : SS42I_binop_rm_int_y<0x37, "vpcmpgtq", int_x86_avx2_pcmpgt_q>, VEX_4V; + + def : Pat<(v4i64 (X86pcmpgtq VR256:$src1, VR256:$src2)), + (VPCMPGTQYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4i64 (X86pcmpgtq VR256:$src1, (memop addr:$src2))), + (VPCMPGTQYrm VR256:$src1, addr:$src2)>; } let Constraints = "$src1 = $dst" in -- cgit v1.1 From b80ada98c50df226e210eabc9547101c5dee2181 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 9 Nov 2011 09:37:21 +0000 Subject: Enable execution dependency fix pass for YMM registers when AVX2 is enabled. Add AVX2 logical operations to list of replaceable instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144179 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 8 ++++++++ lib/Target/X86/X86TargetMachine.cpp | 16 ++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 8dcd637..102911f 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -3367,6 +3367,14 @@ static const unsigned ReplaceableInstrs[][3] = { { X86::VMOVUPSYmr, X86::VMOVUPDYmr, X86::VMOVDQUYmr }, { X86::VMOVUPSYrm, X86::VMOVUPDYrm, X86::VMOVDQUYrm }, { X86::VMOVNTPSYmr, X86::VMOVNTPDYmr, X86::VMOVNTDQYmr }, + { X86::VANDNPSYrm, X86::VANDNPDYrm, X86::VPANDNYrm }, + { X86::VANDNPSYrr, X86::VANDNPDYrr, X86::VPANDNYrr }, + { X86::VANDPSYrm, X86::VANDPDYrm, X86::VPANDYrm }, + { X86::VANDPSYrr, X86::VANDPDYrr, X86::VPANDYrr }, + { X86::VORPSYrm, X86::VORPDYrm, X86::VPORYrm }, + { X86::VORPSYrr, X86::VORPDYrr, X86::VPORYrr }, + { X86::VXORPSYrm, X86::VXORPDYrm, X86::VPXORYrm }, + { X86::VXORPSYrr, X86::VXORPDYrr, X86::VPXORYrr }, }; // FIXME: Some shuffle and unpack instructions have equivalents in different diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 15c6c4e..4d4d7c0 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -135,10 +135,18 @@ bool X86TargetMachine::addPostRegAlloc(PassManagerBase &PM, bool X86TargetMachine::addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) { bool ShouldPrint = false; - if (OptLevel != CodeGenOpt::None && - (Subtarget.hasSSE2() || Subtarget.hasAVX())) { - PM.add(createExecutionDependencyFixPass(&X86::VR128RegClass)); - ShouldPrint = true; + if (OptLevel != CodeGenOpt::None) { + if (Subtarget.hasXMMInt()) { + PM.add(createExecutionDependencyFixPass(&X86::VR128RegClass)); + ShouldPrint = true; + } + if (Subtarget.hasAVX2()) { + // FIXME this should be turned on for just AVX, but the pass doesn't check + // that instructions are valid before replacing them and there are AVX2 + // integer instructions in the table. + PM.add(createExecutionDependencyFixPass(&X86::VR256RegClass)); + ShouldPrint = true; + } } if (Subtarget.hasAVX() && UseVZeroUpper) { -- cgit v1.1 From 0c45f7d1a7a158e94817b21b9cca5456414a4760 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 9 Nov 2011 12:12:04 +0000 Subject: Take advantage of the zero byte in StringMap when emitting dwarf stringpool entries. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144184 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index be7d18a..7015555 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1937,8 +1937,9 @@ void DwarfDebug::emitDebugStr() { Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first); // Emit the string itself. - Asm->OutStreamer.EmitBytes(Entries[i].second->getKey(), 0/*addrspace*/); - Asm->OutStreamer.EmitZeros(1, 0); + Asm->OutStreamer.EmitBytes(StringRef(Entries[i].second->getKeyData(), + Entries[i].second->getKeyLength()+1), + 0/*addrspace*/); } } -- cgit v1.1 From 2dd423987eafaa1d0bc5d0312283e24e60eeb2b5 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 9 Nov 2011 13:19:15 +0000 Subject: Simplify code. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144186 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCDwarf.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index a68215f..c274b9d 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -371,10 +371,7 @@ void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta, // it with DW_LNS_advance_line. if (Temp >= DWARF2_LINE_RANGE) { OS << char(dwarf::DW_LNS_advance_line); - SmallString<32> Tmp; - raw_svector_ostream OSE(Tmp); - MCObjectWriter::EncodeSLEB128(LineDelta, OSE); - OS << OSE.str(); + MCObjectWriter::EncodeSLEB128(LineDelta, OS); LineDelta = 0; Temp = 0 - DWARF2_LINE_BASE; @@ -410,10 +407,7 @@ void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta, // Otherwise use DW_LNS_advance_pc. OS << char(dwarf::DW_LNS_advance_pc); - SmallString<32> Tmp; - raw_svector_ostream OSE(Tmp); - MCObjectWriter::EncodeULEB128(AddrDelta, OSE); - OS << OSE.str(); + MCObjectWriter::EncodeULEB128(AddrDelta, OS); if (NeedCopy) OS << char(dwarf::DW_LNS_copy); -- cgit v1.1 From bb539bf973bb861ffe8179ac8791fad19448cd52 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Wed, 9 Nov 2011 13:21:28 +0000 Subject: Add AVX2 support for vselect of v32i8 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144187 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 2 ++ lib/Target/X86/X86InstrSSE.td | 6 ++++++ 2 files changed, 8 insertions(+) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e5d3c91..c34f225 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1050,6 +1050,8 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::MUL, MVT::v4i64, Custom); setOperationAction(ISD::MUL, MVT::v8i32, Legal); setOperationAction(ISD::MUL, MVT::v16i16, Legal); + + setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); // Don't lower v32i8 because there is no 128-bit byte mul } else { setOperationAction(ISD::ADD, MVT::v4i64, Custom); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 8648d48..068e223 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -6568,6 +6568,12 @@ let Predicates = [HasAVX] in { (VBLENDVPDYrr VR256:$src2, VR256:$src1, VR256:$mask)>; } +let Predicates = [HasAVX2] in { + def : Pat<(v32i8 (vselect (v32i8 VR256:$mask), (v32i8 VR256:$src1), + (v32i8 VR256:$src2))), + (VPBLENDVBYrr VR256:$src2, VR256:$src1, VR256:$mask)>; +} + /// SS41I_ternary_int - SSE 4.1 ternary operator let Uses = [XMM0], Constraints = "$src1 = $dst" in { multiclass SS41I_ternary_int opc, string OpcodeStr, Intrinsic IntId> { -- cgit v1.1 From ef0b3ca3a8935b5390633dc7bb4adcdb99e0c26a Mon Sep 17 00:00:00 2001 From: Duncan Sands Date: Wed, 9 Nov 2011 14:20:48 +0000 Subject: Speculatively revert commit 144124 (djg) in the hope that the 32 bit dragonegg self-host buildbot will recover (it is complaining about object files differing between different build stages). Original commit message: Add a hack to the scheduler to disable pseudo-two-address dependencies in basic blocks containing calls. This works around a problem in which these artificial dependencies can get tied up in calling seqeunce scheduling in a way that makes the graph unschedulable with the current approach of using artificial physical register dependencies for calling sequences. This fixes PR11314. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144188 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index f965a5e..cab303d 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1666,7 +1666,7 @@ public: protected: bool canClobber(const SUnit *SU, const SUnit *Op); - void AddPseudoTwoAddrDeps(const TargetInstrInfo *TII); + void AddPseudoTwoAddrDeps(); void PrescheduleNodesWithMultipleUses(); void CalculateSethiUllmanNumbers(); }; @@ -2628,7 +2628,7 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { void RegReductionPQBase::initNodes(std::vector &sunits) { SUnits = &sunits; // Add pseudo dependency edges for two-address nodes. - AddPseudoTwoAddrDeps(TII); + AddPseudoTwoAddrDeps(); // Reroute edges to nodes with multiple uses. if (!TracksRegPressure) PrescheduleNodesWithMultipleUses(); @@ -2855,17 +2855,7 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() { /// one that has a CopyToReg use (more likely to be a loop induction update). /// If both are two-address, but one is commutable while the other is not /// commutable, favor the one that's not commutable. -void RegReductionPQBase::AddPseudoTwoAddrDeps(const TargetInstrInfo *TII) { - // If the graph contains any calls, disable this optimization. - // FIXME: This is a kludge to work around the fact that the artificial edges - // can combine with the way call sequences use physical register dependencies - // to model their resource usage to create unschedulable graphs. - for (unsigned i = 0, e = SUnits->size(); i != e; ++i) - for (SDNode *Node = (*SUnits)[i].getNode(); Node; Node = Node->getGluedNode()) - if (Node->isMachineOpcode() && - Node->getMachineOpcode() == (unsigned)TII->getCallFrameSetupOpcode()) - return; - +void RegReductionPQBase::AddPseudoTwoAddrDeps() { for (unsigned i = 0, e = SUnits->size(); i != e; ++i) { SUnit *SU = &(*SUnits)[i]; if (!SU->isTwoAddress) -- cgit v1.1 From 983c4578b0e84eb3b4ae319b49a8079c5cd322c6 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Wed, 9 Nov 2011 18:16:11 +0000 Subject: Add comments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144194 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 7015555..edd4e45 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1901,6 +1901,7 @@ void DwarfDebug::emitDebugPubTypes() { Asm->EmitInt32(Entity->getOffset()); if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name"); + // Emit the name with a terminating null byte. Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0); } @@ -1936,7 +1937,7 @@ void DwarfDebug::emitDebugStr() { // Emit a label for reference from debug information entries. Asm->OutStreamer.EmitLabel(Entries[i].second->getValue().first); - // Emit the string itself. + // Emit the string itself with a terminating null byte. Asm->OutStreamer.EmitBytes(StringRef(Entries[i].second->getKeyData(), Entries[i].second->getKeyLength()+1), 0/*addrspace*/); -- cgit v1.1 From f4e0d5d83db8bf73328d59d35471d0edd0b984f4 Mon Sep 17 00:00:00 2001 From: Devang Patel Date: Wed, 9 Nov 2011 21:11:02 +0000 Subject: Remove unnecessary include. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144211 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCAsmPrinter.cpp | 1 - lib/Target/X86/X86AsmPrinter.h | 1 - 2 files changed, 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index b8aad8f..56f622e 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -50,7 +50,6 @@ #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/StringSet.h" #include "llvm/ADT/SmallString.h" #include "InstPrinter/PPCInstPrinter.h" using namespace llvm; diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index 3a50435..1058df5 100644 --- a/lib/Target/X86/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -17,7 +17,6 @@ #include "X86.h" #include "X86MachineFunctionInfo.h" #include "X86TargetMachine.h" -#include "llvm/ADT/StringSet.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/ValueTypes.h" -- cgit v1.1 From c6c7e85a71b3a9a7392beade7e345c1b79b66966 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Wed, 9 Nov 2011 21:22:13 +0000 Subject: AVX2: Add patterns for variable shift operations git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144212 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 12 ++++++++++++ lib/Target/X86/X86InstrSSE.td | 28 ++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index c34f225..93f7de8 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1052,6 +1052,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::MUL, MVT::v16i16, Legal); setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); + + setOperationAction(ISD::SHL, MVT::v4i32, Legal); + setOperationAction(ISD::SHL, MVT::v2i64, Legal); + setOperationAction(ISD::SRL, MVT::v4i32, Legal); + setOperationAction(ISD::SRL, MVT::v2i64, Legal); + setOperationAction(ISD::SRA, MVT::v4i32, Legal); + + setOperationAction(ISD::SHL, MVT::v8i32, Legal); + setOperationAction(ISD::SHL, MVT::v4i64, Legal); + setOperationAction(ISD::SRL, MVT::v8i32, Legal); + setOperationAction(ISD::SRL, MVT::v4i64, Legal); + setOperationAction(ISD::SRA, MVT::v8i32, Legal); // Don't lower v32i8 because there is no 128-bit byte mul } else { setOperationAction(ISD::ADD, MVT::v4i64, Custom); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 068e223..ff4f749 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7689,3 +7689,31 @@ defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", memopv2i64, memopv4i64, VEX_W; defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", memopv4i32, memopv8i32, int_x86_avx2_psrav_d, int_x86_avx2_psrav_d_256>; + + +let Predicates = [HasAVX2] in { + def : Pat<(v4i32 (shl (v4i32 VR128:$src1), (v4i32 VR128:$src2))), + (VPSLLVDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (shl (v2i64 VR128:$src1), (v2i64 VR128:$src2))), + (VPSLLVQrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (srl (v4i32 VR128:$src1), (v4i32 VR128:$src2))), + (VPSRLVDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (srl (v2i64 VR128:$src1), (v2i64 VR128:$src2))), + (VPSRLVQrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (sra (v4i32 VR128:$src1), (v4i32 VR128:$src2))), + (VPSRAVDrr VR128:$src1, VR128:$src2)>; + + def : Pat<(v8i32 (shl (v8i32 VR256:$src1), (v8i32 VR256:$src2))), + (VPSLLVDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4i64 (shl (v4i64 VR256:$src1), (v4i64 VR256:$src2))), + (VPSLLVQYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (srl (v8i32 VR256:$src1), (v8i32 VR256:$src2))), + (VPSRLVDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4i64 (srl (v4i64 VR256:$src1), (v4i64 VR256:$src2))), + (VPSRLVQYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (sra (v8i32 VR256:$src1), (v8i32 VR256:$src2))), + (VPSRAVDYrr VR256:$src1, VR256:$src2)>; +} + + + -- cgit v1.1 From 7346347674f03868e8c076c8c27a7f09f0a086c2 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 9 Nov 2011 21:30:12 +0000 Subject: The ARM LDRH/STRH instructions use a +/-imm8 encoding, not an imm12. rdar://10418009 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144213 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 23629e7..44c88aa 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -846,9 +846,17 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) { switch (VT.getSimpleVT().SimpleTy) { default: assert(false && "Unhandled load/store type!"); + case MVT::i16: + if (isThumb2) + // Integer loads/stores handle 12-bit offsets. + needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset); + else + // ARM i16 integer loads/stores handle +/-imm8 offsets. + if (Addr.Offset > 255 || Addr.Offset < -255) + needsLowering = true; + break; case MVT::i1: case MVT::i8: - case MVT::i16: case MVT::i32: // Integer loads/stores handle 12-bit offsets. needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset); @@ -932,14 +940,14 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) { switch (VT.getSimpleVT().SimpleTy) { // This is mostly going to be Neon/vector support. default: return false; - case MVT::i16: - Opc = isThumb2 ? ARM::t2LDRHi12 : ARM::LDRH; - RC = ARM::GPRRegisterClass; - break; case MVT::i8: Opc = isThumb2 ? ARM::t2LDRBi12 : ARM::LDRBi12; RC = ARM::GPRRegisterClass; break; + case MVT::i16: + Opc = isThumb2 ? ARM::t2LDRHi12 : ARM::LDRH; + RC = ARM::GPRRegisterClass; + break; case MVT::i32: Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12; RC = ARM::GPRRegisterClass; -- cgit v1.1 From 0948f0acca745eef8db6922edfd8836f717396b4 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Wed, 9 Nov 2011 22:25:12 +0000 Subject: Add check so we don't try to perform an impossible transformation. Fixes issue from PR11319. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144216 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 3596d6c..d7bad43 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1783,7 +1783,8 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::BITCAST: // If this is an FP->Int bitcast and if the sign bit is the only // thing demanded, turn this into a FGETSIGN. - if (!Op.getOperand(0).getValueType().isVector() && + if (!Op.getValueType().isVector() && + !Op.getOperand(0).getValueType().isVector() && NewMask == APInt::getSignBit(Op.getValueType().getSizeInBits()) && Op.getOperand(0).getValueType().isFloatingPoint()) { bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); -- cgit v1.1 From ffab7d0c4f581297e8bc1515eeb34e1b12b650c4 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Wed, 9 Nov 2011 22:45:04 +0000 Subject: Fix typo in comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144236 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/DIBuilder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Analysis/DIBuilder.cpp b/lib/Analysis/DIBuilder.cpp index bfa429d..ed3e8f4 100644 --- a/lib/Analysis/DIBuilder.cpp +++ b/lib/Analysis/DIBuilder.cpp @@ -189,7 +189,7 @@ DIType DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, return DIType(MDNode::get(VMContext, Elts)); } -/// createQaulifiedType - Create debugging information entry for a qualified +/// createQualifiedType - Create debugging information entry for a qualified /// type, e.g. 'const int'. DIType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) { // Qualified types are encoded in DIDerivedType format. -- cgit v1.1 From 5ccb0825ed1bdf6271ef451b8239e86d4ff635b1 Mon Sep 17 00:00:00 2001 From: Pete Cooper Date: Wed, 9 Nov 2011 23:07:35 +0000 Subject: DeadStoreElimination can now trim the size of a store if the end of the store is dead. Currently checks alignment and killing stores on a power of 2 boundary as this is likely to trim the size of the earlier store without breaking large vector stores into scalar ones. Fixes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144239 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/DeadStoreElimination.cpp | 143 +++++++++++++++++++------ 1 file changed, 110 insertions(+), 33 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index f114418..03a557e 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -241,6 +241,24 @@ static bool isRemovable(Instruction *I) { } } + +/// isShortenable - Returns true if this instruction can be safely shortened in +/// length. +static bool isShortenable(Instruction *I) { + // Don't shorten stores for now + if (isa(I)) + return false; + + IntrinsicInst *II = cast(I); + switch (II->getIntrinsicID()) { + default: return false; + case Intrinsic::memset: + case Intrinsic::memcpy: + // Do shorten memory intrinsics. + return true; + } +} + /// getStoredPointerOperand - Return the pointer that is being written to. static Value *getStoredPointerOperand(Instruction *I) { if (StoreInst *SI = dyn_cast(I)) @@ -295,11 +313,24 @@ static bool isObjectPointerWithTrustworthySize(const Value *V) { return false; } -/// isCompleteOverwrite - Return true if a store to the 'Later' location +namespace { + enum OverwriteResult + { + OverwriteComplete, + OverwriteEnd, + OverwriteUnknown + }; +} + +/// isOverwrite - Return 'OverwriteComplete' if a store to the 'Later' location /// completely overwrites a store to the 'Earlier' location. -static bool isCompleteOverwrite(const AliasAnalysis::Location &Later, - const AliasAnalysis::Location &Earlier, - AliasAnalysis &AA) { +/// 'OverwriteEnd' if the end of the 'Earlier' location is completely +/// overwritten by 'Later', or 'OverwriteUnknown' if nothing can be determined +static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, + const AliasAnalysis::Location &Earlier, + AliasAnalysis &AA, + int64_t& EarlierOff, + int64_t& LaterOff) { const Value *P1 = Earlier.Ptr->stripPointerCasts(); const Value *P2 = Later.Ptr->stripPointerCasts(); @@ -313,23 +344,24 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later, // If we have no TargetData information around, then the size of the store // is inferrable from the pointee type. If they are the same type, then // we know that the store is safe. - if (AA.getTargetData() == 0) - return Later.Ptr->getType() == Earlier.Ptr->getType(); - return false; + if (AA.getTargetData() == 0 && + Later.Ptr->getType() == Earlier.Ptr->getType()) + return OverwriteComplete; + + return OverwriteUnknown; } // Make sure that the Later size is >= the Earlier size. - if (Later.Size < Earlier.Size) - return false; - return true; + if (Later.Size >= Earlier.Size) + return OverwriteComplete; } // Otherwise, we have to have size information, and the later store has to be // larger than the earlier one. if (Later.Size == AliasAnalysis::UnknownSize || Earlier.Size == AliasAnalysis::UnknownSize || - Later.Size <= Earlier.Size || AA.getTargetData() == 0) - return false; + AA.getTargetData() == 0) + return OverwriteUnknown; // Check to see if the later store is to the entire object (either a global, // an alloca, or a byval argument). If so, then it clearly overwrites any @@ -342,26 +374,27 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later, // If we can't resolve the same pointers to the same object, then we can't // analyze them at all. if (UO1 != UO2) - return false; + return OverwriteUnknown; // If the "Later" store is to a recognizable object, get its size. if (isObjectPointerWithTrustworthySize(UO2)) { uint64_t ObjectSize = TD.getTypeAllocSize(cast(UO2->getType())->getElementType()); if (ObjectSize == Later.Size) - return true; + return OverwriteComplete; } // Okay, we have stores to two completely different pointers. Try to // decompose the pointer into a "base + constant_offset" form. If the base // pointers are equal, then we can reason about the two stores. - int64_t EarlierOff = 0, LaterOff = 0; + EarlierOff = 0; + LaterOff = 0; const Value *BP1 = GetPointerBaseWithConstantOffset(P1, EarlierOff, TD); const Value *BP2 = GetPointerBaseWithConstantOffset(P2, LaterOff, TD); // If the base pointers still differ, we have two completely different stores. if (BP1 != BP2) - return false; + return OverwriteUnknown; // The later store completely overlaps the earlier store if: // @@ -379,11 +412,25 @@ static bool isCompleteOverwrite(const AliasAnalysis::Location &Later, // // We have to be careful here as *Off is signed while *.Size is unsigned. if (EarlierOff >= LaterOff && + Later.Size > Earlier.Size && uint64_t(EarlierOff - LaterOff) + Earlier.Size <= Later.Size) - return true; + return OverwriteComplete; + + // The other interesting case is if the later store overwrites the end of + // the earlier store + // + // |--earlier--| + // |-- later --| + // + // In this case we may want to trim the size of earlier to avoid generating + // writes to addresses which will definitely be overwritten later + if (LaterOff > EarlierOff && + LaterOff < int64_t(EarlierOff + Earlier.Size) && + LaterOff + Later.Size >= EarlierOff + Earlier.Size) + return OverwriteEnd; // Otherwise, they don't completely overlap. - return false; + return OverwriteUnknown; } /// isPossibleSelfRead - If 'Inst' might be a self read (i.e. a noop copy of a @@ -507,22 +554,52 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { // If we find a write that is a) removable (i.e., non-volatile), b) is // completely obliterated by the store to 'Loc', and c) which we know that // 'Inst' doesn't load from, then we can remove it. - if (isRemovable(DepWrite) && isCompleteOverwrite(Loc, DepLoc, *AA) && + if (isRemovable(DepWrite) && !isPossibleSelfRead(Inst, Loc, DepWrite, *AA)) { - DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " - << *DepWrite << "\n KILLER: " << *Inst << '\n'); - - // Delete the store and now-dead instructions that feed it. - DeleteDeadInstruction(DepWrite, *MD); - ++NumFastStores; - MadeChange = true; - - // DeleteDeadInstruction can delete the current instruction in loop - // cases, reset BBI. - BBI = Inst; - if (BBI != BB.begin()) - --BBI; - break; + int64_t InstWriteOffset, DepWriteOffset; + OverwriteResult OR = isOverwrite(Loc, DepLoc, *AA, + DepWriteOffset, InstWriteOffset); + if (OR == OverwriteComplete) { + DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " + << *DepWrite << "\n KILLER: " << *Inst << '\n'); + + // Delete the store and now-dead instructions that feed it. + DeleteDeadInstruction(DepWrite, *MD); + ++NumFastStores; + MadeChange = true; + + // DeleteDeadInstruction can delete the current instruction in loop + // cases, reset BBI. + BBI = Inst; + if (BBI != BB.begin()) + --BBI; + break; + } else if (OR == OverwriteEnd && isShortenable(DepWrite)) { + // TODO: base this on the target vector size so that if the earlier + // store was too small to get vector writes anyway then its likely + // a good idea to shorten it + // Power of 2 vector writes are probably always a bad idea to optimize + // as any store/memset/memcpy is likely using vector instructions so + // shortening it to not vector size is likely to be slower + MemIntrinsic* DepIntrinsic = cast(DepWrite); + unsigned DepWriteAlign = DepIntrinsic->getAlignment(); + if (llvm::isPowerOf2_64(InstWriteOffset) || + ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) { + + DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: " + << *DepWrite << "\n KILLER (offset " + << InstWriteOffset << ", " + << DepLoc.Size << ")" + << *Inst << '\n'); + + Value* DepWriteLength = DepIntrinsic->getLength(); + Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(), + InstWriteOffset - + DepWriteOffset); + DepIntrinsic->setLength(TrimmedLength); + MadeChange = true; + } + } } // If this is a may-aliased store that is clobbering the store value, we -- cgit v1.1 From 14e809c87210877a675977d247ff4453db82d9b2 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Wed, 9 Nov 2011 23:36:02 +0000 Subject: Make sure we correctly unroll conversions between v2f64 and v2i32 on ARM. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144241 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index c51e7ae..6f2b3b8 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -110,7 +110,12 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, setOperationAction(ISD::SETCC, VT.getSimpleVT(), Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, VT.getSimpleVT(), Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT.getSimpleVT(), Custom); - if (ElemTy != MVT::i32) { + if (ElemTy == MVT::i32) { + setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Custom); + setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Custom); + setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Custom); + setOperationAction(ISD::FP_TO_UINT, VT.getSimpleVT(), Custom); + } else { setOperationAction(ISD::SINT_TO_FP, VT.getSimpleVT(), Expand); setOperationAction(ISD::UINT_TO_FP, VT.getSimpleVT(), Expand); setOperationAction(ISD::FP_TO_SINT, VT.getSimpleVT(), Expand); @@ -3018,7 +3023,20 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { } } +static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + assert(VT.getVectorElementType() == MVT::i32 && "Unexpected custom lowering"); + + if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) + return Op; + return DAG.UnrollVectorOp(Op.getNode()); +} + static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { + EVT VT = Op.getValueType(); + if (VT.isVector()) + return LowerVectorFP_TO_INT(Op, DAG); + DebugLoc dl = Op.getDebugLoc(); unsigned Opc; @@ -3040,6 +3058,12 @@ static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); + if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) { + if (VT.getVectorElementType() == MVT::f32) + return Op; + return DAG.UnrollVectorOp(Op.getNode()); + } + assert(Op.getOperand(0).getValueType() == MVT::v4i16 && "Invalid type for custom lowering!"); if (VT != MVT::v4f32) -- cgit v1.1 From 3c5d6e4df495316c0d2e0a7bca5ec7a88aa400a5 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Wed, 9 Nov 2011 23:44:23 +0000 Subject: Thumb2 assembly parsing STMDB w/ optional .w suffix. rdar://10422955 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144242 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrThumb2.td | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index d065107..0a28226 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -3971,6 +3971,12 @@ def : t2InstAlias<"push${p} $regs", (t2STMDB_UPD SP, pred:$p, reglist:$regs)>; def : t2InstAlias<"pop${p}.w $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>; def : t2InstAlias<"pop${p} $regs", (t2LDMIA_UPD SP, pred:$p, reglist:$regs)>; +// STMDB/STMDB_UPD aliases w/ the optional .w suffix +def : t2InstAlias<"stmdb${p}.w $Rn, $regs", + (t2STMDB GPR:$Rn, pred:$p, reglist:$regs)>; +def : t2InstAlias<"stmdb${p}.w $Rn!, $regs", + (t2STMDB_UPD GPR:$Rn, pred:$p, reglist:$regs)>; + // LDMDB/LDMDB_UPD aliases w/ the optional .w suffix def : t2InstAlias<"ldmdb${p}.w $Rn, $regs", (t2LDMDB GPR:$Rn, pred:$p, reglist:$regs)>; -- cgit v1.1 From c27f6725b9ff019ca0bfc317669ed1db9d95bad5 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Thu, 10 Nov 2011 00:02:33 +0000 Subject: Tidy up. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144244 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 70316fd..af98af8 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -4990,18 +4990,6 @@ def : MnemonicAlias<"uqsubaddx", "uqsax">; // USAX == USUBADDX def : MnemonicAlias<"usubaddx", "usax">; -// LDRSBT/LDRHT/LDRSHT post-index offset if optional. -// Note that the write-back output register is a dummy operand for MC (it's -// only meaningful for codegen), so we just pass zero here. -// FIXME: tblgen not cooperating with argument conversions. -//def : InstAlias<"ldrsbt${p} $Rt, $addr", -// (LDRSBTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0,pred:$p)>; -//def : InstAlias<"ldrht${p} $Rt, $addr", -// (LDRHTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0, pred:$p)>; -//def : InstAlias<"ldrsht${p} $Rt, $addr", -// (LDRSHTi GPR:$Rt, GPR:$Rt, addr_offset_none:$addr, 0, pred:$p)>; - - // "mov Rd, so_imm_not" can be handled via "mvn" in assembly, just like // for isel. def : ARMInstAlias<"mov${s}${p} $Rd, $imm", -- cgit v1.1 From 17afb0664861507c9bd968f1678d8cca69b2851d Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Thu, 10 Nov 2011 00:17:03 +0000 Subject: Strip old implicit operands after foldMemoryOperand. The TII.foldMemoryOperand hook preserves implicit operands from the original instruction. This is not what we want when those implicit operands refer to the register being spilled. Implicit operands referring to other registers are preserved. This fixes PR11347. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144247 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/InlineSpiller.cpp | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 726af46..d1e3f1a 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -1017,14 +1017,18 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI, const SmallVectorImpl &Ops, MachineInstr *LoadMI) { bool WasCopy = MI->isCopy(); + unsigned ImpReg = 0; + // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied // operands. SmallVector FoldOps; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { unsigned Idx = Ops[i]; MachineOperand &MO = MI->getOperand(Idx); - if (MO.isImplicit()) + if (MO.isImplicit()) { + ImpReg = MO.getReg(); continue; + } // FIXME: Teach targets to deal with subregs. if (MO.getSubReg()) return false; @@ -1045,7 +1049,20 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI, if (!LoadMI) VRM.addSpillSlotUse(StackSlot, FoldMI); MI->eraseFromParent(); - DEBUG(dbgs() << "\tfolded: " << *FoldMI); + + // TII.foldMemoryOperand may have left some implicit operands on the + // instruction. Strip them. + if (ImpReg) + for (unsigned i = FoldMI->getNumOperands(); i; --i) { + MachineOperand &MO = FoldMI->getOperand(i - 1); + if (!MO.isReg() || !MO.isImplicit()) + break; + if (MO.getReg() == ImpReg) + FoldMI->RemoveOperand(i - 1); + } + + DEBUG(dbgs() << "\tfolded: " << LIS.getInstructionIndex(FoldMI) << '\t' + << *FoldMI); if (!WasCopy) ++NumFolded; else if (Ops.front() == 0) -- cgit v1.1 From c352caf168094c83f05a8010ca14c2e643dbf618 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Thu, 10 Nov 2011 00:49:51 +0000 Subject: llvm-build: Add an explicit component type to represent targets. - Gives us a place to hang target specific metadata (like whether the target has a JIT). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144250 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/LLVMBuild.txt | 3 ++- lib/Target/CBackend/LLVMBuild.txt | 2 +- lib/Target/CellSPU/LLVMBuild.txt | 2 +- lib/Target/CppBackend/LLVMBuild.txt | 2 +- lib/Target/MBlaze/LLVMBuild.txt | 2 +- lib/Target/MSP430/LLVMBuild.txt | 2 +- lib/Target/Mips/LLVMBuild.txt | 3 ++- lib/Target/PTX/LLVMBuild.txt | 2 +- lib/Target/PowerPC/LLVMBuild.txt | 3 ++- lib/Target/Sparc/LLVMBuild.txt | 2 +- lib/Target/X86/LLVMBuild.txt | 3 ++- lib/Target/XCore/LLVMBuild.txt | 2 +- 12 files changed, 16 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/LLVMBuild.txt b/lib/Target/ARM/LLVMBuild.txt index a7f209c..cfac6ac 100644 --- a/lib/Target/ARM/LLVMBuild.txt +++ b/lib/Target/ARM/LLVMBuild.txt @@ -16,10 +16,11 @@ ;===------------------------------------------------------------------------===; [component_0] -type = LibraryGroup +type = TargetGroup name = ARM parent = Target add_to_library_groups = all-targets +has_jit = 1 [component_1] type = Library diff --git a/lib/Target/CBackend/LLVMBuild.txt b/lib/Target/CBackend/LLVMBuild.txt index 1bc5ea1..7cd420b 100644 --- a/lib/Target/CBackend/LLVMBuild.txt +++ b/lib/Target/CBackend/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [component_0] -type = LibraryGroup +type = TargetGroup name = CBackend parent = Target add_to_library_groups = all-targets diff --git a/lib/Target/CellSPU/LLVMBuild.txt b/lib/Target/CellSPU/LLVMBuild.txt index 03e592c..e8db9f3 100644 --- a/lib/Target/CellSPU/LLVMBuild.txt +++ b/lib/Target/CellSPU/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [component_0] -type = LibraryGroup +type = TargetGroup name = CellSPU parent = Target add_to_library_groups = all-targets diff --git a/lib/Target/CppBackend/LLVMBuild.txt b/lib/Target/CppBackend/LLVMBuild.txt index 9602f57..447795f 100644 --- a/lib/Target/CppBackend/LLVMBuild.txt +++ b/lib/Target/CppBackend/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [component_0] -type = LibraryGroup +type = TargetGroup name = CppBackend parent = Target add_to_library_groups = all-targets diff --git a/lib/Target/MBlaze/LLVMBuild.txt b/lib/Target/MBlaze/LLVMBuild.txt index 7bea268..b953e3d 100644 --- a/lib/Target/MBlaze/LLVMBuild.txt +++ b/lib/Target/MBlaze/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [component_0] -type = LibraryGroup +type = TargetGroup name = MBlaze parent = Target add_to_library_groups = all-targets diff --git a/lib/Target/MSP430/LLVMBuild.txt b/lib/Target/MSP430/LLVMBuild.txt index 3081146..8b0b1f6 100644 --- a/lib/Target/MSP430/LLVMBuild.txt +++ b/lib/Target/MSP430/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [component_0] -type = LibraryGroup +type = TargetGroup name = MSP430 parent = Target add_to_library_groups = all-targets diff --git a/lib/Target/Mips/LLVMBuild.txt b/lib/Target/Mips/LLVMBuild.txt index b9d9abf..bcec4a9 100644 --- a/lib/Target/Mips/LLVMBuild.txt +++ b/lib/Target/Mips/LLVMBuild.txt @@ -16,10 +16,11 @@ ;===------------------------------------------------------------------------===; [component_0] -type = LibraryGroup +type = TargetGroup name = Mips parent = Target add_to_library_groups = all-targets +has_jit = 1 [component_1] type = Library diff --git a/lib/Target/PTX/LLVMBuild.txt b/lib/Target/PTX/LLVMBuild.txt index 2d24524..27119c2 100644 --- a/lib/Target/PTX/LLVMBuild.txt +++ b/lib/Target/PTX/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [component_0] -type = LibraryGroup +type = TargetGroup name = PTX parent = Target add_to_library_groups = all-targets diff --git a/lib/Target/PowerPC/LLVMBuild.txt b/lib/Target/PowerPC/LLVMBuild.txt index 4a93587..3c439f3 100644 --- a/lib/Target/PowerPC/LLVMBuild.txt +++ b/lib/Target/PowerPC/LLVMBuild.txt @@ -16,10 +16,11 @@ ;===------------------------------------------------------------------------===; [component_0] -type = LibraryGroup +type = TargetGroup name = PowerPC parent = Target add_to_library_groups = all-targets +has_jit = 1 [component_1] type = Library diff --git a/lib/Target/Sparc/LLVMBuild.txt b/lib/Target/Sparc/LLVMBuild.txt index 21cd91f..f59cc2e 100644 --- a/lib/Target/Sparc/LLVMBuild.txt +++ b/lib/Target/Sparc/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [component_0] -type = LibraryGroup +type = TargetGroup name = Sparc parent = Target add_to_library_groups = all-targets diff --git a/lib/Target/X86/LLVMBuild.txt b/lib/Target/X86/LLVMBuild.txt index b0958f3..7f48a9e 100644 --- a/lib/Target/X86/LLVMBuild.txt +++ b/lib/Target/X86/LLVMBuild.txt @@ -16,10 +16,11 @@ ;===------------------------------------------------------------------------===; [component_0] -type = LibraryGroup +type = TargetGroup name = X86 parent = Target add_to_library_groups = all-targets +has_jit = 1 [component_1] type = Library diff --git a/lib/Target/XCore/LLVMBuild.txt b/lib/Target/XCore/LLVMBuild.txt index 321f25f..41943c3 100644 --- a/lib/Target/XCore/LLVMBuild.txt +++ b/lib/Target/XCore/LLVMBuild.txt @@ -16,7 +16,7 @@ ;===------------------------------------------------------------------------===; [component_0] -type = LibraryGroup +type = TargetGroup name = XCore parent = Target add_to_library_groups = all-targets -- cgit v1.1 From 6852b69e15c4b1078e23d6a5abd023ce48d4576a Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Thu, 10 Nov 2011 00:49:55 +0000 Subject: llvm-build: Change CBackend and CppBackend to not use library_name. This will change the generated library .a file name once we fully switch over, but simplifies how we treat these targets without requiring more special casing (since their library group name and the codegen library name currently map to the same "llvm-config" style component name). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144251 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/CBackend/LLVMBuild.txt | 1 - lib/Target/CppBackend/LLVMBuild.txt | 1 - 2 files changed, 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/CBackend/LLVMBuild.txt b/lib/Target/CBackend/LLVMBuild.txt index 7cd420b..0b1f885 100644 --- a/lib/Target/CBackend/LLVMBuild.txt +++ b/lib/Target/CBackend/LLVMBuild.txt @@ -25,7 +25,6 @@ add_to_library_groups = all-targets type = Library name = CBackendCodeGen parent = CBackend -library_name = CBackend required_libraries = Analysis CBackendInfo CodeGen Core MC Scalar Support Target TransformUtils add_to_library_groups = CBackend diff --git a/lib/Target/CppBackend/LLVMBuild.txt b/lib/Target/CppBackend/LLVMBuild.txt index 447795f..79ad4c1 100644 --- a/lib/Target/CppBackend/LLVMBuild.txt +++ b/lib/Target/CppBackend/LLVMBuild.txt @@ -25,7 +25,6 @@ add_to_library_groups = all-targets type = Library name = CppBackendCodeGen parent = CppBackend -library_name = CppBackend required_libraries = Core CppBackendInfo Support Target add_to_library_groups = CppBackend -- cgit v1.1 From affc6cf9d2b2b74532ce82027ac4524d1e29a658 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Thu, 10 Nov 2011 00:50:07 +0000 Subject: llvm-build: Add --native-target and --enable-targets options, and add logic to handle defining the "magic" target related components (like native, nativecodegen, and engine). - We still require these components to be in the project (currently in lib/Target) so that we have a place to document them and hopefully make it more obvious that they are "magic". git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144253 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/LLVMBuild.txt | 1 - lib/Target/CBackend/LLVMBuild.txt | 1 - lib/Target/CellSPU/LLVMBuild.txt | 1 - lib/Target/CppBackend/LLVMBuild.txt | 1 - lib/Target/LLVMBuild.txt | 20 ++++++++++++++++---- lib/Target/MBlaze/LLVMBuild.txt | 1 - lib/Target/MSP430/LLVMBuild.txt | 1 - lib/Target/Mips/LLVMBuild.txt | 1 - lib/Target/PTX/LLVMBuild.txt | 1 - lib/Target/PowerPC/LLVMBuild.txt | 1 - lib/Target/Sparc/LLVMBuild.txt | 1 - lib/Target/X86/LLVMBuild.txt | 1 - lib/Target/XCore/LLVMBuild.txt | 1 - 13 files changed, 16 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/LLVMBuild.txt b/lib/Target/ARM/LLVMBuild.txt index cfac6ac..79cae91 100644 --- a/lib/Target/ARM/LLVMBuild.txt +++ b/lib/Target/ARM/LLVMBuild.txt @@ -19,7 +19,6 @@ type = TargetGroup name = ARM parent = Target -add_to_library_groups = all-targets has_jit = 1 [component_1] diff --git a/lib/Target/CBackend/LLVMBuild.txt b/lib/Target/CBackend/LLVMBuild.txt index 0b1f885..851ded9 100644 --- a/lib/Target/CBackend/LLVMBuild.txt +++ b/lib/Target/CBackend/LLVMBuild.txt @@ -19,7 +19,6 @@ type = TargetGroup name = CBackend parent = Target -add_to_library_groups = all-targets [component_1] type = Library diff --git a/lib/Target/CellSPU/LLVMBuild.txt b/lib/Target/CellSPU/LLVMBuild.txt index e8db9f3..a2127dd 100644 --- a/lib/Target/CellSPU/LLVMBuild.txt +++ b/lib/Target/CellSPU/LLVMBuild.txt @@ -19,7 +19,6 @@ type = TargetGroup name = CellSPU parent = Target -add_to_library_groups = all-targets [component_1] type = Library diff --git a/lib/Target/CppBackend/LLVMBuild.txt b/lib/Target/CppBackend/LLVMBuild.txt index 79ad4c1..77e31c7 100644 --- a/lib/Target/CppBackend/LLVMBuild.txt +++ b/lib/Target/CppBackend/LLVMBuild.txt @@ -19,7 +19,6 @@ type = TargetGroup name = CppBackend parent = Target -add_to_library_groups = all-targets [component_1] type = Library diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt index 073a76a..09dadd6 100644 --- a/lib/Target/LLVMBuild.txt +++ b/lib/Target/LLVMBuild.txt @@ -21,20 +21,32 @@ name = Target parent = Libraries required_libraries = Core MC Support +; This is a convenient group we define (and expect targets to add to) which +; makes it easy for tools to include every target. [component_1] type = LibraryGroup name = all-targets parent = Libraries +; This is a special group whose required libraries are extended (by llvm-build) +; with the configured native target, if any. [component_2] type = LibraryGroup -name = native +name = Native parent = Libraries -required_libraries = X86 +; This is a special group whose required libraries are extended (by llvm-build) +; with the configured native code generator, if any. [component_3] type = LibraryGroup -name = nativecodegen +name = NativeCodeGen +parent = Libraries + +; This is a special group whose required libraries are extended (by llvm-build) +; with the best execution engine (the native JIT, if available, or the +; interpreter). +[component_4] +type = LibraryGroup +name = Engine parent = Libraries -required_libraries = X86CodeGen diff --git a/lib/Target/MBlaze/LLVMBuild.txt b/lib/Target/MBlaze/LLVMBuild.txt index b953e3d..fa89552 100644 --- a/lib/Target/MBlaze/LLVMBuild.txt +++ b/lib/Target/MBlaze/LLVMBuild.txt @@ -19,7 +19,6 @@ type = TargetGroup name = MBlaze parent = Target -add_to_library_groups = all-targets [component_1] type = Library diff --git a/lib/Target/MSP430/LLVMBuild.txt b/lib/Target/MSP430/LLVMBuild.txt index 8b0b1f6..9ade110 100644 --- a/lib/Target/MSP430/LLVMBuild.txt +++ b/lib/Target/MSP430/LLVMBuild.txt @@ -19,7 +19,6 @@ type = TargetGroup name = MSP430 parent = Target -add_to_library_groups = all-targets [component_1] type = Library diff --git a/lib/Target/Mips/LLVMBuild.txt b/lib/Target/Mips/LLVMBuild.txt index bcec4a9..65c7d7f 100644 --- a/lib/Target/Mips/LLVMBuild.txt +++ b/lib/Target/Mips/LLVMBuild.txt @@ -19,7 +19,6 @@ type = TargetGroup name = Mips parent = Target -add_to_library_groups = all-targets has_jit = 1 [component_1] diff --git a/lib/Target/PTX/LLVMBuild.txt b/lib/Target/PTX/LLVMBuild.txt index 27119c2..180e7ce 100644 --- a/lib/Target/PTX/LLVMBuild.txt +++ b/lib/Target/PTX/LLVMBuild.txt @@ -19,7 +19,6 @@ type = TargetGroup name = PTX parent = Target -add_to_library_groups = all-targets [component_1] type = Library diff --git a/lib/Target/PowerPC/LLVMBuild.txt b/lib/Target/PowerPC/LLVMBuild.txt index 3c439f3..ce8b2e9 100644 --- a/lib/Target/PowerPC/LLVMBuild.txt +++ b/lib/Target/PowerPC/LLVMBuild.txt @@ -19,7 +19,6 @@ type = TargetGroup name = PowerPC parent = Target -add_to_library_groups = all-targets has_jit = 1 [component_1] diff --git a/lib/Target/Sparc/LLVMBuild.txt b/lib/Target/Sparc/LLVMBuild.txt index f59cc2e..d99057f 100644 --- a/lib/Target/Sparc/LLVMBuild.txt +++ b/lib/Target/Sparc/LLVMBuild.txt @@ -19,7 +19,6 @@ type = TargetGroup name = Sparc parent = Target -add_to_library_groups = all-targets [component_1] type = Library diff --git a/lib/Target/X86/LLVMBuild.txt b/lib/Target/X86/LLVMBuild.txt index 7f48a9e..814e81b 100644 --- a/lib/Target/X86/LLVMBuild.txt +++ b/lib/Target/X86/LLVMBuild.txt @@ -19,7 +19,6 @@ type = TargetGroup name = X86 parent = Target -add_to_library_groups = all-targets has_jit = 1 [component_1] diff --git a/lib/Target/XCore/LLVMBuild.txt b/lib/Target/XCore/LLVMBuild.txt index 41943c3..f1b7574 100644 --- a/lib/Target/XCore/LLVMBuild.txt +++ b/lib/Target/XCore/LLVMBuild.txt @@ -19,7 +19,6 @@ type = TargetGroup name = XCore parent = Target -add_to_library_groups = all-targets [component_1] type = Library -- cgit v1.1 From 83337302595a6b85523be8522c2b6105c8e159d1 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Thu, 10 Nov 2011 01:16:48 +0000 Subject: build/Make & CMake: Pass the appropriate --native-target and --enable-targets options to llvm-build, so the all-targets etc. components are defined properly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144255 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/LLVMBuild.txt | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt index 09dadd6..60f5230 100644 --- a/lib/Target/LLVMBuild.txt +++ b/lib/Target/LLVMBuild.txt @@ -21,8 +21,9 @@ name = Target parent = Libraries required_libraries = Core MC Support -; This is a convenient group we define (and expect targets to add to) which -; makes it easy for tools to include every target. +; This is a special group whose required libraries are extended (by llvm-build) +; with every built target, which makes it easy for tools to include every +; target. [component_1] type = LibraryGroup name = all-targets -- cgit v1.1 From 6cba97c5557ac51d8ae09b9f5cbad6891124db4d Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 10 Nov 2011 01:30:39 +0000 Subject: For immediate encodings of icmp, zero or sign extend first. Then determine if the value is negative and flip the sign accordingly. rdar://10422026 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144258 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 44c88aa..4c47ff9 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -1216,7 +1216,6 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, // Check to see if the 2nd operand is a constant that we can encode directly // in the compare. - uint64_t Imm; int EncodedImm = 0; bool EncodeImm = false; bool isNegativeImm = false; @@ -1224,10 +1223,11 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { const APInt &CIVal = ConstInt->getValue(); - - isNegativeImm = CIVal.isNegative(); - Imm = (isNegativeImm) ? (-CIVal).getZExtValue() : CIVal.getZExtValue(); - EncodedImm = (int)Imm; + EncodedImm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue(); + if (EncodedImm < 0) { + isNegativeImm = true; + EncodedImm = -EncodedImm; + } EncodeImm = isThumb2 ? (ARM_AM::getT2SOImmVal(EncodedImm) != -1) : (ARM_AM::getSOImmVal(EncodedImm) != -1); } -- cgit v1.1 From 4dbe96e22ff4989577cf4854f717b9d972094f87 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Thu, 10 Nov 2011 06:54:20 +0000 Subject: AVX2: Add variable shift from memory. Note: These patterns only works in some cases because many times the load sd node is bitcasted from a load node of a different type. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144266 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index ff4f749..91c84dd 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7692,6 +7692,7 @@ defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", memopv4i32, memopv8i32, let Predicates = [HasAVX2] in { + def : Pat<(v4i32 (shl (v4i32 VR128:$src1), (v4i32 VR128:$src2))), (VPSLLVDrr VR128:$src1, VR128:$src2)>; def : Pat<(v2i64 (shl (v2i64 VR128:$src1), (v2i64 VR128:$src2))), @@ -7702,7 +7703,6 @@ let Predicates = [HasAVX2] in { (VPSRLVQrr VR128:$src1, VR128:$src2)>; def : Pat<(v4i32 (sra (v4i32 VR128:$src1), (v4i32 VR128:$src2))), (VPSRAVDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i32 (shl (v8i32 VR256:$src1), (v8i32 VR256:$src2))), (VPSLLVDYrr VR256:$src1, VR256:$src2)>; def : Pat<(v4i64 (shl (v4i64 VR256:$src1), (v4i64 VR256:$src2))), @@ -7713,6 +7713,29 @@ let Predicates = [HasAVX2] in { (VPSRLVQYrr VR256:$src1, VR256:$src2)>; def : Pat<(v8i32 (sra (v8i32 VR256:$src1), (v8i32 VR256:$src2))), (VPSRAVDYrr VR256:$src1, VR256:$src2)>; + + def : Pat<(v4i32 (shl (v4i32 VR128:$src1),(loadv4i32 addr:$src2))), + (VPSLLVDrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i32 (shl (v4i32 VR128:$src1),(loadv2i64 addr:$src2))), + (VPSLLVDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (shl (v2i64 VR128:$src1),(loadv2i64 addr:$src2))), + (VPSLLVQrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i32 (srl (v4i32 VR128:$src1),(loadv4i32 addr:$src2))), + (VPSRLVDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (srl (v2i64 VR128:$src1),(loadv2i64 addr:$src2))), + (VPSRLVQrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i32 (sra (v4i32 VR128:$src1),(loadv4i32 addr:$src2))), + (VPSRAVDrm VR128:$src1, addr:$src2)>; + def : Pat<(v8i32 (shl (v8i32 VR256:$src1),(loadv8i32 addr:$src2))), + (VPSLLVDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4i64 (shl (v4i64 VR256:$src1),(loadv4i64 addr:$src2))), + (VPSLLVQYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8i32 (srl (v8i32 VR256:$src1),(loadv8i32 addr:$src2))), + (VPSRLVDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4i64 (srl (v4i64 VR256:$src1),(loadv4i64 addr:$src2))), + (VPSRLVQYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8i32 (sra (v8i32 VR256:$src1),(loadv8i32 addr:$src2))), + (VPSRAVDYrm VR256:$src1, addr:$src2)>; } -- cgit v1.1 From 623a7e146bd86747dc46a6f8bb9993fc217d6b78 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Thu, 10 Nov 2011 07:43:16 +0000 Subject: Use a bigger hammer to fix PR11314 by disabling the "forcing two-address instruction lower optimization" in the pre-RA scheduler. The optimization, rather the hack, was done before MI use-list was available. Now we should be able to implement it in a better way, perhaps in the two-address pass until a MI scheduler is available. Now that the scheduler has to backtrack to handle call sequences. Adding artificial scheduling constraints is just not safe. Furthermore, the hack is not taking all the other scheduling decisions into consideration so it's just as likely to pessimize code. So I view disabling this optimization goodness regardless of PR11314. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144267 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index cab303d..7938a37 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -89,6 +89,9 @@ static cl::opt DisableSchedCriticalPath( static cl::opt DisableSchedHeight( "disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp")); +static cl::opt Disable2AddrHack( + "disable-2addr-hack", cl::Hidden, cl::init(true), + cl::desc("Disable scheduler's two-address hack")); static cl::opt MaxReorderWindow( "max-sched-reorder", cl::Hidden, cl::init(6), @@ -2628,7 +2631,8 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { void RegReductionPQBase::initNodes(std::vector &sunits) { SUnits = &sunits; // Add pseudo dependency edges for two-address nodes. - AddPseudoTwoAddrDeps(); + if (!Disable2AddrHack) + AddPseudoTwoAddrDeps(); // Reroute edges to nodes with multiple uses. if (!TracksRegPressure) PrescheduleNodesWithMultipleUses(); -- cgit v1.1 From 977665c24a4f2c0d05774deef607b6a20bc265f2 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Thu, 10 Nov 2011 15:35:14 +0000 Subject: build: Rename CBackend and CppBackend libraries to have CodeGen suffix, for consistency with other targets. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144292 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/CBackend/CMakeLists.txt | 4 ++-- lib/Target/CBackend/Makefile | 2 +- lib/Target/CppBackend/CMakeLists.txt | 4 ++-- lib/Target/CppBackend/Makefile | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/CBackend/CMakeLists.txt b/lib/Target/CBackend/CMakeLists.txt index 96ae49f..edf8ee7 100644 --- a/lib/Target/CBackend/CMakeLists.txt +++ b/lib/Target/CBackend/CMakeLists.txt @@ -1,8 +1,8 @@ -add_llvm_target(CBackend +add_llvm_target(CBackendCodeGen CBackend.cpp ) -add_llvm_library_dependencies(LLVMCBackend +add_llvm_library_dependencies(LLVMCBackendCodeGen LLVMAnalysis LLVMCBackendInfo LLVMCodeGen diff --git a/lib/Target/CBackend/Makefile b/lib/Target/CBackend/Makefile index 621948a..bac3474 100644 --- a/lib/Target/CBackend/Makefile +++ b/lib/Target/CBackend/Makefile @@ -8,7 +8,7 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../.. -LIBRARYNAME = LLVMCBackend +LIBRARYNAME = LLVMCBackendCodeGen DIRS = TargetInfo include $(LEVEL)/Makefile.common diff --git a/lib/Target/CppBackend/CMakeLists.txt b/lib/Target/CppBackend/CMakeLists.txt index 95b6058..53f6868 100644 --- a/lib/Target/CppBackend/CMakeLists.txt +++ b/lib/Target/CppBackend/CMakeLists.txt @@ -1,8 +1,8 @@ -add_llvm_target(CppBackend +add_llvm_target(CppBackendCodeGen CPPBackend.cpp ) -add_llvm_library_dependencies(LLVMCppBackend +add_llvm_library_dependencies(LLVMCppBackendCodeGen LLVMCore LLVMCppBackendInfo LLVMSupport diff --git a/lib/Target/CppBackend/Makefile b/lib/Target/CppBackend/Makefile index d75f4e8..efc7463 100644 --- a/lib/Target/CppBackend/Makefile +++ b/lib/Target/CppBackend/Makefile @@ -8,7 +8,7 @@ ##===----------------------------------------------------------------------===## LEVEL = ../../.. -LIBRARYNAME = LLVMCppBackend +LIBRARYNAME = LLVMCppBackendCodeGen DIRS = TargetInfo include $(LEVEL)/Makefile.common -- cgit v1.1 From 71810ab7c0ecd6927dde1eee0c73169642f3764d Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Thu, 10 Nov 2011 16:44:55 +0000 Subject: ARM assembly parsing for ASR(immediate). Start of rdar://9704684 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144293 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrFormats.td | 20 +++++++++++++------- lib/Target/ARM/ARMInstrInfo.td | 9 +++++++++ lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 15 +++++++++++++++ 3 files changed, 37 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index f9969b9..c5bf607 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -293,21 +293,27 @@ class InstThumb +class AsmPseudoInst : InstTemplate { - let OutOperandList = (ops); + let OutOperandList = (outs); let InOperandList = iops; let Pattern = []; let isCodeGenOnly = 0; // So we get asm matcher for it. + let AsmString = asm; let isPseudo = 1; } -class ARMAsmPseudo : AsmPseudoInst, Requires<[IsARM]>; -class tAsmPseudo : AsmPseudoInst, Requires<[IsThumb]>; -class t2AsmPseudo : AsmPseudoInst, Requires<[IsThumb2]>; -class VFP2AsmPseudo : AsmPseudoInst, Requires<[HasVFP2]>; -class NEONAsmPseudo : AsmPseudoInst, Requires<[HasNEON]>; +class ARMAsmPseudo : AsmPseudoInst, + Requires<[IsARM]>; +class tAsmPseudo : AsmPseudoInst, + Requires<[IsThumb]>; +class t2AsmPseudo : AsmPseudoInst, + Requires<[IsThumb2]>; +class VFP2AsmPseudo : AsmPseudoInst, + Requires<[HasVFP2]>; +class NEONAsmPseudo : AsmPseudoInst, + Requires<[HasNEON]>; // Pseudo instructions for the code generator. class PseudoInst pattern> diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index af98af8..359053c 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -4994,3 +4994,12 @@ def : MnemonicAlias<"usubaddx", "usax">; // for isel. def : ARMInstAlias<"mov${s}${p} $Rd, $imm", (MVNi rGPR:$Rd, so_imm_not:$imm, pred:$p, cc_out:$s)>; + +// The shifter forms of the MOV instruction are aliased to the ASR, LSL, +// LSR, ROR, and RRX instructions. +// FIXME: We need C++ parser hooks to map the alias to the MOV +// encoding. It seems we should be able to do that sort of thing +// in tblgen, but it could get ugly. +def ASRi : ARMAsmPseudo<"asr${s}${p} $Rd, $Rm, $imm", + (ins GPR:$Rd, GPR:$Rm, imm1_32:$imm, pred:$p, + cc_out:$s)>; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index cb0c97b..e68ecec 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -4541,6 +4541,21 @@ void ARMAsmParser:: processInstruction(MCInst &Inst, const SmallVectorImpl &Operands) { switch (Inst.getOpcode()) { + // Handle the MOV complex aliases. + case ARM::ASRi: { + unsigned Amt = Inst.getOperand(2).getImm() + 1; + unsigned ShiftOp = ARM_AM::getSORegOpc(ARM_AM::asr, Amt); + MCInst TmpInst; + TmpInst.setOpcode(ARM::MOVsi); + TmpInst.addOperand(Inst.getOperand(0)); // Rd + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(MCOperand::CreateImm(ShiftOp)); // Shift value and ty + TmpInst.addOperand(Inst.getOperand(3)); // CondCode + TmpInst.addOperand(Inst.getOperand(4)); + TmpInst.addOperand(Inst.getOperand(5)); // cc_out + Inst = TmpInst; + break; + } case ARM::LDMIA_UPD: // If this is a load of a single register via a 'pop', then we should use // a post-indexed LDR instruction instead, per the ARM ARM. -- cgit v1.1 From ee10ff89a2934636570cb17b756bf31b2a38aab5 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Thu, 10 Nov 2011 19:18:01 +0000 Subject: ARM assembly parsing for LSR/LSL/ROR(immediate). More of rdar://9704684 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144301 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrInfo.td | 19 +++++++++++++++- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 37 ++++++++++++++++++++++++++----- 2 files changed, 50 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 359053c..af1f490 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -544,6 +544,14 @@ def imm0_31 : Operand, ImmLeaf, ImmLeaf= 0 && Imm < 32; +}]> { + let ParserMatchClass = Imm0_32AsmOperand; +} + /// imm0_255 predicate - Immediate in the range [0,255]. def Imm0_255AsmOperand : AsmOperandClass { let Name = "Imm0_255"; } def imm0_255 : Operand, ImmLeaf= 0 && Imm < 256; }]> { @@ -5001,5 +5009,14 @@ def : ARMInstAlias<"mov${s}${p} $Rd, $imm", // encoding. It seems we should be able to do that sort of thing // in tblgen, but it could get ugly. def ASRi : ARMAsmPseudo<"asr${s}${p} $Rd, $Rm, $imm", - (ins GPR:$Rd, GPR:$Rm, imm1_32:$imm, pred:$p, + (ins GPR:$Rd, GPR:$Rm, imm0_32:$imm, pred:$p, + cc_out:$s)>; +def LSRi : ARMAsmPseudo<"lsr${s}${p} $Rd, $Rm, $imm", + (ins GPR:$Rd, GPR:$Rm, imm0_32:$imm, pred:$p, + cc_out:$s)>; +def LSLi : ARMAsmPseudo<"lsl${s}${p} $Rd, $Rm, $imm", + (ins GPR:$Rd, GPR:$Rm, imm0_31:$imm, pred:$p, + cc_out:$s)>; +def RORi : ARMAsmPseudo<"ror${s}${p} $Rd, $Rm, $imm", + (ins GPR:$Rd, GPR:$Rm, imm0_31:$imm, pred:$p, cc_out:$s)>; diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index e68ecec..83e7aac 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -602,6 +602,14 @@ public: int64_t Value = CE->getValue(); return Value > 0 && Value < 33; } + bool isImm0_32() const { + if (Kind != k_Immediate) + return false; + const MCConstantExpr *CE = dyn_cast(getImm()); + if (!CE) return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < 33; + } bool isImm0_65535() const { if (Kind != k_Immediate) return false; @@ -1217,6 +1225,11 @@ public: Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1)); } + void addImm0_32Operands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + addExpr(Inst, getImm()); + } + void addImm0_65535Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); addExpr(Inst, getImm()); @@ -4542,14 +4555,28 @@ processInstruction(MCInst &Inst, const SmallVectorImpl &Operands) { switch (Inst.getOpcode()) { // Handle the MOV complex aliases. - case ARM::ASRi: { - unsigned Amt = Inst.getOperand(2).getImm() + 1; - unsigned ShiftOp = ARM_AM::getSORegOpc(ARM_AM::asr, Amt); + case ARM::ASRi: + case ARM::LSRi: + case ARM::LSLi: + case ARM::RORi: { + ARM_AM::ShiftOpc ShiftTy; + unsigned Amt = Inst.getOperand(2).getImm(); + switch(Inst.getOpcode()) { + default: llvm_unreachable("unexpected opcode!"); + case ARM::ASRi: ShiftTy = ARM_AM::asr; break; + case ARM::LSRi: ShiftTy = ARM_AM::lsr; break; + case ARM::LSLi: ShiftTy = ARM_AM::lsl; break; + case ARM::RORi: ShiftTy = ARM_AM::ror; break; + } + // A shift by zero is a plain MOVr, not a MOVsi. + unsigned Opc = Amt == 0 ? ARM::MOVr : ARM::MOVsi; + unsigned Shifter = ARM_AM::getSORegOpc(ShiftTy, Amt); MCInst TmpInst; - TmpInst.setOpcode(ARM::MOVsi); + TmpInst.setOpcode(Opc); TmpInst.addOperand(Inst.getOperand(0)); // Rd TmpInst.addOperand(Inst.getOperand(1)); // Rn - TmpInst.addOperand(MCOperand::CreateImm(ShiftOp)); // Shift value and ty + if (Opc == ARM::MOVsi) + TmpInst.addOperand(MCOperand::CreateImm(Shifter)); // Shift value and ty TmpInst.addOperand(Inst.getOperand(3)); // CondCode TmpInst.addOperand(Inst.getOperand(4)); TmpInst.addOperand(Inst.getOperand(5)); // cc_out -- cgit v1.1 From 0ffe2b4dd6f26fa19827f85bf9e4a766539a859c Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 10 Nov 2011 19:25:34 +0000 Subject: Rework adding function names to the dwarf accelerator tables, allow multiple dies per function and support C++ basenames. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144304 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp | 8 +- lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 11 ++- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 138 +++++++++++++++-------------- 3 files changed, 86 insertions(+), 71 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index 7c93dbf..a3a2488 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -75,9 +75,15 @@ void DwarfAccelTable::ComputeBucketCount(void) { void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, const char *Prefix) { // Create the individual hash data outputs. - for (StringMap::const_iterator + for (StringMap::iterator EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { struct HashData *Entry = new HashData((*EI).getKeyData()); + + // Unique the entries. + std::sort((*EI).second.begin(), (*EI).second.end()); + (*EI).second.erase(std::unique((*EI).second.begin(), (*EI).second.end()), + (*EI).second.end()); + for (DIEArray::const_iterator DI = (*EI).second.begin(), DE = (*EI).second.end(); DI != DE; ++DI) diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 1cbe3a0..453e898 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -62,7 +62,7 @@ class CompileUnit { /// AccelNames - A map of names for the name accelerator table. /// - StringMap AccelNames; + StringMap > AccelNames; StringMap > AccelObjC; StringMap AccelNamespace; StringMap AccelTypes; @@ -84,7 +84,9 @@ public: DIE* getCUDie() const { return CUDie.get(); } const StringMap &getGlobalTypes() const { return GlobalTypes; } - const StringMap &getAccelNames() const { return AccelNames; } + const StringMap > &getAccelNames() const { + return AccelNames; + } const StringMap > &getAccelObjC() const { return AccelObjC; } @@ -101,7 +103,10 @@ public: /// addAccelName - Add a new name to the name accelerator table. - void addAccelName(StringRef Name, DIE *Die) { AccelNames[Name] = Die; } + void addAccelName(StringRef Name, DIE *Die) { + std::vector &DIEs = AccelNames[Name]; + DIEs.push_back(Die); + } void addAccelObjC(StringRef Name, DIE *Die) { std::vector &DIEs = AccelObjC[Name]; DIEs.push_back(Die); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index edd4e45..3ca2318 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -198,6 +198,63 @@ static StringRef getRealLinkageName(StringRef LinkageName) { return LinkageName; } +static bool isObjCClass(StringRef Name) { + return Name.startswith("+") || Name.startswith("-"); +} + +static bool hasObjCCategory(StringRef Name) { + if (!isObjCClass(Name)) return false; + + size_t pos = Name.find(')'); + if (pos != std::string::npos) { + if (Name[pos+1] != ' ') return false; + return true; + } + return false; +} + +static void getObjCClassCategory(StringRef In, StringRef &Class, + StringRef &Category) { + if (!hasObjCCategory(In)) { + Class = In.slice(In.find('[') + 1, In.find(' ')); + Category = ""; + return; + } + + Class = In.slice(In.find('[') + 1, In.find('(')); + Category = In.slice(In.find('[') + 1, In.find(' ')); + return; +} + +static StringRef getObjCMethodName(StringRef In) { + return In.slice(In.find(' ') + 1, In.find(']')); +} + +// Add the various names to the Dwarf accelerator table names. +static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP, + DIE* Die) { + if (!SP.isDefinition()) return; + + TheCU->addAccelName(SP.getName(), Die); + + // If the linkage name is different than the name, go ahead and output + // that as well into the name table. + if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName()) + TheCU->addAccelName(SP.getLinkageName(), Die); + + // If this is an Objective-C selector name add it to the ObjC accelerator + // too. + if (isObjCClass(SP.getName())) { + StringRef Class, Category; + getObjCClassCategory(SP.getName(), Class, Category); + TheCU->addAccelObjC(Class, Die); + if (Category != "") + TheCU->addAccelObjC(Category, Die); + // Also add the base method name to the name table. + TheCU->addAccelName(getObjCMethodName(SP.getName()), Die); + } +} + /// updateSubprogramScopeDIE - Find DIE for the given subprogram and /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes. /// If there are global variables in this scope then create and insert @@ -257,6 +314,10 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, MachineLocation Location(RI->getFrameRegister(*Asm->MF)); SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location); + // Add name to the name table, we do this here because we're guaranteed + // to have concrete versions of our DW_TAG_subprogram nodes. + addSubprogramNames(SPCU, SP, SPDie); + return SPDie; } @@ -384,6 +445,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, return ScopeDIE; } + + /// constructScopeDIE - Construct a DIE for this scope. DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { if (!Scope || !Scope->getScopeNode()) @@ -439,19 +502,9 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { ScopeDIE->addChild(*I); if (DS.isSubprogram()) - TheCU->addPubTypes(DISubprogram(DS)); - - if (DS.isSubprogram() && !Scope->isAbstractScope()) { - DISubprogram SP = DISubprogram(DS); - TheCU->addAccelName(SP.getName(), ScopeDIE); + TheCU->addPubTypes(DISubprogram(DS)); - // If the linkage name is different than the name, go ahead and output - // that as well into the name table. - if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName()) - TheCU->addAccelName(SP.getLinkageName(), ScopeDIE); - } - - return ScopeDIE; + return ScopeDIE; } /// GetOrCreateSourceID - Look up the source id with the given directory and @@ -531,38 +584,6 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { return NewCU; } -static bool isObjCClass(StringRef Name) { - return Name.startswith("+") || Name.startswith("-"); -} - -static bool hasObjCCategory(StringRef Name) { - if (!isObjCClass(Name)) return false; - - size_t pos = Name.find(')'); - if (pos != std::string::npos) { - if (Name[pos+1] != ' ') return false; - return true; - } - return false; -} - -static void getObjCClassCategory(StringRef In, StringRef &Class, - StringRef &Category) { - if (!hasObjCCategory(In)) { - Class = In.slice(In.find('[') + 1, In.find(' ')); - Category = ""; - return; - } - - Class = In.slice(In.find('[') + 1, In.find('(')); - Category = In.slice(In.find('[') + 1, In.find(' ')); - return; -} - -static StringRef getObjCMethodName(StringRef In) { - return In.slice(In.find(' ') + 1, In.find(']')); -} - /// construct SubprogramDIE - Construct subprogram DIE. void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N) { @@ -597,25 +618,6 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, // Add to context owner. TheCU->addToContextOwner(SubprogramDie, SP.getContext()); - // Add to Accel Names - TheCU->addAccelName(SP.getName(), SubprogramDie); - - // If the linkage name is different than the name, go ahead and output - // that as well into the name table. - if (SP.getLinkageName() != "" && SP.getName() != SP.getLinkageName()) - TheCU->addAccelName(SP.getLinkageName(), SubprogramDie); - - // If this is an Objective-C selector name add it to the ObjC accelerator too. - if (isObjCClass(SP.getName())) { - StringRef Class, Category; - getObjCClassCategory(SP.getName(), Class, Category); - TheCU->addAccelObjC(Class, SubprogramDie); - if (Category != "") - TheCU->addAccelObjC(Category, SubprogramDie); - // Also add the base method name to the name table. - TheCU->addAccelName(getObjCMethodName(SP.getName()), SubprogramDie); - } - return; } @@ -1763,12 +1765,14 @@ void DwarfDebug::emitAccelNames() { for (DenseMap::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { CompileUnit *TheCU = I->second; - const StringMap &Names = TheCU->getAccelNames(); - for (StringMap::const_iterator + const StringMap > &Names = TheCU->getAccelNames(); + for (StringMap >::const_iterator GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { const char *Name = GI->getKeyData(); - DIE *Entity = GI->second; - AT.AddName(Name, Entity); + std::vector Entities = GI->second; + for (std::vector::const_iterator DI = Entities.begin(), + DE = Entities.end(); DI != DE; ++DI) + AT.AddName(Name, (*DI)); } } -- cgit v1.1 From 1b3f9198ab3880be34b6252423b9e388b5cd6a5e Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 10 Nov 2011 19:52:58 +0000 Subject: Move type handling to make sure we get all created types that aren't forward decls and have names into the dwarf accelerator types table. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144306 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index a68da04..d440a74 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -605,7 +605,11 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) { assert(Ty.isDerivedType() && "Unknown kind of DIType"); constructTypeDIE(*TyDIE, DIDerivedType(Ty)); } - + // If this is a named finished type then include it in the list of types + // for the accelerator tables. + if (!Ty.getName().empty() && !Ty.isForwardDecl()) + addAccelType(Ty.getName(), TyDIE); + addToContextOwner(TyDIE, Ty.getContext()); return TyDIE; } @@ -634,12 +638,6 @@ void CompileUnit::addType(DIE *Entity, DIType Ty) { // If this is a complete composite type then include it in the // list of global types. addGlobalType(Ty); - - // If this is a named finished type then include it in the list of types - // for the accelerator tables. - if (!Ty.getName().empty() && !Ty.isForwardDecl()) - if (DIEEntry *Entry = getDIEEntry(Ty)) - AccelTypes[Ty.getName()] = Entry->getEntry(); } /// addGlobalType - Add a new global type to the compile unit. -- cgit v1.1 From c7e5a6a2c69ee2552242da2a70775acd7d8819ae Mon Sep 17 00:00:00 2001 From: Pete Cooper Date: Thu, 10 Nov 2011 20:22:08 +0000 Subject: Fixed bug in DeadStoreElimination commit r144239 Size of data being pointed to wasn't always being checked so some small writes were killing big writes Fixes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144312 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/DeadStoreElimination.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 03a557e..8f5f157 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -380,7 +380,7 @@ static OverwriteResult isOverwrite(const AliasAnalysis::Location &Later, if (isObjectPointerWithTrustworthySize(UO2)) { uint64_t ObjectSize = TD.getTypeAllocSize(cast(UO2->getType())->getElementType()); - if (ObjectSize == Later.Size) + if (ObjectSize == Later.Size && ObjectSize >= Earlier.Size) return OverwriteComplete; } -- cgit v1.1 From d475f8612b1c7959dbf50242c8fa9d4aea1ee1a9 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Thu, 10 Nov 2011 20:48:53 +0000 Subject: ARM .thumb_func directive for quoted symbol names. Use the getIdentifier() method of the token, not getString(), otherwise we keep the quotes as part of the symbol name, which we don't want. rdar://10428015 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144315 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 83e7aac..f142e68 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -4966,17 +4966,17 @@ bool ARMAsmParser::parseDirectiveThumbFunc(SMLoc L) { const AsmToken &Tok = Parser.getTok(); if (Tok.isNot(AsmToken::Identifier) && Tok.isNot(AsmToken::String)) return Error(L, "unexpected token in .thumb_func directive"); - Name = Tok.getString(); + Name = Tok.getIdentifier(); Parser.Lex(); // Consume the identifier token. } - if (getLexer().isNot(AsmToken::EndOfStatement)) + if (getLexer().isNot(AsmToken::EndOfStatement)) return Error(L, "unexpected token in directive"); Parser.Lex(); // FIXME: assuming function name will be the line following .thumb_func if (!isMachO) { - Name = Parser.getTok().getString(); + Name = Parser.getTok().getIdentifier(); } // Mark symbol as a thumb symbol. -- cgit v1.1 From 16455ce1a4063348209e94f52afde653ded5eeb5 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Thu, 10 Nov 2011 21:09:49 +0000 Subject: When in ARM mode, LDRH/STRH require special handling of negative offsets. For correctness, disable this for now. rdar://10418009 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144316 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 4c47ff9..6b2c1f3 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -852,7 +852,8 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) { needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset); else // ARM i16 integer loads/stores handle +/-imm8 offsets. - if (Addr.Offset > 255 || Addr.Offset < -255) + // FIXME: Negative offsets require special handling. + if (Addr.Offset > 255 || Addr.Offset < 0) needsLowering = true; break; case MVT::i1: -- cgit v1.1 From 8bd36eafca6992754006cbace5ba471884631d08 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Thu, 10 Nov 2011 21:47:55 +0000 Subject: Make types and namespaces take multiple DIEs for the accelerator tables as well. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144319 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 18 ++++++++++++------ lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 20 ++++++++++++-------- 2 files changed, 24 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 453e898..07a7723 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -64,8 +64,8 @@ class CompileUnit { /// StringMap > AccelNames; StringMap > AccelObjC; - StringMap AccelNamespace; - StringMap AccelTypes; + StringMap > AccelNamespace; + StringMap > AccelTypes; /// DIEBlocks - A list of all the DIEBlocks in use. std::vector DIEBlocks; @@ -90,8 +90,12 @@ public: const StringMap > &getAccelObjC() const { return AccelObjC; } - const StringMap &getAccelNamespace() const { return AccelNamespace; } - const StringMap &getAccelTypes() const { return AccelTypes; } + const StringMap > &getAccelNamespace() const { + return AccelNamespace; + } + const StringMap > &getAccelTypes() const { + return AccelTypes; + } /// hasContent - Return true if this compile unit has something to write out. /// @@ -112,10 +116,12 @@ public: DIEs.push_back(Die); } void addAccelNamespace(StringRef Name, DIE *Die) { - AccelNamespace[Name] = Die; + std::vector &DIEs = AccelNamespace[Name]; + DIEs.push_back(Die); } void addAccelType(StringRef Name, DIE *Die) { - AccelTypes[Name] = Die; + std::vector &DIEs = AccelTypes[Name]; + DIEs.push_back(Die); } /// getDIE - Returns the debug information entry map slot for the diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 3ca2318..5d05a3c 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1823,12 +1823,14 @@ void DwarfDebug::emitAccelNamespaces() { for (DenseMap::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { CompileUnit *TheCU = I->second; - const StringMap &Names = TheCU->getAccelNamespace(); - for (StringMap::const_iterator + const StringMap > &Names = TheCU->getAccelNamespace(); + for (StringMap >::const_iterator GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { const char *Name = GI->getKeyData(); - DIE *Entity = GI->second; - AT.AddName(Name, Entity); + std::vector Entities = GI->second; + for (std::vector::const_iterator DI = Entities.begin(), + DE = Entities.end(); DI != DE; ++DI) + AT.AddName(Name, (*DI)); } } @@ -1849,12 +1851,14 @@ void DwarfDebug::emitAccelTypes() { for (DenseMap::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { CompileUnit *TheCU = I->second; - const StringMap &Names = TheCU->getAccelTypes(); - for (StringMap::const_iterator + const StringMap > &Names = TheCU->getAccelTypes(); + for (StringMap >::const_iterator GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { const char *Name = GI->getKeyData(); - DIE *Entity = GI->second; - AT.AddName(Name, Entity); + std::vector Entities = GI->second; + for (std::vector::const_iterator DI = Entities.begin(), + DE= Entities.end(); DI !=DE; ++DI) + AT.AddName(Name, (*DI)); } } -- cgit v1.1 From 1325ab92560443958d380bdffa630b4f96fba58f Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Thu, 10 Nov 2011 22:00:34 +0000 Subject: build/MCDisassembler: Fix required libraries list of MCDisassembler to use all-targets instead of an explicit list. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144320 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCDisassembler/LLVMBuild.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/MC/MCDisassembler/LLVMBuild.txt b/lib/MC/MCDisassembler/LLVMBuild.txt index 33f95af..7b8c7ff 100644 --- a/lib/MC/MCDisassembler/LLVMBuild.txt +++ b/lib/MC/MCDisassembler/LLVMBuild.txt @@ -19,5 +19,7 @@ type = Library name = MCDisassembler parent = MC -required_libraries = ARMAsmParser ARMDesc ARMDisassembler ARMInfo CBackendInfo CellSPUDesc CellSPUInfo CppBackendInfo MBlazeAsmParser MBlazeDesc MBlazeDisassembler MBlazeInfo MC MCParser MSP430Desc MSP430Info MipsDesc MipsInfo PTXDesc PTXInfo PowerPCDesc PowerPCInfo SparcDesc SparcInfo Support X86AsmParser X86Desc X86Disassembler X86Info XCoreDesc XCoreInfo +; FIXME: This is really horrible, MCDisassembler should not in and of its own +; accord depending on every target. +required_libraries = all-targets MC MCParser Support -- cgit v1.1 From eaf4221cef44f5e5094b60099f6e52a8e431bb49 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Thu, 10 Nov 2011 22:00:37 +0000 Subject: build/MBlazeDisassembler: Some compilers may generate an MBlaze disassembler that depends on MBlazeCodeGen. This is a layering violation that should really be fixed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144321 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/MBlaze/Disassembler/LLVMBuild.txt | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/MBlaze/Disassembler/LLVMBuild.txt b/lib/Target/MBlaze/Disassembler/LLVMBuild.txt index c5c4f80..b2b3a3a 100644 --- a/lib/Target/MBlaze/Disassembler/LLVMBuild.txt +++ b/lib/Target/MBlaze/Disassembler/LLVMBuild.txt @@ -19,6 +19,12 @@ type = Library name = MBlazeDisassembler parent = MBlaze -required_libraries = MBlazeDesc MBlazeInfo MC Support +; Strictly speaking, we probably shouldn't have a dependency on +; MBlazeCodeGen. However, given the current factoring we end up including +; MBlazeGenRegisterInfo.inc in the disassembler. Those generated headers end up +; referencing external variables through GPRRegClass, SPRRegClass, and +; CRCRegClass. These aren't actually used, but some compilers may generate +; references to them. +required_libraries = MBlazeCodeGen MBlazeDesc MBlazeInfo MC Support add_to_library_groups = MBlaze -- cgit v1.1 From 1b332860aef0121cf4591f4377a7201ce0ef8366 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Thu, 10 Nov 2011 22:10:12 +0000 Subject: Thumb MUL assembly parsing for 3-operand form. Get the source register that isn't tied to the destination register correct, even when the assembly source operand order is backwards. rdar://10428630 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144322 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index f142e68..d887053 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -3415,13 +3415,15 @@ cvtThumbMultiply(MCInst &Inst, unsigned Opcode, } ((ARMOperand*)Operands[3])->addRegOperands(Inst, 1); ((ARMOperand*)Operands[1])->addCCOutOperands(Inst, 1); - ((ARMOperand*)Operands[4])->addRegOperands(Inst, 1); - // If we have a three-operand form, use that, else the second source operand - // is just the destination operand again. - if (Operands.size() == 6) - ((ARMOperand*)Operands[5])->addRegOperands(Inst, 1); - else - Inst.addOperand(Inst.getOperand(0)); + // If we have a three-operand form, make sure to set Rn to be the operand + // that isn't the same as Rd. + unsigned RegOp = 4; + if (Operands.size() == 6 && + ((ARMOperand*)Operands[4])->getReg() == + ((ARMOperand*)Operands[3])->getReg()) + RegOp = 5; + ((ARMOperand*)Operands[RegOp])->addRegOperands(Inst, 1); + Inst.addOperand(Inst.getOperand(0)); ((ARMOperand*)Operands[2])->addCondCodeOperands(Inst, 2); return true; -- cgit v1.1 From 01b55b4a808d04cbff7b62f55eaeb62019340bc0 Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Thu, 10 Nov 2011 22:34:29 +0000 Subject: Check in getOrCreateSubprogramDIE if a declaration exists and if so output it first. This is a more general fix to pr11300. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144324 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 13 ++++++++++++- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 12 ------------ 2 files changed, 12 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index d440a74..657763a 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -934,6 +934,12 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { if (SPDie) return SPDie; + DISubprogram SPDecl = SP.getFunctionDeclaration(); + DIE *DeclDie = NULL; + if (SPDecl.isSubprogram()) { + DeclDie = getOrCreateSubprogramDIE(SPDecl); + } + SPDie = new DIE(dwarf::DW_TAG_subprogram); // DW_TAG_inlined_subroutine may refer to this DIE. @@ -952,8 +958,13 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // If this DIE is going to refer declaration info using AT_specification // then there is no need to add other attributes. - if (SP.getFunctionDeclaration().isSubprogram()) + if (DeclDie) { + // Refer function declaration directly. + addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, + DeclDie); + return SPDie; + } // Constructors and operators for anonymous aggregates do not have names. if (!SP.getName().empty()) diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 5d05a3c..581f04b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -598,20 +598,8 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, // class type. return; - DISubprogram SPDecl = SP.getFunctionDeclaration(); - DIE *DeclDie = NULL; - if (SPDecl.isSubprogram()) { - DeclDie = TheCU->getOrCreateSubprogramDIE(SPDecl); - } - DIE *SubprogramDie = TheCU->getOrCreateSubprogramDIE(SP); - if (DeclDie) { - // Refer function declaration directly. - TheCU->addDIEEntry(SubprogramDie, dwarf::DW_AT_specification, - dwarf::DW_FORM_ref4, DeclDie); - } - // Add to map. TheCU->insertDIE(N, SubprogramDie); -- cgit v1.1 From fae02597bb90f4334079580441b8e5876be4a3d2 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Thu, 10 Nov 2011 23:01:27 +0000 Subject: Thumb1 diagnostics for reglist on PUSH/POP fix. Was not checking the first register in the register list. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144329 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index d887053..0cc2e5a 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -4528,14 +4528,14 @@ validateInstruction(MCInst &Inst, } case ARM::tPOP: { bool listContainsBase; - if (checkLowRegisterList(Inst, 3, 0, ARM::PC, listContainsBase)) + if (checkLowRegisterList(Inst, 2, 0, ARM::PC, listContainsBase)) return Error(Operands[2]->getStartLoc(), "registers must be in range r0-r7 or pc"); break; } case ARM::tPUSH: { bool listContainsBase; - if (checkLowRegisterList(Inst, 3, 0, ARM::LR, listContainsBase)) + if (checkLowRegisterList(Inst, 2, 0, ARM::LR, listContainsBase)) return Error(Operands[2]->getStartLoc(), "registers must be in range r0-r7 or lr"); break; -- cgit v1.1 From 5402637ff283d7397513d5c1699cdf2274c47313 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Thu, 10 Nov 2011 23:17:11 +0000 Subject: Thumb2 parsing for push/pop w/ hi registers in the reglist. rdar://10130228. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144331 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 34 +++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 0cc2e5a..546e90f 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -4526,16 +4526,21 @@ validateInstruction(MCInst &Inst, "in register list"); break; } + // Like for ldm/stm, push and pop have hi-reg handling version in Thumb2, + // so only issue a diagnostic for thumb1. The instructions will be + // switched to the t2 encodings in processInstruction() if necessary. case ARM::tPOP: { bool listContainsBase; - if (checkLowRegisterList(Inst, 2, 0, ARM::PC, listContainsBase)) + if (checkLowRegisterList(Inst, 2, 0, ARM::PC, listContainsBase) && + !isThumbTwo()) return Error(Operands[2]->getStartLoc(), "registers must be in range r0-r7 or pc"); break; } case ARM::tPUSH: { bool listContainsBase; - if (checkLowRegisterList(Inst, 2, 0, ARM::LR, listContainsBase)) + if (checkLowRegisterList(Inst, 2, 0, ARM::LR, listContainsBase) && + !isThumbTwo()) return Error(Operands[2]->getStartLoc(), "registers must be in range r0-r7 or lr"); break; @@ -4691,6 +4696,31 @@ processInstruction(MCInst &Inst, } break; } + case ARM::tPOP: { + bool listContainsBase; + // If the register list contains any high registers, we need to use + // the 32-bit encoding instead if we're in Thumb2. Otherwise, this + // should have generated an error in validateInstruction(). + if (!checkLowRegisterList(Inst, 2, 0, ARM::PC, listContainsBase)) + return; + assert (isThumbTwo()); + Inst.setOpcode(ARM::t2LDMIA_UPD); + // Add the base register and writeback operands. + Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP)); + Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP)); + break; + } + case ARM::tPUSH: { + bool listContainsBase; + if (!checkLowRegisterList(Inst, 2, 0, ARM::LR, listContainsBase)) + return; + assert (isThumbTwo()); + Inst.setOpcode(ARM::t2STMDB_UPD); + // Add the base register and writeback operands. + Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP)); + Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP)); + break; + } case ARM::t2MOVi: { // If we can use the 16-bit encoding and the user didn't explicitly // request the 32-bit variant, transform it here. -- cgit v1.1 From 83ec87755ed4d07f6650d6727fb762052bd0041c Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Thu, 10 Nov 2011 23:42:14 +0000 Subject: ARM let processInstruction() tranforms chain. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144337 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 52 ++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 15 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 546e90f..dec9251 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -208,7 +208,7 @@ class ARMAsmParser : public MCTargetAsmParser { bool validateInstruction(MCInst &Inst, const SmallVectorImpl &Ops); - void processInstruction(MCInst &Inst, + bool processInstruction(MCInst &Inst, const SmallVectorImpl &Ops); bool shouldOmitCCOutOperand(StringRef Mnemonic, SmallVectorImpl &Operands); @@ -4557,7 +4557,7 @@ validateInstruction(MCInst &Inst, return false; } -void ARMAsmParser:: +bool ARMAsmParser:: processInstruction(MCInst &Inst, const SmallVectorImpl &Operands) { switch (Inst.getOpcode()) { @@ -4588,7 +4588,7 @@ processInstruction(MCInst &Inst, TmpInst.addOperand(Inst.getOperand(4)); TmpInst.addOperand(Inst.getOperand(5)); // cc_out Inst = TmpInst; - break; + return true; } case ARM::LDMIA_UPD: // If this is a load of a single register via a 'pop', then we should use @@ -4605,6 +4605,7 @@ processInstruction(MCInst &Inst, TmpInst.addOperand(Inst.getOperand(2)); // CondCode TmpInst.addOperand(Inst.getOperand(3)); Inst = TmpInst; + return true; } break; case ARM::STMDB_UPD: @@ -4628,36 +4629,48 @@ processInstruction(MCInst &Inst, // explicitly specified. From the ARM ARM: "Encoding T1 is preferred // to encoding T2 if is specified and encoding T2 is preferred // to encoding T1 if is omitted." - if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) + if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) { Inst.setOpcode(ARM::tADDi3); + return true; + } break; case ARM::tSUBi8: // If the immediate is in the range 0-7, we want tADDi3 iff Rd was // explicitly specified. From the ARM ARM: "Encoding T1 is preferred // to encoding T2 if is specified and encoding T2 is preferred // to encoding T1 if is omitted." - if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) + if (Inst.getOperand(3).getImm() < 8 && Operands.size() == 6) { Inst.setOpcode(ARM::tSUBi3); + return true; + } break; case ARM::tB: // A Thumb conditional branch outside of an IT block is a tBcc. - if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) + if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) { Inst.setOpcode(ARM::tBcc); + return true; + } break; case ARM::t2B: // A Thumb2 conditional branch outside of an IT block is a t2Bcc. - if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()) + if (Inst.getOperand(1).getImm() != ARMCC::AL && !inITBlock()){ Inst.setOpcode(ARM::t2Bcc); + return true; + } break; case ARM::t2Bcc: // If the conditional is AL or we're in an IT block, we really want t2B. - if (Inst.getOperand(1).getImm() == ARMCC::AL || inITBlock()) + if (Inst.getOperand(1).getImm() == ARMCC::AL || inITBlock()) { Inst.setOpcode(ARM::t2B); + return true; + } break; case ARM::tBcc: // If the conditional is AL, we really want tB. - if (Inst.getOperand(1).getImm() == ARMCC::AL) + if (Inst.getOperand(1).getImm() == ARMCC::AL) { Inst.setOpcode(ARM::tB); + return true; + } break; case ARM::tLDMIA: { // If the register list contains any high registers, or if the writeback @@ -4680,6 +4693,7 @@ processInstruction(MCInst &Inst, if (hasWritebackToken) Inst.insert(Inst.begin(), MCOperand::CreateReg(Inst.getOperand(0).getReg())); + return true; } break; } @@ -4693,6 +4707,7 @@ processInstruction(MCInst &Inst, // 16-bit encoding isn't sufficient. Switch to the 32-bit version. assert (isThumbTwo()); Inst.setOpcode(ARM::t2STMIA_UPD); + return true; } break; } @@ -4702,24 +4717,24 @@ processInstruction(MCInst &Inst, // the 32-bit encoding instead if we're in Thumb2. Otherwise, this // should have generated an error in validateInstruction(). if (!checkLowRegisterList(Inst, 2, 0, ARM::PC, listContainsBase)) - return; + return false; assert (isThumbTwo()); Inst.setOpcode(ARM::t2LDMIA_UPD); // Add the base register and writeback operands. Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP)); Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP)); - break; + return true; } case ARM::tPUSH: { bool listContainsBase; if (!checkLowRegisterList(Inst, 2, 0, ARM::LR, listContainsBase)) - return; + return false; assert (isThumbTwo()); Inst.setOpcode(ARM::t2STMDB_UPD); // Add the base register and writeback operands. Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP)); Inst.insert(Inst.begin(), MCOperand::CreateReg(ARM::SP)); - break; + return true; } case ARM::t2MOVi: { // If we can use the 16-bit encoding and the user didn't explicitly @@ -4740,6 +4755,7 @@ processInstruction(MCInst &Inst, TmpInst.addOperand(Inst.getOperand(2)); TmpInst.addOperand(Inst.getOperand(3)); Inst = TmpInst; + return true; } break; } @@ -4760,6 +4776,7 @@ processInstruction(MCInst &Inst, TmpInst.addOperand(Inst.getOperand(2)); TmpInst.addOperand(Inst.getOperand(3)); Inst = TmpInst; + return true; } break; } @@ -4790,6 +4807,7 @@ processInstruction(MCInst &Inst, TmpInst.addOperand(Inst.getOperand(3)); TmpInst.addOperand(Inst.getOperand(4)); Inst = TmpInst; + return true; } break; } @@ -4822,6 +4840,7 @@ processInstruction(MCInst &Inst, break; } } + return false; } unsigned ARMAsmParser::checkTargetMatchPredicate(MCInst &Inst) { @@ -4887,8 +4906,11 @@ MatchAndEmitInstruction(SMLoc IDLoc, } // Some instructions need post-processing to, for example, tweak which - // encoding is selected. - processInstruction(Inst, Operands); + // encoding is selected. Loop on it while changes happen so the + // individual transformations can chain off each other. E.g., + // tPOP(r8)->t2LDMIA_UPD(sp,r8)->t2STR_POST(sp,r8) + while (processInstruction(Inst, Operands)) + ; // Only move forward at the very end so that everything in validate // and process gets a consistent answer about whether we're in an IT -- cgit v1.1 From 0352b4679e9289ded6b2d73a76a017e0d97fe70d Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Thu, 10 Nov 2011 23:58:34 +0000 Subject: Thumb2 ldm/stm updating w/ one register in the list are LDR/STR. rdar://10429490 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144338 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 32 +++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index dec9251..e782975 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -4590,6 +4590,38 @@ processInstruction(MCInst &Inst, Inst = TmpInst; return true; } + case ARM::t2LDMIA_UPD: { + // If this is a load of a single register, then we should use + // a post-indexed LDR instruction instead, per the ARM ARM. + if (Inst.getNumOperands() != 5) + return false; + MCInst TmpInst; + TmpInst.setOpcode(ARM::t2LDR_POST); + TmpInst.addOperand(Inst.getOperand(4)); // Rt + TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(MCOperand::CreateImm(4)); + TmpInst.addOperand(Inst.getOperand(2)); // CondCode + TmpInst.addOperand(Inst.getOperand(3)); + Inst = TmpInst; + return true; + } + case ARM::t2STMDB_UPD: { + // If this is a store of a single register, then we should use + // a pre-indexed STR instruction instead, per the ARM ARM. + if (Inst.getNumOperands() != 5) + return false; + MCInst TmpInst; + TmpInst.setOpcode(ARM::t2STR_PRE); + TmpInst.addOperand(Inst.getOperand(0)); // Rn_wb + TmpInst.addOperand(Inst.getOperand(4)); // Rt + TmpInst.addOperand(Inst.getOperand(1)); // Rn + TmpInst.addOperand(MCOperand::CreateImm(-4)); + TmpInst.addOperand(Inst.getOperand(2)); // CondCode + TmpInst.addOperand(Inst.getOperand(3)); + Inst = TmpInst; + return true; + } case ARM::LDMIA_UPD: // If this is a load of a single register via a 'pop', then we should use // a post-indexed LDR instruction instead, per the ARM ARM. -- cgit v1.1 From 5ed5506f18fcc0a277c863f7a21b39f58e892ca5 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Fri, 11 Nov 2011 00:23:56 +0000 Subject: LLVMBuild: Add explicit information on whether targets define an assembly printer, assembly parser, or disassembler. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144344 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/LLVMBuild.txt | 3 +++ lib/Target/CellSPU/LLVMBuild.txt | 1 + lib/Target/MBlaze/LLVMBuild.txt | 3 +++ lib/Target/MSP430/LLVMBuild.txt | 1 + lib/Target/Mips/LLVMBuild.txt | 1 + lib/Target/PTX/LLVMBuild.txt | 1 + lib/Target/PowerPC/LLVMBuild.txt | 1 + lib/Target/Sparc/LLVMBuild.txt | 1 + lib/Target/X86/LLVMBuild.txt | 3 +++ lib/Target/XCore/LLVMBuild.txt | 1 + 10 files changed, 16 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/LLVMBuild.txt b/lib/Target/ARM/LLVMBuild.txt index 79cae91..9082539 100644 --- a/lib/Target/ARM/LLVMBuild.txt +++ b/lib/Target/ARM/LLVMBuild.txt @@ -19,6 +19,9 @@ type = TargetGroup name = ARM parent = Target +has_asmparser = 1 +has_asmprinter = 1 +has_disassembler = 1 has_jit = 1 [component_1] diff --git a/lib/Target/CellSPU/LLVMBuild.txt b/lib/Target/CellSPU/LLVMBuild.txt index a2127dd..4ae26b2 100644 --- a/lib/Target/CellSPU/LLVMBuild.txt +++ b/lib/Target/CellSPU/LLVMBuild.txt @@ -19,6 +19,7 @@ type = TargetGroup name = CellSPU parent = Target +has_asmprinter = 1 [component_1] type = Library diff --git a/lib/Target/MBlaze/LLVMBuild.txt b/lib/Target/MBlaze/LLVMBuild.txt index fa89552..f1a3f5d 100644 --- a/lib/Target/MBlaze/LLVMBuild.txt +++ b/lib/Target/MBlaze/LLVMBuild.txt @@ -19,6 +19,9 @@ type = TargetGroup name = MBlaze parent = Target +has_asmparser = 1 +has_asmprinter = 1 +has_disassembler = 1 [component_1] type = Library diff --git a/lib/Target/MSP430/LLVMBuild.txt b/lib/Target/MSP430/LLVMBuild.txt index 9ade110..024312b 100644 --- a/lib/Target/MSP430/LLVMBuild.txt +++ b/lib/Target/MSP430/LLVMBuild.txt @@ -19,6 +19,7 @@ type = TargetGroup name = MSP430 parent = Target +has_asmprinter = 1 [component_1] type = Library diff --git a/lib/Target/Mips/LLVMBuild.txt b/lib/Target/Mips/LLVMBuild.txt index 65c7d7f..e733b52 100644 --- a/lib/Target/Mips/LLVMBuild.txt +++ b/lib/Target/Mips/LLVMBuild.txt @@ -19,6 +19,7 @@ type = TargetGroup name = Mips parent = Target +has_asmprinter = 1 has_jit = 1 [component_1] diff --git a/lib/Target/PTX/LLVMBuild.txt b/lib/Target/PTX/LLVMBuild.txt index 180e7ce..27807e6 100644 --- a/lib/Target/PTX/LLVMBuild.txt +++ b/lib/Target/PTX/LLVMBuild.txt @@ -19,6 +19,7 @@ type = TargetGroup name = PTX parent = Target +has_asmprinter = 1 [component_1] type = Library diff --git a/lib/Target/PowerPC/LLVMBuild.txt b/lib/Target/PowerPC/LLVMBuild.txt index ce8b2e9..5baa988 100644 --- a/lib/Target/PowerPC/LLVMBuild.txt +++ b/lib/Target/PowerPC/LLVMBuild.txt @@ -19,6 +19,7 @@ type = TargetGroup name = PowerPC parent = Target +has_asmprinter = 1 has_jit = 1 [component_1] diff --git a/lib/Target/Sparc/LLVMBuild.txt b/lib/Target/Sparc/LLVMBuild.txt index d99057f..38c797f 100644 --- a/lib/Target/Sparc/LLVMBuild.txt +++ b/lib/Target/Sparc/LLVMBuild.txt @@ -19,6 +19,7 @@ type = TargetGroup name = Sparc parent = Target +has_asmprinter = 1 [component_1] type = Library diff --git a/lib/Target/X86/LLVMBuild.txt b/lib/Target/X86/LLVMBuild.txt index 814e81b..514566c 100644 --- a/lib/Target/X86/LLVMBuild.txt +++ b/lib/Target/X86/LLVMBuild.txt @@ -19,6 +19,9 @@ type = TargetGroup name = X86 parent = Target +has_asmparser = 1 +has_asmprinter = 1 +has_disassembler = 1 has_jit = 1 [component_1] diff --git a/lib/Target/XCore/LLVMBuild.txt b/lib/Target/XCore/LLVMBuild.txt index f1b7574..1f7e2d5 100644 --- a/lib/Target/XCore/LLVMBuild.txt +++ b/lib/Target/XCore/LLVMBuild.txt @@ -19,6 +19,7 @@ type = TargetGroup name = XCore parent = Target +has_asmprinter = 1 [component_1] type = Library -- cgit v1.1 From 7b809e08b906755d71994a20479cbe781db9614d Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 11 Nov 2011 00:28:42 +0000 Subject: Disable compact unwind generation until I can solve the codegen problems. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144346 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCDwarf.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index c274b9d..0225afa 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -1011,7 +1011,8 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer, // Emit the compact unwind info if available. // FIXME: This emits both the compact unwind and the old CIE/FDE // information. Only one of those is needed. - if (IsEH && MOFI->getCompactUnwindSection()) + // FIXME: Disable. This is causing failures in the test suite. + if (false && IsEH && MOFI->getCompactUnwindSection()) for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) { const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i); if (Frame.CompactUnwindEncoding) -- cgit v1.1 From 4e89d97e3a40dcbbf07648512f0e95133867a74f Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Fri, 11 Nov 2011 00:36:21 +0000 Subject: Add support for using MVN to materialize negative constants. rdar://10412592 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144348 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 6b2c1f3..030fab1 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -552,16 +552,30 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { // do so now. const ConstantInt *CI = cast(C); if (Subtarget->hasV6T2Ops() && isUInt<16>(CI->getZExtValue())) { - EVT SrcVT = MVT::i32; unsigned Opc = isThumb2 ? ARM::t2MOVi16 : ARM::MOVi16; - unsigned ImmReg = createResultReg(TLI.getRegClassFor(SrcVT)); + unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ImmReg) .addImm(CI->getZExtValue())); return ImmReg; } - // For now 32-bit only. + // Use MVN to emit negative constants. + if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) { + unsigned Imm = (unsigned)~(CI->getSExtValue()); + bool EncodeImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) : + (ARM_AM::getSOImmVal(Imm) != -1); + if (EncodeImm) { + unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi; + unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Opc), ImmReg) + .addImm(Imm)); + return ImmReg; + } + } + + // Load from constant pool. For now 32-bit only. if (VT != MVT::i32) return false; -- cgit v1.1 From c739577d3c2e5ee47baaf8b4ba259718ec2db4cc Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 11 Nov 2011 00:59:14 +0000 Subject: If we have to reset the calculation of the compact encoding, then also reset the "saved register" index. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144350 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FrameLowering.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index ece90cb..819d242 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -518,6 +518,7 @@ uint32_t X86FrameLowering::getCompactUnwindEncoding(MachineFunction &MF) const { CFAOffset = 0; memset(SavedRegs, 0, sizeof(SavedRegs)); + SavedRegIdx = 6; InstrOffset += MoveInstrSize; } else if (Opc == SubtractInstr) { if (StackAdjust) -- cgit v1.1 From cf3b89f9a83e46494fba73dd7754df03e95b2b15 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Fri, 11 Nov 2011 01:03:50 +0000 Subject: Reenable compact unwinding now that is fixed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144351 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCDwarf.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index 0225afa..b68fcaf 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -1012,7 +1012,7 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer, // FIXME: This emits both the compact unwind and the old CIE/FDE // information. Only one of those is needed. // FIXME: Disable. This is causing failures in the test suite. - if (false && IsEH && MOFI->getCompactUnwindSection()) + if (IsEH && MOFI->getCompactUnwindSection()) for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) { const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i); if (Frame.CompactUnwindEncoding) -- cgit v1.1 From b80f778bd315e5c37b987c3203c6d40bd9c3bfe6 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Fri, 11 Nov 2011 01:16:15 +0000 Subject: Get rid of an optimization in SCCP which appears to have many issues. Specifically, it doesn't handle many cases involving undef correctly, and it is missing other checks which lead to it trying to re-mark a value marked as a constant with a different value. It also appears to trigger very rarely. Fixes PR11357. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144352 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/SCCP.cpp | 168 +---------------------------------------- 1 file changed, 1 insertion(+), 167 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp index 196a847..f6762ad 100644 --- a/lib/Transforms/Scalar/SCCP.cpp +++ b/lib/Transforms/Scalar/SCCP.cpp @@ -201,10 +201,6 @@ class SCCPSolver : public InstVisitor { SmallVector BBWorkList; // The BasicBlock work list - /// UsersOfOverdefinedPHIs - Keep track of any users of PHI nodes that are not - /// overdefined, despite the fact that the PHI node is overdefined. - std::multimap UsersOfOverdefinedPHIs; - /// KnownFeasibleEdges - Entries in this set are edges which have already had /// PHI nodes retriggered. typedef std::pair Edge; @@ -466,33 +462,6 @@ private: if (BBExecutable.count(I->getParent())) // Inst is executable? visit(*I); } - - /// RemoveFromOverdefinedPHIs - If I has any entries in the - /// UsersOfOverdefinedPHIs map for PN, remove them now. - void RemoveFromOverdefinedPHIs(Instruction *I, PHINode *PN) { - if (UsersOfOverdefinedPHIs.empty()) return; - typedef std::multimap::iterator ItTy; - std::pair Range = UsersOfOverdefinedPHIs.equal_range(PN); - for (ItTy It = Range.first, E = Range.second; It != E;) { - if (It->second == I) - UsersOfOverdefinedPHIs.erase(It++); - else - ++It; - } - } - - /// InsertInOverdefinedPHIs - Insert an entry in the UsersOfOverdefinedPHIS - /// map for I and PN, but if one is there already, do not create another. - /// (Duplicate entries do not break anything directly, but can lead to - /// exponential growth of the table in rare cases.) - void InsertInOverdefinedPHIs(Instruction *I, PHINode *PN) { - typedef std::multimap::iterator ItTy; - std::pair Range = UsersOfOverdefinedPHIs.equal_range(PN); - for (ItTy J = Range.first, E = Range.second; J != E; ++J) - if (J->second == I) - return; - UsersOfOverdefinedPHIs.insert(std::make_pair(PN, I)); - } private: friend class InstVisitor; @@ -700,23 +669,8 @@ void SCCPSolver::visitPHINode(PHINode &PN) { if (PN.getType()->isStructTy()) return markAnythingOverdefined(&PN); - if (getValueState(&PN).isOverdefined()) { - // There may be instructions using this PHI node that are not overdefined - // themselves. If so, make sure that they know that the PHI node operand - // changed. - typedef std::multimap::iterator ItTy; - std::pair Range = UsersOfOverdefinedPHIs.equal_range(&PN); - - if (Range.first == Range.second) - return; - - SmallVector Users; - for (ItTy I = Range.first, E = Range.second; I != E; ++I) - Users.push_back(I->second); - while (!Users.empty()) - visit(Users.pop_back_val()); + if (getValueState(&PN).isOverdefined()) return; // Quick exit - } // Super-extra-high-degree PHI nodes are unlikely to ever be marked constant, // and slow us down a lot. Just mark them overdefined. @@ -959,64 +913,6 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) { } - // If both operands are PHI nodes, it is possible that this instruction has - // a constant value, despite the fact that the PHI node doesn't. Check for - // this condition now. - if (PHINode *PN1 = dyn_cast(I.getOperand(0))) - if (PHINode *PN2 = dyn_cast(I.getOperand(1))) - if (PN1->getParent() == PN2->getParent()) { - // Since the two PHI nodes are in the same basic block, they must have - // entries for the same predecessors. Walk the predecessor list, and - // if all of the incoming values are constants, and the result of - // evaluating this expression with all incoming value pairs is the - // same, then this expression is a constant even though the PHI node - // is not a constant! - LatticeVal Result; - for (unsigned i = 0, e = PN1->getNumIncomingValues(); i != e; ++i) { - LatticeVal In1 = getValueState(PN1->getIncomingValue(i)); - BasicBlock *InBlock = PN1->getIncomingBlock(i); - LatticeVal In2 =getValueState(PN2->getIncomingValueForBlock(InBlock)); - - if (In1.isOverdefined() || In2.isOverdefined()) { - Result.markOverdefined(); - break; // Cannot fold this operation over the PHI nodes! - } - - if (In1.isConstant() && In2.isConstant()) { - Constant *V = ConstantExpr::get(I.getOpcode(), In1.getConstant(), - In2.getConstant()); - if (Result.isUndefined()) - Result.markConstant(V); - else if (Result.isConstant() && Result.getConstant() != V) { - Result.markOverdefined(); - break; - } - } - } - - // If we found a constant value here, then we know the instruction is - // constant despite the fact that the PHI nodes are overdefined. - if (Result.isConstant()) { - markConstant(IV, &I, Result.getConstant()); - // Remember that this instruction is virtually using the PHI node - // operands. - InsertInOverdefinedPHIs(&I, PN1); - InsertInOverdefinedPHIs(&I, PN2); - return; - } - - if (Result.isUndefined()) - return; - - // Okay, this really is overdefined now. Since we might have - // speculatively thought that this was not overdefined before, and - // added ourselves to the UsersOfOverdefinedPHIs list for the PHIs, - // make sure to clean out any entries that we put there, for - // efficiency. - RemoveFromOverdefinedPHIs(&I, PN1); - RemoveFromOverdefinedPHIs(&I, PN2); - } - markOverdefined(&I); } @@ -1037,68 +933,6 @@ void SCCPSolver::visitCmpInst(CmpInst &I) { if (!V1State.isOverdefined() && !V2State.isOverdefined()) return; - // If something is overdefined, use some tricks to avoid ending up and over - // defined if we can. - - // If both operands are PHI nodes, it is possible that this instruction has - // a constant value, despite the fact that the PHI node doesn't. Check for - // this condition now. - if (PHINode *PN1 = dyn_cast(I.getOperand(0))) - if (PHINode *PN2 = dyn_cast(I.getOperand(1))) - if (PN1->getParent() == PN2->getParent()) { - // Since the two PHI nodes are in the same basic block, they must have - // entries for the same predecessors. Walk the predecessor list, and - // if all of the incoming values are constants, and the result of - // evaluating this expression with all incoming value pairs is the - // same, then this expression is a constant even though the PHI node - // is not a constant! - LatticeVal Result; - for (unsigned i = 0, e = PN1->getNumIncomingValues(); i != e; ++i) { - LatticeVal In1 = getValueState(PN1->getIncomingValue(i)); - BasicBlock *InBlock = PN1->getIncomingBlock(i); - LatticeVal In2 =getValueState(PN2->getIncomingValueForBlock(InBlock)); - - if (In1.isOverdefined() || In2.isOverdefined()) { - Result.markOverdefined(); - break; // Cannot fold this operation over the PHI nodes! - } - - if (In1.isConstant() && In2.isConstant()) { - Constant *V = ConstantExpr::getCompare(I.getPredicate(), - In1.getConstant(), - In2.getConstant()); - if (Result.isUndefined()) - Result.markConstant(V); - else if (Result.isConstant() && Result.getConstant() != V) { - Result.markOverdefined(); - break; - } - } - } - - // If we found a constant value here, then we know the instruction is - // constant despite the fact that the PHI nodes are overdefined. - if (Result.isConstant()) { - markConstant(&I, Result.getConstant()); - // Remember that this instruction is virtually using the PHI node - // operands. - InsertInOverdefinedPHIs(&I, PN1); - InsertInOverdefinedPHIs(&I, PN2); - return; - } - - if (Result.isUndefined()) - return; - - // Okay, this really is overdefined now. Since we might have - // speculatively thought that this was not overdefined before, and - // added ourselves to the UsersOfOverdefinedPHIs list for the PHIs, - // make sure to clean out any entries that we put there, for - // efficiency. - RemoveFromOverdefinedPHIs(&I, PN1); - RemoveFromOverdefinedPHIs(&I, PN2); - } - markOverdefined(&I); } -- cgit v1.1 From d117fbb2311f6b05770ee680135eaac4eb6d16d1 Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Fri, 11 Nov 2011 01:55:22 +0000 Subject: If we have a DIE with an AT_specification use that instead of the normal addr DIE when adding to the dwarf accelerator tables. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144354 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 657763a..1e51709 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -1095,6 +1095,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { addToContextOwner(VariableDIE, GVContext); // Add location. bool addToAccelTable = false; + DIE *VariableSpecDIE; if (isGlobalVariable) { addToAccelTable = true; DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); @@ -1106,7 +1107,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() && !GVContext.isFile() && !isSubprogramContext(GVContext)) { // Create specification DIE. - DIE *VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); + VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, VariableDIE); addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); @@ -1135,13 +1136,15 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); } - if (addToAccelTable) - addAccelName(GV.getName(), VariableDIE); + if (addToAccelTable) { + DIE *AddrDIE = VariableSpecDIE ? VariableSpecDIE : VariableDIE; + addAccelName(GV.getName(), AddrDIE); - // If the linkage name is different than the name, go ahead and output - // that as well into the name table. - if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName()) - addAccelName(GV.getLinkageName(), VariableDIE); + // If the linkage name is different than the name, go ahead and output + // that as well into the name table. + if (GV.getLinkageName() != "" && GV.getName() != GV.getLinkageName()) + addAccelName(GV.getLinkageName(), AddrDIE); + } return; } -- cgit v1.1 From 646abbfa30f881b5183b62e77a185fc48d9d82bd Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Fri, 11 Nov 2011 02:38:59 +0000 Subject: When loading a value, treat an i1 as an i8. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144356 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 030fab1..c2f0a71 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -955,6 +955,7 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) { switch (VT.getSimpleVT().SimpleTy) { // This is mostly going to be Neon/vector support. default: return false; + case MVT::i1: case MVT::i8: Opc = isThumb2 ? ARM::t2LDRBi12 : ARM::LDRBi12; RC = ARM::GPRRegisterClass; -- cgit v1.1 From d61c34ba30888c49f4f223422f30b018a41594da Mon Sep 17 00:00:00 2001 From: Eric Christopher Date: Fri, 11 Nov 2011 03:16:32 +0000 Subject: Initialize variable. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144360 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 1e51709..159c096 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -19,6 +19,7 @@ #include "llvm/GlobalVariable.h" #include "llvm/Instructions.h" #include "llvm/Analysis/DIBuilder.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameLowering.h" @@ -1095,7 +1096,7 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { addToContextOwner(VariableDIE, GVContext); // Add location. bool addToAccelTable = false; - DIE *VariableSpecDIE; + DIE *VariableSpecDIE = NULL; if (isGlobalVariable) { addToAccelTable = true; DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); -- cgit v1.1 From 15f58c56e9a4150abeea04469c9105edb8acad99 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Fri, 11 Nov 2011 03:16:38 +0000 Subject: Make sure to expand SIGN_EXTEND_INREG for NEON vectors. PR11319, round 3. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144361 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 6f2b3b8..b55ef70 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -127,6 +127,7 @@ void ARMTargetLowering::addTypeForNEON(EVT VT, EVT PromotedLdStVT, setOperationAction(ISD::EXTRACT_SUBVECTOR, VT.getSimpleVT(), Legal); setOperationAction(ISD::SELECT, VT.getSimpleVT(), Expand); setOperationAction(ISD::SELECT_CC, VT.getSimpleVT(), Expand); + setOperationAction(ISD::SIGN_EXTEND_INREG, VT.getSimpleVT(), Expand); if (VT.isInteger()) { setOperationAction(ISD::SHL, VT.getSimpleVT(), Custom); setOperationAction(ISD::SRA, VT.getSimpleVT(), Custom); -- cgit v1.1 From d5cf5a631fb398d375982fed74196dcd0f7ff952 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 11 Nov 2011 03:58:36 +0000 Subject: Fix printing of MCSymbolRegExpr. Needs three closing parentheses for VK_Mips_GPOFF_HI/LO. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144366 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp index 4a815f3..f544d39 100644 --- a/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp +++ b/lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp @@ -118,7 +118,10 @@ static void printExpr(const MCExpr *Expr, raw_ostream &OS) { OS << Offset; } - if (Kind != MCSymbolRefExpr::VK_None) + if ((Kind == MCSymbolRefExpr::VK_Mips_GPOFF_HI) || + (Kind == MCSymbolRefExpr::VK_Mips_GPOFF_LO)) + OS << ")))"; + else if (Kind != MCSymbolRefExpr::VK_None) OS << ')'; } -- cgit v1.1 From a1fa08f66a5e92ccf5bc0b565c045be14108dae4 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 11 Nov 2011 04:00:29 +0000 Subject: Emit Mips64's sequence of instructions that set global register in prologue. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144367 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsFrameLowering.cpp | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index 71f3116..68adfe6 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -149,6 +149,11 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { unsigned NewReg = 0; int NewImm = 0; bool ATUsed; + unsigned GP = STI.isABI_N64() ? Mips::GP_64 : Mips::GP; + unsigned T9 = STI.isABI_N64() ? Mips::T9_64 : Mips::T9; + unsigned ADDu = STI.isABI_N64() ? Mips::DADDu : Mips::ADDu; + unsigned ADDiu = STI.isABI_N64() ? Mips::DADDiu : Mips::ADDiu; + unsigned LUi = STI.isABI_N64() ? Mips::LUi64 : Mips::LUi; // First, compute final stack size. unsigned RegSize = STI.isGP32bit() ? 4 : 8; @@ -165,10 +170,25 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { BuildMI(MBB, MBBI, dl, TII.get(Mips::NOREORDER)); - // TODO: check need from GP here. + // Emit instructions that set $gp using the the value of $t9. + // O32 uses the directive .cpload while N32/64 requires three instructions to + // do this. + // TODO: Do not emit these instructions if no instructions use $gp. if (isPIC && STI.isABI_O32()) BuildMI(MBB, MBBI, dl, TII.get(Mips::CPLOAD)) .addReg(RegInfo->getPICCallReg()); + else if (STI.isABI_N64() || (isPIC && STI.isABI_N32())) { + // lui $28,%hi(%neg(%gp_rel(fname))) + // addu $28,$28,$25 + // addiu $28,$28,%lo(%neg(%gp_rel(fname))) + const GlobalValue *FName = MF.getFunction(); + BuildMI(MBB, MBBI, dl, TII.get(LUi), GP) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_HI); + BuildMI(MBB, MBBI, dl, TII.get(ADDu), GP).addReg(GP).addReg(T9); + BuildMI(MBB, MBBI, dl, TII.get(ADDiu), GP).addReg(GP) + .addGlobalAddress(FName, 0, MipsII::MO_GPOFF_LO); + } + BuildMI(MBB, MBBI, dl, TII.get(Mips::NOMACRO)); // No need to allocate space on the stack. -- cgit v1.1 From 642b1097131ec8c700318f01fe2504d8d5ef4adc Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 11 Nov 2011 04:03:54 +0000 Subject: 64-bit versions of jal, jalr and bal. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144368 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips64InstrInfo.td | 26 ++++++++++++++++++++++++++ lib/Target/Mips/MipsInstrInfo.td | 1 + 2 files changed, 27 insertions(+) (limited to 'lib') diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 9a769e8..9eddbc9 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -51,6 +51,30 @@ class shift_rotate_imm64_32 func, bits<5> isRotate, string instr_asm, shift_rotate_imm; +// Jump and Link (Call) +let isCall=1, hasDelaySlot=1, + // All calls clobber the non-callee saved registers... + Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, + K0, K1, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9], Uses = [GP] in { + class JumpLink64 op, string instr_asm>: + FJ; + + class JumpLinkReg64 op, bits<6> func, string instr_asm>: + FR { + let rt = 0; + let rd = 31; + let shamt = 0; + } + + class BranchLink64: + FI<0x1, (outs), (ins CPU64Regs:$rs, brtarget:$imm16, variable_ops), + !strconcat(instr_asm, "\t$rs, $imm16"), [], IIBranch>; +} + // Mul, Div class Mult64 func, string instr_asm, InstrItinClass itin>: Mult; @@ -123,6 +147,8 @@ defm ULD : LoadM64<0x37, "uld", load_u, 1>; defm USD : StoreM64<0x3f, "usd", store_u, 1>; /// Jump and Branch Instructions +def JAL64 : JumpLink64<0x03, "jal">; +def JALR64 : JumpLinkReg64<0x00, 0x09, "jalr">; def BEQ64 : CBranch<0x04, "beq", seteq, CPU64Regs>; def BNE64 : CBranch<0x05, "bne", setne, CPU64Regs>; def BGEZ64 : CBranchZero<0x01, 1, "bgez", setge, CPU64Regs>; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 1cc3841..61e6bf3 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -140,6 +140,7 @@ def NotN64 : Predicate<"!Subtarget.isABI_N64()">; // Instruction operand types def brtarget : Operand; def calltarget : Operand; +def calltarget64: Operand; def simm16 : Operand; def simm16_64 : Operand; def shamt : Operand; -- cgit v1.1 From c742e4fc9016b4987dbd06af4670d7759392d08d Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 11 Nov 2011 04:06:38 +0000 Subject: Add 64-bit versions of LEA_ADDiu and DynAlloc. Modify LowerDYNAMIC_STACKALLOC. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144370 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips64InstrInfo.td | 9 +++++++++ lib/Target/Mips/MipsISelLowering.cpp | 10 +++++----- lib/Target/Mips/MipsInstrInfo.td | 20 +++++++++++++------- 3 files changed, 27 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 9eddbc9..06b3d05 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -171,6 +171,12 @@ def MFLO64 : MoveFromLOHI<0x12, "mflo", CPU64Regs, [LO64]>; def DCLZ : CountLeading0<0x24, "dclz", CPU64Regs>; def DCLO : CountLeading1<0x25, "dclo", CPU64Regs>; +def LEA_ADDiu64 : EffectiveAddress<"addiu\t$rt, $addr", CPU64Regs, mem_ea_64>; + +let Uses = [SP_64] in +def DynAlloc64 : EffectiveAddress<"daddiu\t$rt, $addr", CPU64Regs, mem_ea_64>, + Requires<[IsN64]>; + //===----------------------------------------------------------------------===// // Arbitrary patterns that map to one or more instructions //===----------------------------------------------------------------------===// @@ -200,6 +206,9 @@ defm : SetgtPats; defm : SetgePats; defm : SetgeImmPats; +// select MipsDynAlloc +def : Pat<(MipsDynAlloc addr:$f), (DynAlloc64 addr:$f)>, Requires<[IsN64]>; + // truncate def : Pat<(i32 (trunc CPU64Regs:$src)), (SLL (EXTRACT_SUBREG CPU64Regs:$src, sub_32), 0)>, Requires<[IsN64]>; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 50aa78f..e322367 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -1295,6 +1295,7 @@ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); MipsFunctionInfo *MipsFI = MF.getInfo(); + unsigned SP = IsN64 ? Mips::SP_64 : Mips::SP; assert(getTargetMachine().getFrameLowering()->getStackAlignment() >= cast(Op.getOperand(2).getNode())->getZExtValue() && @@ -1306,20 +1307,19 @@ LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const DebugLoc dl = Op.getDebugLoc(); // Get a reference from Mips stack pointer - SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, Mips::SP, MVT::i32); + SDValue StackPointer = DAG.getCopyFromReg(Chain, dl, SP, getPointerTy()); // Subtract the dynamic size from the actual stack size to // obtain the new stack size. - SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, StackPointer, Size); + SDValue Sub = DAG.getNode(ISD::SUB, dl, getPointerTy(), StackPointer, Size); // The Sub result contains the new stack start address, so it // must be placed in the stack pointer register. - Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, Mips::SP, Sub, - SDValue()); + Chain = DAG.getCopyToReg(StackPointer.getValue(1), dl, SP, Sub, SDValue()); // This node always has two return values: a new stack pointer // value and a chain - SDVTList VTLs = DAG.getVTList(MVT::i32, MVT::Other); + SDVTList VTLs = DAG.getVTList(getPointerTy(), MVT::Other); SDValue Ptr = DAG.getFrameIndex(MipsFI->getDynAllocFI(), getPointerTy()); SDValue Ops[] = { Chain, Ptr, Chain.getValue(1) }; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index 61e6bf3..b4dc7aa 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -39,8 +39,8 @@ def SDT_MipsDivRem : SDTypeProfile<0, 2, def SDT_MipsThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>; -def SDT_MipsDynAlloc : SDTypeProfile<1, 1, [SDTCisVT<0, i32>, - SDTCisVT<1, iPTR>]>; +def SDT_MipsDynAlloc : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, + SDTCisSameAs<0, 1>]>; def SDT_Sync : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>; def SDT_Ext : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<0, 1>, @@ -168,6 +168,12 @@ def mem_ea : Operand { let EncoderMethod = "getMemEncoding"; } +def mem_ea_64 : Operand { + let PrintMethod = "printMemOperandEA"; + let MIOperandInfo = (ops CPU64Regs, simm16_64); + let EncoderMethod = "getMemEncoding"; +} + // size operand of ext instruction def size_ext : Operand { let EncoderMethod = "getSizeExtEncoding"; @@ -526,9 +532,9 @@ class MoveToLOHI func, string instr_asm, RegisterClass RC, let Defs = DefRegs; } -class EffectiveAddress : - FMem<0x09, (outs CPURegs:$rt), (ins mem_ea:$addr), - instr_asm, [(set CPURegs:$rt, addr:$addr)], IIAlu>; +class EffectiveAddress : + FMem<0x09, (outs RC:$rt), (ins Mem:$addr), + instr_asm, [(set RC:$rt, addr:$addr)], IIAlu>; // Count Leading Ones/Zeros in Word class CountLeading0 func, string instr_asm, RegisterClass RC>: @@ -799,13 +805,13 @@ let addr=0 in // instructions. The same not happens for stack address copies, so an // add op with mem ComplexPattern is used and the stack address copy // can be matched. It's similar to Sparc LEA_ADDRi -def LEA_ADDiu : EffectiveAddress<"addiu\t$rt, $addr">; +def LEA_ADDiu : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea>; // DynAlloc node points to dynamically allocated stack space. // $sp is added to the list of implicitly used registers to prevent dead code // elimination from removing instructions that modify $sp. let Uses = [SP] in -def DynAlloc : EffectiveAddress<"addiu\t$rt, $addr">; +def DynAlloc : EffectiveAddress<"addiu\t$rt, $addr", CPURegs, mem_ea>; // MADD*/MSUB* def MADD : MArithR<0, "madd", MipsMAdd, 1>; -- cgit v1.1 From 46ac4399b13d46baa9e6280d540c468d8feba8ad Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 11 Nov 2011 04:11:56 +0000 Subject: Modify LowerFRAMEADDR. Use 64-bit register FP_64 when ABI is N64. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144371 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index e322367..1a85d54 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -1658,7 +1658,8 @@ LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { MFI->setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); - SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Mips::FP, VT); + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, + IsN64 ? Mips::FP_64 : Mips::FP, VT); return FrameAddr; } -- cgit v1.1 From 59068067cb37322c50463102bbd6929df34c039e Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 11 Nov 2011 04:14:30 +0000 Subject: 64-bit atomic instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144372 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips64InstrInfo.td | 28 ++++++++ lib/Target/Mips/MipsISelLowering.cpp | 125 +++++++++++++++++++++++++++++------ lib/Target/Mips/MipsInstrInfo.td | 105 +++++++++++++++++------------ 3 files changed, 194 insertions(+), 64 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 06b3d05..608c271 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -81,6 +81,28 @@ class Mult64 func, string instr_asm, InstrItinClass itin>: class Div64 func, string instr_asm, InstrItinClass itin>: Div; +multiclass Atomic2Ops64 { + def #NAME# : Atomic2Ops, Requires<[NotN64]>; + def _P8 : Atomic2Ops, Requires<[IsN64]>; +} + +multiclass AtomicCmpSwap64 { + def #NAME# : AtomicCmpSwap, Requires<[NotN64]>; + def _P8 : AtomicCmpSwap, + Requires<[IsN64]>; +} + +let usesCustomInserter = 1, Predicates = [HasMips64] in { + defm ATOMIC_LOAD_ADD_I64 : Atomic2Ops64; + defm ATOMIC_LOAD_SUB_I64 : Atomic2Ops64; + defm ATOMIC_LOAD_AND_I64 : Atomic2Ops64; + defm ATOMIC_LOAD_OR_I64 : Atomic2Ops64; + defm ATOMIC_LOAD_XOR_I64 : Atomic2Ops64; + defm ATOMIC_LOAD_NAND_I64 : Atomic2Ops64; + defm ATOMIC_SWAP_I64 : Atomic2Ops64; + defm ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap64; +} + //===----------------------------------------------------------------------===// // Instruction definition //===----------------------------------------------------------------------===// @@ -146,6 +168,12 @@ defm USW64 : StoreM64<0x2b, "usw", truncstorei32_u, 1>; defm ULD : LoadM64<0x37, "uld", load_u, 1>; defm USD : StoreM64<0x3f, "usd", store_u, 1>; +/// Load-linked, Store-conditional +def LLD : LLBase<0x34, "lld", CPU64Regs, mem>, Requires<[NotN64]>; +def LLD_P8 : LLBase<0x34, "lld", CPU64Regs, mem64>, Requires<[IsN64]>; +def SCD : SCBase<0x3c, "scd", CPU64Regs, mem>, Requires<[NotN64]>; +def SCD_P8 : SCBase<0x3c, "scd", CPU64Regs, mem64>, Requires<[IsN64]>; + /// Jump and Branch Instructions def JAL64 : JumpLink64<0x03, "jal">; def JALR64 : JumpLinkReg64<0x00, 0x09, "jalr">; diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 1a85d54..d916edd 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -794,60 +794,108 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, assert(false && "Unexpected instr type to insert"); return NULL; case Mips::ATOMIC_LOAD_ADD_I8: + case Mips::ATOMIC_LOAD_ADD_I8_P8: return EmitAtomicBinaryPartword(MI, BB, 1, Mips::ADDu); case Mips::ATOMIC_LOAD_ADD_I16: + case Mips::ATOMIC_LOAD_ADD_I16_P8: return EmitAtomicBinaryPartword(MI, BB, 2, Mips::ADDu); case Mips::ATOMIC_LOAD_ADD_I32: + case Mips::ATOMIC_LOAD_ADD_I32_P8: return EmitAtomicBinary(MI, BB, 4, Mips::ADDu); + case Mips::ATOMIC_LOAD_ADD_I64: + case Mips::ATOMIC_LOAD_ADD_I64_P8: + return EmitAtomicBinary(MI, BB, 8, Mips::DADDu); case Mips::ATOMIC_LOAD_AND_I8: + case Mips::ATOMIC_LOAD_AND_I8_P8: return EmitAtomicBinaryPartword(MI, BB, 1, Mips::AND); case Mips::ATOMIC_LOAD_AND_I16: + case Mips::ATOMIC_LOAD_AND_I16_P8: return EmitAtomicBinaryPartword(MI, BB, 2, Mips::AND); case Mips::ATOMIC_LOAD_AND_I32: + case Mips::ATOMIC_LOAD_AND_I32_P8: return EmitAtomicBinary(MI, BB, 4, Mips::AND); + case Mips::ATOMIC_LOAD_AND_I64: + case Mips::ATOMIC_LOAD_AND_I64_P8: + return EmitAtomicBinary(MI, BB, 48, Mips::AND64); case Mips::ATOMIC_LOAD_OR_I8: + case Mips::ATOMIC_LOAD_OR_I8_P8: return EmitAtomicBinaryPartword(MI, BB, 1, Mips::OR); case Mips::ATOMIC_LOAD_OR_I16: + case Mips::ATOMIC_LOAD_OR_I16_P8: return EmitAtomicBinaryPartword(MI, BB, 2, Mips::OR); case Mips::ATOMIC_LOAD_OR_I32: + case Mips::ATOMIC_LOAD_OR_I32_P8: return EmitAtomicBinary(MI, BB, 4, Mips::OR); + case Mips::ATOMIC_LOAD_OR_I64: + case Mips::ATOMIC_LOAD_OR_I64_P8: + return EmitAtomicBinary(MI, BB, 8, Mips::OR64); case Mips::ATOMIC_LOAD_XOR_I8: + case Mips::ATOMIC_LOAD_XOR_I8_P8: return EmitAtomicBinaryPartword(MI, BB, 1, Mips::XOR); case Mips::ATOMIC_LOAD_XOR_I16: + case Mips::ATOMIC_LOAD_XOR_I16_P8: return EmitAtomicBinaryPartword(MI, BB, 2, Mips::XOR); case Mips::ATOMIC_LOAD_XOR_I32: + case Mips::ATOMIC_LOAD_XOR_I32_P8: return EmitAtomicBinary(MI, BB, 4, Mips::XOR); + case Mips::ATOMIC_LOAD_XOR_I64: + case Mips::ATOMIC_LOAD_XOR_I64_P8: + return EmitAtomicBinary(MI, BB, 8, Mips::XOR64); case Mips::ATOMIC_LOAD_NAND_I8: + case Mips::ATOMIC_LOAD_NAND_I8_P8: return EmitAtomicBinaryPartword(MI, BB, 1, 0, true); case Mips::ATOMIC_LOAD_NAND_I16: + case Mips::ATOMIC_LOAD_NAND_I16_P8: return EmitAtomicBinaryPartword(MI, BB, 2, 0, true); case Mips::ATOMIC_LOAD_NAND_I32: + case Mips::ATOMIC_LOAD_NAND_I32_P8: return EmitAtomicBinary(MI, BB, 4, 0, true); + case Mips::ATOMIC_LOAD_NAND_I64: + case Mips::ATOMIC_LOAD_NAND_I64_P8: + return EmitAtomicBinary(MI, BB, 8, 0, true); case Mips::ATOMIC_LOAD_SUB_I8: + case Mips::ATOMIC_LOAD_SUB_I8_P8: return EmitAtomicBinaryPartword(MI, BB, 1, Mips::SUBu); case Mips::ATOMIC_LOAD_SUB_I16: + case Mips::ATOMIC_LOAD_SUB_I16_P8: return EmitAtomicBinaryPartword(MI, BB, 2, Mips::SUBu); case Mips::ATOMIC_LOAD_SUB_I32: + case Mips::ATOMIC_LOAD_SUB_I32_P8: return EmitAtomicBinary(MI, BB, 4, Mips::SUBu); + case Mips::ATOMIC_LOAD_SUB_I64: + case Mips::ATOMIC_LOAD_SUB_I64_P8: + return EmitAtomicBinary(MI, BB, 8, Mips::DSUBu); case Mips::ATOMIC_SWAP_I8: + case Mips::ATOMIC_SWAP_I8_P8: return EmitAtomicBinaryPartword(MI, BB, 1, 0); case Mips::ATOMIC_SWAP_I16: + case Mips::ATOMIC_SWAP_I16_P8: return EmitAtomicBinaryPartword(MI, BB, 2, 0); case Mips::ATOMIC_SWAP_I32: + case Mips::ATOMIC_SWAP_I32_P8: return EmitAtomicBinary(MI, BB, 4, 0); + case Mips::ATOMIC_SWAP_I64: + case Mips::ATOMIC_SWAP_I64_P8: + return EmitAtomicBinary(MI, BB, 8, 0); case Mips::ATOMIC_CMP_SWAP_I8: + case Mips::ATOMIC_CMP_SWAP_I8_P8: return EmitAtomicCmpSwapPartword(MI, BB, 1); case Mips::ATOMIC_CMP_SWAP_I16: + case Mips::ATOMIC_CMP_SWAP_I16_P8: return EmitAtomicCmpSwapPartword(MI, BB, 2); case Mips::ATOMIC_CMP_SWAP_I32: + case Mips::ATOMIC_CMP_SWAP_I32_P8: return EmitAtomicCmpSwap(MI, BB, 4); + case Mips::ATOMIC_CMP_SWAP_I64: + case Mips::ATOMIC_CMP_SWAP_I64_P8: + return EmitAtomicCmpSwap(MI, BB, 8); } } @@ -857,13 +905,31 @@ MachineBasicBlock * MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, unsigned BinOpcode, bool Nand) const { - assert(Size == 4 && "Unsupported size for EmitAtomicBinary."); + assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicBinary."); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &RegInfo = MF->getRegInfo(); - const TargetRegisterClass *RC = getRegClassFor(MVT::i32); + const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8)); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); + unsigned LL, SC, AND, NOR, ZERO, BEQ; + + if (Size == 4) { + LL = IsN64 ? Mips::LL_P8 : Mips::LL; + SC = IsN64 ? Mips::SC_P8 : Mips::SC; + AND = Mips::AND; + NOR = Mips::NOR; + ZERO = Mips::ZERO; + BEQ = Mips::BEQ; + } + else { + LL = IsN64 ? Mips::LLD_P8 : Mips::LLD; + SC = IsN64 ? Mips::SCD_P8 : Mips::SCD; + AND = Mips::AND64; + NOR = Mips::NOR64; + ZERO = Mips::ZERO_64; + BEQ = Mips::BEQ64; + } unsigned OldVal = MI->getOperand(0).getReg(); unsigned Ptr = MI->getOperand(1).getReg(); @@ -901,23 +967,20 @@ MipsTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, // sc success, storeval, 0(ptr) // beq success, $0, loopMBB BB = loopMBB; - BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(Ptr).addImm(0); + BuildMI(BB, dl, TII->get(LL), OldVal).addReg(Ptr).addImm(0); if (Nand) { // and andres, oldval, incr // nor storeval, $0, andres - BuildMI(BB, dl, TII->get(Mips::AND), AndRes).addReg(OldVal).addReg(Incr); - BuildMI(BB, dl, TII->get(Mips::NOR), StoreVal) - .addReg(Mips::ZERO).addReg(AndRes); + BuildMI(BB, dl, TII->get(AND), AndRes).addReg(OldVal).addReg(Incr); + BuildMI(BB, dl, TII->get(NOR), StoreVal).addReg(ZERO).addReg(AndRes); } else if (BinOpcode) { // storeval, oldval, incr BuildMI(BB, dl, TII->get(BinOpcode), StoreVal).addReg(OldVal).addReg(Incr); } else { StoreVal = Incr; } - BuildMI(BB, dl, TII->get(Mips::SC), Success) - .addReg(StoreVal).addReg(Ptr).addImm(0); - BuildMI(BB, dl, TII->get(Mips::BEQ)) - .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB); + BuildMI(BB, dl, TII->get(SC), Success).addReg(StoreVal).addReg(Ptr).addImm(0); + BuildMI(BB, dl, TII->get(BEQ)).addReg(Success).addReg(ZERO).addMBB(loopMBB); MI->eraseFromParent(); // The instruction is gone now. @@ -937,6 +1000,8 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, const TargetRegisterClass *RC = getRegClassFor(MVT::i32); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); + unsigned LL = IsN64 ? Mips::LL_P8 : Mips::LL; + unsigned SC = IsN64 ? Mips::SC_P8 : Mips::SC; unsigned Dest = MI->getOperand(0).getReg(); unsigned Ptr = MI->getOperand(1).getReg(); @@ -1028,7 +1093,7 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, // beq success,$0,loopMBB BB = loopMBB; - BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0); + BuildMI(BB, dl, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0); if (Nand) { // and andres, oldval, incr2 // nor binopres, $0, andres @@ -1051,7 +1116,7 @@ MipsTargetLowering::EmitAtomicBinaryPartword(MachineInstr *MI, .addReg(OldVal).addReg(Mask2); BuildMI(BB, dl, TII->get(Mips::OR), StoreVal) .addReg(MaskedOldVal0).addReg(NewVal); - BuildMI(BB, dl, TII->get(Mips::SC), Success) + BuildMI(BB, dl, TII->get(SC), Success) .addReg(StoreVal).addReg(AlignedAddr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BEQ)) .addReg(Success).addReg(Mips::ZERO).addMBB(loopMBB); @@ -1082,13 +1147,29 @@ MachineBasicBlock * MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size) const { - assert(Size == 4 && "Unsupported size for EmitAtomicCmpSwap."); + assert((Size == 4 || Size == 8) && "Unsupported size for EmitAtomicCmpSwap."); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &RegInfo = MF->getRegInfo(); - const TargetRegisterClass *RC = getRegClassFor(MVT::i32); + const TargetRegisterClass *RC = getRegClassFor(MVT::getIntegerVT(Size * 8)); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); + unsigned LL, SC, ZERO, BNE, BEQ; + + if (Size == 4) { + LL = IsN64 ? Mips::LL_P8 : Mips::LL; + SC = IsN64 ? Mips::SC_P8 : Mips::SC; + ZERO = Mips::ZERO; + BNE = Mips::BNE; + BEQ = Mips::BEQ; + } + else { + LL = IsN64 ? Mips::LLD_P8 : Mips::LLD; + SC = IsN64 ? Mips::SCD_P8 : Mips::SCD; + ZERO = Mips::ZERO_64; + BNE = Mips::BNE64; + BEQ = Mips::BEQ64; + } unsigned Dest = MI->getOperand(0).getReg(); unsigned Ptr = MI->getOperand(1).getReg(); @@ -1127,18 +1208,18 @@ MipsTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, // ll dest, 0(ptr) // bne dest, oldval, exitMBB BB = loop1MBB; - BuildMI(BB, dl, TII->get(Mips::LL), Dest).addReg(Ptr).addImm(0); - BuildMI(BB, dl, TII->get(Mips::BNE)) + BuildMI(BB, dl, TII->get(LL), Dest).addReg(Ptr).addImm(0); + BuildMI(BB, dl, TII->get(BNE)) .addReg(Dest).addReg(OldVal).addMBB(exitMBB); // loop2MBB: // sc success, newval, 0(ptr) // beq success, $0, loop1MBB BB = loop2MBB; - BuildMI(BB, dl, TII->get(Mips::SC), Success) + BuildMI(BB, dl, TII->get(SC), Success) .addReg(NewVal).addReg(Ptr).addImm(0); - BuildMI(BB, dl, TII->get(Mips::BEQ)) - .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB); + BuildMI(BB, dl, TII->get(BEQ)) + .addReg(Success).addReg(ZERO).addMBB(loop1MBB); MI->eraseFromParent(); // The instruction is gone now. @@ -1157,6 +1238,8 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, const TargetRegisterClass *RC = getRegClassFor(MVT::i32); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); + unsigned LL = IsN64 ? Mips::LL_P8 : Mips::LL; + unsigned SC = IsN64 ? Mips::SC_P8 : Mips::SC; unsigned Dest = MI->getOperand(0).getReg(); unsigned Ptr = MI->getOperand(1).getReg(); @@ -1247,7 +1330,7 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, // and maskedoldval0,oldval,mask // bne maskedoldval0,shiftedcmpval,sinkMBB BB = loop1MBB; - BuildMI(BB, dl, TII->get(Mips::LL), OldVal).addReg(AlignedAddr).addImm(0); + BuildMI(BB, dl, TII->get(LL), OldVal).addReg(AlignedAddr).addImm(0); BuildMI(BB, dl, TII->get(Mips::AND), MaskedOldVal0) .addReg(OldVal).addReg(Mask); BuildMI(BB, dl, TII->get(Mips::BNE)) @@ -1263,7 +1346,7 @@ MipsTargetLowering::EmitAtomicCmpSwapPartword(MachineInstr *MI, .addReg(OldVal).addReg(Mask2); BuildMI(BB, dl, TII->get(Mips::OR), StoreVal) .addReg(MaskedOldVal1).addReg(ShiftedNewVal); - BuildMI(BB, dl, TII->get(Mips::SC), Success) + BuildMI(BB, dl, TII->get(SC), Success) .addReg(StoreVal).addReg(AlignedAddr).addImm(0); BuildMI(BB, dl, TII->get(Mips::BEQ)) .addReg(Success).addReg(Mips::ZERO).addMBB(loop1MBB); diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index b4dc7aa..a58ce99 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -594,20 +594,41 @@ class ExtIns _funct, string instr_asm, dag outs, dag ins, } // Atomic instructions with 2 source operands (ATOMIC_SWAP & ATOMIC_LOAD_*). -class Atomic2Ops : - MipsPseudo<(outs CPURegs:$dst), (ins CPURegs:$ptr, CPURegs:$incr), +class Atomic2Ops : + MipsPseudo<(outs DRC:$dst), (ins PRC:$ptr, DRC:$incr), !strconcat("atomic_", Opstr, "\t$dst, $ptr, $incr"), - [(set CPURegs:$dst, - (Op CPURegs:$ptr, CPURegs:$incr))]>; + [(set DRC:$dst, (Op PRC:$ptr, DRC:$incr))]>; + +multiclass Atomic2Ops32 { + def #NAME# : Atomic2Ops, Requires<[NotN64]>; + def _P8 : Atomic2Ops, Requires<[IsN64]>; +} // Atomic Compare & Swap. -class AtomicCmpSwap : - MipsPseudo<(outs CPURegs:$dst), - (ins CPURegs:$ptr, CPURegs:$cmp, CPURegs:$swap), - !strconcat("atomic_cmp_swap_", Width, - "\t$dst, $ptr, $cmp, $swap"), - [(set CPURegs:$dst, - (Op CPURegs:$ptr, CPURegs:$cmp, CPURegs:$swap))]>; +class AtomicCmpSwap : + MipsPseudo<(outs DRC:$dst), (ins PRC:$ptr, DRC:$cmp, DRC:$swap), + !strconcat("atomic_cmp_swap_", Width, "\t$dst, $ptr, $cmp, $swap"), + [(set DRC:$dst, (Op PRC:$ptr, DRC:$cmp, DRC:$swap))]>; + +multiclass AtomicCmpSwap32 { + def #NAME# : AtomicCmpSwap, Requires<[NotN64]>; + def _P8 : AtomicCmpSwap, Requires<[IsN64]>; +} + +class LLBase Opc, string opstring, RegisterClass RC, Operand Mem> : + FMem { + let mayLoad = 1; +} + +class SCBase Opc, string opstring, RegisterClass RC, Operand Mem> : + FMem { + let mayStore = 1; + let Constraints = "$rt = $dst"; +} //===----------------------------------------------------------------------===// // Pseudo instructions @@ -643,32 +664,32 @@ def CPLOAD : MipsPseudo<(outs), (ins CPURegs:$picreg), ".cpload\t$picreg", []>; def CPRESTORE : MipsPseudo<(outs), (ins i32imm:$loc), ".cprestore\t$loc", []>; let usesCustomInserter = 1 in { - def ATOMIC_LOAD_ADD_I8 : Atomic2Ops; - def ATOMIC_LOAD_ADD_I16 : Atomic2Ops; - def ATOMIC_LOAD_ADD_I32 : Atomic2Ops; - def ATOMIC_LOAD_SUB_I8 : Atomic2Ops; - def ATOMIC_LOAD_SUB_I16 : Atomic2Ops; - def ATOMIC_LOAD_SUB_I32 : Atomic2Ops; - def ATOMIC_LOAD_AND_I8 : Atomic2Ops; - def ATOMIC_LOAD_AND_I16 : Atomic2Ops; - def ATOMIC_LOAD_AND_I32 : Atomic2Ops; - def ATOMIC_LOAD_OR_I8 : Atomic2Ops; - def ATOMIC_LOAD_OR_I16 : Atomic2Ops; - def ATOMIC_LOAD_OR_I32 : Atomic2Ops; - def ATOMIC_LOAD_XOR_I8 : Atomic2Ops; - def ATOMIC_LOAD_XOR_I16 : Atomic2Ops; - def ATOMIC_LOAD_XOR_I32 : Atomic2Ops; - def ATOMIC_LOAD_NAND_I8 : Atomic2Ops; - def ATOMIC_LOAD_NAND_I16 : Atomic2Ops; - def ATOMIC_LOAD_NAND_I32 : Atomic2Ops; - - def ATOMIC_SWAP_I8 : Atomic2Ops; - def ATOMIC_SWAP_I16 : Atomic2Ops; - def ATOMIC_SWAP_I32 : Atomic2Ops; - - def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap; - def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap; - def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap; + defm ATOMIC_LOAD_ADD_I8 : Atomic2Ops32; + defm ATOMIC_LOAD_ADD_I16 : Atomic2Ops32; + defm ATOMIC_LOAD_ADD_I32 : Atomic2Ops32; + defm ATOMIC_LOAD_SUB_I8 : Atomic2Ops32; + defm ATOMIC_LOAD_SUB_I16 : Atomic2Ops32; + defm ATOMIC_LOAD_SUB_I32 : Atomic2Ops32; + defm ATOMIC_LOAD_AND_I8 : Atomic2Ops32; + defm ATOMIC_LOAD_AND_I16 : Atomic2Ops32; + defm ATOMIC_LOAD_AND_I32 : Atomic2Ops32; + defm ATOMIC_LOAD_OR_I8 : Atomic2Ops32; + defm ATOMIC_LOAD_OR_I16 : Atomic2Ops32; + defm ATOMIC_LOAD_OR_I32 : Atomic2Ops32; + defm ATOMIC_LOAD_XOR_I8 : Atomic2Ops32; + defm ATOMIC_LOAD_XOR_I16 : Atomic2Ops32; + defm ATOMIC_LOAD_XOR_I32 : Atomic2Ops32; + defm ATOMIC_LOAD_NAND_I8 : Atomic2Ops32; + defm ATOMIC_LOAD_NAND_I16 : Atomic2Ops32; + defm ATOMIC_LOAD_NAND_I32 : Atomic2Ops32; + + defm ATOMIC_SWAP_I8 : Atomic2Ops32; + defm ATOMIC_SWAP_I16 : Atomic2Ops32; + defm ATOMIC_SWAP_I32 : Atomic2Ops32; + + defm ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap32; + defm ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap32; + defm ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap32; } //===----------------------------------------------------------------------===// @@ -745,12 +766,10 @@ def SYNC : MipsInst<(outs), (ins i32imm:$stype), "sync $stype", } /// Load-linked, Store-conditional -let mayLoad = 1 in - def LL : FMem<0x30, (outs CPURegs:$rt), (ins mem:$addr), - "ll\t$rt, $addr", [], IILoad>; -let mayStore = 1, Constraints = "$rt = $dst" in - def SC : FMem<0x38, (outs CPURegs:$dst), (ins CPURegs:$rt, mem:$addr), - "sc\t$rt, $addr", [], IIStore>; +def LL : LLBase<0x30, "ll", CPURegs, mem>, Requires<[NotN64]>; +def LL_P8 : LLBase<0x30, "ll", CPURegs, mem64>, Requires<[IsN64]>; +def SC : SCBase<0x38, "sc", CPURegs, mem>, Requires<[NotN64]>; +def SC_P8 : SCBase<0x38, "sc", CPURegs, mem64>, Requires<[IsN64]>; /// Jump and Branch Instructions def J : JumpFJ<0x02, "j">; -- cgit v1.1 From e184fec550ea249d00e058cfba34ec6913951895 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 11 Nov 2011 04:18:21 +0000 Subject: Do not try to detect DAG combine patterns for integer multiply-add/sub if value type is not i32. MIPS does not have 64-bit integer multiply-add/sub instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144373 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index d916edd..2b0e90b 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -395,7 +395,8 @@ static SDValue PerformADDECombine(SDNode *N, SelectionDAG& DAG, if (DCI.isBeforeLegalize()) return SDValue(); - if (Subtarget->hasMips32() && SelectMadd(N, &DAG)) + if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && + SelectMadd(N, &DAG)) return SDValue(N, 0); return SDValue(); @@ -407,7 +408,8 @@ static SDValue PerformSUBECombine(SDNode *N, SelectionDAG& DAG, if (DCI.isBeforeLegalize()) return SDValue(); - if (Subtarget->hasMips32() && SelectMsub(N, &DAG)) + if (Subtarget->hasMips32() && N->getValueType(0) == MVT::i32 && + SelectMsub(N, &DAG)) return SDValue(N, 0); return SDValue(); -- cgit v1.1 From a07d3fc693e2a3ac7c9ed2a59f62b21ab33d9fd4 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Fri, 11 Nov 2011 06:20:39 +0000 Subject: Add support for using immediates with select instructions. rdar://10412592 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144376 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 48 +++++++++++++++++++++++++++++++++++------- 1 file changed, 40 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index c2f0a71..de12db7 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -1491,17 +1491,49 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { if (CondReg == 0) return false; unsigned Op1Reg = getRegForValue(I->getOperand(1)); if (Op1Reg == 0) return false; - unsigned Op2Reg = getRegForValue(I->getOperand(2)); - if (Op2Reg == 0) return false; - unsigned CmpOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri; + // Check to see if we can use an immediate in the conditional move. + int Imm = 0; + bool UseImm = false; + bool isNegativeImm = false; + if (const ConstantInt *ConstInt = dyn_cast(I->getOperand(2))) { + assert (VT == MVT::i32 && "Expecting an i32."); + Imm = (int)ConstInt->getValue().getZExtValue(); + if (Imm < 0) { + isNegativeImm = true; + Imm = ~Imm; + } + UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) : + (ARM_AM::getSOImmVal(Imm) != -1); + } + + unsigned Op2Reg; + if (!UseImm) { + Op2Reg = getRegForValue(I->getOperand(2)); + if (Op2Reg == 0) return false; + } + + unsigned CmpOpc = isThumb2 ? ARM::t2CMPri : ARM::CMPri; AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) - .addReg(CondReg).addImm(1)); + .addReg(CondReg).addImm(0)); + + unsigned MovCCOpc; + if (!UseImm) { + MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr; + } else { + if (!isNegativeImm) { + MovCCOpc = isThumb2 ? ARM::t2MOVCCi : ARM::MOVCCi; + } else { + MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi; + } + } unsigned ResultReg = createResultReg(RC); - unsigned MovCCOpc = isThumb2 ? ARM::t2MOVCCr : ARM::MOVCCr; - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) - .addReg(Op1Reg).addReg(Op2Reg) - .addImm(ARMCC::EQ).addReg(ARM::CPSR); + if (!UseImm) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) + .addReg(Op2Reg).addReg(Op1Reg).addImm(ARMCC::NE).addReg(ARM::CPSR); + else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) + .addReg(Op1Reg).addImm(Imm).addImm(ARMCC::EQ).addReg(ARM::CPSR); UpdateValueMap(I, ResultReg); return true; } -- cgit v1.1 From 1c47de87c74c3834c5cfab8a7e0fa67b1805f927 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Fri, 11 Nov 2011 06:27:41 +0000 Subject: Rename variables to avoid confusion. No functionallity change intended. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144377 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index de12db7..da2ca3e 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -563,9 +563,9 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, EVT VT) { // Use MVN to emit negative constants. if (VT == MVT::i32 && Subtarget->hasV6T2Ops() && CI->isNegative()) { unsigned Imm = (unsigned)~(CI->getSExtValue()); - bool EncodeImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) : + bool UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) : (ARM_AM::getSOImmVal(Imm) != -1); - if (EncodeImm) { + if (UseImm) { unsigned Opc = isThumb2 ? ARM::t2MVNi : ARM::MVNi; unsigned ImmReg = createResultReg(TLI.getRegClassFor(MVT::i32)); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -1232,25 +1232,25 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, // Check to see if the 2nd operand is a constant that we can encode directly // in the compare. - int EncodedImm = 0; - bool EncodeImm = false; + int Imm = 0; + bool UseImm = false; bool isNegativeImm = false; if (const ConstantInt *ConstInt = dyn_cast(Src2Value)) { if (SrcVT == MVT::i32 || SrcVT == MVT::i16 || SrcVT == MVT::i8 || SrcVT == MVT::i1) { const APInt &CIVal = ConstInt->getValue(); - EncodedImm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue(); - if (EncodedImm < 0) { + Imm = (isZExt) ? (int)CIVal.getZExtValue() : (int)CIVal.getSExtValue(); + if (Imm < 0) { isNegativeImm = true; - EncodedImm = -EncodedImm; + Imm = -Imm; } - EncodeImm = isThumb2 ? (ARM_AM::getT2SOImmVal(EncodedImm) != -1) : - (ARM_AM::getSOImmVal(EncodedImm) != -1); + UseImm = isThumb2 ? (ARM_AM::getT2SOImmVal(Imm) != -1) : + (ARM_AM::getSOImmVal(Imm) != -1); } } else if (const ConstantFP *ConstFP = dyn_cast(Src2Value)) { if (SrcVT == MVT::f32 || SrcVT == MVT::f64) if (ConstFP->isZero() && !ConstFP->isNegative()) - EncodeImm = true; + UseImm = true; } unsigned CmpOpc; @@ -1261,11 +1261,11 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, // TODO: Verify compares. case MVT::f32: isICmp = false; - CmpOpc = EncodeImm ? ARM::VCMPEZS : ARM::VCMPES; + CmpOpc = UseImm ? ARM::VCMPEZS : ARM::VCMPES; break; case MVT::f64: isICmp = false; - CmpOpc = EncodeImm ? ARM::VCMPEZD : ARM::VCMPED; + CmpOpc = UseImm ? ARM::VCMPEZD : ARM::VCMPED; break; case MVT::i1: case MVT::i8: @@ -1274,12 +1274,12 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, // Intentional fall-through. case MVT::i32: if (isThumb2) { - if (!EncodeImm) + if (!UseImm) CmpOpc = ARM::t2CMPrr; else CmpOpc = isNegativeImm ? ARM::t2CMNzri : ARM::t2CMPri; } else { - if (!EncodeImm) + if (!UseImm) CmpOpc = ARM::CMPrr; else CmpOpc = isNegativeImm ? ARM::CMNzri : ARM::CMPri; @@ -1291,7 +1291,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, if (SrcReg1 == 0) return false; unsigned SrcReg2; - if (!EncodeImm) { + if (!UseImm) { SrcReg2 = getRegForValue(Src2Value); if (SrcReg2 == 0) return false; } @@ -1302,14 +1302,14 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, ResultReg = ARMEmitIntExt(SrcVT, SrcReg1, MVT::i32, isZExt); if (ResultReg == 0) return false; SrcReg1 = ResultReg; - if (!EncodeImm) { + if (!UseImm) { ResultReg = ARMEmitIntExt(SrcVT, SrcReg2, MVT::i32, isZExt); if (ResultReg == 0) return false; SrcReg2 = ResultReg; } } - if (!EncodeImm) { + if (!UseImm) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) .addReg(SrcReg1).addReg(SrcReg2)); @@ -1320,7 +1320,7 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0. if (isICmp) - MIB.addImm(EncodedImm); + MIB.addImm(Imm); AddOptionalDefs(MIB); } -- cgit v1.1 From 46154eb6fd7d0dc908eda5dd52fe16d893e8e008 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 11 Nov 2011 07:39:23 +0000 Subject: Add lowering for AVX2 shift instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144380 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 185 ++++++++++++++++++++++++++----------- lib/Target/X86/X86InstrSSE.td | 99 ++++++++------------ 2 files changed, 167 insertions(+), 117 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 93f7de8..e77b1df 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1050,21 +1050,9 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::MUL, MVT::v4i64, Custom); setOperationAction(ISD::MUL, MVT::v8i32, Legal); setOperationAction(ISD::MUL, MVT::v16i16, Legal); + // Don't lower v32i8 because there is no 128-bit byte mul setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); - - setOperationAction(ISD::SHL, MVT::v4i32, Legal); - setOperationAction(ISD::SHL, MVT::v2i64, Legal); - setOperationAction(ISD::SRL, MVT::v4i32, Legal); - setOperationAction(ISD::SRL, MVT::v2i64, Legal); - setOperationAction(ISD::SRA, MVT::v4i32, Legal); - - setOperationAction(ISD::SHL, MVT::v8i32, Legal); - setOperationAction(ISD::SHL, MVT::v4i64, Legal); - setOperationAction(ISD::SRL, MVT::v8i32, Legal); - setOperationAction(ISD::SRL, MVT::v4i64, Legal); - setOperationAction(ISD::SRA, MVT::v8i32, Legal); - // Don't lower v32i8 because there is no 128-bit byte mul } else { setOperationAction(ISD::ADD, MVT::v4i64, Custom); setOperationAction(ISD::ADD, MVT::v8i32, Custom); @@ -10130,47 +10118,6 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { if (!Subtarget->hasXMMInt()) return SDValue(); - // Decompose 256-bit shifts into smaller 128-bit shifts. - if (VT.getSizeInBits() == 256) { - int NumElems = VT.getVectorNumElements(); - MVT EltVT = VT.getVectorElementType().getSimpleVT(); - EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); - - // Extract the two vectors - SDValue V1 = Extract128BitVector(R, DAG.getConstant(0, MVT::i32), DAG, dl); - SDValue V2 = Extract128BitVector(R, DAG.getConstant(NumElems/2, MVT::i32), - DAG, dl); - - // Recreate the shift amount vectors - SDValue Amt1, Amt2; - if (Amt.getOpcode() == ISD::BUILD_VECTOR) { - // Constant shift amount - SmallVector Amt1Csts; - SmallVector Amt2Csts; - for (int i = 0; i < NumElems/2; ++i) - Amt1Csts.push_back(Amt->getOperand(i)); - for (int i = NumElems/2; i < NumElems; ++i) - Amt2Csts.push_back(Amt->getOperand(i)); - - Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, - &Amt1Csts[0], NumElems/2); - Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, - &Amt2Csts[0], NumElems/2); - } else { - // Variable shift amount - Amt1 = Extract128BitVector(Amt, DAG.getConstant(0, MVT::i32), DAG, dl); - Amt2 = Extract128BitVector(Amt, DAG.getConstant(NumElems/2, MVT::i32), - DAG, dl); - } - - // Issue new vector shifts for the smaller types - V1 = DAG.getNode(Op.getOpcode(), dl, NewVT, V1, Amt1); - V2 = DAG.getNode(Op.getOpcode(), dl, NewVT, V2, Amt2); - - // Concatenate the result back - return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, V1, V2); - } - // Optimize shl/srl/sra with constant shift amount. if (isSplatVector(Amt.getNode())) { SDValue SclrAmt = Amt->getOperand(0); @@ -10259,9 +10206,97 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask); return Res; } + + if (Subtarget->hasAVX2()) { + if (VT == MVT::v4i64 && Op.getOpcode() == ISD::SHL) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_pslli_q, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + + if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SHL) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_pslli_d, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + + if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SHL) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_pslli_w, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + + if (VT == MVT::v4i64 && Op.getOpcode() == ISD::SRL) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrli_q, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + + if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SRL) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrli_d, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + + if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SRL) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrli_w, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + + if (VT == MVT::v8i32 && Op.getOpcode() == ISD::SRA) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrai_d, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + + if (VT == MVT::v16i16 && Op.getOpcode() == ISD::SRA) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrai_w, MVT::i32), + R, DAG.getConstant(ShiftAmt, MVT::i32)); + } } } + // AVX2 variable shifts + if (Subtarget->hasAVX2()) { + if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_psllv_d, MVT::i32), + R, Amt); + if (VT == MVT::v8i32 && Op->getOpcode() == ISD::SHL) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_psllv_d_256, MVT::i32), + R, Amt); + if (VT == MVT::v2i64 && Op->getOpcode() == ISD::SHL) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_psllv_q, MVT::i32), + R, Amt); + if (VT == MVT::v4i64 && Op->getOpcode() == ISD::SHL) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_psllv_q_256, MVT::i32), + R, Amt); + + if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SRL) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrlv_d, MVT::i32), + R, Amt); + if (VT == MVT::v8i32 && Op->getOpcode() == ISD::SRL) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrlv_d_256, MVT::i32), + R, Amt); + if (VT == MVT::v2i64 && Op->getOpcode() == ISD::SRL) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrlv_q, MVT::i32), + R, Amt); + if (VT == MVT::v4i64 && Op->getOpcode() == ISD::SRL) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrlv_q_256, MVT::i32), + R, Amt); + + if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SRA) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrav_d, MVT::i32), + R, Amt); + if (VT == MVT::v8i32 && Op->getOpcode() == ISD::SRA) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrav_d_256, MVT::i32), + R, Amt); + } + // Lower SHL with variable shift amount. if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) { Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, @@ -10328,6 +10363,48 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { R, DAG.getNode(ISD::ADD, dl, VT, R, R)); return R; } + + // Decompose 256-bit shifts into smaller 128-bit shifts. + if (VT.getSizeInBits() == 256) { + int NumElems = VT.getVectorNumElements(); + MVT EltVT = VT.getVectorElementType().getSimpleVT(); + EVT NewVT = MVT::getVectorVT(EltVT, NumElems/2); + + // Extract the two vectors + SDValue V1 = Extract128BitVector(R, DAG.getConstant(0, MVT::i32), DAG, dl); + SDValue V2 = Extract128BitVector(R, DAG.getConstant(NumElems/2, MVT::i32), + DAG, dl); + + // Recreate the shift amount vectors + SDValue Amt1, Amt2; + if (Amt.getOpcode() == ISD::BUILD_VECTOR) { + // Constant shift amount + SmallVector Amt1Csts; + SmallVector Amt2Csts; + for (int i = 0; i < NumElems/2; ++i) + Amt1Csts.push_back(Amt->getOperand(i)); + for (int i = NumElems/2; i < NumElems; ++i) + Amt2Csts.push_back(Amt->getOperand(i)); + + Amt1 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, + &Amt1Csts[0], NumElems/2); + Amt2 = DAG.getNode(ISD::BUILD_VECTOR, dl, NewVT, + &Amt2Csts[0], NumElems/2); + } else { + // Variable shift amount + Amt1 = Extract128BitVector(Amt, DAG.getConstant(0, MVT::i32), DAG, dl); + Amt2 = Extract128BitVector(Amt, DAG.getConstant(NumElems/2, MVT::i32), + DAG, dl); + } + + // Issue new vector shifts for the smaller types + V1 = DAG.getNode(Op.getOpcode(), dl, NewVT, V1, Amt1); + V2 = DAG.getNode(Op.getOpcode(), dl, NewVT, V2, Amt2); + + // Concatenate the result back + return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, V1, V2); + } + return SDValue(); } diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 91c84dd..10f527c 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7655,7 +7655,6 @@ defm VPMASKMOVQ : avx2_pmovmask<"vpmaskmovq", // Variable Bit Shifts // multiclass avx2_var_shift opc, string OpcodeStr, - PatFrag pf128, PatFrag pf256, Intrinsic Int128, Intrinsic Int256> { def rr : AVX28I opc, string OpcodeStr, def rm : AVX28I, + [(set VR128:$dst, + (Int128 VR128:$src1, (bitconvert (memopv2i64 addr:$src2))))]>, VEX_4V; def Yrr : AVX28I opc, string OpcodeStr, def Yrm : AVX28I, + [(set VR256:$dst, + (Int256 VR256:$src1, (bitconvert (memopv4i64 addr:$src2))))]>, VEX_4V; } -defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", memopv4i32, memopv8i32, - int_x86_avx2_psllv_d, int_x86_avx2_psllv_d_256>; -defm VPSLLVQ : avx2_var_shift<0x47, "vpsllvq", memopv2i64, memopv4i64, - int_x86_avx2_psllv_q, int_x86_avx2_psllv_q_256>, - VEX_W; -defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", memopv4i32, memopv8i32, - int_x86_avx2_psrlv_d, int_x86_avx2_psrlv_d_256>; -defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", memopv2i64, memopv4i64, - int_x86_avx2_psrlv_q, int_x86_avx2_psrlv_q_256>, - VEX_W; -defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", memopv4i32, memopv8i32, - int_x86_avx2_psrav_d, int_x86_avx2_psrav_d_256>; - - -let Predicates = [HasAVX2] in { - - def : Pat<(v4i32 (shl (v4i32 VR128:$src1), (v4i32 VR128:$src2))), - (VPSLLVDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (shl (v2i64 VR128:$src1), (v2i64 VR128:$src2))), - (VPSLLVQrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (srl (v4i32 VR128:$src1), (v4i32 VR128:$src2))), - (VPSRLVDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v2i64 (srl (v2i64 VR128:$src1), (v2i64 VR128:$src2))), - (VPSRLVQrr VR128:$src1, VR128:$src2)>; - def : Pat<(v4i32 (sra (v4i32 VR128:$src1), (v4i32 VR128:$src2))), - (VPSRAVDrr VR128:$src1, VR128:$src2)>; - def : Pat<(v8i32 (shl (v8i32 VR256:$src1), (v8i32 VR256:$src2))), - (VPSLLVDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (shl (v4i64 VR256:$src1), (v4i64 VR256:$src2))), - (VPSLLVQYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (srl (v8i32 VR256:$src1), (v8i32 VR256:$src2))), - (VPSRLVDYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v4i64 (srl (v4i64 VR256:$src1), (v4i64 VR256:$src2))), - (VPSRLVQYrr VR256:$src1, VR256:$src2)>; - def : Pat<(v8i32 (sra (v8i32 VR256:$src1), (v8i32 VR256:$src2))), - (VPSRAVDYrr VR256:$src1, VR256:$src2)>; - - def : Pat<(v4i32 (shl (v4i32 VR128:$src1),(loadv4i32 addr:$src2))), - (VPSLLVDrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (shl (v4i32 VR128:$src1),(loadv2i64 addr:$src2))), - (VPSLLVDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2i64 (shl (v2i64 VR128:$src1),(loadv2i64 addr:$src2))), - (VPSLLVQrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (srl (v4i32 VR128:$src1),(loadv4i32 addr:$src2))), - (VPSRLVDrm VR128:$src1, addr:$src2)>; - def : Pat<(v2i64 (srl (v2i64 VR128:$src1),(loadv2i64 addr:$src2))), - (VPSRLVQrm VR128:$src1, addr:$src2)>; - def : Pat<(v4i32 (sra (v4i32 VR128:$src1),(loadv4i32 addr:$src2))), - (VPSRAVDrm VR128:$src1, addr:$src2)>; - def : Pat<(v8i32 (shl (v8i32 VR256:$src1),(loadv8i32 addr:$src2))), - (VPSLLVDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (shl (v4i64 VR256:$src1),(loadv4i64 addr:$src2))), - (VPSLLVQYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (srl (v8i32 VR256:$src1),(loadv8i32 addr:$src2))), - (VPSRLVDYrm VR256:$src1, addr:$src2)>; - def : Pat<(v4i64 (srl (v4i64 VR256:$src1),(loadv4i64 addr:$src2))), - (VPSRLVQYrm VR256:$src1, addr:$src2)>; - def : Pat<(v8i32 (sra (v8i32 VR256:$src1),(loadv8i32 addr:$src2))), - (VPSRAVDYrm VR256:$src1, addr:$src2)>; +multiclass avx2_var_shift_i64 opc, string OpcodeStr, + Intrinsic Int128, Intrinsic Int256> { + def rr : AVX28I, VEX_4V; + def rm : AVX28I, + VEX_4V; + def Yrr : AVX28I, VEX_4V; + def Yrm : AVX28I, + VEX_4V; } - +defm VPSLLVD : avx2_var_shift<0x47, "vpsllvd", int_x86_avx2_psllv_d, + int_x86_avx2_psllv_d_256>; +defm VPSLLVQ : avx2_var_shift_i64<0x47, "vpsllvq", int_x86_avx2_psllv_q, + int_x86_avx2_psllv_q_256>, VEX_W; +defm VPSRLVD : avx2_var_shift<0x45, "vpsrlvd", int_x86_avx2_psrlv_d, + int_x86_avx2_psrlv_d_256>; +defm VPSRLVQ : avx2_var_shift_i64<0x45, "vpsrlvq", int_x86_avx2_psrlv_q, + int_x86_avx2_psrlv_q_256>, VEX_W; +defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", int_x86_avx2_psrav_d, + int_x86_avx2_psrav_d_256>; -- cgit v1.1 From 7fb12ef5a62ae5eef35374d00f62efe9fbdc0d2e Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 11 Nov 2011 12:39:35 +0000 Subject: Remove the unnecessary dependency on libMBlazeCodeGen from libMBlazeDisassembler. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144383 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/MBlaze/Disassembler/LLVMBuild.txt | 8 +------- lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp | 3 +-- 2 files changed, 2 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/Target/MBlaze/Disassembler/LLVMBuild.txt b/lib/Target/MBlaze/Disassembler/LLVMBuild.txt index b2b3a3a..c5c4f80 100644 --- a/lib/Target/MBlaze/Disassembler/LLVMBuild.txt +++ b/lib/Target/MBlaze/Disassembler/LLVMBuild.txt @@ -19,12 +19,6 @@ type = Library name = MBlazeDisassembler parent = MBlaze -; Strictly speaking, we probably shouldn't have a dependency on -; MBlazeCodeGen. However, given the current factoring we end up including -; MBlazeGenRegisterInfo.inc in the disassembler. Those generated headers end up -; referencing external variables through GPRRegClass, SPRRegClass, and -; CRCRegClass. These aren't actually used, but some compilers may generate -; references to them. -required_libraries = MBlazeCodeGen MBlazeDesc MBlazeInfo MC Support +required_libraries = MBlazeDesc MBlazeInfo MC Support add_to_library_groups = MBlaze diff --git a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp index c3a3833..3087317 100644 --- a/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp +++ b/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp @@ -13,13 +13,12 @@ //===----------------------------------------------------------------------===// #include "MBlaze.h" -#include "MBlazeInstrInfo.h" #include "MBlazeDisassembler.h" #include "llvm/MC/EDInstInfo.h" #include "llvm/MC/MCDisassembler.h" -#include "llvm/MC/MCDisassembler.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/TargetRegistry.h" -- cgit v1.1 From eea66f63d98771a2772f5173debf954a81f3f782 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 11 Nov 2011 12:39:41 +0000 Subject: Remove the unnecessary dependency on libARMCodeGen from libARMDisassembler. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144384 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 6927d2d..840f50b 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -10,13 +10,13 @@ #define DEBUG_TYPE "arm-disassembler" #include "ARM.h" -#include "ARMRegisterInfo.h" #include "ARMSubtarget.h" #include "MCTargetDesc/ARMAddressingModes.h" #include "MCTargetDesc/ARMMCExpr.h" #include "MCTargetDesc/ARMBaseInfo.h" #include "llvm/MC/EDInstInfo.h" #include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDisassembler.h" -- cgit v1.1 From 178051fbae2b224ecc5aa20e39b7cee3ab38e760 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Fri, 11 Nov 2011 13:20:40 +0000 Subject: Clients are responsible for initializing the targets, remove it from the disassembler API. This will break users of the LLVMCreateDisasm API (not that I know of any). They have to call the LLVMInitializeAll* functions from llvm-c/Target.h themselves now. edis' C API in all its horribleness should be unaffected. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144385 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCDisassembler/Disassembler.cpp | 7 ------- lib/MC/MCDisassembler/EDDisassembler.cpp | 16 ---------------- lib/MC/MCDisassembler/EDDisassembler.h | 5 ----- lib/MC/MCDisassembler/LLVMBuild.txt | 5 +---- 4 files changed, 1 insertion(+), 32 deletions(-) (limited to 'lib') diff --git a/lib/MC/MCDisassembler/Disassembler.cpp b/lib/MC/MCDisassembler/Disassembler.cpp index 16e66dc..f156760 100644 --- a/lib/MC/MCDisassembler/Disassembler.cpp +++ b/lib/MC/MCDisassembler/Disassembler.cpp @@ -18,7 +18,6 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/TargetSelect.h" namespace llvm { class Target; @@ -35,12 +34,6 @@ using namespace llvm; LLVMDisasmContextRef LLVMCreateDisasm(const char *TripleName, void *DisInfo, int TagType, LLVMOpInfoCallback GetOpInfo, LLVMSymbolLookupCallback SymbolLookUp) { - // Initialize targets and assembly printers/parsers. - llvm::InitializeAllTargetInfos(); - llvm::InitializeAllTargetMCs(); - llvm::InitializeAllAsmParsers(); - llvm::InitializeAllDisassemblers(); - // Get the target. std::string Error; const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); diff --git a/lib/MC/MCDisassembler/EDDisassembler.cpp b/lib/MC/MCDisassembler/EDDisassembler.cpp index 5633cb1..3540334 100644 --- a/lib/MC/MCDisassembler/EDDisassembler.cpp +++ b/lib/MC/MCDisassembler/EDDisassembler.cpp @@ -34,10 +34,8 @@ #include "llvm/Support/MemoryObject.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" -#include "llvm/Support/TargetSelect.h" using namespace llvm; -bool EDDisassembler::sInitialized = false; EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers; struct TripleMap { @@ -98,20 +96,6 @@ static int getLLVMSyntaxVariant(Triple::ArchType arch, } } -void EDDisassembler::initialize() { - if (sInitialized) - return; - - sInitialized = true; - - InitializeAllTargetInfos(); - InitializeAllTargetMCs(); - InitializeAllAsmParsers(); - InitializeAllDisassemblers(); -} - -#undef BRINGUP_TARGET - EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch, AssemblySyntax syntax) { CPUKey key; diff --git a/lib/MC/MCDisassembler/EDDisassembler.h b/lib/MC/MCDisassembler/EDDisassembler.h index 38c2203..97c2d1f 100644 --- a/lib/MC/MCDisassembler/EDDisassembler.h +++ b/lib/MC/MCDisassembler/EDDisassembler.h @@ -94,8 +94,6 @@ struct EDDisassembler { typedef std::map DisassemblerMap_t; - /// True if the disassembler registry has been initialized; false if not - static bool sInitialized; /// A map from disassembler specifications to disassemblers. Populated /// lazily. static DisassemblerMap_t sDisassemblers; @@ -116,9 +114,6 @@ struct EDDisassembler { static EDDisassembler *getDisassembler(llvm::StringRef str, AssemblySyntax syntax); - /// initialize - Initializes the disassembler registry and the LLVM backend - static void initialize(); - //////////////////////// // Per-object members // //////////////////////// diff --git a/lib/MC/MCDisassembler/LLVMBuild.txt b/lib/MC/MCDisassembler/LLVMBuild.txt index 7b8c7ff..d73c6ad 100644 --- a/lib/MC/MCDisassembler/LLVMBuild.txt +++ b/lib/MC/MCDisassembler/LLVMBuild.txt @@ -19,7 +19,4 @@ type = Library name = MCDisassembler parent = MC -; FIXME: This is really horrible, MCDisassembler should not in and of its own -; accord depending on every target. -required_libraries = all-targets MC MCParser Support - +required_libraries = MC MCParser Support -- cgit v1.1 From b812ee6d7841a617df15aa5d03c8994f223af860 Mon Sep 17 00:00:00 2001 From: Dan Bailey Date: Fri, 11 Nov 2011 14:45:06 +0000 Subject: add rules in tabgen for PTX COPY_ADDRESS of frameindex git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144387 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PTX/PTXInstrInfo.td | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/Target/PTX/PTXInstrInfo.td b/lib/Target/PTX/PTXInstrInfo.td index fbddac5..bcd5bcf 100644 --- a/lib/Target/PTX/PTXInstrInfo.td +++ b/lib/Target/PTX/PTXInstrInfo.td @@ -680,6 +680,12 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in { def MOVaddr64 : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "mov.u64\t$d, $a", [(set RegI64:$d, (PTXcopyaddress tglobaladdr:$a))]>; + def MOVframe32 + : InstPTX<(outs RegI32:$d), (ins i32imm:$a), "cvta.local.u32\t$d, $a", + [(set RegI32:$d, (PTXcopyaddress frameindex:$a))]>; + def MOVframe64 + : InstPTX<(outs RegI64:$d), (ins i64imm:$a), "cvta.local.u64\t$d, $a", + [(set RegI64:$d, (PTXcopyaddress frameindex:$a))]>; } // PTX cvt instructions -- cgit v1.1 From 96e6458903ab0799542365cac98653c207984162 Mon Sep 17 00:00:00 2001 From: Dan Bailey Date: Fri, 11 Nov 2011 14:45:12 +0000 Subject: allow non-device function calls in PTX when natively handling device-side printf git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144388 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp | 18 ++++- lib/Target/PTX/PTXAsmPrinter.cpp | 30 ++++++++ lib/Target/PTX/PTXAsmPrinter.h | 2 +- lib/Target/PTX/PTXISelLowering.cpp | 102 +++++++++++++++++++++----- 4 files changed, 129 insertions(+), 23 deletions(-) (limited to 'lib') diff --git a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp index aabb404..2f6c92d 100644 --- a/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp +++ b/lib/Target/PTX/InstPrinter/PTXInstPrinter.cpp @@ -96,9 +96,23 @@ void PTXInstPrinter::printCall(const MCInst *MI, raw_ostream &O) { O << "), "; } - O << *(MI->getOperand(Index++).getExpr()) << ", ("; - + const MCExpr* Expr = MI->getOperand(Index++).getExpr(); unsigned NumArgs = MI->getOperand(Index++).getImm(); + + // if the function call is to printf or puts, change to vprintf + if (const MCSymbolRefExpr *SymRefExpr = dyn_cast(Expr)) { + const MCSymbol &Sym = SymRefExpr->getSymbol(); + if (Sym.getName() == "printf" || Sym.getName() == "puts") { + O << "vprintf"; + } else { + O << Sym.getName(); + } + } else { + O << *Expr; + } + + O << ", ("; + if (NumArgs > 0) { printOperand(MI, Index++, O); for (unsigned i = 1; i < NumArgs; ++i) { diff --git a/lib/Target/PTX/PTXAsmPrinter.cpp b/lib/Target/PTX/PTXAsmPrinter.cpp index 45a6afc..bdf238b 100644 --- a/lib/Target/PTX/PTXAsmPrinter.cpp +++ b/lib/Target/PTX/PTXAsmPrinter.cpp @@ -165,6 +165,11 @@ void PTXAsmPrinter::EmitStartOfAsmFile(Module &M) OutStreamer.AddBlankLine(); + // declare external functions + for (Module::const_iterator i = M.begin(), e = M.end(); + i != e; ++i) + EmitFunctionDeclaration(i); + // declare global variables for (Module::const_global_iterator i = M.global_begin(), e = M.global_end(); i != e; ++i) @@ -454,6 +459,31 @@ void PTXAsmPrinter::EmitFunctionEntryLabel() { OutStreamer.EmitRawText(os.str()); } +void PTXAsmPrinter::EmitFunctionDeclaration(const Function* func) +{ + const PTXSubtarget& ST = TM.getSubtarget(); + + std::string decl = ""; + + // hard-coded emission of extern vprintf function + + if (func->getName() == "printf" || func->getName() == "puts") { + decl += ".extern .func (.param .b32 __param_1) vprintf (.param .b"; + if (ST.is64Bit()) + decl += "64"; + else + decl += "32"; + decl += " __param_2, .param .b"; + if (ST.is64Bit()) + decl += "64"; + else + decl += "32"; + decl += " __param_3)\n"; + } + + OutStreamer.EmitRawText(Twine(decl)); +} + unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName, StringRef DirName) { // If FE did not provide a file name, then assume stdin. diff --git a/lib/Target/PTX/PTXAsmPrinter.h b/lib/Target/PTX/PTXAsmPrinter.h index 538c080..d5ea4db 100644 --- a/lib/Target/PTX/PTXAsmPrinter.h +++ b/lib/Target/PTX/PTXAsmPrinter.h @@ -47,7 +47,7 @@ public: private: void EmitVariableDeclaration(const GlobalVariable *gv); - void EmitFunctionDeclaration(); + void EmitFunctionDeclaration(const Function* func); StringMap SourceIdMap; }; // class PTXAsmPrinter diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp index 3307d91..7f55871 100644 --- a/lib/Target/PTX/PTXISelLowering.cpp +++ b/lib/Target/PTX/PTXISelLowering.cpp @@ -20,6 +20,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" @@ -352,40 +353,101 @@ PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee, SmallVectorImpl &InVals) const { MachineFunction& MF = DAG.getMachineFunction(); - PTXMachineFunctionInfo *MFI = MF.getInfo(); - PTXParamManager &PM = MFI->getParamManager(); - + PTXMachineFunctionInfo *PTXMFI = MF.getInfo(); + PTXParamManager &PM = PTXMFI->getParamManager(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + assert(getTargetMachine().getSubtarget().callsAreHandled() && "Calls are not handled for the target device"); + // Identify the callee function + const GlobalValue *GV = cast(Callee)->getGlobal(); + const Function *function = cast(GV); + + // allow non-device calls only for printf + bool isPrintf = function->getName() == "printf" || function->getName() == "puts"; + + assert((isPrintf || function->getCallingConv() == CallingConv::PTX_Device) && + "PTX function calls must be to PTX device functions"); + + unsigned outSize = isPrintf ? 2 : Outs.size(); + std::vector Ops; // The layout of the ops will be [Chain, #Ins, Ins, Callee, #Outs, Outs] - Ops.resize(Outs.size() + Ins.size() + 4); + Ops.resize(outSize + Ins.size() + 4); Ops[0] = Chain; // Identify the callee function - const GlobalValue *GV = cast(Callee)->getGlobal(); - assert(cast(GV)->getCallingConv() == CallingConv::PTX_Device && - "PTX function calls must be to PTX device functions"); Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy()); Ops[Ins.size()+2] = Callee; - // Generate STORE_PARAM nodes for each function argument. In PTX, function - // arguments are explicitly stored into .param variables and passed as - // arguments. There is no register/stack-based calling convention in PTX. - Ops[Ins.size()+3] = DAG.getTargetConstant(OutVals.size(), MVT::i32); - for (unsigned i = 0; i != OutVals.size(); ++i) { - unsigned Size = OutVals[i].getValueType().getSizeInBits(); - unsigned Param = PM.addLocalParam(Size); - const std::string &ParamName = PM.getParamName(Param); - SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), - MVT::Other); + // #Outs + Ops[Ins.size()+3] = DAG.getTargetConstant(outSize, MVT::i32); + + if (isPrintf) { + // first argument is the address of the global string variable in memory + unsigned Param0 = PM.addLocalParam(getPointerTy().getSizeInBits()); + SDValue ParamValue0 = DAG.getTargetExternalSymbol(PM.getParamName(Param0).c_str(), + MVT::Other); Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, - ParamValue, OutVals[i]); - Ops[i+Ins.size()+4] = ParamValue; - } + ParamValue0, OutVals[0]); + Ops[Ins.size()+4] = ParamValue0; + + // alignment is the maximum size of all the arguments + unsigned alignment = 0; + for (unsigned i = 1; i < OutVals.size(); ++i) { + alignment = std::max(alignment, + OutVals[i].getValueType().getSizeInBits()); + } + + // size is the alignment multiplied by the number of arguments + unsigned size = alignment * (OutVals.size() - 1); + + // second argument is the address of the stack object (unless no arguments) + unsigned Param1 = PM.addLocalParam(getPointerTy().getSizeInBits()); + SDValue ParamValue1 = DAG.getTargetExternalSymbol(PM.getParamName(Param1).c_str(), + MVT::Other); + Ops[Ins.size()+5] = ParamValue1; + + if (size > 0) + { + // create a local stack object to store the arguments + unsigned StackObject = MFI->CreateStackObject(size / 8, alignment / 8, false); + SDValue FrameIndex = DAG.getFrameIndex(StackObject, getPointerTy()); + + // store each of the arguments to the stack in turn + for (unsigned int i = 1; i != OutVals.size(); i++) { + SDValue FrameAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameIndex, DAG.getTargetConstant((i - 1) * 8, getPointerTy())); + Chain = DAG.getStore(Chain, dl, OutVals[i], FrameAddr, + MachinePointerInfo(), + false, false, 0); + } + // copy the address of the local frame index to get the address in non-local space + SDValue genericAddr = DAG.getNode(PTXISD::COPY_ADDRESS, dl, getPointerTy(), FrameIndex); + + // store this address in the second argument + Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, ParamValue1, genericAddr); + } + } + else + { + // Generate STORE_PARAM nodes for each function argument. In PTX, function + // arguments are explicitly stored into .param variables and passed as + // arguments. There is no register/stack-based calling convention in PTX. + for (unsigned i = 0; i != OutVals.size(); ++i) { + unsigned Size = OutVals[i].getValueType().getSizeInBits(); + unsigned Param = PM.addLocalParam(Size); + const std::string &ParamName = PM.getParamName(Param); + SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(), + MVT::Other); + Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, + ParamValue, OutVals[i]); + Ops[i+Ins.size()+4] = ParamValue; + } + } + std::vector InParams; // Generate list of .param variables to hold the return value(s). -- cgit v1.1 From 900b16b99d39de70e74b343ffa0d57154ff30dd0 Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Fri, 11 Nov 2011 17:34:14 +0000 Subject: Remove FIXME comment that should have been removed with r144351. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144392 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCDwarf.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index b68fcaf..c274b9d 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -1011,7 +1011,6 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer, // Emit the compact unwind info if available. // FIXME: This emits both the compact unwind and the old CIE/FDE // information. Only one of those is needed. - // FIXME: Disable. This is causing failures in the test suite. if (IsEH && MOFI->getCompactUnwindSection()) for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) { const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i); -- cgit v1.1 From 7b8c2f8587763e0a8ce48f9b7b67287930129c8d Mon Sep 17 00:00:00 2001 From: Nicolas Geoffray Date: Fri, 11 Nov 2011 18:32:52 +0000 Subject: Add a custom safepoint method, in order for language implementers to decide which machine instruction gets to be a safepoint. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144399 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/GCStrategy.cpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index 766c6ee..9349797 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -97,6 +97,7 @@ GCStrategy::GCStrategy() : CustomReadBarriers(false), CustomWriteBarriers(false), CustomRoots(false), + CustomSafePoints(false), InitRoots(true), UsesMetadata(false) {} @@ -116,6 +117,14 @@ bool GCStrategy::performCustomLowering(Function &F) { return 0; } + +bool GCStrategy::findCustomSafePoints(GCFunctionInfo& FI, MachineFunction &F) { + dbgs() << "gc " << getName() << " must override findCustomSafePoints.\n"; + llvm_unreachable(0); + return 0; +} + + GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) { GCFunctionInfo *FI = new GCFunctionInfo(F, *this); Functions.push_back(FI); @@ -405,9 +414,13 @@ bool MachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) { // Find the size of the stack frame. FI->setFrameSize(MF.getFrameInfo()->getStackSize()); - + // Find all safe points. - FindSafePoints(MF); + if (FI->getStrategy().customSafePoints()) { + FI->getStrategy().findCustomSafePoints(*FI, MF); + } else { + FindSafePoints(MF); + } // Find the stack offsets for all roots. FindStackOffsets(MF); -- cgit v1.1 From ce485e7f70faed6d19daafff91bb20509403d432 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Fri, 11 Nov 2011 21:27:40 +0000 Subject: ARM allow Q registers in vldm/vstm register lists. rdar://9672822 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144407 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 72 +++++++++++++++++++------------ 1 file changed, 45 insertions(+), 27 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index e782975..10d7bf2 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -2410,6 +2410,29 @@ static unsigned getNextRegister(unsigned Reg) { } } +// Return the low-subreg of a given Q register. +static unsigned getDRegFromQReg(unsigned QReg) { + switch (QReg) { + default: llvm_unreachable("expected a Q register!"); + case ARM::Q0: return ARM::D0; + case ARM::Q1: return ARM::D2; + case ARM::Q2: return ARM::D4; + case ARM::Q3: return ARM::D6; + case ARM::Q4: return ARM::D8; + case ARM::Q5: return ARM::D10; + case ARM::Q6: return ARM::D12; + case ARM::Q7: return ARM::D14; + case ARM::Q8: return ARM::D16; + case ARM::Q9: return ARM::D19; + case ARM::Q10: return ARM::D20; + case ARM::Q11: return ARM::D22; + case ARM::Q12: return ARM::D24; + case ARM::Q13: return ARM::D26; + case ARM::Q14: return ARM::D28; + case ARM::Q15: return ARM::D30; + } +} + /// Parse a register list. bool ARMAsmParser:: parseRegisterList(SmallVectorImpl &Operands) { @@ -2425,6 +2448,16 @@ parseRegisterList(SmallVectorImpl &Operands) { if (Reg == -1) return Error(RegLoc, "register expected"); + // The reglist instructions have at most 16 registers, so reserve + // space for that many. + SmallVector, 16> Registers; + + // Allow Q regs and just interpret them as the two D sub-registers. + if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { + Reg = getDRegFromQReg(Reg); + Registers.push_back(std::pair(Reg, RegLoc)); + ++Reg; + } const MCRegisterClass *RC; if (ARMMCRegisterClasses[ARM::GPRRegClassID].contains(Reg)) RC = &ARMMCRegisterClasses[ARM::GPRRegClassID]; @@ -2435,10 +2468,7 @@ parseRegisterList(SmallVectorImpl &Operands) { else return Error(RegLoc, "invalid register in register list"); - // The reglist instructions have at most 16 registers, so reserve - // space for that many. - SmallVector, 16> Registers; - // Store the first register. + // Store the register. Registers.push_back(std::pair(Reg, RegLoc)); // This starts immediately after the first register token in the list, @@ -2452,6 +2482,9 @@ parseRegisterList(SmallVectorImpl &Operands) { int EndReg = tryParseRegister(); if (EndReg == -1) return Error(EndLoc, "register expected"); + // Allow Q regs and just interpret them as the two D sub-registers. + if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(EndReg)) + EndReg = getDRegFromQReg(EndReg) + 1; // If the register is the same as the start reg, there's nothing // more to do. if (Reg == EndReg) @@ -2476,6 +2509,12 @@ parseRegisterList(SmallVectorImpl &Operands) { Reg = tryParseRegister(); if (Reg == -1) return Error(RegLoc, "register expected"); + // Allow Q regs and just interpret them as the two D sub-registers. + bool isQReg = false; + if (ARMMCRegisterClasses[ARM::QPRRegClassID].contains(Reg)) { + Reg = getDRegFromQReg(Reg); + isQReg = true; + } // The register must be in the same register class as the first. if (!RC->contains(Reg)) return Error(RegLoc, "invalid register in register list"); @@ -2489,6 +2528,8 @@ parseRegisterList(SmallVectorImpl &Operands) { Reg != OldReg + 1) return Error(RegLoc, "non-contiguous register range"); Registers.push_back(std::pair(Reg, RegLoc)); + if (isQReg) + Registers.push_back(std::pair(++Reg, RegLoc)); } SMLoc E = Parser.getTok().getLoc(); @@ -2500,29 +2541,6 @@ parseRegisterList(SmallVectorImpl &Operands) { return false; } -// Return the low-subreg of a given Q register. -static unsigned getDRegFromQReg(unsigned QReg) { - switch (QReg) { - default: llvm_unreachable("expected a Q register!"); - case ARM::Q0: return ARM::D0; - case ARM::Q1: return ARM::D2; - case ARM::Q2: return ARM::D4; - case ARM::Q3: return ARM::D6; - case ARM::Q4: return ARM::D8; - case ARM::Q5: return ARM::D10; - case ARM::Q6: return ARM::D12; - case ARM::Q7: return ARM::D14; - case ARM::Q8: return ARM::D16; - case ARM::Q9: return ARM::D19; - case ARM::Q10: return ARM::D20; - case ARM::Q11: return ARM::D22; - case ARM::Q12: return ARM::D24; - case ARM::Q13: return ARM::D26; - case ARM::Q14: return ARM::D28; - case ARM::Q15: return ARM::D30; - } -} - // parse a vector register list ARMAsmParser::OperandMatchResultTy ARMAsmParser:: parseVectorList(SmallVectorImpl &Operands) { -- cgit v1.1 From 95bc85e4eefdfc1aabfde85daf752f05d2a60701 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 11 Nov 2011 22:18:09 +0000 Subject: Preserve MachineMemOperands in ARMLoadStoreOptimizer. Fixes PR8113. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144409 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 4c3be89..c8728f4 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -32,6 +32,7 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Debug.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" @@ -1504,6 +1505,23 @@ static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base, return AddedRegPressure.size() <= MemRegs.size() * 2; } + +/// Copy Op0 and Op1 operands into a new array assigned to MI. +static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0, + MachineInstr *Op1) { + assert(MI->memoperands_empty() && "expected a new machineinstr"); + size_t numMemRefs = (Op0->memoperands_end() - Op0->memoperands_begin()) + + (Op1->memoperands_end() - Op1->memoperands_begin()); + + MachineFunction *MF = MI->getParent()->getParent(); + MachineSDNode::mmo_iterator MemBegin = MF->allocateMemRefsArray(numMemRefs); + MachineSDNode::mmo_iterator MemEnd = + std::copy(Op0->memoperands_begin(), Op0->memoperands_end(), MemBegin); + MemEnd = + std::copy(Op1->memoperands_begin(), Op1->memoperands_end(), MemEnd); + MI->setMemRefs(MemBegin, MemEnd); +} + bool ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl, @@ -1698,6 +1716,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, if (!isT2) MIB.addReg(0); MIB.addImm(Offset).addImm(Pred).addReg(PredReg); + concatenateMemOperands(MIB, Op0, Op1); + DEBUG(dbgs() << "Formed " << *MIB << "\n"); ++NumLDRDFormed; } else { MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) @@ -1710,6 +1730,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, if (!isT2) MIB.addReg(0); MIB.addImm(Offset).addImm(Pred).addReg(PredReg); + concatenateMemOperands(MIB, Op0, Op1); + DEBUG(dbgs() << "Formed " << *MIB << "\n"); ++NumSTRDFormed; } MBB->erase(Op0); -- cgit v1.1 From c3937b97c00a857dff3528895e71ecfbc7ff3a28 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Fri, 11 Nov 2011 22:30:06 +0000 Subject: Nuke no longer accurate comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144411 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 3 --- 1 file changed, 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 10d7bf2..af7fe33 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -4283,9 +4283,6 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, Next = Name.find('.', Start + 1); StringRef ExtraToken = Name.slice(Start, Next); - // For now, we're only parsing Thumb1 (for the most part), so - // just ignore ".n" qualifiers. We'll use them to restrict - // matching when we do Thumb2. if (ExtraToken != ".n") { SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start); Operands.push_back(ARMOperand::CreateToken(ExtraToken, Loc)); -- cgit v1.1 From 47b92f3d8362518596d57269dc53d985bc13323a Mon Sep 17 00:00:00 2001 From: Bruno Cardoso Lopes Date: Fri, 11 Nov 2011 22:58:42 +0000 Subject: Mips MC object code emission improvements: "With this patch we can now generate runnable Mips code through LLVM direct object emission. We have run numerous simple programs, both C and C++ and with -O0 and -O3 from the output. The code is not production ready, but quite useful for experimentation." Patch and message by Jack Carter git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144414 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp | 147 ++++++++++++-- lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h | 82 ++++++++ lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp | 213 ++++++++++++++++++++- lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp | 5 +- lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h | 5 +- lib/Target/Mips/Makefile | 2 +- lib/Target/Mips/MipsAsmPrinter.cpp | 1 + lib/Target/Mips/MipsCodeEmitter.cpp | 57 ++++-- lib/Target/Mips/MipsFrameLowering.cpp | 1 + lib/Target/Mips/MipsISelLowering.cpp | 1 + lib/Target/Mips/MipsInstrInfo.h | 80 -------- lib/Target/Mips/MipsInstrInfo.td | 10 +- lib/Target/Mips/MipsMCInstLower.cpp | 1 + 13 files changed, 475 insertions(+), 130 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp index f190ec4..4f017d0 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp @@ -1,5 +1,21 @@ +//===-- MipsASMBackend.cpp - ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the MipsAsmBackend and MipsELFObjectWriter classes. +// +//===----------------------------------------------------------------------===// +// + +#include "MipsFixupKinds.h" #include "MCTargetDesc/MipsMCTargetDesc.h" #include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCDirectives.h" #include "llvm/MC/MCELFObjectWriter.h" @@ -8,7 +24,6 @@ #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionMachO.h" -#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/Object/MachOFormat.h" #include "llvm/Support/ELF.h" @@ -16,7 +31,50 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; +static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) { + + // Add/subtract and shift + switch (Kind) { + default: + break; + case Mips::fixup_Mips_PC16: + // So far we are only using this type for branches. + // For branches we start 1 instruction after the branch + // so the displacement will be one instruction size less. + Value -= 4; + // The displacement is then divided by 4 to give us an 18 bit + // address range. + Value >>= 2; + break; + case Mips::fixup_Mips_26: + // So far we are only using this type for jumps. + // The displacement is then divided by 4 to give us an 28 bit + // address range. + Value >>= 2; + break; + } + + // Mask off value for placement as an operand + switch (Kind) { + default: + break; + case FK_Data_4: + Value &= 0xffffffff; + break; + case Mips::fixup_Mips_26: + Value &= 0x03ffffff; + break; + case Mips::fixup_Mips_LO16: + case Mips::fixup_Mips_PC16: + Value &= 0x0000ffff; + break; + } + + return Value; +} + namespace { + class MipsELFObjectWriter : public MCELFObjectTargetWriter { public: MipsELFObjectWriter(bool is64Bit, Triple::OSType OSType, uint16_t EMachine, @@ -27,18 +85,75 @@ public: class MipsAsmBackend : public MCAsmBackend { public: - MipsAsmBackend(const Target &T) - : MCAsmBackend() {} - - unsigned getNumFixupKinds() const { - return 1; //tbd - } + MipsAsmBackend(const Target &T) : MCAsmBackend() {} /// ApplyFixup - Apply the \arg Value for given \arg Fixup into the provided /// data fragment, at the offset specified by the fixup and following the /// fixup kind as appropriate. void ApplyFixup(const MCFixup &Fixup, char *Data, unsigned DataSize, uint64_t Value) const { + unsigned Kind = (unsigned)Fixup.getKind(); + Value = adjustFixupValue(Kind, Value); + + if (!Value) + return; // Doesn't change encoding. + + unsigned Offset = Fixup.getOffset(); + switch (Kind) { + default: + llvm_unreachable("Unknown fixup kind!"); + case Mips::fixup_Mips_GOT16: // This will be fixed up at link time + break; + case FK_Data_4: + case Mips::fixup_Mips_26: + case Mips::fixup_Mips_LO16: + case Mips::fixup_Mips_PC16: + // For each byte of the fragment that the fixup touches, mask i + // the fixup value. The Value has been "split up" into the appr + // bitfields above. + for (unsigned i = 0; i != 4; ++i) // FIXME - Need to support 2 and 8 bytes + Data[Offset + i] |= uint8_t((Value >> (i * 8)) & 0xff); + break; + } + } + + unsigned getNumFixupKinds() const { return Mips::NumTargetFixupKinds; } + + const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const { + const static MCFixupKindInfo Infos[Mips::NumTargetFixupKinds] = { + // This table *must* be in the order that the fixup_* kinds a + // MipsFixupKinds.h. + // + // name offset bits flags + { "fixup_Mips_NONE", 0, 0, 0 }, + { "fixup_Mips_16", 0, 16, 0 }, + { "fixup_Mips_32", 0, 32, 0 }, + { "fixup_Mips_REL32", 0, 32, 0 }, + { "fixup_Mips_26", 0, 26, 0 }, + { "fixup_Mips_HI16", 0, 16, 0 }, + { "fixup_Mips_LO16", 0, 16, 0 }, + { "fixup_Mips_GPREL16", 0, 16, 0 }, + { "fixup_Mips_LITERAL", 0, 16, 0 }, + { "fixup_Mips_GOT16", 0, 16, 0 }, + { "fixup_Mips_PC16", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, + { "fixup_Mips_CALL16", 0, 16, 0 }, + { "fixup_Mips_GPREL32", 0, 32, 0 }, + { "fixup_Mips_SHIFT5", 6, 5, 0 }, + { "fixup_Mips_SHIFT6", 6, 5, 0 }, + { "fixup_Mips_64", 0, 64, 0 }, + { "fixup_Mips_TLSGD", 0, 16, 0 }, + { "fixup_Mips_GOTTPREL", 0, 16, 0 }, + { "fixup_Mips_TPREL_HI", 0, 16, 0 }, + { "fixup_Mips_TPREL_LO", 0, 16, 0 }, + { "fixup_Mips_Branch_PCRel", 0, 16, MCFixupKindInfo::FKF_IsPCRel } + }; + + if (Kind < FirstTargetFixupKind) + return MCAsmBackend::getFixupKindInfo(Kind); + + assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() && + "Invalid kind!"); + return Infos[Kind - FirstTargetFixupKind]; } /// @name Target Relaxation Interfaces @@ -52,24 +167,24 @@ public: return false; } - /// RelaxInstruction - Relax the instruction in the given fragment to the next - /// wider instruction. + /// RelaxInstruction - Relax the instruction in the given fragment + /// to the next wider instruction. /// - /// \param Inst - The instruction to relax, which may be the same as the - /// output. + /// \param Inst - The instruction to relax, which may be the same + /// as the output. /// \parm Res [output] - On return, the relaxed instruction. void RelaxInstruction(const MCInst &Inst, MCInst &Res) const { } /// @} - /// WriteNopData - Write an (optimal) nop sequence of Count bytes to the given - /// output. If the target cannot generate such a sequence, it should return an - /// error. + /// WriteNopData - Write an (optimal) nop sequence of Count bytes + /// to the given output. If the target cannot generate such a sequence, + /// it should return an error. /// /// \return - True on success. bool WriteNopData(uint64_t Count, MCObjectWriter *OW) const { - return false; + return true; } }; @@ -106,7 +221,7 @@ public: return new MipsELFObjectWriter(false, OSType, ELF::EM_MIPS, false); } }; -} +} // namespace MCAsmBackend *llvm::createMipsAsmBackend(const Target &T, StringRef TT) { Triple TheTriple(TT); diff --git a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h index f7a6fa9..cebfde0 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h +++ b/lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h @@ -19,6 +19,88 @@ #include "llvm/Support/ErrorHandling.h" namespace llvm { + +/// MipsII - This namespace holds all of the target specific flags that +/// instruction info tracks. +/// +namespace MipsII { + /// Target Operand Flag enum. + enum TOF { + //===------------------------------------------------------------------===// + // Mips Specific MachineOperand flags. + + MO_NO_FLAG, + + /// MO_GOT - Represents the offset into the global offset table at which + /// the address the relocation entry symbol resides during execution. + MO_GOT, + + /// MO_GOT_CALL - Represents the offset into the global offset table at + /// which the address of a call site relocation entry symbol resides + /// during execution. This is different from the above since this flag + /// can only be present in call instructions. + MO_GOT_CALL, + + /// MO_GPREL - Represents the offset from the current gp value to be used + /// for the relocatable object file being produced. + MO_GPREL, + + /// MO_ABS_HI/LO - Represents the hi or low part of an absolute symbol + /// address. + MO_ABS_HI, + MO_ABS_LO, + + /// MO_TLSGD - Represents the offset into the global offset table at which + // the module ID and TSL block offset reside during execution (General + // Dynamic TLS). + MO_TLSGD, + + /// MO_GOTTPREL - Represents the offset from the thread pointer (Initial + // Exec TLS). + MO_GOTTPREL, + + /// MO_TPREL_HI/LO - Represents the hi and low part of the offset from + // the thread pointer (Local Exec TLS). + MO_TPREL_HI, + MO_TPREL_LO, + + // N32/64 Flags. + MO_GPOFF_HI, + MO_GPOFF_LO, + MO_GOT_DISP, + MO_GOT_PAGE, + MO_GOT_OFST + }; + + enum { + //===------------------------------------------------------------------===// + // Instruction encodings. These are the standard/most common forms for + // Mips instructions. + // + + // Pseudo - This represents an instruction that is a pseudo instruction + // or one that has not been implemented yet. It is illegal to code generate + // it, but tolerated for intermediate implementation stages. + Pseudo = 0, + + /// FrmR - This form is for instructions of the format R. + FrmR = 1, + /// FrmI - This form is for instructions of the format I. + FrmI = 2, + /// FrmJ - This form is for instructions of the format J. + FrmJ = 3, + /// FrmFR - This form is for instructions of the format FR. + FrmFR = 4, + /// FrmFI - This form is for instructions of the format FI. + FrmFI = 5, + /// FrmOther - This form is for instructions that have no specific format. + FrmOther = 6, + + FormMask = 15 + }; +} + + /// getMipsRegisterNumbering - Given the enum value for some register, /// return the number that it corresponds to. inline static unsigned getMipsRegisterNumbering(unsigned RegEnum) diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp index d66de23..1115fec 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp @@ -12,16 +12,18 @@ //===----------------------------------------------------------------------===// // #define DEBUG_TYPE "mccodeemitter" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "MCTargetDesc/MipsFixupKinds.h" +#include "MCTargetDesc/MipsMCTargetDesc.h" +#include "llvm/ADT/APFloat.h" +#include "llvm/ADT/Statistic.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/ADT/APFloat.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Support/raw_ostream.h" -#include "MCTargetDesc/MipsMCTargetDesc.h" using namespace llvm; @@ -31,22 +33,217 @@ class MipsMCCodeEmitter : public MCCodeEmitter { void operator=(const MipsMCCodeEmitter &); // DO NOT IMPLEMENT const MCInstrInfo &MCII; const MCSubtargetInfo &STI; + MCContext &Ctx; public: MipsMCCodeEmitter(const MCInstrInfo &mcii, const MCSubtargetInfo &sti, - MCContext &ctx) - : MCII(mcii), STI(sti) {} + MCContext &ctx) : MCII(mcii), STI(sti) , Ctx(ctx) {} ~MipsMCCodeEmitter() {} - void EncodeInstruction(const MCInst &MI, raw_ostream &OS, - SmallVectorImpl &Fixups) const { + void EmitByte(unsigned char C, raw_ostream &OS) const { + OS << (char)C; + } + + void EmitInstruction(uint64_t Val, unsigned Size, raw_ostream &OS) const { + // Output the instruction encoding in little endian byte order. + for (unsigned i = 0; i != Size; ++i) { + EmitByte(Val & 255, OS); + Val >>= 8; + } } + + void EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups) const; + + // getBinaryCodeForInstr - TableGen'erated function for getting the + // binary encoding for an instruction. + unsigned getBinaryCodeForInstr(const MCInst &MI, + SmallVectorImpl &Fixups) const; + + // getBranchJumpOpValue - Return binary encoding of the jump + // target operand. If the machine operand requires relocation, + // record the relocation and return zero. + unsigned getJumpTargetOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups) const; + + // getBranchTargetOpValue - Return binary encoding of the branch + // target operand. If the machine operand requires relocation, + // record the relocation and return zero. + unsigned getBranchTargetOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups) const; + + // getMachineOpValue - Return binary encoding of operand. If the machin + // operand requires relocation, record the relocation and return zero. + unsigned getMachineOpValue(const MCInst &MI,const MCOperand &MO, + SmallVectorImpl &Fixups) const; + + unsigned getMemEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups) const; + unsigned getSizeExtEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups) const; + unsigned getSizeInsEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups) const; + }; // class MipsMCCodeEmitter } // namespace MCCodeEmitter *llvm::createMipsMCCodeEmitter(const MCInstrInfo &MCII, const MCSubtargetInfo &STI, - MCContext &Ctx) { + MCContext &Ctx) +{ return new MipsMCCodeEmitter(MCII, STI, Ctx); } + +/// EncodeInstruction - Emit the instruction. +/// Size the instruction (currently only 4 bytes +void MipsMCCodeEmitter:: +EncodeInstruction(const MCInst &MI, raw_ostream &OS, + SmallVectorImpl &Fixups) const +{ + uint32_t Binary = getBinaryCodeForInstr(MI, Fixups); + + // Check for unimplemented opcodes. + // Unfortunately in MIPS both NOT and SLL will come in with Binary == 0 + // so we have to special check for them. + unsigned Opcode = MI.getOpcode(); + if ((Opcode != Mips::NOP) && (Opcode != Mips::SLL) && !Binary) + llvm_unreachable("unimplemented opcode in EncodeInstruction()"); + + const MCInstrDesc &Desc = MCII.get(MI.getOpcode()); + uint64_t TSFlags = Desc.TSFlags; + + // Pseudo instructions don't get encoded and shouldn't be here + // in the first place! + if ((TSFlags & MipsII::FormMask) == MipsII::Pseudo) + llvm_unreachable("Pseudo opcode found in EncodeInstruction()"); + + // For now all instructions are 4 bytes + int Size = 4; // FIXME: Have Desc.getSize() return the correct value! + + EmitInstruction(Binary, Size, OS); +} + +/// getBranchTargetOpValue - Return binary encoding of the branch +/// target operand. If the machine operand requires relocation, +/// record the relocation and return zero. +unsigned MipsMCCodeEmitter:: +getBranchTargetOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups) const { + + const MCOperand &MO = MI.getOperand(OpNo); + assert(MO.isExpr() && "getBranchTargetOpValue expects only expressions"); + + const MCExpr *Expr = MO.getExpr(); + Fixups.push_back(MCFixup::Create(0, Expr, + MCFixupKind(Mips::fixup_Mips_PC16))); + return 0; +} + +/// getJumpTargetOpValue - Return binary encoding of the jump +/// target operand. If the machine operand requires relocation, +/// record the relocation and return zero. +unsigned MipsMCCodeEmitter:: +getJumpTargetOpValue(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups) const { + + const MCOperand &MO = MI.getOperand(OpNo); + assert(MO.isExpr() && "getJumpTargetOpValue expects only expressions"); + + const MCExpr *Expr = MO.getExpr(); + Fixups.push_back(MCFixup::Create(0, Expr, + MCFixupKind(Mips::fixup_Mips_26))); + return 0; +} + +/// getMachineOpValue - Return binary encoding of operand. If the machine +/// operand requires relocation, record the relocation and return zero. +unsigned MipsMCCodeEmitter:: +getMachineOpValue(const MCInst &MI, const MCOperand &MO, + SmallVectorImpl &Fixups) const { + if (MO.isReg()) { + unsigned Reg = MO.getReg(); + unsigned RegNo = getMipsRegisterNumbering(Reg); + return RegNo; + } else if (MO.isImm()) { + return static_cast(MO.getImm()); + } else if (MO.isFPImm()) { + return static_cast(APFloat(MO.getFPImm()) + .bitcastToAPInt().getHiBits(32).getLimitedValue()); + } else if (MO.isExpr()) { + const MCExpr *Expr = MO.getExpr(); + MCExpr::ExprKind Kind = Expr->getKind(); + if (Kind == MCExpr::SymbolRef) { + Mips::Fixups FixupKind = Mips::fixup_Mips_NONE; + MCSymbolRefExpr::VariantKind SymRefKind = + cast(Expr)->getKind(); + switch(SymRefKind) { + case MCSymbolRefExpr::VK_Mips_GPREL: + FixupKind = Mips::fixup_Mips_GPREL16; + break; + case MCSymbolRefExpr::VK_Mips_GOT_CALL: + FixupKind = Mips::fixup_Mips_CALL16; + break; + case MCSymbolRefExpr::VK_Mips_GOT: + FixupKind = Mips::fixup_Mips_GOT16; + break; + case MCSymbolRefExpr::VK_Mips_ABS_HI: + FixupKind = Mips::fixup_Mips_HI16; + break; + case MCSymbolRefExpr::VK_Mips_ABS_LO: + FixupKind = Mips::fixup_Mips_LO16; + break; + case MCSymbolRefExpr::VK_Mips_TLSGD: + FixupKind = Mips::fixup_Mips_TLSGD; + break; + case MCSymbolRefExpr::VK_Mips_GOTTPREL: + FixupKind = Mips::fixup_Mips_GOTTPREL; + break; + case MCSymbolRefExpr::VK_Mips_TPREL_HI: + FixupKind = Mips::fixup_Mips_TPREL_HI; + break; + case MCSymbolRefExpr::VK_Mips_TPREL_LO: + FixupKind = Mips::fixup_Mips_TPREL_LO; + break; + default: + return 0; + } // switch + Fixups.push_back(MCFixup::Create(0, Expr, MCFixupKind(FixupKind))); + } // if SymbolRef + // All of the information is in the fixup. + return 0; + } + llvm_unreachable("Unable to encode MCOperand!"); + // Not reached + return 0; +} + +/// getMemEncoding - Return binary encoding of memory related operand. +/// If the offset operand requires relocation, record the relocation. +unsigned +MipsMCCodeEmitter::getMemEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups) const { + // Base register is encoded in bits 20-16, offset is encoded in bits 15-0. + assert(MI.getOperand(OpNo).isReg()); + unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo),Fixups) << 16; + unsigned OffBits = getMachineOpValue(MI, MI.getOperand(OpNo+1), Fixups); + + return (OffBits & 0xFFFF) | RegBits; +} + +unsigned +MipsMCCodeEmitter::getSizeExtEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups) const { + // FIXME: implement + return 0; +} + +unsigned +MipsMCCodeEmitter::getSizeInsEncoding(const MCInst &MI, unsigned OpNo, + SmallVectorImpl &Fixups) const { + // FIXME: implement + return 0; +} + +#include "MipsGenMCCodeEmitter.inc" + diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp index 1f9e3dd..e6040e4 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#include "MipsMCTargetDesc.h" #include "MipsMCAsmInfo.h" +#include "MipsMCTargetDesc.h" #include "InstPrinter/MipsInstPrinter.h" #include "llvm/MC/MachineLocation.h" #include "llvm/MC/MCCodeGenInfo.h" @@ -140,6 +140,9 @@ extern "C" void LLVMInitializeMipsTargetMC() { TargetRegistry::RegisterMCAsmBackend(TheMips64Target, createMipsAsmBackend); TargetRegistry::RegisterMCAsmBackend(TheMips64elTarget, createMipsAsmBackend); + TargetRegistry::RegisterMCCodeEmitter(TheMipsTarget, createMipsMCCodeEmitter); + TargetRegistry::RegisterMCCodeEmitter(TheMipselTarget, createMipsMCCodeEmitter); + // Register the MC subtarget info. TargetRegistry::RegisterMCSubtargetInfo(TheMipsTarget, createMipsMCSubtargetInfo); diff --git a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h index 7a0042a..fc43d2d 100644 --- a/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h +++ b/lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.h @@ -16,12 +16,14 @@ namespace llvm { class MCAsmBackend; -class MCInstrInfo; class MCCodeEmitter; class MCContext; +class MCInstrInfo; +class MCObjectWriter; class MCSubtargetInfo; class StringRef; class Target; +class raw_ostream; extern Target TheMipsTarget; extern Target TheMipselTarget; @@ -33,6 +35,7 @@ MCCodeEmitter *createMipsMCCodeEmitter(const MCInstrInfo &MCII, MCContext &Ctx); MCAsmBackend *createMipsAsmBackend(const Target &T, StringRef TT); + } // End llvm namespace // Defines symbolic names for Mips registers. This defines a mapping from diff --git a/lib/Target/Mips/Makefile b/lib/Target/Mips/Makefile index d72693c..94f7c18 100644 --- a/lib/Target/Mips/Makefile +++ b/lib/Target/Mips/Makefile @@ -15,7 +15,7 @@ TARGET = Mips BUILT_SOURCES = MipsGenRegisterInfo.inc MipsGenInstrInfo.inc \ MipsGenAsmWriter.inc MipsGenCodeEmitter.inc \ MipsGenDAGISel.inc MipsGenCallingConv.inc \ - MipsGenSubtargetInfo.inc + MipsGenSubtargetInfo.inc MipsGenMCCodeEmitter.inc DIRS = InstPrinter TargetInfo MCTargetDesc diff --git a/lib/Target/Mips/MipsAsmPrinter.cpp b/lib/Target/Mips/MipsAsmPrinter.cpp index d7b7f06..186a5e3 100644 --- a/lib/Target/Mips/MipsAsmPrinter.cpp +++ b/lib/Target/Mips/MipsAsmPrinter.cpp @@ -20,6 +20,7 @@ #include "MipsMCInstLower.h" #include "MipsMCSymbolRefExpr.h" #include "InstPrinter/MipsInstPrinter.h" +#include "MCTargetDesc/MipsBaseInfo.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" diff --git a/lib/Target/Mips/MipsCodeEmitter.cpp b/lib/Target/Mips/MipsCodeEmitter.cpp index dc4ecd6..a8f29ae 100644 --- a/lib/Target/Mips/MipsCodeEmitter.cpp +++ b/lib/Target/Mips/MipsCodeEmitter.cpp @@ -18,18 +18,20 @@ #include "MipsRelocations.h" #include "MipsSubtarget.h" #include "MipsTargetMachine.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/PassManager.h" +#include "MCTargetDesc/MipsBaseInfo.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/JITCodeEmitter.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Constants.h" +#include "llvm/DerivedTypes.h" +#include "llvm/Function.h" +#include "llvm/PassManager.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -37,8 +39,6 @@ #include #endif -#include "llvm/CodeGen/MachineOperand.h" - using namespace llvm; STATISTIC(NumEmitted, "Number of machine instructions emitted"); @@ -66,9 +66,9 @@ class MipsCodeEmitter : public MachineFunctionPass { public: MipsCodeEmitter(TargetMachine &tm, JITCodeEmitter &mce) : MachineFunctionPass(ID), JTI(0), - II((const MipsInstrInfo *) tm.getInstrInfo()), - TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0), - IsPIC(TM.getRelocationModel() == Reloc::PIC_) { + II((const MipsInstrInfo *) tm.getInstrInfo()), + TD(tm.getTargetData()), TM(tm), MCE(mce), MCPEs(0), MJTEs(0), + IsPIC(TM.getRelocationModel() == Reloc::PIC_) { } bool runOnMachineFunction(MachineFunction &MF); @@ -91,7 +91,7 @@ class MipsCodeEmitter : public MachineFunctionPass { /// Routines that handle operands which add machine relocations which are /// fixed up by the relocation stage. void emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, - bool MayNeedFarStub) const; + bool MayNeedFarStub) const; void emitExternalSymbolAddress(const char *ES, unsigned Reloc) const; void emitConstPoolAddress(unsigned CPI, unsigned Reloc) const; void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const; @@ -105,6 +105,9 @@ class MipsCodeEmitter : public MachineFunctionPass { unsigned getRelocation(const MachineInstr &MI, const MachineOperand &MO) const; + unsigned getJumpTargetOpValue(const MachineInstr &MI, unsigned OpNo) const; + + unsigned getBranchTargetOpValue(const MachineInstr &MI, unsigned OpNo) const; unsigned getMemEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getSizeExtEncoding(const MachineInstr &MI, unsigned OpNo) const; unsigned getSizeInsEncoding(const MachineInstr &MI, unsigned OpNo) const; @@ -165,23 +168,34 @@ unsigned MipsCodeEmitter::getRelocation(const MachineInstr &MI, return Mips::reloc_mips_lo; } +unsigned MipsCodeEmitter::getJumpTargetOpValue(const MachineInstr &MI, + unsigned OpNo) const { + // FIXME: implement + return 0; +} + +unsigned MipsCodeEmitter::getBranchTargetOpValue(const MachineInstr &MI, + unsigned OpNo) const { + // FIXME: implement + return 0; +} + unsigned MipsCodeEmitter::getMemEncoding(const MachineInstr &MI, - unsigned OpNo) const { + unsigned OpNo) const { // Base register is encoded in bits 20-16, offset is encoded in bits 15-0. assert(MI.getOperand(OpNo).isReg()); unsigned RegBits = getMachineOpValue(MI, MI.getOperand(OpNo)) << 16; - return - (getMachineOpValue(MI, MI.getOperand(OpNo+1)) & 0xFFFF) | RegBits; + return (getMachineOpValue(MI, MI.getOperand(OpNo+1)) & 0xFFFF) | RegBits; } unsigned MipsCodeEmitter::getSizeExtEncoding(const MachineInstr &MI, - unsigned OpNo) const { + unsigned OpNo) const { // size is encoded as size-1. return getMachineOpValue(MI, MI.getOperand(OpNo)) - 1; } unsigned MipsCodeEmitter::getSizeInsEncoding(const MachineInstr &MI, - unsigned OpNo) const { + unsigned OpNo) const { // size is encoded as pos+size-1. return getMachineOpValue(MI, MI.getOperand(OpNo-1)) + getMachineOpValue(MI, MI.getOperand(OpNo)) - 1; @@ -190,7 +204,7 @@ unsigned MipsCodeEmitter::getSizeInsEncoding(const MachineInstr &MI, /// getMachineOpValue - Return binary encoding of operand. If the machine /// operand requires relocation, record the relocation and return zero. unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI, - const MachineOperand &MO) const { + const MachineOperand &MO) const { if (MO.isReg()) return MipsRegisterInfo::getRegisterNumbering(MO.getReg()); else if (MO.isImm()) @@ -217,9 +231,10 @@ unsigned MipsCodeEmitter::getMachineOpValue(const MachineInstr &MI, } void MipsCodeEmitter::emitGlobalAddress(const GlobalValue *GV, unsigned Reloc, - bool MayNeedFarStub) const { + bool MayNeedFarStub) const { MCE.addRelocation(MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc, - const_cast(GV), 0, MayNeedFarStub)); + const_cast(GV), 0, + MayNeedFarStub)); } void MipsCodeEmitter::emitGlobalAddressUnaligned(const GlobalValue *GV, @@ -248,7 +263,7 @@ emitJumpTableAddress(unsigned JTIndex, unsigned Reloc) const { } void MipsCodeEmitter::emitMachineBasicBlock(MachineBasicBlock *BB, - unsigned Reloc) const { + unsigned Reloc) const { MCE.addRelocation(MachineRelocation::getBB(MCE.getCurrentPCOffset(), Reloc, BB)); } @@ -395,7 +410,7 @@ void MipsCodeEmitter::emitWordLE(unsigned Word) { /// createMipsJITCodeEmitterPass - Return a pass that emits the collected Mips /// code to the specified MCE object. FunctionPass *llvm::createMipsJITCodeEmitterPass(MipsTargetMachine &TM, - JITCodeEmitter &JCE) { + JITCodeEmitter &JCE) { return new MipsCodeEmitter(TM, JCE); } diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index 68adfe6..07de251 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -14,6 +14,7 @@ #include "MipsFrameLowering.h" #include "MipsInstrInfo.h" #include "MipsMachineFunction.h" +#include "MCTargetDesc/MipsBaseInfo.h" #include "llvm/Function.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 2b0e90b..96ec588 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -24,6 +24,7 @@ #include "llvm/Intrinsics.h" #include "llvm/CallingConv.h" #include "InstPrinter/MipsInstPrinter.h" +#include "MCTargetDesc/MipsBaseInfo.h" #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h index 271d248..8fa3052 100644 --- a/lib/Target/Mips/MipsInstrInfo.h +++ b/lib/Target/Mips/MipsInstrInfo.h @@ -30,86 +30,6 @@ namespace Mips { unsigned GetOppositeBranchOpc(unsigned Opc); } -/// MipsII - This namespace holds all of the target specific flags that -/// instruction info tracks. -/// -namespace MipsII { - /// Target Operand Flag enum. - enum TOF { - //===------------------------------------------------------------------===// - // Mips Specific MachineOperand flags. - - MO_NO_FLAG, - - /// MO_GOT - Represents the offset into the global offset table at which - /// the address the relocation entry symbol resides during execution. - MO_GOT, - - /// MO_GOT_CALL - Represents the offset into the global offset table at - /// which the address of a call site relocation entry symbol resides - /// during execution. This is different from the above since this flag - /// can only be present in call instructions. - MO_GOT_CALL, - - /// MO_GPREL - Represents the offset from the current gp value to be used - /// for the relocatable object file being produced. - MO_GPREL, - - /// MO_ABS_HI/LO - Represents the hi or low part of an absolute symbol - /// address. - MO_ABS_HI, - MO_ABS_LO, - - /// MO_TLSGD - Represents the offset into the global offset table at which - // the module ID and TSL block offset reside during execution (General - // Dynamic TLS). - MO_TLSGD, - - /// MO_GOTTPREL - Represents the offset from the thread pointer (Initial - // Exec TLS). - MO_GOTTPREL, - - /// MO_TPREL_HI/LO - Represents the hi and low part of the offset from - // the thread pointer (Local Exec TLS). - MO_TPREL_HI, - MO_TPREL_LO, - - // N32/64 Flags. - MO_GPOFF_HI, - MO_GPOFF_LO, - MO_GOT_DISP, - MO_GOT_PAGE, - MO_GOT_OFST - }; - - enum { - //===------------------------------------------------------------------===// - // Instruction encodings. These are the standard/most common forms for - // Mips instructions. - // - - // Pseudo - This represents an instruction that is a pseudo instruction - // or one that has not been implemented yet. It is illegal to code generate - // it, but tolerated for intermediate implementation stages. - Pseudo = 0, - - /// FrmR - This form is for instructions of the format R. - FrmR = 1, - /// FrmI - This form is for instructions of the format I. - FrmI = 2, - /// FrmJ - This form is for instructions of the format J. - FrmJ = 3, - /// FrmFR - This form is for instructions of the format FR. - FrmFR = 4, - /// FrmFI - This form is for instructions of the format FI. - FrmFI = 5, - /// FrmOther - This form is for instructions that have no specific format. - FrmOther = 6, - - FormMask = 15 - }; -} - class MipsInstrInfo : public MipsGenInstrInfo { MipsTargetMachine &TM; bool IsN64; diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index a58ce99..5dca9b6 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -138,7 +138,13 @@ def NotN64 : Predicate<"!Subtarget.isABI_N64()">; //===----------------------------------------------------------------------===// // Instruction operand types -def brtarget : Operand; +def jmptarget : Operand { + let EncoderMethod = "getJumpTargetOpValue"; +} +def brtarget : Operand { + let EncoderMethod = "getBranchTargetOpValue"; + let OperandType = "OPERAND_PCREL"; +} def calltarget : Operand; def calltarget64: Operand; def simm16 : Operand; @@ -449,7 +455,7 @@ class SetCC_I op, string instr_asm, PatFrag cond_op, Operand Od, // Unconditional branch let isBranch=1, isTerminator=1, isBarrier=1, hasDelaySlot = 1 in class JumpFJ op, string instr_asm>: - FJ; let isBranch=1, isTerminator=1, isBarrier=1, rd=0, hasDelaySlot = 1 in diff --git a/lib/Target/Mips/MipsMCInstLower.cpp b/lib/Target/Mips/MipsMCInstLower.cpp index 6c0e4f6..1fab52c 100644 --- a/lib/Target/Mips/MipsMCInstLower.cpp +++ b/lib/Target/Mips/MipsMCInstLower.cpp @@ -15,6 +15,7 @@ #include "MipsAsmPrinter.h" #include "MipsInstrInfo.h" #include "MipsMCInstLower.h" +#include "MCTargetDesc/MipsBaseInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineOperand.h" -- cgit v1.1 From f779757ceb202a995d90a2a17442255c70be061a Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Fri, 11 Nov 2011 22:59:16 +0000 Subject: Target/LLVMBuild: Order components alphabetically. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144415 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/LLVMBuild.txt | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/Target/LLVMBuild.txt b/lib/Target/LLVMBuild.txt index 60f5230..358cbc8 100644 --- a/lib/Target/LLVMBuild.txt +++ b/lib/Target/LLVMBuild.txt @@ -15,39 +15,39 @@ ; ;===------------------------------------------------------------------------===; -[component_0] -type = Library -name = Target -parent = Libraries -required_libraries = Core MC Support - ; This is a special group whose required libraries are extended (by llvm-build) -; with every built target, which makes it easy for tools to include every -; target. -[component_1] +; with the best execution engine (the native JIT, if available, or the +; interpreter). +[component_0] type = LibraryGroup -name = all-targets +name = Engine parent = Libraries ; This is a special group whose required libraries are extended (by llvm-build) ; with the configured native target, if any. -[component_2] +[component_1] type = LibraryGroup name = Native parent = Libraries ; This is a special group whose required libraries are extended (by llvm-build) ; with the configured native code generator, if any. -[component_3] +[component_2] type = LibraryGroup name = NativeCodeGen parent = Libraries +; The component for the actual target library itself. +[component_3] +type = Library +name = Target +parent = Libraries +required_libraries = Core MC Support + ; This is a special group whose required libraries are extended (by llvm-build) -; with the best execution engine (the native JIT, if available, or the -; interpreter). +; with every built target, which makes it easy for tools to include every +; target. [component_4] type = LibraryGroup -name = Engine +name = all-targets parent = Libraries - -- cgit v1.1 From 8396893fa54d35c59f7538ea9589d496a6275903 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Fri, 11 Nov 2011 22:59:23 +0000 Subject: LLVMBuild: Alphabetize required_libraries lists. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144416 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/IPO/LLVMBuild.txt | 2 +- lib/Transforms/Utils/LLVMBuild.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/IPO/LLVMBuild.txt b/lib/Transforms/IPO/LLVMBuild.txt index bf5d5f4..884faca 100644 --- a/lib/Transforms/IPO/LLVMBuild.txt +++ b/lib/Transforms/IPO/LLVMBuild.txt @@ -20,5 +20,5 @@ type = Library name = IPO parent = Transforms library_name = ipo -required_libraries = Analysis Core InstCombine Scalar Support Target TransformUtils IPA +required_libraries = Analysis Core IPA InstCombine Scalar Support Target TransformUtils diff --git a/lib/Transforms/Utils/LLVMBuild.txt b/lib/Transforms/Utils/LLVMBuild.txt index 6ba983c..dea7b02 100644 --- a/lib/Transforms/Utils/LLVMBuild.txt +++ b/lib/Transforms/Utils/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = TransformUtils parent = Transforms -required_libraries = Analysis Core Support Target IPA +required_libraries = Analysis Core IPA Support Target -- cgit v1.1 From 7aef99b677452724100145c81f76f32e494cc5a7 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Fri, 11 Nov 2011 23:08:10 +0000 Subject: ARM vldm and vstm VFP instructions can take a data type suffix. It's ignored by the assembler when present, but is legal syntax. Other instructions have something similar, but for some mnemonics it's only sometimes not significant, so this quick check in the parser will need refactored into something more robust soon-ish. This gets some basics working in the meantime. Partial for rdar://10435264 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144422 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/TableGen/Record.cpp | 2 +- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/TableGen/Record.cpp b/lib/TableGen/Record.cpp index 92559d1..7c2ee22 100644 --- a/lib/TableGen/Record.cpp +++ b/lib/TableGen/Record.cpp @@ -1699,7 +1699,7 @@ void Record::checkName() { assert(TypedName && "Record name is not typed!"); RecTy *Type = TypedName->getType(); if (dynamic_cast(Type) == 0) { - llvm_unreachable("Record name is not a string!"); + throw "Record name is not a string!"; } } diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index af7fe33..501a1c8 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -4179,6 +4179,22 @@ bool ARMAsmParser::shouldOmitCCOutOperand(StringRef Mnemonic, return false; } +static bool isDataTypeToken(StringRef Tok) { + return Tok == ".8" || Tok == ".16" || Tok == ".32" || Tok == ".64" || + Tok == ".i8" || Tok == ".i16" || Tok == ".i32" || Tok == ".i64" || + Tok == ".u8" || Tok == ".u16" || Tok == ".u32" || Tok == ".u64" || + Tok == ".s8" || Tok == ".s16" || Tok == ".s32" || Tok == ".s64" || + Tok == ".p8" || Tok == ".p16" || Tok == ".f32" || Tok == ".f64" || + Tok == ".f" || Tok == ".d"; +} + +// FIXME: This bit should probably be handled via an explicit match class +// in the .td files that matches the suffix instead of having it be +// a literal string token the way it is now. +static bool doesIgnoreDataTypeSuffix(StringRef Mnemonic, StringRef DT) { + return Mnemonic.startswith("vldm") || Mnemonic.startswith("vstm"); +} + /// Parse an arm instruction mnemonic followed by its operands. bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, SmallVectorImpl &Operands) { @@ -4283,6 +4299,12 @@ bool ARMAsmParser::ParseInstruction(StringRef Name, SMLoc NameLoc, Next = Name.find('.', Start + 1); StringRef ExtraToken = Name.slice(Start, Next); + // Some NEON instructions have an optional datatype suffix that is + // completely ignored. Check for that. + if (isDataTypeToken(ExtraToken) && + doesIgnoreDataTypeSuffix(Mnemonic, ExtraToken)) + continue; + if (ExtraToken != ".n") { SMLoc Loc = SMLoc::getFromPointer(NameLoc.getPointer() + Start); Operands.push_back(ARMOperand::CreateToken(ExtraToken, Loc)); -- cgit v1.1 From b12f97534c551f44bb3579f808cbdfbc893e54f5 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Fri, 11 Nov 2011 23:12:56 +0000 Subject: CMake: Fix CMake build for new Mips tblgen file. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144423 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Target/Mips/CMakeLists.txt b/lib/Target/Mips/CMakeLists.txt index e81ba6f..53656d4d 100644 --- a/lib/Target/Mips/CMakeLists.txt +++ b/lib/Target/Mips/CMakeLists.txt @@ -3,6 +3,7 @@ set(LLVM_TARGET_DEFINITIONS Mips.td) tablegen(LLVM MipsGenRegisterInfo.inc -gen-register-info) tablegen(LLVM MipsGenInstrInfo.inc -gen-instr-info) tablegen(LLVM MipsGenCodeEmitter.inc -gen-emitter) +tablegen(LLVM MipsGenMCCodeEmitter.inc -gen-emitter -mc-emitter) tablegen(LLVM MipsGenAsmWriter.inc -gen-asm-writer) tablegen(LLVM MipsGenDAGISel.inc -gen-dag-isel) tablegen(LLVM MipsGenCallingConv.inc -gen-callingconv) -- cgit v1.1 From 11add26ec2bdf5109f0ff2ee19d237664687b914 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Fri, 11 Nov 2011 23:31:03 +0000 Subject: Add support in fast-isel for selecting memset/memcpy/memmove intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144426 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 70 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 60 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index da2ca3e..4bf55fb 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -164,7 +164,8 @@ class ARMFastISel : public FastISel { bool SelectFPToSI(const Instruction *I); bool SelectSDiv(const Instruction *I); bool SelectSRem(const Instruction *I); - bool SelectCall(const Instruction *I); + bool SelectCall(const Instruction *I, const char *IntrMemName); + bool SelectIntrinsicCall(const IntrinsicInst &I); bool SelectSelect(const Instruction *I); bool SelectRet(const Instruction *I); bool SelectTrunc(const Instruction *I); @@ -1997,12 +1998,13 @@ bool ARMFastISel::ARMEmitLibcall(const Instruction *I, RTLIB::Libcall Call) { return true; } -bool ARMFastISel::SelectCall(const Instruction *I) { +bool ARMFastISel::SelectCall(const Instruction *I, + const char *IntrMemName = 0) { const CallInst *CI = cast(I); const Value *Callee = CI->getCalledValue(); - // Can't handle inline asm or worry about intrinsics yet. - if (isa(Callee) || isa(CI)) return false; + // Can't handle inline asm. + if (isa(Callee)) return false; // Only handle global variable Callees. const GlobalValue *GV = dyn_cast(Callee); @@ -2044,8 +2046,12 @@ bool ARMFastISel::SelectCall(const Instruction *I) { ArgFlags.reserve(CS.arg_size()); for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { - unsigned Arg = getRegForValue(*i); + // If we're lowering a memory intrinsic instead of a regular call, skip the + // last two arguments, which shouldn't be passed to the underlying function. + if (IntrMemName && e-i <= 2) + break; + unsigned Arg = getRegForValue(*i); if (Arg == 0) return false; ISD::ArgFlagsTy Flags; @@ -2090,14 +2096,16 @@ bool ARMFastISel::SelectCall(const Instruction *I) { if(isThumb2) // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CallOpc))) - .addGlobalAddress(GV, 0, 0); + TII.get(CallOpc))); else // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CallOpc)) - .addGlobalAddress(GV, 0, 0)); - + TII.get(CallOpc))); + if (!IntrMemName) + MIB.addGlobalAddress(GV, 0, 0); + else + MIB.addExternalSymbol(IntrMemName, 0); + // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) MIB.addReg(RegArgs[i]); @@ -2112,6 +2120,46 @@ bool ARMFastISel::SelectCall(const Instruction *I) { return true; } +bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { + // FIXME: Handle more intrinsics. + switch (I.getIntrinsicID()) { + default: return false; + case Intrinsic::memcpy: + case Intrinsic::memmove: { + // FIXME: Small memcpy/memmove's are common enough that we want to do them + // without a call if possible. + const MemTransferInst &MTI = cast(I); + // Don't handle volatile. + if (MTI.isVolatile()) + return false; + + if (!MTI.getLength()->getType()->isIntegerTy(32)) + return false; + + if (MTI.getSourceAddressSpace() > 255 || MTI.getDestAddressSpace() > 255) + return false; + + const char *IntrMemName = isa(I) ? "memcpy" : "memmove"; + return SelectCall(&I, IntrMemName); + } + case Intrinsic::memset: { + const MemSetInst &MSI = cast(I); + // Don't handle volatile. + if (MSI.isVolatile()) + return false; + + if (!MSI.getLength()->getType()->isIntegerTy(32)) + return false; + + if (MSI.getDestAddressSpace() > 255) + return false; + + return SelectCall(&I, "memset"); + } + } + return false; +} + bool ARMFastISel::SelectTrunc(const Instruction *I) { // The high bits for a type smaller than the register size are assumed to be // undefined. @@ -2235,6 +2283,8 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { case Instruction::SRem: return SelectSRem(I); case Instruction::Call: + if (const IntrinsicInst *II = dyn_cast(I)) + return SelectIntrinsicCall(*II); return SelectCall(I); case Instruction::Select: return SelectSelect(I); -- cgit v1.1 From c7352f8ca0fc716c38cb3d81e63e943d47d578b3 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Fri, 11 Nov 2011 23:34:43 +0000 Subject: ARM optional size suffix for VLDR/VSTR syntax. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144427 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrVFP.td | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index e746cf2..22a464e 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -1163,3 +1163,12 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm), def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>; +// The size suffix is optional for VLDR/VSTR +def : VFP2InstAlias<"vldr$p $Dd, $addr", + (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>; +def : VFP2InstAlias<"vldr$p $Sd, $addr", + (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>; +def : VFP2InstAlias<"vstr$p $Dd, $addr", + (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>; +def : VFP2InstAlias<"vstr$p $Sd, $addr", + (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>; -- cgit v1.1 From 10a630dea6d03e8cfc9575c81a996e1644b0d660 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Fri, 11 Nov 2011 23:45:47 +0000 Subject: ARM assembly parsing for VST1 two-register encoding. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144430 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index d3c4486b..7106473 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1238,12 +1238,11 @@ class VST1D op7_4, string Dt> let DecoderMethod = "DecodeVSTInstruction"; } class VST1Q op7_4, string Dt> - : NLdSt<0,0b00,0b1010,op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), IIC_VST1x2, - "vst1", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> { + : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins VecListTwoD:$Vd, addrmode6:$Rn), + IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDInstruction"; } def VST1d8 : VST1D<{0,0,0,?}, "8">; -- cgit v1.1 From 4d06138d53b5e3248eccd04b2b31277fce66f260 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Fri, 11 Nov 2011 23:51:31 +0000 Subject: Oops. Missed the isel half of this. revert while I sort that out. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144431 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 7106473..d3c4486b 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1238,11 +1238,12 @@ class VST1D op7_4, string Dt> let DecoderMethod = "DecodeVSTInstruction"; } class VST1Q op7_4, string Dt> - : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins VecListTwoD:$Vd, addrmode6:$Rn), - IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { + : NLdSt<0,0b00,0b1010,op7_4, (outs), + (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), IIC_VST1x2, + "vst1", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVSTInstruction"; } def VST1d8 : VST1D<{0,0,0,?}, "8">; -- cgit v1.1 From 0e3642a58746a26f50709179a2bcb25f335e2dac Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Fri, 11 Nov 2011 23:58:27 +0000 Subject: Some cleanup and bulletproofing for node replacement in LegalizeDAG. To maintain LegalizeDAG invariants, whenever we a node is replaced, we must attempt to delete it, and if it still has uses after it is replaced (which can happen in rare cases due to CSE), we must revisit it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144432 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 116 ++++++++++++++++--------------- 1 file changed, 59 insertions(+), 57 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 0e864fe..0bca55f 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -134,14 +134,39 @@ private: void ExpandNode(SDNode *Node); void PromoteNode(SDNode *Node); - // DAGUpdateListener implementation. - virtual void NodeDeleted(SDNode *N, SDNode *E) { + void ForgetNode(SDNode *N) { LegalizedNodes.erase(N); if (LegalizePosition == SelectionDAG::allnodes_iterator(N)) ++LegalizePosition; } +public: + // DAGUpdateListener implementation. + virtual void NodeDeleted(SDNode *N, SDNode *E) { + ForgetNode(N); + } virtual void NodeUpdated(SDNode *N) {} + + // Node replacement helpers + void ReplacedNode(SDNode *N) { + if (N->use_empty()) { + DAG.RemoveDeadNode(N, this); + } else { + ForgetNode(N); + } + } + void ReplaceNode(SDNode *Old, SDNode *New) { + DAG.ReplaceAllUsesWith(Old, New, this); + ReplacedNode(Old); + } + void ReplaceNode(SDValue Old, SDValue New) { + DAG.ReplaceAllUsesWith(Old, New, this); + ReplacedNode(Old.getNode()); + } + void ReplaceNode(SDNode *Old, const SDValue *New) { + DAG.ReplaceAllUsesWith(Old, New, this); + ReplacedNode(Old); + } }; } @@ -267,7 +292,7 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { /// ExpandUnalignedStore - Expands an unaligned store to 2 half-size stores. static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, const TargetLowering &TLI, - SelectionDAG::DAGUpdateListener *DUL) { + SelectionDAGLegalize *DAGLegalize) { SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); SDValue Val = ST->getValue(); @@ -284,8 +309,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val); Result = DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(), ST->isVolatile(), ST->isNonTemporal(), Alignment); - DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); - DAG.RemoveDeadNode(ST, DUL); + DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); return; } // Do a (aligned) store to a stack slot, then copy from the stack slot @@ -349,8 +373,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], Stores.size()); - DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); - DAG.RemoveDeadNode(ST, DUL); + DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); return; } assert(ST->getMemoryVT().isInteger() && @@ -382,8 +405,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); - DAG.ReplaceAllUsesWith(SDValue(ST, 0), Result, DUL); - DAG.RemoveDeadNode(ST, DUL); + DAGLegalize->ReplaceNode(SDValue(ST, 0), Result); } /// ExpandUnalignedLoad - Expands an unaligned load to 2 half-size loads. @@ -824,7 +846,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { DAG.ReplaceAllUsesWith(Node, NewNode, this); for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) DAG.TransferDbgValues(SDValue(Node, i), SDValue(NewNode, i)); - DAG.RemoveDeadNode(Node, this); + ReplacedNode(Node); Node = NewNode; } switch (Action) { @@ -846,7 +868,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { DAG.ReplaceAllUsesWith(Node, ResultVals.data(), this); for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) DAG.TransferDbgValues(SDValue(Node, i), ResultVals[i]); - DAG.RemoveDeadNode(Node, this); + ReplacedNode(Node); } return; } @@ -881,7 +903,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { EVT VT = Node->getValueType(0); - Node = DAG.UpdateNodeOperands(Node, Tmp1, Tmp2, LD->getOffset()); Tmp3 = SDValue(Node, 0); Tmp4 = SDValue(Node, 1); @@ -920,10 +941,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { break; } } - // Since loads produce two values, make sure to remember that we - // legalized both of them. - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp3); - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp4); + if (Tmp4.getNode() != Node) { + assert(Tmp3.getNode() != Node && "Load must be completely replaced"); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp3); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp4); + ReplacedNode(Node); + } return; } @@ -1058,8 +1081,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { isCustom = true; // FALLTHROUGH case TargetLowering::Legal: - Node = DAG.UpdateNodeOperands(Node, - Tmp1, Tmp2, LD->getOffset()); Tmp1 = SDValue(Node, 0); Tmp2 = SDValue(Node, 1); @@ -1135,8 +1156,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { // Since loads produce two values, make sure to remember that we legalized // both of them. - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp1); - DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp2); + if (Tmp2.getNode() != Node) { + assert(Tmp1.getNode() != Node && "Load must be completely replaced"); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 0), Tmp1); + DAG.ReplaceAllUsesOfValueWith(SDValue(Node, 1), Tmp2); + ReplacedNode(Node); + } break; } case ISD::STORE: { @@ -1149,17 +1174,12 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (!ST->isTruncatingStore()) { if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { - DAG.ReplaceAllUsesWith(ST, OptStore, this); - DAG.RemoveDeadNode(ST, this); + ReplaceNode(ST, OptStore); break; } { Tmp3 = ST->getValue(); - Node = DAG.UpdateNodeOperands(Node, - Tmp1, Tmp3, Tmp2, - ST->getOffset()); - EVT VT = Tmp3.getValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { default: assert(0 && "This action is not supported yet!"); @@ -1176,10 +1196,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { break; case TargetLowering::Custom: Tmp1 = TLI.LowerOperation(SDValue(Node, 0), DAG); - if (Tmp1.getNode()) { - DAG.ReplaceAllUsesWith(SDValue(Node, 0), Tmp1, this); - DAG.RemoveDeadNode(Node, this); - } + if (Tmp1.getNode()) + ReplaceNode(SDValue(Node, 0), Tmp1); break; case TargetLowering::Promote: { assert(VT.isVector() && "Unknown legal promote case!"); @@ -1189,8 +1207,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); - DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); - DAG.RemoveDeadNode(Node, this); + ReplaceNode(SDValue(Node, 0), Result); break; } } @@ -1212,8 +1229,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { SDValue Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), NVT, isVolatile, isNonTemporal, Alignment); - DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); - DAG.RemoveDeadNode(Node, this); + ReplaceNode(SDValue(Node, 0), Result); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. assert(!StVT.isVector() && "Unsupported truncstore!"); @@ -1268,14 +1284,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { // The order of the stores doesn't matter. SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); - DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); - DAG.RemoveDeadNode(Node, this); + ReplaceNode(SDValue(Node, 0), Result); } else { - if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() || - Tmp2 != ST->getBasePtr()) - Node = DAG.UpdateNodeOperands(Node, Tmp1, Tmp3, Tmp2, - ST->getOffset()); - switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { default: assert(0 && "This action is not supported yet!"); case TargetLowering::Legal: @@ -1289,10 +1299,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { } break; case TargetLowering::Custom: - DAG.ReplaceAllUsesWith(SDValue(Node, 0), - TLI.LowerOperation(SDValue(Node, 0), DAG), - this); - DAG.RemoveDeadNode(Node, this); + ReplaceNode(SDValue(Node, 0), + TLI.LowerOperation(SDValue(Node, 0), DAG)); break; case TargetLowering::Expand: assert(!StVT.isVector() && @@ -1304,8 +1312,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { SDValue Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(), isVolatile, isNonTemporal, Alignment); - DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); - DAG.RemoveDeadNode(Node, this); + ReplaceNode(SDValue(Node, 0), Result); break; } } @@ -3376,8 +3383,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue Result = DAG.getNode(ISD::BUILD_VECTOR, dl, Node->getValueType(0), &Scalars[0], Scalars.size()); - DAG.ReplaceAllUsesWith(SDValue(Node, 0), Result, this); - DAG.RemoveDeadNode(Node, this); + ReplaceNode(SDValue(Node, 0), Result); break; } case ISD::GLOBAL_OFFSET_TABLE: @@ -3394,10 +3400,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } // Replace the original node with the legalized result. - if (!Results.empty()) { - DAG.ReplaceAllUsesWith(Node, Results.data(), this); - DAG.RemoveDeadNode(Node, this); - } + if (!Results.empty()) + ReplaceNode(Node, Results.data()); } void SelectionDAGLegalize::PromoteNode(SDNode *Node) { @@ -3531,10 +3535,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { } // Replace the original node with the legalized result. - if (!Results.empty()) { - DAG.ReplaceAllUsesWith(Node, Results.data(), this); - DAG.RemoveDeadNode(Node, this); - } + if (!Results.empty()) + ReplaceNode(Node, Results.data()); } // SelectionDAG::Legalize - This is the entry point for the file. -- cgit v1.1 From 742c4bac07e2800275a69259296fba7c3e3f651b Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Sat, 12 Nov 2011 00:31:53 +0000 Subject: Re-apply 144430, this time with the associated isel and disassmbler bits. Original commit msg: 'ARM assembly parsing for VST1 two-register encoding.' git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144437 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMExpandPseudoInsts.cpp | 8 ++++---- lib/Target/ARM/ARMInstrNEON.td | 5 ++--- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 4 ---- 3 files changed, 6 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 5f7b8b2..fb7d96a 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -272,16 +272,16 @@ static const NEONLdStTableEntry NEONLdStTable[] = { { ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,true}, { ARM::VST1d64TPseudo_UPD, ARM::VST1d64T_UPD, false, true, true, SingleSpc, 3, 1 ,true}, -{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, false, SingleSpc, 2, 4 ,true}, +{ ARM::VST1q16Pseudo, ARM::VST1q16, false, false, false, SingleSpc, 2, 4 ,false}, { ARM::VST1q16PseudoWB_fixed, ARM::VST1q16wb_fixed, false, true, false, SingleSpc, 2, 4 ,false}, { ARM::VST1q16PseudoWB_register, ARM::VST1q16wb_register, false, true, true, SingleSpc, 2, 4 ,false}, -{ ARM::VST1q32Pseudo, ARM::VST1q32, false, false, false, SingleSpc, 2, 2 ,true}, +{ ARM::VST1q32Pseudo, ARM::VST1q32, false, false, false, SingleSpc, 2, 2 ,false}, { ARM::VST1q32PseudoWB_fixed, ARM::VST1q32wb_fixed, false, true, false, SingleSpc, 2, 2 ,false}, { ARM::VST1q32PseudoWB_register, ARM::VST1q32wb_register, false, true, true, SingleSpc, 2, 2 ,false}, -{ ARM::VST1q64Pseudo, ARM::VST1q64, false, false, false, SingleSpc, 2, 1 ,true}, +{ ARM::VST1q64Pseudo, ARM::VST1q64, false, false, false, SingleSpc, 2, 1 ,false}, { ARM::VST1q64PseudoWB_fixed, ARM::VST1q64wb_fixed, false, true, false, SingleSpc, 2, 1 ,false}, { ARM::VST1q64PseudoWB_register, ARM::VST1q64wb_register, false, true, true, SingleSpc, 2, 1 ,false}, -{ ARM::VST1q8Pseudo, ARM::VST1q8, false, false, false, SingleSpc, 2, 8 ,true}, +{ ARM::VST1q8Pseudo, ARM::VST1q8, false, false, false, SingleSpc, 2, 8 ,false}, { ARM::VST1q8PseudoWB_fixed, ARM::VST1q8wb_fixed, false, true, false, SingleSpc, 2, 8 ,false}, { ARM::VST1q8PseudoWB_register, ARM::VST1q8wb_register, false, true, true, SingleSpc, 2, 8 ,false}, diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index d3c4486b..07403c1 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1238,9 +1238,8 @@ class VST1D op7_4, string Dt> let DecoderMethod = "DecodeVSTInstruction"; } class VST1Q op7_4, string Dt> - : NLdSt<0,0b00,0b1010,op7_4, (outs), - (ins addrmode6:$Rn, DPR:$Vd, DPR:$src2), IIC_VST1x2, - "vst1", Dt, "\\{$Vd, $src2\\}, $Rn", "", []> { + : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListTwoD:$Vd), + IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; let DecoderMethod = "DecodeVSTInstruction"; diff --git a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp index 840f50b..0b9b5d0 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassembler.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassembler.cpp @@ -2267,10 +2267,6 @@ static DecodeStatus DecodeVSTInstruction(llvm::MCInst &Inst, unsigned Insn, // Second input register switch (Inst.getOpcode()) { - case ARM::VST1q8: - case ARM::VST1q16: - case ARM::VST1q32: - case ARM::VST1q64: case ARM::VST1d8T: case ARM::VST1d16T: case ARM::VST1d32T: -- cgit v1.1 From 501852423d34578bc41a745681783b04124dd0db Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Sat, 12 Nov 2011 00:35:34 +0000 Subject: Don't try to form pre/post-indexed loads/stores until after LegalizeDAG runs. Fixes PR11029. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144438 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 14 +++++++------- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 8 ++++---- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 8b28ea9..4384db8 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -279,7 +279,7 @@ namespace { public: DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) - : DAG(D), TLI(D.getTargetLoweringInfo()), Level(Unrestricted), + : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {} /// Run - runs the dag combiner on all nodes in the work list @@ -944,8 +944,8 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { void DAGCombiner::Run(CombineLevel AtLevel) { // set the instance variables, so that the various visit routines may use it. Level = AtLevel; - LegalOperations = Level >= NoIllegalOperations; - LegalTypes = Level >= NoIllegalTypes; + LegalOperations = Level >= AfterLegalizeVectorOps; + LegalTypes = Level >= AfterLegalizeTypes; // Add all the dag nodes to the worklist. WorkList.reserve(DAG.allnodes_size()); @@ -5471,7 +5471,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { // fold (sint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128 && // ...but only if the target supports immediate floating-point values - (Level == llvm::Unrestricted || + (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); @@ -5496,7 +5496,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { // fold (uint_to_fp c1) -> c1fp if (N0C && OpVT != MVT::ppcf128 && // ...but only if the target supports immediate floating-point values - (Level == llvm::Unrestricted || + (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); @@ -5875,7 +5875,7 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { /// the add / subtract in and all of its other uses are redirected to the /// new load / store. bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { - if (!LegalOperations) + if (Level < AfterLegalizeDAG) return false; bool isLoad = true; @@ -6007,7 +6007,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { /// load / store effectively and all of its uses are redirected to the /// new load / store. bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { - if (!LegalOperations) + if (Level < AfterLegalizeDAG) return false; bool isLoad = true; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 2964bd3..5cbce3f 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -487,7 +487,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // Run the DAG combiner in pre-legalize mode. { NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled); - CurDAG->Combine(Unrestricted, *AA, OptLevel); + CurDAG->Combine(BeforeLegalizeTypes, *AA, OptLevel); } DEBUG(dbgs() << "Optimized lowered selection DAG: BB#" << BlockNumber @@ -515,7 +515,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("DAG Combining after legalize types", GroupName, TimePassesIsEnabled); - CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); + CurDAG->Combine(AfterLegalizeTypes, *AA, OptLevel); } DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber @@ -540,7 +540,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { { NamedRegionTimer T("DAG Combining after legalize vectors", GroupName, TimePassesIsEnabled); - CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + CurDAG->Combine(AfterLegalizeVectorOps, *AA, OptLevel); } DEBUG(dbgs() << "Optimized vector-legalized selection DAG: BB#" @@ -562,7 +562,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { // Run the DAG combiner in post-legalize mode. { NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled); - CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); + CurDAG->Combine(AfterLegalizeDAG, *AA, OptLevel); } DEBUG(dbgs() << "Optimized legalized selection DAG: BB#" << BlockNumber -- cgit v1.1 From 9588c10b69121d9746b09e868fcc8879cbd98e3a Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Sat, 12 Nov 2011 00:58:43 +0000 Subject: ARM refactor simple immediate asm operand render methods. These immediate operands all use the same simple logic for rendering to MCInst, so have them share the method for doing so. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144439 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrFormats.td | 4 +-- lib/Target/ARM/ARMInstrInfo.td | 35 +++++++++--------- lib/Target/ARM/ARMInstrThumb2.td | 2 +- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 60 ------------------------------- 4 files changed, 22 insertions(+), 79 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index c5bf607..12cb464 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -174,7 +174,7 @@ def s_cc_out : OptionalDefOperand { // ARM special operands for disassembly only. // -def SetEndAsmOperand : AsmOperandClass { +def SetEndAsmOperand : ImmAsmOperand { let Name = "SetEndImm"; let ParserMethod = "parseSetEndImm"; } @@ -820,7 +820,7 @@ class AMiscA1I opcod, bits<4> opc7_4, dag oops, dag iops, } // PKH instructions -def PKHLSLAsmOperand : AsmOperandClass { +def PKHLSLAsmOperand : ImmAsmOperand { let Name = "PKHLSLImm"; let ParserMethod = "parsePKHLSLImm"; } diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index af1f490..770703c 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -284,14 +284,6 @@ def lo16AllZero : PatLeaf<(i32 imm), [{ return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0; }], hi16>; -/// imm0_65535 - An immediate is in the range [0.65535]. -def Imm0_65535AsmOperand: AsmOperandClass { let Name = "Imm0_65535"; } -def imm0_65535 : Operand, ImmLeaf= 0 && Imm < 65536; -}]> { - let ParserMatchClass = Imm0_65535AsmOperand; -} - class BinOpWithFlagFrag : PatFrag<(ops node:$LHS, node:$RHS, node:$FLAG), res>; class BinOpFrag : PatFrag<(ops node:$LHS, node:$RHS), res>; @@ -326,6 +318,9 @@ def fsub_mlx : PatFrag<(ops node:$lhs, node:$rhs),(fsub node:$lhs, node:$rhs),[{ // Operand Definitions. // +// Immediate operands with a shared generic asm render method. +class ImmAsmOperand : AsmOperandClass { let RenderMethod = "addImmOperands"; } + // Branch target. // FIXME: rename brtarget to t2_brtarget def brtarget : Operand { @@ -496,7 +491,7 @@ def shift_so_reg_imm : Operand, // reg reg imm // so_imm - Match a 32-bit shifter_operand immediate operand, which is an // 8-bit immediate rotated by an arbitrary number of bits. -def SOImmAsmOperand: AsmOperandClass { let Name = "ARMSOImm"; } +def SOImmAsmOperand: ImmAsmOperand { let Name = "ARMSOImm"; } def so_imm : Operand, ImmLeaf { @@ -521,7 +516,7 @@ def arm_i32imm : PatLeaf<(imm), [{ }]>; /// imm0_7 predicate - Immediate in the range [0,7]. -def Imm0_7AsmOperand: AsmOperandClass { let Name = "Imm0_7"; } +def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; } def imm0_7 : Operand, ImmLeaf= 0 && Imm < 8; }]> { @@ -529,7 +524,7 @@ def imm0_7 : Operand, ImmLeaf, ImmLeaf= 0 && Imm < 16; }]> { @@ -537,7 +532,7 @@ def imm0_15 : Operand, ImmLeaf, ImmLeaf= 0 && Imm < 32; }]> { @@ -545,7 +540,7 @@ def imm0_31 : Operand, ImmLeaf, ImmLeaf= 0 && Imm < 32; }]> { @@ -553,25 +548,33 @@ def imm0_32 : Operand, ImmLeaf, ImmLeaf= 0 && Imm < 256; }]> { let ParserMatchClass = Imm0_255AsmOperand; } +/// imm0_65535 - An immediate is in the range [0.65535]. +def Imm0_65535AsmOperand: ImmAsmOperand { let Name = "Imm0_65535"; } +def imm0_65535 : Operand, ImmLeaf= 0 && Imm < 65536; +}]> { + let ParserMatchClass = Imm0_65535AsmOperand; +} + // imm0_65535_expr - For movt/movw - 16-bit immediate that can also reference // a relocatable expression. // // FIXME: This really needs a Thumb version separate from the ARM version. // While the range is the same, and can thus use the same match class, // the encoding is different so it should have a different encoder method. -def Imm0_65535ExprAsmOperand: AsmOperandClass { let Name = "Imm0_65535Expr"; } +def Imm0_65535ExprAsmOperand: ImmAsmOperand { let Name = "Imm0_65535Expr"; } def imm0_65535_expr : Operand { let EncoderMethod = "getHiLo16ImmOpValue"; let ParserMatchClass = Imm0_65535ExprAsmOperand; } /// imm24b - True if the 32-bit immediate is encodable in 24 bits. -def Imm24bitAsmOperand: AsmOperandClass { let Name = "Imm24bit"; } +def Imm24bitAsmOperand: ImmAsmOperand { let Name = "Imm24bit"; } def imm24b : Operand, ImmLeaf= 0 && Imm <= 0xffffff; }]> { diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 0a28226..03077c0 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -65,7 +65,7 @@ def t2_so_imm_neg_XFORM : SDNodeXForm, ImmLeaf { diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 501a1c8..6974b11 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -1189,26 +1189,6 @@ public: Inst.addOperand(MCOperand::CreateImm(CE->getValue() / 4)); } - void addImm0_255Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - - void addImm0_7Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - - void addImm0_15Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - - void addImm0_31Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - void addImm1_16Operands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The constant encodes as the immediate-1, and we store in the instruction @@ -1225,26 +1205,6 @@ public: Inst.addOperand(MCOperand::CreateImm(CE->getValue() - 1)); } - void addImm0_32Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - - void addImm0_65535Operands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - - void addImm0_65535ExprOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - - void addImm24bitOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - void addImmThumbSROperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The constant encodes as the immediate, except for 32, which encodes as @@ -1254,11 +1214,6 @@ public: Inst.addOperand(MCOperand::CreateImm((Imm == 32 ? 0 : Imm))); } - void addPKHLSLImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - void addPKHASRImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // An ASR value of 32 encodes as 0, so that's how we want to add it to @@ -1268,16 +1223,6 @@ public: Inst.addOperand(MCOperand::CreateImm(Val == 32 ? 0 : Val)); } - void addARMSOImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - - void addT2SOImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - void addT2SOImmNotOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); // The operand is actually a t2_so_imm, but we have its bitwise @@ -1294,11 +1239,6 @@ public: Inst.addOperand(MCOperand::CreateImm(~CE->getValue())); } - void addSetEndImmOperands(MCInst &Inst, unsigned N) const { - assert(N == 1 && "Invalid number of operands!"); - addExpr(Inst, getImm()); - } - void addMemBarrierOptOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); Inst.addOperand(MCOperand::CreateImm(unsigned(getMemBarrierOpt()))); -- cgit v1.1 From d1ac3a47f2c2993e8bbda0a9f1c843e9efd0f00f Mon Sep 17 00:00:00 2001 From: Rafael Espindola Date: Sat, 12 Nov 2011 01:57:54 +0000 Subject: The dwarf standard says that the only differences between a out-of-line instance and a concrete inlined instance are the use of DW_TAG_subprogram instead of DW_TAG_inlined_subroutine and the who owns the tree. We were also omitting DW_AT_inline from the abstract roots. To fix this, make sure we mark abstract instance roots with DW_AT_inline even when we have only out-of-line instances referring to them with DW_AT_abstract_origin. FileCheck is not a very good tool for tests like this, maybe we should add a -verify mode to llvm-dwarfdump. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144441 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 581f04b..dc46a58 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -772,6 +772,13 @@ void DwarfDebug::endModule() { DIE *ISP = *AI; FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); } + for (DenseMap::iterator AI = AbstractSPDies.begin(), + AE = AbstractSPDies.end(); AI != AE; ++AI) { + DIE *ISP = AI->second; + if (InlinedSubprogramDIEs.count(ISP)) + continue; + FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); + } // Emit DW_AT_containing_type attribute to connect types with their // vtable holding type. -- cgit v1.1 From 75f69e3a3dff78cb89ded1d6c96ccb65603a82d9 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Sat, 12 Nov 2011 02:07:50 +0000 Subject: Make sure scalarrepl picks the correct alloca when it rewrites a bitcast. Fixes PR11353. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144442 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Scalar/ScalarReplAggregates.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp index b89f730..c12f403 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp @@ -1875,8 +1875,14 @@ void SROA::RewriteBitCast(BitCastInst *BC, AllocaInst *AI, uint64_t Offset, return; // The bitcast references the original alloca. Replace its uses with - // references to the first new element alloca. - Instruction *Val = NewElts[0]; + // references to the alloca containing offset zero (which is normally at + // index zero, but might not be in cases involving structs with elements + // of size zero). + Type *T = AI->getAllocatedType(); + uint64_t EltOffset = 0; + Type *IdxTy; + uint64_t Idx = FindElementAndOffset(T, EltOffset, IdxTy); + Instruction *Val = NewElts[Idx]; if (Val->getType() != BC->getDestTy()) { Val = new BitCastInst(Val, BC->getDestTy(), "", BC); Val->takeName(BC); @@ -2160,6 +2166,8 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst, } unsigned EltSize = TD->getTypeAllocSize(EltTy); + if (!EltSize) + continue; IRBuilder<> Builder(MI); -- cgit v1.1 From b8ebca83f4dff04ba21cc97673003f0bd35a2e49 Mon Sep 17 00:00:00 2001 From: Daniel Dunbar Date: Sat, 12 Nov 2011 02:10:57 +0000 Subject: build: Attempt to rectify inconsistencies between CMake and LLVMBuild versions of explicit dependencies. - The hope is that we have a tool/test to verify these are accurate (and tight) soon. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144444 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/ExecutionEngine/JIT/CMakeLists.txt | 3 ++- lib/ExecutionEngine/JIT/LLVMBuild.txt | 2 +- lib/MC/LLVMBuild.txt | 2 +- lib/MC/MCDisassembler/CMakeLists.txt | 17 ----------------- lib/Object/LLVMBuild.txt | 2 +- lib/Target/ARM/Disassembler/LLVMBuild.txt | 2 +- lib/Target/ARM/MCTargetDesc/CMakeLists.txt | 2 +- lib/Target/ARM/TargetInfo/LLVMBuild.txt | 2 +- lib/Target/CBackend/TargetInfo/LLVMBuild.txt | 2 +- lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt | 2 +- lib/Target/CellSPU/TargetInfo/LLVMBuild.txt | 2 +- lib/Target/CppBackend/TargetInfo/CMakeLists.txt | 1 + lib/Target/CppBackend/TargetInfo/LLVMBuild.txt | 2 +- lib/Target/MBlaze/Disassembler/CMakeLists.txt | 1 - lib/Target/MBlaze/TargetInfo/LLVMBuild.txt | 2 +- lib/Target/MSP430/MCTargetDesc/CMakeLists.txt | 2 ++ lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt | 2 +- lib/Target/MSP430/TargetInfo/LLVMBuild.txt | 2 +- lib/Target/Mips/TargetInfo/LLVMBuild.txt | 2 +- lib/Target/PTX/CMakeLists.txt | 1 + lib/Target/PTX/LLVMBuild.txt | 2 +- lib/Target/PTX/MCTargetDesc/CMakeLists.txt | 2 +- lib/Target/PTX/TargetInfo/LLVMBuild.txt | 2 +- lib/Target/PowerPC/TargetInfo/LLVMBuild.txt | 2 +- lib/Target/Sparc/TargetInfo/LLVMBuild.txt | 2 +- lib/Target/X86/CMakeLists.txt | 2 ++ lib/Target/X86/MCTargetDesc/CMakeLists.txt | 1 - lib/Target/X86/TargetInfo/LLVMBuild.txt | 2 +- lib/Target/XCore/MCTargetDesc/LLVMBuild.txt | 2 +- lib/Target/XCore/TargetInfo/LLVMBuild.txt | 2 +- lib/Transforms/IPO/CMakeLists.txt | 1 + 31 files changed, 31 insertions(+), 42 deletions(-) (limited to 'lib') diff --git a/lib/ExecutionEngine/JIT/CMakeLists.txt b/lib/ExecutionEngine/JIT/CMakeLists.txt index 92aa76a..813ccce 100644 --- a/lib/ExecutionEngine/JIT/CMakeLists.txt +++ b/lib/ExecutionEngine/JIT/CMakeLists.txt @@ -12,10 +12,11 @@ add_llvm_library(LLVMJIT ) add_llvm_library_dependencies(LLVMJIT + LLVMCodeGen LLVMCore LLVMExecutionEngine + LLVMMC LLVMRuntimeDyld LLVMSupport LLVMTarget - LLVMCodeGen ) diff --git a/lib/ExecutionEngine/JIT/LLVMBuild.txt b/lib/ExecutionEngine/JIT/LLVMBuild.txt index b974713..21cb300 100644 --- a/lib/ExecutionEngine/JIT/LLVMBuild.txt +++ b/lib/ExecutionEngine/JIT/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = JIT parent = ExecutionEngine -required_libraries = CodeGen Core ExecutionEngine MC Support Target +required_libraries = CodeGen Core ExecutionEngine MC RuntimeDyld Support Target diff --git a/lib/MC/LLVMBuild.txt b/lib/MC/LLVMBuild.txt index acc8dff..8ad66b6 100644 --- a/lib/MC/LLVMBuild.txt +++ b/lib/MC/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = MC parent = Libraries -required_libraries = Support +required_libraries = Object Support diff --git a/lib/MC/MCDisassembler/CMakeLists.txt b/lib/MC/MCDisassembler/CMakeLists.txt index 4debb28..5cf5f1b 100644 --- a/lib/MC/MCDisassembler/CMakeLists.txt +++ b/lib/MC/MCDisassembler/CMakeLists.txt @@ -10,21 +10,4 @@ add_llvm_library_dependencies(LLVMMCDisassembler LLVMMC LLVMMCParser LLVMSupport - LLVMTarget ) - -foreach(t ${LLVM_TARGETS_TO_BUILD}) - set(td ${LLVM_MAIN_SRC_DIR}/lib/Target/${t}) - if(EXISTS ${td}/TargetInfo/CMakeLists.txt) - add_llvm_library_dependencies(LLVMMCDisassembler "LLVM${t}Info") - endif() - if(EXISTS ${td}/MCTargetDesc/CMakeLists.txt) - add_llvm_library_dependencies(LLVMMCDisassembler "LLVM${t}Desc") - endif() - if(EXISTS ${td}/AsmParser/CMakeLists.txt) - add_llvm_library_dependencies(LLVMMCDisassembler "LLVM${t}AsmParser") - endif() - if(EXISTS ${td}/Disassembler/CMakeLists.txt) - add_llvm_library_dependencies(LLVMMCDisassembler "LLVM${t}Disassembler") - endif() -endforeach(t) diff --git a/lib/Object/LLVMBuild.txt b/lib/Object/LLVMBuild.txt index 44c0083..20fbb85 100644 --- a/lib/Object/LLVMBuild.txt +++ b/lib/Object/LLVMBuild.txt @@ -19,5 +19,5 @@ type = Library name = Object parent = Libraries -required_libraries = Support +required_libraries = Core Support diff --git a/lib/Target/ARM/Disassembler/LLVMBuild.txt b/lib/Target/ARM/Disassembler/LLVMBuild.txt index dff57b4..baa9bc3 100644 --- a/lib/Target/ARM/Disassembler/LLVMBuild.txt +++ b/lib/Target/ARM/Disassembler/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = ARMDisassembler parent = ARM -required_libraries = ARMDesc ARMInfo MC Support +required_libraries = ARMCodeGen ARMDesc ARMInfo MC Support add_to_library_groups = ARM diff --git a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt index adc37cb..f529314 100644 --- a/lib/Target/ARM/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/ARM/MCTargetDesc/CMakeLists.txt @@ -12,8 +12,8 @@ add_dependencies(LLVMARMDesc ARMCommonTableGen) include_directories(${CMAKE_CURRENT_SOURCE_DIR}/.. ${CMAKE_CURRENT_BINARY_DIR}/..) add_llvm_library_dependencies(LLVMARMDesc - LLVMARMInfo LLVMARMAsmPrinter + LLVMARMInfo LLVMMC LLVMSupport ) diff --git a/lib/Target/ARM/TargetInfo/LLVMBuild.txt b/lib/Target/ARM/TargetInfo/LLVMBuild.txt index 7d7504f..046c1fc 100644 --- a/lib/Target/ARM/TargetInfo/LLVMBuild.txt +++ b/lib/Target/ARM/TargetInfo/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = ARMInfo parent = ARM -required_libraries = MC Support +required_libraries = MC Support Target add_to_library_groups = ARM diff --git a/lib/Target/CBackend/TargetInfo/LLVMBuild.txt b/lib/Target/CBackend/TargetInfo/LLVMBuild.txt index 943fe2d..35752b7 100644 --- a/lib/Target/CBackend/TargetInfo/LLVMBuild.txt +++ b/lib/Target/CBackend/TargetInfo/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = CBackendInfo parent = CBackend -required_libraries = MC Support +required_libraries = MC Support Target add_to_library_groups = CBackend diff --git a/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt b/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt index b5147ae..abc44a2 100644 --- a/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = CellSPUDesc parent = CellSPU -required_libraries = CellSPUInfo MC Support +required_libraries = CellSPUInfo MC add_to_library_groups = CellSPU diff --git a/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt b/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt index 7525359..0710cc3 100644 --- a/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt +++ b/lib/Target/CellSPU/TargetInfo/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = CellSPUInfo parent = CellSPU -required_libraries = MC Support +required_libraries = MC Support Target add_to_library_groups = CellSPU diff --git a/lib/Target/CppBackend/TargetInfo/CMakeLists.txt b/lib/Target/CppBackend/TargetInfo/CMakeLists.txt index 7165d8f..738b215 100644 --- a/lib/Target/CppBackend/TargetInfo/CMakeLists.txt +++ b/lib/Target/CppBackend/TargetInfo/CMakeLists.txt @@ -6,5 +6,6 @@ add_llvm_library(LLVMCppBackendInfo add_llvm_library_dependencies(LLVMCppBackendInfo LLVMMC + LLVMSupport LLVMTarget ) diff --git a/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt b/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt index b130fee..67a23ba 100644 --- a/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt +++ b/lib/Target/CppBackend/TargetInfo/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = CppBackendInfo parent = CppBackend -required_libraries = MC Support +required_libraries = MC Support Target add_to_library_groups = CppBackend diff --git a/lib/Target/MBlaze/Disassembler/CMakeLists.txt b/lib/Target/MBlaze/Disassembler/CMakeLists.txt index 112c64c..e0a53ee 100644 --- a/lib/Target/MBlaze/Disassembler/CMakeLists.txt +++ b/lib/Target/MBlaze/Disassembler/CMakeLists.txt @@ -14,7 +14,6 @@ set_property( endif() add_llvm_library_dependencies(LLVMMBlazeDisassembler - LLVMMBlazeCodeGen LLVMMBlazeDesc LLVMMBlazeInfo LLVMMC diff --git a/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt b/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt index 488c2c7..938a1d9 100644 --- a/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt +++ b/lib/Target/MBlaze/TargetInfo/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = MBlazeInfo parent = MBlaze -required_libraries = MC Support +required_libraries = MC Support Target add_to_library_groups = MBlaze diff --git a/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt index 04bd03e..c2dd448 100644 --- a/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/MSP430/MCTargetDesc/CMakeLists.txt @@ -7,6 +7,8 @@ add_llvm_library_dependencies(LLVMMSP430Desc LLVMMC LLVMMSP430AsmPrinter LLVMMSP430Info + LLVMSupport + LLVMTarget ) add_dependencies(LLVMMSP430Desc MSP430CommonTableGen) diff --git a/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt index 5d41082..1890e9d 100644 --- a/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/MSP430/MCTargetDesc/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = MSP430Desc parent = MSP430 -required_libraries = MC MSP430AsmPrinter MSP430Info Support +required_libraries = MC MSP430AsmPrinter MSP430Info Support Target add_to_library_groups = MSP430 diff --git a/lib/Target/MSP430/TargetInfo/LLVMBuild.txt b/lib/Target/MSP430/TargetInfo/LLVMBuild.txt index 3bcc826..a745ea8 100644 --- a/lib/Target/MSP430/TargetInfo/LLVMBuild.txt +++ b/lib/Target/MSP430/TargetInfo/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = MSP430Info parent = MSP430 -required_libraries = MC Support +required_libraries = MC Support Target add_to_library_groups = MSP430 diff --git a/lib/Target/Mips/TargetInfo/LLVMBuild.txt b/lib/Target/Mips/TargetInfo/LLVMBuild.txt index e8035af..90ae260 100644 --- a/lib/Target/Mips/TargetInfo/LLVMBuild.txt +++ b/lib/Target/Mips/TargetInfo/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = MipsInfo parent = Mips -required_libraries = MC Support +required_libraries = MC Support Target add_to_library_groups = Mips diff --git a/lib/Target/PTX/CMakeLists.txt b/lib/Target/PTX/CMakeLists.txt index 46a458c..6709c1b 100644 --- a/lib/Target/PTX/CMakeLists.txt +++ b/lib/Target/PTX/CMakeLists.txt @@ -36,6 +36,7 @@ add_llvm_library_dependencies(LLVMPTXCodeGen LLVMSelectionDAG LLVMSupport LLVMTarget + LLVMTransformUtils ) add_subdirectory(TargetInfo) diff --git a/lib/Target/PTX/LLVMBuild.txt b/lib/Target/PTX/LLVMBuild.txt index 27807e6..22c70de 100644 --- a/lib/Target/PTX/LLVMBuild.txt +++ b/lib/Target/PTX/LLVMBuild.txt @@ -25,6 +25,6 @@ has_asmprinter = 1 type = Library name = PTXCodeGen parent = PTX -required_libraries = Analysis AsmPrinter CodeGen Core MC PTXDesc PTXInfo Scalar SelectionDAG Support Target TransformUtils +required_libraries = Analysis AsmPrinter CodeGen Core MC PTXDesc PTXInfo SelectionDAG Support Target TransformUtils add_to_library_groups = PTX diff --git a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt index 811ef4b..94dbcee 100644 --- a/lib/Target/PTX/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/PTX/MCTargetDesc/CMakeLists.txt @@ -5,8 +5,8 @@ add_llvm_library(LLVMPTXDesc add_llvm_library_dependencies(LLVMPTXDesc LLVMMC - LLVMPTXInfo LLVMPTXAsmPrinter + LLVMPTXInfo LLVMSupport ) diff --git a/lib/Target/PTX/TargetInfo/LLVMBuild.txt b/lib/Target/PTX/TargetInfo/LLVMBuild.txt index f35c237..8e5285a 100644 --- a/lib/Target/PTX/TargetInfo/LLVMBuild.txt +++ b/lib/Target/PTX/TargetInfo/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = PTXInfo parent = PTX -required_libraries = MC Support +required_libraries = MC Support Target add_to_library_groups = PTX diff --git a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt index 1f5d3e7..f51b417 100644 --- a/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt +++ b/lib/Target/PowerPC/TargetInfo/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = PowerPCInfo parent = PowerPC -required_libraries = MC Support +required_libraries = MC Support Target add_to_library_groups = PowerPC diff --git a/lib/Target/Sparc/TargetInfo/LLVMBuild.txt b/lib/Target/Sparc/TargetInfo/LLVMBuild.txt index 22f4e1f..81c9032 100644 --- a/lib/Target/Sparc/TargetInfo/LLVMBuild.txt +++ b/lib/Target/Sparc/TargetInfo/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = SparcInfo parent = Sparc -required_libraries = MC Support +required_libraries = MC Support Target add_to_library_groups = Sparc diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index b590199..4542d4b 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -62,6 +62,8 @@ add_llvm_library_dependencies(LLVMX86CodeGen LLVMTarget LLVMX86AsmPrinter LLVMX86Desc + LLVMX86Info + LLVMX86Utils ) add_subdirectory(AsmParser) diff --git a/lib/Target/X86/MCTargetDesc/CMakeLists.txt b/lib/Target/X86/MCTargetDesc/CMakeLists.txt index 8721912..264e791 100644 --- a/lib/Target/X86/MCTargetDesc/CMakeLists.txt +++ b/lib/Target/X86/MCTargetDesc/CMakeLists.txt @@ -10,7 +10,6 @@ add_llvm_library_dependencies(LLVMX86Desc LLVMMC LLVMSupport LLVMX86AsmPrinter - LLVMX86AsmPrinter LLVMX86Info ) diff --git a/lib/Target/X86/TargetInfo/LLVMBuild.txt b/lib/Target/X86/TargetInfo/LLVMBuild.txt index 6b2635b..ee015bd 100644 --- a/lib/Target/X86/TargetInfo/LLVMBuild.txt +++ b/lib/Target/X86/TargetInfo/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = X86Info parent = X86 -required_libraries = MC Support +required_libraries = MC Support Target add_to_library_groups = X86 diff --git a/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt index 7f4a433..628afb5 100644 --- a/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt +++ b/lib/Target/XCore/MCTargetDesc/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = XCoreDesc parent = XCore -required_libraries = MC Support XCoreInfo +required_libraries = MC XCoreInfo add_to_library_groups = XCore diff --git a/lib/Target/XCore/TargetInfo/LLVMBuild.txt b/lib/Target/XCore/TargetInfo/LLVMBuild.txt index 1d1b722..d0b8e54 100644 --- a/lib/Target/XCore/TargetInfo/LLVMBuild.txt +++ b/lib/Target/XCore/TargetInfo/LLVMBuild.txt @@ -19,6 +19,6 @@ type = Library name = XCoreInfo parent = XCore -required_libraries = MC Support +required_libraries = MC Support Target add_to_library_groups = XCore diff --git a/lib/Transforms/IPO/CMakeLists.txt b/lib/Transforms/IPO/CMakeLists.txt index 4d8dbc2..8fa66fc 100644 --- a/lib/Transforms/IPO/CMakeLists.txt +++ b/lib/Transforms/IPO/CMakeLists.txt @@ -24,6 +24,7 @@ add_llvm_library(LLVMipo add_llvm_library_dependencies(LLVMipo LLVMAnalysis LLVMCore + LLVMInstCombine LLVMScalarOpts LLVMSupport LLVMTarget -- cgit v1.1 From 2c5d65202e690bd46f69aa142342c0d61b7ac42a Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Sat, 12 Nov 2011 02:20:46 +0000 Subject: Function for handling byval arguments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144447 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsCallingConv.td | 5 +++-- lib/Target/Mips/MipsISelLowering.cpp | 42 ++++++++++++++++++++++++++++++++++-- 2 files changed, 43 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index 0ae4ef6..b8a863b 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -35,8 +35,9 @@ def RetCC_MipsO32 : CallingConv<[ //===----------------------------------------------------------------------===// def CC_MipsN : CallingConv<[ - // FIXME: Handle byval, complex and float double parameters. - + // Handles byval parameters. + CCIfByVal>, + // Promote i8/i16/i32 arguments to i64. CCIfType<[i8, i16, i32], CCPromoteToType>, diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 96ec588..e57a057 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -1772,8 +1772,6 @@ SDValue MipsTargetLowering::LowerATOMIC_FENCE(SDValue Op, // Calling Convention Implementation //===----------------------------------------------------------------------===// -#include "MipsGenCallingConv.inc" - //===----------------------------------------------------------------------===// // TODO: Implement a generic logic using tblgen that can support this. // Mips O32 ABI rules: @@ -1880,6 +1878,46 @@ static bool CC_MipsO32(unsigned ValNo, MVT ValVT, return false; // CC must always match } +static const unsigned Mips64IntRegs[8] = + {Mips::A0_64, Mips::A1_64, Mips::A2_64, Mips::A3_64, + Mips::T0_64, Mips::T1_64, Mips::T2_64, Mips::T3_64}; +static const unsigned Mips64DPRegs[8] = + {Mips::D12_64, Mips::D13_64, Mips::D14_64, Mips::D15_64, + Mips::D16_64, Mips::D17_64, Mips::D18_64, Mips::D19_64}; + +static bool CC_Mips64Byval(unsigned ValNo, MVT ValVT, MVT LocVT, + CCValAssign::LocInfo LocInfo, + ISD::ArgFlagsTy ArgFlags, CCState &State) { + unsigned Align = std::max(ArgFlags.getByValAlign(), (unsigned)8); + unsigned Size = (ArgFlags.getByValSize() + 7) / 8 * 8; + unsigned FirstIdx = State.getFirstUnallocated(Mips64IntRegs, 8); + + assert(Align <= 16 && "Cannot handle alignments larger than 16."); + + // If byval is 16-byte aligned, the first arg register must be even. + if ((Align == 16) && (FirstIdx % 2)) { + State.AllocateReg(Mips64IntRegs[FirstIdx], Mips64DPRegs[FirstIdx]); + ++FirstIdx; + } + + // Mark the registers allocated. + for (unsigned I = FirstIdx; Size && (I < 8); Size -= 8, ++I) + State.AllocateReg(Mips64IntRegs[I], Mips64DPRegs[I]); + + // Allocate space on caller's stack. + unsigned Offset = State.AllocateStack(Size, Align); + + if (FirstIdx < 8) + State.addLoc(CCValAssign::getReg(ValNo, ValVT, Mips64IntRegs[FirstIdx], + LocVT, LocInfo)); + else + State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); + + return true; +} + +#include "MipsGenCallingConv.inc" + //===----------------------------------------------------------------------===// // Call Calling Convention Implementation //===----------------------------------------------------------------------===// -- cgit v1.1 From deab22a556195467f1c367c9623d276ace35c3cc Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Sat, 12 Nov 2011 02:25:00 +0000 Subject: 64-bit arbitrary immediate pattern. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144448 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips64InstrInfo.td | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 608c271..51b9bf9 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -215,6 +215,10 @@ def : Pat<(i64 immSExt16:$in), def : Pat<(i64 immZExt16:$in), (ORi64 ZERO_64, imm:$in)>; +// Arbitrary immediates +def : Pat<(i64 imm:$imm), + (ORi64 (LUi64 (HI16 imm:$imm)), (LO16 imm:$imm))>; + // zextloadi32_u def : Pat<(zextloadi32_u addr:$a), (DSRL32 (DSLL32 (ULW64_P8 addr:$a), 0), 0)>, Requires<[IsN64]>; -- cgit v1.1 From 3a5257de21216125bddaa0e5f87c00d32e054cd0 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Sat, 12 Nov 2011 02:29:58 +0000 Subject: Implement Mips64's handling of byval arguments in LowerFormalArguments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144449 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 78 +++++++++++++++++++++++++++--------- 1 file changed, 60 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index e57a057..9ac2a6d 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -2357,6 +2357,46 @@ static void ReadByValArg(MachineFunction &MF, SDValue Chain, DebugLoc dl, } } +// Create frame object on stack and copy registers used for byval passing to it. +static unsigned +CopyMips64ByValRegs(MachineFunction &MF, SDValue Chain, DebugLoc dl, + std::vector& OutChains, SelectionDAG &DAG, + const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, + MachineFrameInfo *MFI, bool IsRegLoc, + SmallVectorImpl &InVals, MipsFunctionInfo *MipsFI, + EVT PtrTy) { + const unsigned *Reg = Mips64IntRegs + 8; + int FOOffset; // Frame object offset from virtual frame pointer. + + if (IsRegLoc) { + Reg = std::find(Mips64IntRegs, Mips64IntRegs + 8, VA.getLocReg()); + FOOffset = (Reg - Mips64IntRegs) * 8 - 8 * 8; + MipsFI->setRegSaveAreaSize(-FOOffset); + } + else + FOOffset = VA.getLocMemOffset(); + + // Create frame object. + unsigned NumRegs = (Flags.getByValSize() + 7) / 8; + unsigned LastFI = MFI->CreateFixedObject(NumRegs * 8, FOOffset, true); + SDValue FIN = DAG.getFrameIndex(LastFI, PtrTy); + InVals.push_back(FIN); + + // Copy arg registers. + for (unsigned I = 0; (Reg != Mips64IntRegs + 8) && (I < NumRegs); + ++Reg, ++I) { + unsigned VReg = AddLiveIn(MF, *Reg, Mips::CPU64RegsRegisterClass); + SDValue StorePtr = DAG.getNode(ISD::ADD, dl, PtrTy, FIN, + DAG.getConstant(I * 8, PtrTy)); + SDValue Store = DAG.getStore(Chain, dl, DAG.getRegister(VReg, MVT::i64), + StorePtr, MachinePointerInfo(), false, + false, 0); + OutChains.push_back(Store); + } + + return LastFI; +} + /// LowerFormalArguments - transform physical registers into virtual registers /// and generate load operations for arguments places on the stack. SDValue @@ -2392,9 +2432,28 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { CCValAssign &VA = ArgLocs[i]; EVT ValVT = VA.getValVT(); + ISD::ArgFlagsTy Flags = Ins[i].Flags; + bool IsRegLoc = VA.isRegLoc(); + + if (Flags.isByVal()) { + assert(Flags.getByValSize() && + "ByVal args of size 0 should have been ignored by front-end."); + if (IsO32) { + unsigned NumWords = (Flags.getByValSize() + 3) / 4; + LastFI = MFI->CreateFixedObject(NumWords * 4, VA.getLocMemOffset(), + true); + SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy()); + InVals.push_back(FIN); + ReadByValArg(MF, Chain, dl, OutChains, DAG, NumWords, FIN, VA, Flags); + } else // N32/64 + LastFI = CopyMips64ByValRegs(MF, Chain, dl, OutChains, DAG, VA, Flags, + MFI, IsRegLoc, InVals, MipsFI, + getPointerTy()); + continue; + } // Arguments stored on registers - if (VA.isRegLoc()) { + if (IsRegLoc) { EVT RegVT = VA.getLocVT(); unsigned ArgReg = VA.getLocReg(); TargetRegisterClass *RC = 0; @@ -2450,23 +2509,6 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, // sanity check assert(VA.isMemLoc()); - ISD::ArgFlagsTy Flags = Ins[i].Flags; - - if (Flags.isByVal()) { - assert(IsO32 && - "No support for ByVal args by ABIs other than O32 yet."); - assert(Flags.getByValSize() && - "ByVal args of size 0 should have been ignored by front-end."); - unsigned NumWords = (Flags.getByValSize() + 3) / 4; - LastFI = MFI->CreateFixedObject(NumWords * 4, VA.getLocMemOffset(), - true); - SDValue FIN = DAG.getFrameIndex(LastFI, getPointerTy()); - InVals.push_back(FIN); - ReadByValArg(MF, Chain, dl, OutChains, DAG, NumWords, FIN, VA, Flags); - - continue; - } - // The stack pointer offset is relative to the caller stack frame. LastFI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, VA.getLocMemOffset(), true); -- cgit v1.1 From afe153c53f525b5599e79f847e2a0723905b6518 Mon Sep 17 00:00:00 2001 From: Sean Callanan Date: Sat, 12 Nov 2011 02:31:32 +0000 Subject: Fixed the MCJIT so that it can emit not only instance methods but also class methods for Objective-C. Clang emits Objective-C method names with '\1' at the beginning, and the JIT has pre-existing logic to try prepending a '\1' when searching a module for an instance method (that is, a method whose name begins with '-'). I simply extended it to do the same thing when it encountered a class method (a method whose name begins with '+'). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144451 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h index c17a397..58f9100 100644 --- a/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h +++ b/lib/ExecutionEngine/MCJIT/MCJITMemoryManager.h @@ -39,9 +39,9 @@ public: if (Name[0] == '_') ++Name; Function *F = M->getFunction(Name); // Some ObjC names have a prefixed \01 in the IR. If we failed to find - // the symbol and it's of the ObjC conventions (starts with "-"), try - // prepending a \01 and see if we can find it that way. - if (!F && Name[0] == '-') + // the symbol and it's of the ObjC conventions (starts with "-" or + // "+"), try prepending a \01 and see if we can find it that way. + if (!F && (Name[0] == '-' || Name[0] == '+')) F = M->getFunction((Twine("\1") + Name).str()); assert(F && "No matching function in JIT IR Module!"); return JMM->startFunctionBody(F, Size); @@ -56,9 +56,9 @@ public: if (Name[0] == '_') ++Name; Function *F = M->getFunction(Name); // Some ObjC names have a prefixed \01 in the IR. If we failed to find - // the symbol and it's of the ObjC conventions (starts with "-"), try - // prepending a \01 and see if we can find it that way. - if (!F && Name[0] == '-') + // the symbol and it's of the ObjC conventions (starts with "-" or + // "+"), try prepending a \01 and see if we can find it that way. + if (!F && (Name[0] == '-' || Name[0] == '+')) F = M->getFunction((Twine("\1") + Name).str()); assert(F && "No matching function in JIT IR Module!"); JMM->endFunctionBody(F, FunctionStart, FunctionEnd); -- cgit v1.1 From 6df3e7b1620372cf89b31eb5ff007fc4d1e721cf Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Sat, 12 Nov 2011 02:34:50 +0000 Subject: Implement Mips64's handling of byval arguments in LowerCall. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144452 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 112 +++++++++++++++++++++++++++++++---- 1 file changed, 100 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 9ac2a6d..f30a779 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -2026,6 +2026,90 @@ WriteByValArg(SDValue& ByValChain, SDValue Chain, DebugLoc dl, MachinePointerInfo(0), MachinePointerInfo(0)); } +// Copy Mips64 byVal arg to registers and stack. +void static +PassByValArg64(SDValue& ByValChain, SDValue Chain, DebugLoc dl, + SmallVector, 16>& RegsToPass, + SmallVector& MemOpChains, int& LastFI, + MachineFrameInfo *MFI, SelectionDAG &DAG, SDValue Arg, + const CCValAssign &VA, const ISD::ArgFlagsTy& Flags, + EVT PtrTy, bool isLittle) { + unsigned ByValSize = Flags.getByValSize(); + unsigned Alignment = std::min(Flags.getByValAlign(), (unsigned)8); + bool IsRegLoc = VA.isRegLoc(); + unsigned Offset = 0; // Offset in # of bytes from the beginning of struct. + unsigned LocMemOffset = 0; + + if (!IsRegLoc) + LocMemOffset = VA.getLocMemOffset(); + else { + const unsigned *Reg = std::find(Mips64IntRegs, Mips64IntRegs + 8, + VA.getLocReg()); + const unsigned *RegEnd = Mips64IntRegs + 8; + + // Copy double words to registers. + for (; (Reg != RegEnd) && (ByValSize >= Offset + 8); ++Reg, Offset += 8) { + SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, PtrTy, Arg, + DAG.getConstant(Offset, PtrTy)); + SDValue LoadVal = DAG.getLoad(MVT::i64, dl, Chain, LoadPtr, + MachinePointerInfo(), false, false, false, + Alignment); + MemOpChains.push_back(LoadVal.getValue(1)); + RegsToPass.push_back(std::make_pair(*Reg, LoadVal)); + } + + // If there is an argument register available, copy the remainder of the + // byval argument with sub-doubleword loads and shifts. + if ((Reg != RegEnd) && (ByValSize != Offset)) { + assert((ByValSize < Offset + 8) && + "Size of the remainder should be smaller than 8-byte."); + SDValue Val; + for (unsigned LoadSize = 4; Offset < ByValSize; LoadSize /= 2) { + unsigned RemSize = ByValSize - Offset; + + if (RemSize < LoadSize) + continue; + + SDValue LoadPtr = DAG.getNode(ISD::ADD, dl, PtrTy, Arg, + DAG.getConstant(Offset, PtrTy)); + SDValue LoadVal = + DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i64, Chain, LoadPtr, + MachinePointerInfo(), MVT::getIntegerVT(LoadSize * 8), + false, false, Alignment); + MemOpChains.push_back(LoadVal.getValue(1)); + + // Offset in number of bits from double word boundary. + unsigned OffsetDW = (Offset % 8) * 8; + unsigned Shamt = isLittle ? OffsetDW : 64 - (OffsetDW + LoadSize * 8); + SDValue Shift = DAG.getNode(ISD::SHL, dl, MVT::i64, LoadVal, + DAG.getConstant(Shamt, MVT::i32)); + + Val = Val.getNode() ? DAG.getNode(ISD::OR, dl, MVT::i64, Val, Shift) : + Shift; + Offset += LoadSize; + Alignment = std::min(Alignment, LoadSize); + } + + RegsToPass.push_back(std::make_pair(*Reg, Val)); + return; + } + } + + unsigned MemCpySize = ByValSize - Offset; + if (MemCpySize) { + // Create a fixed object on stack at offset LocMemOffset and copy + // remainder of byval arg to it with memcpy. + SDValue Src = DAG.getNode(ISD::ADD, dl, PtrTy, Arg, + DAG.getConstant(Offset, PtrTy)); + LastFI = MFI->CreateFixedObject(MemCpySize, LocMemOffset, true); + SDValue Dst = DAG.getFrameIndex(LastFI, PtrTy); + ByValChain = DAG.getMemcpy(ByValChain, dl, Dst, Src, + DAG.getConstant(MemCpySize, PtrTy), Alignment, + /*isVolatile=*/false, /*AlwaysInline=*/false, + MachinePointerInfo(0), MachinePointerInfo(0)); + } +} + /// LowerCall - functions arguments are copied from virtual regs to /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted. /// TODO: isTailCall. @@ -2112,6 +2196,22 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, SDValue Arg = OutVals[i]; CCValAssign &VA = ArgLocs[i]; MVT ValVT = VA.getValVT(), LocVT = VA.getLocVT(); + ISD::ArgFlagsTy Flags = Outs[i].Flags; + + // ByVal Arg. + if (Flags.isByVal()) { + assert(Flags.getByValSize() && + "ByVal args of size 0 should have been ignored by front-end."); + if (IsO32) + WriteByValArg(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI, + MFI, DAG, Arg, VA, Flags, getPointerTy(), + Subtarget->isLittle()); + else + PassByValArg64(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI, + MFI, DAG, Arg, VA, Flags, getPointerTy(), + Subtarget->isLittle()); + continue; + } // Promote the value if needed. switch (VA.getLocInfo()) { @@ -2157,18 +2257,6 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, // Register can't get to this point... assert(VA.isMemLoc()); - // ByVal Arg. - ISD::ArgFlagsTy Flags = Outs[i].Flags; - if (Flags.isByVal()) { - assert(IsO32 && - "No support for ByVal args by ABIs other than O32 yet."); - assert(Flags.getByValSize() && - "ByVal args of size 0 should have been ignored by front-end."); - WriteByValArg(ByValChain, Chain, dl, RegsToPass, MemOpChains, LastFI, MFI, - DAG, Arg, VA, Flags, getPointerTy(), Subtarget->isLittle()); - continue; - } - // Create the frame index object for this incoming parameter LastFI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8, VA.getLocMemOffset(), true); -- cgit v1.1 From 7386612a1635090acf010aaa4c7f68b181fcfe65 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Sat, 12 Nov 2011 02:38:12 +0000 Subject: Fix typo. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144453 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index f30a779..1ba138f 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -820,7 +820,7 @@ MipsTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, return EmitAtomicBinary(MI, BB, 4, Mips::AND); case Mips::ATOMIC_LOAD_AND_I64: case Mips::ATOMIC_LOAD_AND_I64_P8: - return EmitAtomicBinary(MI, BB, 48, Mips::AND64); + return EmitAtomicBinary(MI, BB, 8, Mips::AND64); case Mips::ATOMIC_LOAD_OR_I8: case Mips::ATOMIC_LOAD_OR_I8_P8: -- cgit v1.1 From d7ecff49d9190262da48a9f5815c01138fd54ebc Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Sat, 12 Nov 2011 03:09:12 +0000 Subject: Don't try to loop on iterators that are potentially invalidated inside the loop. Fixes PR11361! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144454 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/ScalarEvolution.cpp | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'lib') diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index b940d93..ac00259 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -4853,10 +4853,18 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN, // Also evaluate the other PHI nodes. However, we don't get to stop if we // cease to be able to evaluate one of them or if they stop evolving, // because that doesn't necessarily prevent us from computing PN. + SmallVector, 8> PHIsToCompute; for (DenseMap::const_iterator I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){ PHINode *PHI = dyn_cast(I->first); if (!PHI || PHI == PN || PHI->getParent() != Header) continue; + PHIsToCompute.push_back(std::make_pair(PHI, I->second)); + } + // We use two distinct loops because EvaluateExpression may invalidate any + // iterators into CurrentIterVals. + for (SmallVectorImpl >::const_iterator + I = PHIsToCompute.begin(), E = PHIsToCompute.end(); I != E; ++I) { + PHINode *PHI = I->first; Constant *&NextPHI = NextIterVals[PHI]; if (!NextPHI) { // Not already computed. Value *BEValue = PHI->getIncomingValue(SecondIsBackedge); @@ -4928,10 +4936,20 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, // Update all the PHI nodes for the next iteration. DenseMap NextIterVals; + + // Create a list of which PHIs we need to compute. We want to do this before + // calling EvaluateExpression on them because that may invalidate iterators + // into CurrentIterVals. + SmallVector PHIsToCompute; for (DenseMap::const_iterator I = CurrentIterVals.begin(), E = CurrentIterVals.end(); I != E; ++I){ PHINode *PHI = dyn_cast(I->first); if (!PHI || PHI->getParent() != Header) continue; + PHIsToCompute.push_back(PHI); + } + for (SmallVectorImpl::const_iterator I = PHIsToCompute.begin(), + E = PHIsToCompute.end(); I != E; ++I) { + PHINode *PHI = *I; Constant *&NextPHI = NextIterVals[PHI]; if (NextPHI) continue; // Already computed! -- cgit v1.1 From 7be5dfd1a164707fbfc9bb49de23d68b6e15df44 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 12 Nov 2011 09:58:49 +0000 Subject: Add more AVX2 shift lowering support. Move AVX2 variable shift to use patterns instead of custom lowering code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144457 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 166 +++++++++++++++++++++++-------------- lib/Target/X86/X86InstrSSE.td | 49 +++++++++++ 2 files changed, 153 insertions(+), 62 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index e77b1df..f1c80a2 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -924,10 +924,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // FIXME: Do we need to handle scalar-to-vector here? setOperationAction(ISD::MUL, MVT::v4i32, Legal); - // Can turn SHL into an integer multiply. - setOperationAction(ISD::SHL, MVT::v4i32, Custom); - setOperationAction(ISD::SHL, MVT::v16i8, Custom); - setOperationAction(ISD::VSELECT, MVT::v2f64, Legal); setOperationAction(ISD::VSELECT, MVT::v2i64, Legal); setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); @@ -955,18 +951,32 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) } if (Subtarget->hasXMMInt()) { - setOperationAction(ISD::SRL, MVT::v2i64, Custom); - setOperationAction(ISD::SRL, MVT::v4i32, Custom); - setOperationAction(ISD::SRL, MVT::v16i8, Custom); setOperationAction(ISD::SRL, MVT::v8i16, Custom); + setOperationAction(ISD::SRL, MVT::v16i8, Custom); - setOperationAction(ISD::SHL, MVT::v2i64, Custom); - setOperationAction(ISD::SHL, MVT::v4i32, Custom); setOperationAction(ISD::SHL, MVT::v8i16, Custom); + setOperationAction(ISD::SHL, MVT::v16i8, Custom); - setOperationAction(ISD::SRA, MVT::v4i32, Custom); setOperationAction(ISD::SRA, MVT::v8i16, Custom); setOperationAction(ISD::SRA, MVT::v16i8, Custom); + + if (Subtarget->hasAVX2()) { + setOperationAction(ISD::SRL, MVT::v2i64, Legal); + setOperationAction(ISD::SRL, MVT::v4i32, Legal); + + setOperationAction(ISD::SHL, MVT::v2i64, Legal); + setOperationAction(ISD::SHL, MVT::v4i32, Legal); + + setOperationAction(ISD::SRA, MVT::v4i32, Legal); + } else { + setOperationAction(ISD::SRL, MVT::v2i64, Custom); + setOperationAction(ISD::SRL, MVT::v4i32, Custom); + + setOperationAction(ISD::SHL, MVT::v2i64, Custom); + setOperationAction(ISD::SHL, MVT::v4i32, Custom); + + setOperationAction(ISD::SRA, MVT::v4i32, Custom); + } } if (Subtarget->hasSSE42() || Subtarget->hasAVX()) @@ -1009,18 +1019,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i8, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i16, Custom); - setOperationAction(ISD::SRL, MVT::v4i64, Custom); - setOperationAction(ISD::SRL, MVT::v8i32, Custom); setOperationAction(ISD::SRL, MVT::v16i16, Custom); setOperationAction(ISD::SRL, MVT::v32i8, Custom); - setOperationAction(ISD::SHL, MVT::v4i64, Custom); - setOperationAction(ISD::SHL, MVT::v8i32, Custom); setOperationAction(ISD::SHL, MVT::v16i16, Custom); setOperationAction(ISD::SHL, MVT::v32i8, Custom); - setOperationAction(ISD::SRA, MVT::v8i32, Custom); setOperationAction(ISD::SRA, MVT::v16i16, Custom); + setOperationAction(ISD::SRA, MVT::v32i8, Custom); setOperationAction(ISD::SETCC, MVT::v32i8, Custom); setOperationAction(ISD::SETCC, MVT::v16i16, Custom); @@ -1053,6 +1059,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) // Don't lower v32i8 because there is no 128-bit byte mul setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); + + setOperationAction(ISD::SRL, MVT::v4i64, Legal); + setOperationAction(ISD::SRL, MVT::v8i32, Legal); + + setOperationAction(ISD::SHL, MVT::v4i64, Legal); + setOperationAction(ISD::SHL, MVT::v8i32, Legal); + + setOperationAction(ISD::SRA, MVT::v8i32, Legal); } else { setOperationAction(ISD::ADD, MVT::v4i64, Custom); setOperationAction(ISD::ADD, MVT::v8i32, Custom); @@ -1068,6 +1082,14 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::MUL, MVT::v8i32, Custom); setOperationAction(ISD::MUL, MVT::v16i16, Custom); // Don't lower v32i8 because there is no 128-bit byte mul + + setOperationAction(ISD::SRL, MVT::v4i64, Custom); + setOperationAction(ISD::SRL, MVT::v8i32, Custom); + + setOperationAction(ISD::SHL, MVT::v4i64, Custom); + setOperationAction(ISD::SHL, MVT::v8i32, Custom); + + setOperationAction(ISD::SRA, MVT::v8i32, Custom); } // Custom lower several nodes for 256-bit types. @@ -9510,6 +9532,14 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const // Fix vector shift instructions where the last operand is a non-immediate // i32 value. + case Intrinsic::x86_avx2_pslli_w: + case Intrinsic::x86_avx2_pslli_d: + case Intrinsic::x86_avx2_pslli_q: + case Intrinsic::x86_avx2_psrli_w: + case Intrinsic::x86_avx2_psrli_d: + case Intrinsic::x86_avx2_psrli_q: + case Intrinsic::x86_avx2_psrai_w: + case Intrinsic::x86_avx2_psrai_d: case Intrinsic::x86_sse2_pslli_w: case Intrinsic::x86_sse2_pslli_d: case Intrinsic::x86_sse2_pslli_q: @@ -9557,6 +9587,30 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const case Intrinsic::x86_sse2_psrai_d: NewIntNo = Intrinsic::x86_sse2_psra_d; break; + case Intrinsic::x86_avx2_pslli_w: + NewIntNo = Intrinsic::x86_avx2_psll_w; + break; + case Intrinsic::x86_avx2_pslli_d: + NewIntNo = Intrinsic::x86_avx2_psll_d; + break; + case Intrinsic::x86_avx2_pslli_q: + NewIntNo = Intrinsic::x86_avx2_psll_q; + break; + case Intrinsic::x86_avx2_psrli_w: + NewIntNo = Intrinsic::x86_avx2_psrl_w; + break; + case Intrinsic::x86_avx2_psrli_d: + NewIntNo = Intrinsic::x86_avx2_psrl_d; + break; + case Intrinsic::x86_avx2_psrli_q: + NewIntNo = Intrinsic::x86_avx2_psrl_q; + break; + case Intrinsic::x86_avx2_psrai_w: + NewIntNo = Intrinsic::x86_avx2_psra_w; + break; + case Intrinsic::x86_avx2_psrai_d: + NewIntNo = Intrinsic::x86_avx2_psra_d; + break; default: { ShAmtVT = MVT::v2i32; switch (IntNo) { @@ -10251,52 +10305,6 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) const { } } - // AVX2 variable shifts - if (Subtarget->hasAVX2()) { - if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psllv_d, MVT::i32), - R, Amt); - if (VT == MVT::v8i32 && Op->getOpcode() == ISD::SHL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psllv_d_256, MVT::i32), - R, Amt); - if (VT == MVT::v2i64 && Op->getOpcode() == ISD::SHL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psllv_q, MVT::i32), - R, Amt); - if (VT == MVT::v4i64 && Op->getOpcode() == ISD::SHL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psllv_q_256, MVT::i32), - R, Amt); - - if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SRL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrlv_d, MVT::i32), - R, Amt); - if (VT == MVT::v8i32 && Op->getOpcode() == ISD::SRL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrlv_d_256, MVT::i32), - R, Amt); - if (VT == MVT::v2i64 && Op->getOpcode() == ISD::SRL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrlv_q, MVT::i32), - R, Amt); - if (VT == MVT::v4i64 && Op->getOpcode() == ISD::SRL) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrlv_q_256, MVT::i32), - R, Amt); - - if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SRA) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrav_d, MVT::i32), - R, Amt); - if (VT == MVT::v8i32 && Op->getOpcode() == ISD::SRA) - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, - DAG.getConstant(Intrinsic::x86_avx2_psrav_d_256, MVT::i32), - R, Amt); - } - // Lower SHL with variable shift amount. if (VT == MVT::v4i32 && Op->getOpcode() == ISD::SHL) { Op = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, VT, @@ -13464,7 +13472,9 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, if (!Subtarget->hasXMMInt()) return SDValue(); - if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16) + if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16 && + (!Subtarget->hasAVX2() || + (VT != MVT::v4i64 && VT != MVT::v8i32 && VT != MVT::v16i16))) return SDValue(); SDValue ShAmtOp = N->getOperand(1); @@ -13537,6 +13547,18 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, DAG.getConstant(Intrinsic::x86_sse2_pslli_w, MVT::i32), ValOp, BaseShAmt); + if (VT == MVT::v4i64) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::x86_avx2_pslli_q, MVT::i32), + ValOp, BaseShAmt); + if (VT == MVT::v8i32) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::x86_avx2_pslli_d, MVT::i32), + ValOp, BaseShAmt); + if (VT == MVT::v16i16) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::x86_avx2_pslli_w, MVT::i32), + ValOp, BaseShAmt); break; case ISD::SRA: if (VT == MVT::v4i32) @@ -13547,6 +13569,14 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, DAG.getConstant(Intrinsic::x86_sse2_psrai_w, MVT::i32), ValOp, BaseShAmt); + if (VT == MVT::v8i32) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrai_d, MVT::i32), + ValOp, BaseShAmt); + if (VT == MVT::v16i16) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrai_w, MVT::i32), + ValOp, BaseShAmt); break; case ISD::SRL: if (VT == MVT::v2i64) @@ -13561,6 +13591,18 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG, return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, DAG.getConstant(Intrinsic::x86_sse2_psrli_w, MVT::i32), ValOp, BaseShAmt); + if (VT == MVT::v4i64) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrli_q, MVT::i32), + ValOp, BaseShAmt); + if (VT == MVT::v8i32) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrli_d, MVT::i32), + ValOp, BaseShAmt); + if (VT == MVT::v16i16) + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT, + DAG.getConstant(Intrinsic::x86_avx2_psrli_w, MVT::i32), + ValOp, BaseShAmt); break; } return SDValue(); diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 10f527c..735a30f 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -7713,3 +7713,52 @@ defm VPSRLVQ : avx2_var_shift_i64<0x45, "vpsrlvq", int_x86_avx2_psrlv_q, defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", int_x86_avx2_psrav_d, int_x86_avx2_psrav_d_256>; +let Predicates = [HasAVX2] in { + def : Pat<(v4i32 (shl (v4i32 VR128:$src1), (v4i32 VR128:$src2))), + (VPSLLVDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (shl (v2i64 VR128:$src1), (v2i64 VR128:$src2))), + (VPSLLVQrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (srl (v4i32 VR128:$src1), (v4i32 VR128:$src2))), + (VPSRLVDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (srl (v2i64 VR128:$src1), (v2i64 VR128:$src2))), + (VPSRLVQrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (sra (v4i32 VR128:$src1), (v4i32 VR128:$src2))), + (VPSRAVDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v8i32 (shl (v8i32 VR256:$src1), (v8i32 VR256:$src2))), + (VPSLLVDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4i64 (shl (v4i64 VR256:$src1), (v4i64 VR256:$src2))), + (VPSLLVQYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (srl (v8i32 VR256:$src1), (v8i32 VR256:$src2))), + (VPSRLVDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4i64 (srl (v4i64 VR256:$src1), (v4i64 VR256:$src2))), + (VPSRLVQYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (sra (v8i32 VR256:$src1), (v8i32 VR256:$src2))), + (VPSRAVDYrr VR256:$src1, VR256:$src2)>; + + def : Pat<(v4i32 (shl (v4i32 VR128:$src1), + (v4i32 (bitconvert (memopv2i64 addr:$src2))))), + (VPSLLVDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (shl (v2i64 VR128:$src1), (memopv2i64 addr:$src2))), + (VPSLLVQrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i32 (srl (v4i32 VR128:$src1), + (v4i32 (bitconvert (memopv2i64 addr:$src2))))), + (VPSRLVDrm VR128:$src1, addr:$src2)>; + def : Pat<(v2i64 (srl (v2i64 VR128:$src1), (memopv2i64 addr:$src2))), + (VPSRLVQrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i32 (sra (v4i32 VR128:$src1), + (v4i32 (bitconvert (memopv2i64 addr:$src2))))), + (VPSRAVDrm VR128:$src1, addr:$src2)>; + def : Pat<(v8i32 (shl (v8i32 VR256:$src1), + (v8i32 (bitconvert (memopv4i64 addr:$src2))))), + (VPSLLVDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4i64 (shl (v4i64 VR256:$src1), (memopv4i64 addr:$src2))), + (VPSLLVQYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8i32 (srl (v8i32 VR256:$src1), + (v8i32 (bitconvert (memopv4i64 addr:$src2))))), + (VPSRLVDYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4i64 (srl (v4i64 VR256:$src1), (memopv4i64 addr:$src2))), + (VPSRLVQYrm VR256:$src1, addr:$src2)>; + def : Pat<(v8i32 (sra (v8i32 VR256:$src1), + (v8i32 (bitconvert (memopv4i64 addr:$src2))))), + (VPSRAVDYrm VR256:$src1, addr:$src2)>; +} -- cgit v1.1 From 799c1ede712854f56ed85b74702e611b88b49436 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Sat, 12 Nov 2011 22:39:45 +0000 Subject: Delete the linear scan register allocator. RegAllocGreedy has been the default for six months now. Deleting RegAllocLinearScan makes it possible to also delete VirtRegRewriter and clean up the spiller code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144475 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CMakeLists.txt | 1 - lib/CodeGen/Passes.cpp | 5 - lib/CodeGen/RegAllocLinearScan.cpp | 1543 ------------------------------------ 3 files changed, 1549 deletions(-) delete mode 100644 lib/CodeGen/RegAllocLinearScan.cpp (limited to 'lib') diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 1bbe7a0..1c39cd2 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -70,7 +70,6 @@ add_llvm_library(LLVMCodeGen RegAllocBasic.cpp RegAllocFast.cpp RegAllocGreedy.cpp - RegAllocLinearScan.cpp RegAllocPBQP.cpp RegisterClassInfo.cpp RegisterCoalescer.cpp diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 315aedd..5f57088 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -55,11 +55,6 @@ FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) { RegisterRegAlloc::setDefault(RegAlloc); } - // This forces linking of the linear scan register allocator, - // so -regalloc=linearscan still works in clang. - if (Ctor == createLinearScanRegisterAllocator) - return createLinearScanRegisterAllocator(); - if (Ctor != createDefaultRegisterAllocator) return Ctor(); diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp deleted file mode 100644 index ce3fb90..0000000 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ /dev/null @@ -1,1543 +0,0 @@ -//===-- RegAllocLinearScan.cpp - Linear Scan register allocator -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a linear scan register allocator. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "regalloc" -#include "LiveDebugVariables.h" -#include "LiveRangeEdit.h" -#include "VirtRegMap.h" -#include "VirtRegRewriter.h" -#include "RegisterClassInfo.h" -#include "Spiller.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Function.h" -#include "llvm/CodeGen/CalcSpillWeights.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/ADT/EquivalenceClasses.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -#include - -using namespace llvm; - -STATISTIC(NumIters , "Number of iterations performed"); -STATISTIC(NumBacktracks, "Number of times we had to backtrack"); -STATISTIC(NumCoalesce, "Number of copies coalesced"); -STATISTIC(NumDowngrade, "Number of registers downgraded"); - -static cl::opt -NewHeuristic("new-spilling-heuristic", - cl::desc("Use new spilling heuristic"), - cl::init(false), cl::Hidden); - -static cl::opt -TrivCoalesceEnds("trivial-coalesce-ends", - cl::desc("Attempt trivial coalescing of interval ends"), - cl::init(false), cl::Hidden); - -static cl::opt -AvoidWAWHazard("avoid-waw-hazard", - cl::desc("Avoid write-write hazards for some register classes"), - cl::init(false), cl::Hidden); - -static RegisterRegAlloc -linearscanRegAlloc("linearscan", "linear scan register allocator", - createLinearScanRegisterAllocator); - -namespace { - // When we allocate a register, add it to a fixed-size queue of - // registers to skip in subsequent allocations. This trades a small - // amount of register pressure and increased spills for flexibility in - // the post-pass scheduler. - // - // Note that in a the number of registers used for reloading spills - // will be one greater than the value of this option. - // - // One big limitation of this is that it doesn't differentiate between - // different register classes. So on x86-64, if there is xmm register - // pressure, it can caused fewer GPRs to be held in the queue. - static cl::opt - NumRecentlyUsedRegs("linearscan-skip-count", - cl::desc("Number of registers for linearscan to remember" - "to skip."), - cl::init(0), - cl::Hidden); - - struct RALinScan : public MachineFunctionPass { - static char ID; - RALinScan() : MachineFunctionPass(ID) { - initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); - initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); - initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); - initializeRegisterCoalescerPass( - *PassRegistry::getPassRegistry()); - initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); - initializeLiveStacksPass(*PassRegistry::getPassRegistry()); - initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); - initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); - initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); - initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); - - // Initialize the queue to record recently-used registers. - if (NumRecentlyUsedRegs > 0) - RecentRegs.resize(NumRecentlyUsedRegs, 0); - RecentNext = RecentRegs.begin(); - avoidWAW_ = 0; - } - - typedef std::pair IntervalPtr; - typedef SmallVector IntervalPtrs; - private: - /// RelatedRegClasses - This structure is built the first time a function is - /// compiled, and keeps track of which register classes have registers that - /// belong to multiple classes or have aliases that are in other classes. - EquivalenceClasses RelatedRegClasses; - DenseMap OneClassForEachPhysReg; - - // NextReloadMap - For each register in the map, it maps to the another - // register which is defined by a reload from the same stack slot and - // both reloads are in the same basic block. - DenseMap NextReloadMap; - - // DowngradedRegs - A set of registers which are being "downgraded", i.e. - // un-favored for allocation. - SmallSet DowngradedRegs; - - // DowngradeMap - A map from virtual registers to physical registers being - // downgraded for the virtual registers. - DenseMap DowngradeMap; - - MachineFunction* mf_; - MachineRegisterInfo* mri_; - const TargetMachine* tm_; - const TargetRegisterInfo* tri_; - const TargetInstrInfo* tii_; - BitVector allocatableRegs_; - BitVector reservedRegs_; - LiveIntervals* li_; - MachineLoopInfo *loopInfo; - RegisterClassInfo RegClassInfo; - - /// handled_ - Intervals are added to the handled_ set in the order of their - /// start value. This is uses for backtracking. - std::vector handled_; - - /// fixed_ - Intervals that correspond to machine registers. - /// - IntervalPtrs fixed_; - - /// active_ - Intervals that are currently being processed, and which have a - /// live range active for the current point. - IntervalPtrs active_; - - /// inactive_ - Intervals that are currently being processed, but which have - /// a hold at the current point. - IntervalPtrs inactive_; - - typedef std::priority_queue, - greater_ptr > IntervalHeap; - IntervalHeap unhandled_; - - /// regUse_ - Tracks register usage. - SmallVector regUse_; - SmallVector regUseBackUp_; - - /// vrm_ - Tracks register assignments. - VirtRegMap* vrm_; - - std::auto_ptr rewriter_; - - std::auto_ptr spiller_; - - // The queue of recently-used registers. - SmallVector RecentRegs; - SmallVector::iterator RecentNext; - - // Last write-after-write register written. - unsigned avoidWAW_; - - // Record that we just picked this register. - void recordRecentlyUsed(unsigned reg) { - assert(reg != 0 && "Recently used register is NOREG!"); - if (!RecentRegs.empty()) { - *RecentNext++ = reg; - if (RecentNext == RecentRegs.end()) - RecentNext = RecentRegs.begin(); - } - } - - public: - virtual const char* getPassName() const { - return "Linear Scan Register Allocator"; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - if (StrongPHIElim) - AU.addRequiredID(StrongPHIEliminationID); - // Make sure PassManager knows which analyses to make available - // to coalescing and which analyses coalescing invalidates. - AU.addRequiredTransitiveID(RegisterCoalescerPassID); - AU.addRequired(); - AU.addRequiredID(LiveStacksID); - AU.addPreservedID(LiveStacksID); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequiredID(MachineDominatorsID); - AU.addPreservedID(MachineDominatorsID); - MachineFunctionPass::getAnalysisUsage(AU); - } - - /// runOnMachineFunction - register allocate the whole function - bool runOnMachineFunction(MachineFunction&); - - // Determine if we skip this register due to its being recently used. - bool isRecentlyUsed(unsigned reg) const { - return reg == avoidWAW_ || - std::find(RecentRegs.begin(), RecentRegs.end(), reg) != RecentRegs.end(); - } - - private: - /// linearScan - the linear scan algorithm - void linearScan(); - - /// initIntervalSets - initialize the interval sets. - /// - void initIntervalSets(); - - /// processActiveIntervals - expire old intervals and move non-overlapping - /// ones to the inactive list. - void processActiveIntervals(SlotIndex CurPoint); - - /// processInactiveIntervals - expire old intervals and move overlapping - /// ones to the active list. - void processInactiveIntervals(SlotIndex CurPoint); - - /// hasNextReloadInterval - Return the next liveinterval that's being - /// defined by a reload from the same SS as the specified one. - LiveInterval *hasNextReloadInterval(LiveInterval *cur); - - /// DowngradeRegister - Downgrade a register for allocation. - void DowngradeRegister(LiveInterval *li, unsigned Reg); - - /// UpgradeRegister - Upgrade a register for allocation. - void UpgradeRegister(unsigned Reg); - - /// assignRegOrStackSlotAtInterval - assign a register if one - /// is available, or spill. - void assignRegOrStackSlotAtInterval(LiveInterval* cur); - - void updateSpillWeights(std::vector &Weights, - unsigned reg, float weight, - const TargetRegisterClass *RC); - - /// findIntervalsToSpill - Determine the intervals to spill for the - /// specified interval. It's passed the physical registers whose spill - /// weight is the lowest among all the registers whose live intervals - /// conflict with the interval. - void findIntervalsToSpill(LiveInterval *cur, - std::vector > &Candidates, - unsigned NumCands, - SmallVector &SpillIntervals); - - /// attemptTrivialCoalescing - If a simple interval is defined by a copy, - /// try to allocate the definition to the same register as the source, - /// if the register is not defined during the life time of the interval. - /// This eliminates a copy, and is used to coalesce copies which were not - /// coalesced away before allocation either due to dest and src being in - /// different register classes or because the coalescer was overly - /// conservative. - unsigned attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg); - - /// - /// Register usage / availability tracking helpers. - /// - - void initRegUses() { - regUse_.resize(tri_->getNumRegs(), 0); - regUseBackUp_.resize(tri_->getNumRegs(), 0); - } - - void finalizeRegUses() { -#ifndef NDEBUG - // Verify all the registers are "freed". - bool Error = false; - for (unsigned i = 0, e = tri_->getNumRegs(); i != e; ++i) { - if (regUse_[i] != 0) { - dbgs() << tri_->getName(i) << " is still in use!\n"; - Error = true; - } - } - if (Error) - llvm_unreachable(0); -#endif - regUse_.clear(); - regUseBackUp_.clear(); - } - - void addRegUse(unsigned physReg) { - assert(TargetRegisterInfo::isPhysicalRegister(physReg) && - "should be physical register!"); - ++regUse_[physReg]; - for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) - ++regUse_[*as]; - } - - void delRegUse(unsigned physReg) { - assert(TargetRegisterInfo::isPhysicalRegister(physReg) && - "should be physical register!"); - assert(regUse_[physReg] != 0); - --regUse_[physReg]; - for (const unsigned* as = tri_->getAliasSet(physReg); *as; ++as) { - assert(regUse_[*as] != 0); - --regUse_[*as]; - } - } - - bool isRegAvail(unsigned physReg) const { - assert(TargetRegisterInfo::isPhysicalRegister(physReg) && - "should be physical register!"); - return regUse_[physReg] == 0; - } - - void backUpRegUses() { - regUseBackUp_ = regUse_; - } - - void restoreRegUses() { - regUse_ = regUseBackUp_; - } - - /// - /// Register handling helpers. - /// - - /// getFreePhysReg - return a free physical register for this virtual - /// register interval if we have one, otherwise return 0. - unsigned getFreePhysReg(LiveInterval* cur); - unsigned getFreePhysReg(LiveInterval* cur, - const TargetRegisterClass *RC, - unsigned MaxInactiveCount, - SmallVector &inactiveCounts, - bool SkipDGRegs); - - /// getFirstNonReservedPhysReg - return the first non-reserved physical - /// register in the register class. - unsigned getFirstNonReservedPhysReg(const TargetRegisterClass *RC) { - ArrayRef O = RegClassInfo.getOrder(RC); - assert(!O.empty() && "All registers reserved?!"); - return O.front(); - } - - void ComputeRelatedRegClasses(); - - template - void printIntervals(const char* const str, ItTy i, ItTy e) const { - DEBUG({ - if (str) - dbgs() << str << " intervals:\n"; - - for (; i != e; ++i) { - dbgs() << '\t' << *i->first << " -> "; - - unsigned reg = i->first->reg; - if (TargetRegisterInfo::isVirtualRegister(reg)) - reg = vrm_->getPhys(reg); - - dbgs() << tri_->getName(reg) << '\n'; - } - }); - } - }; - char RALinScan::ID = 0; -} - -INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc", - "Linear Scan Register Allocator", false, false) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination) -INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights) -INITIALIZE_PASS_DEPENDENCY(LiveStacks) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_DEPENDENCY(VirtRegMap) -INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer) -INITIALIZE_AG_DEPENDENCY(AliasAnalysis) -INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc", - "Linear Scan Register Allocator", false, false) - -void RALinScan::ComputeRelatedRegClasses() { - // First pass, add all reg classes to the union, and determine at least one - // reg class that each register is in. - bool HasAliases = false; - for (TargetRegisterInfo::regclass_iterator RCI = tri_->regclass_begin(), - E = tri_->regclass_end(); RCI != E; ++RCI) { - RelatedRegClasses.insert(*RCI); - for (TargetRegisterClass::iterator I = (*RCI)->begin(), E = (*RCI)->end(); - I != E; ++I) { - HasAliases = HasAliases || *tri_->getAliasSet(*I) != 0; - - const TargetRegisterClass *&PRC = OneClassForEachPhysReg[*I]; - if (PRC) { - // Already processed this register. Just make sure we know that - // multiple register classes share a register. - RelatedRegClasses.unionSets(PRC, *RCI); - } else { - PRC = *RCI; - } - } - } - - // Second pass, now that we know conservatively what register classes each reg - // belongs to, add info about aliases. We don't need to do this for targets - // without register aliases. - if (HasAliases) - for (DenseMap::iterator - I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end(); - I != E; ++I) - for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS) { - const TargetRegisterClass *AliasClass = - OneClassForEachPhysReg.lookup(*AS); - if (AliasClass) - RelatedRegClasses.unionSets(I->second, AliasClass); - } -} - -/// attemptTrivialCoalescing - If a simple interval is defined by a copy, try -/// allocate the definition the same register as the source register if the -/// register is not defined during live time of the interval. If the interval is -/// killed by a copy, try to use the destination register. This eliminates a -/// copy. This is used to coalesce copies which were not coalesced away before -/// allocation either due to dest and src being in different register classes or -/// because the coalescer was overly conservative. -unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { - unsigned Preference = vrm_->getRegAllocPref(cur.reg); - if ((Preference && Preference == Reg) || !cur.containsOneValue()) - return Reg; - - // We cannot handle complicated live ranges. Simple linear stuff only. - if (cur.ranges.size() != 1) - return Reg; - - const LiveRange &range = cur.ranges.front(); - - VNInfo *vni = range.valno; - if (vni->isUnused() || !vni->def.isValid()) - return Reg; - - unsigned CandReg; - { - MachineInstr *CopyMI; - if ((CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy()) - // Defined by a copy, try to extend SrcReg forward - CandReg = CopyMI->getOperand(1).getReg(); - else if (TrivCoalesceEnds && - (CopyMI = li_->getInstructionFromIndex(range.end.getBaseIndex())) && - CopyMI->isCopy() && cur.reg == CopyMI->getOperand(1).getReg()) - // Only used by a copy, try to extend DstReg backwards - CandReg = CopyMI->getOperand(0).getReg(); - else - return Reg; - - // If the target of the copy is a sub-register then don't coalesce. - if(CopyMI->getOperand(0).getSubReg()) - return Reg; - } - - if (TargetRegisterInfo::isVirtualRegister(CandReg)) { - if (!vrm_->isAssignedReg(CandReg)) - return Reg; - CandReg = vrm_->getPhys(CandReg); - } - if (Reg == CandReg) - return Reg; - - const TargetRegisterClass *RC = mri_->getRegClass(cur.reg); - if (!RC->contains(CandReg)) - return Reg; - - if (li_->conflictsWithPhysReg(cur, *vrm_, CandReg)) - return Reg; - - // Try to coalesce. - DEBUG(dbgs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg) - << '\n'); - vrm_->clearVirt(cur.reg); - vrm_->assignVirt2Phys(cur.reg, CandReg); - - ++NumCoalesce; - return CandReg; -} - -bool RALinScan::runOnMachineFunction(MachineFunction &fn) { - mf_ = &fn; - mri_ = &fn.getRegInfo(); - tm_ = &fn.getTarget(); - tri_ = tm_->getRegisterInfo(); - tii_ = tm_->getInstrInfo(); - allocatableRegs_ = tri_->getAllocatableSet(fn); - reservedRegs_ = tri_->getReservedRegs(fn); - li_ = &getAnalysis(); - loopInfo = &getAnalysis(); - RegClassInfo.runOnMachineFunction(fn); - - // We don't run the coalescer here because we have no reason to - // interact with it. If the coalescer requires interaction, it - // won't do anything. If it doesn't require interaction, we assume - // it was run as a separate pass. - - // If this is the first function compiled, compute the related reg classes. - if (RelatedRegClasses.empty()) - ComputeRelatedRegClasses(); - - // Also resize register usage trackers. - initRegUses(); - - vrm_ = &getAnalysis(); - if (!rewriter_.get()) rewriter_.reset(createVirtRegRewriter()); - - spiller_.reset(createSpiller(*this, *mf_, *vrm_)); - - initIntervalSets(); - - linearScan(); - - // Rewrite spill code and update the PhysRegsUsed set. - rewriter_->runOnMachineFunction(*mf_, *vrm_, li_); - - // Write out new DBG_VALUE instructions. - getAnalysis().emitDebugValues(vrm_); - - assert(unhandled_.empty() && "Unhandled live intervals remain!"); - - finalizeRegUses(); - - fixed_.clear(); - active_.clear(); - inactive_.clear(); - handled_.clear(); - NextReloadMap.clear(); - DowngradedRegs.clear(); - DowngradeMap.clear(); - spiller_.reset(0); - - return true; -} - -/// initIntervalSets - initialize the interval sets. -/// -void RALinScan::initIntervalSets() -{ - assert(unhandled_.empty() && fixed_.empty() && - active_.empty() && inactive_.empty() && - "interval sets should be empty on initialization"); - - handled_.reserve(li_->getNumIntervals()); - - for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) { - if (TargetRegisterInfo::isPhysicalRegister(i->second->reg)) { - if (!i->second->empty() && allocatableRegs_.test(i->second->reg)) { - mri_->setPhysRegUsed(i->second->reg); - fixed_.push_back(std::make_pair(i->second, i->second->begin())); - } - } else { - if (i->second->empty()) { - assignRegOrStackSlotAtInterval(i->second); - } - else - unhandled_.push(i->second); - } - } -} - -void RALinScan::linearScan() { - // linear scan algorithm - DEBUG({ - dbgs() << "********** LINEAR SCAN **********\n" - << "********** Function: " - << mf_->getFunction()->getName() << '\n'; - printIntervals("fixed", fixed_.begin(), fixed_.end()); - }); - - while (!unhandled_.empty()) { - // pick the interval with the earliest start point - LiveInterval* cur = unhandled_.top(); - unhandled_.pop(); - ++NumIters; - DEBUG(dbgs() << "\n*** CURRENT ***: " << *cur << '\n'); - - assert(!cur->empty() && "Empty interval in unhandled set."); - - processActiveIntervals(cur->beginIndex()); - processInactiveIntervals(cur->beginIndex()); - - assert(TargetRegisterInfo::isVirtualRegister(cur->reg) && - "Can only allocate virtual registers!"); - - // Allocating a virtual register. try to find a free - // physical register or spill an interval (possibly this one) in order to - // assign it one. - assignRegOrStackSlotAtInterval(cur); - - DEBUG({ - printIntervals("active", active_.begin(), active_.end()); - printIntervals("inactive", inactive_.begin(), inactive_.end()); - }); - } - - // Expire any remaining active intervals - while (!active_.empty()) { - IntervalPtr &IP = active_.back(); - unsigned reg = IP.first->reg; - DEBUG(dbgs() << "\tinterval " << *IP.first << " expired\n"); - assert(TargetRegisterInfo::isVirtualRegister(reg) && - "Can only allocate virtual registers!"); - reg = vrm_->getPhys(reg); - delRegUse(reg); - active_.pop_back(); - } - - // Expire any remaining inactive intervals - DEBUG({ - for (IntervalPtrs::reverse_iterator - i = inactive_.rbegin(); i != inactive_.rend(); ++i) - dbgs() << "\tinterval " << *i->first << " expired\n"; - }); - inactive_.clear(); - - // Add live-ins to every BB except for entry. Also perform trivial coalescing. - MachineFunction::iterator EntryMBB = mf_->begin(); - SmallVector LiveInMBBs; - for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) { - LiveInterval &cur = *i->second; - unsigned Reg = 0; - bool isPhys = TargetRegisterInfo::isPhysicalRegister(cur.reg); - if (isPhys) - Reg = cur.reg; - else if (vrm_->isAssignedReg(cur.reg)) - Reg = attemptTrivialCoalescing(cur, vrm_->getPhys(cur.reg)); - if (!Reg) - continue; - // Ignore splited live intervals. - if (!isPhys && vrm_->getPreSplitReg(cur.reg)) - continue; - - for (LiveInterval::Ranges::const_iterator I = cur.begin(), E = cur.end(); - I != E; ++I) { - const LiveRange &LR = *I; - if (li_->findLiveInMBBs(LR.start, LR.end, LiveInMBBs)) { - for (unsigned i = 0, e = LiveInMBBs.size(); i != e; ++i) - if (LiveInMBBs[i] != EntryMBB) { - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && - "Adding a virtual register to livein set?"); - LiveInMBBs[i]->addLiveIn(Reg); - } - LiveInMBBs.clear(); - } - } - } - - DEBUG(dbgs() << *vrm_); - - // Look for physical registers that end up not being allocated even though - // register allocator had to spill other registers in its register class. - if (!vrm_->FindUnusedRegisters(li_)) - return; -} - -/// processActiveIntervals - expire old intervals and move non-overlapping ones -/// to the inactive list. -void RALinScan::processActiveIntervals(SlotIndex CurPoint) -{ - DEBUG(dbgs() << "\tprocessing active intervals:\n"); - - for (unsigned i = 0, e = active_.size(); i != e; ++i) { - LiveInterval *Interval = active_[i].first; - LiveInterval::iterator IntervalPos = active_[i].second; - unsigned reg = Interval->reg; - - IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); - - if (IntervalPos == Interval->end()) { // Remove expired intervals. - DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n"); - assert(TargetRegisterInfo::isVirtualRegister(reg) && - "Can only allocate virtual registers!"); - reg = vrm_->getPhys(reg); - delRegUse(reg); - - // Pop off the end of the list. - active_[i] = active_.back(); - active_.pop_back(); - --i; --e; - - } else if (IntervalPos->start > CurPoint) { - // Move inactive intervals to inactive list. - DEBUG(dbgs() << "\t\tinterval " << *Interval << " inactive\n"); - assert(TargetRegisterInfo::isVirtualRegister(reg) && - "Can only allocate virtual registers!"); - reg = vrm_->getPhys(reg); - delRegUse(reg); - // add to inactive. - inactive_.push_back(std::make_pair(Interval, IntervalPos)); - - // Pop off the end of the list. - active_[i] = active_.back(); - active_.pop_back(); - --i; --e; - } else { - // Otherwise, just update the iterator position. - active_[i].second = IntervalPos; - } - } -} - -/// processInactiveIntervals - expire old intervals and move overlapping -/// ones to the active list. -void RALinScan::processInactiveIntervals(SlotIndex CurPoint) -{ - DEBUG(dbgs() << "\tprocessing inactive intervals:\n"); - - for (unsigned i = 0, e = inactive_.size(); i != e; ++i) { - LiveInterval *Interval = inactive_[i].first; - LiveInterval::iterator IntervalPos = inactive_[i].second; - unsigned reg = Interval->reg; - - IntervalPos = Interval->advanceTo(IntervalPos, CurPoint); - - if (IntervalPos == Interval->end()) { // remove expired intervals. - DEBUG(dbgs() << "\t\tinterval " << *Interval << " expired\n"); - - // Pop off the end of the list. - inactive_[i] = inactive_.back(); - inactive_.pop_back(); - --i; --e; - } else if (IntervalPos->start <= CurPoint) { - // move re-activated intervals in active list - DEBUG(dbgs() << "\t\tinterval " << *Interval << " active\n"); - assert(TargetRegisterInfo::isVirtualRegister(reg) && - "Can only allocate virtual registers!"); - reg = vrm_->getPhys(reg); - addRegUse(reg); - // add to active - active_.push_back(std::make_pair(Interval, IntervalPos)); - - // Pop off the end of the list. - inactive_[i] = inactive_.back(); - inactive_.pop_back(); - --i; --e; - } else { - // Otherwise, just update the iterator position. - inactive_[i].second = IntervalPos; - } - } -} - -/// updateSpillWeights - updates the spill weights of the specifed physical -/// register and its weight. -void RALinScan::updateSpillWeights(std::vector &Weights, - unsigned reg, float weight, - const TargetRegisterClass *RC) { - SmallSet Processed; - SmallSet SuperAdded; - SmallVector Supers; - Weights[reg] += weight; - Processed.insert(reg); - for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) { - Weights[*as] += weight; - Processed.insert(*as); - if (tri_->isSubRegister(*as, reg) && - SuperAdded.insert(*as) && - RC->contains(*as)) { - Supers.push_back(*as); - } - } - - // If the alias is a super-register, and the super-register is in the - // register class we are trying to allocate. Then add the weight to all - // sub-registers of the super-register even if they are not aliases. - // e.g. allocating for GR32, bh is not used, updating bl spill weight. - // bl should get the same spill weight otherwise it will be chosen - // as a spill candidate since spilling bh doesn't make ebx available. - for (unsigned i = 0, e = Supers.size(); i != e; ++i) { - for (const unsigned *sr = tri_->getSubRegisters(Supers[i]); *sr; ++sr) - if (!Processed.count(*sr)) - Weights[*sr] += weight; - } -} - -static -RALinScan::IntervalPtrs::iterator -FindIntervalInVector(RALinScan::IntervalPtrs &IP, LiveInterval *LI) { - for (RALinScan::IntervalPtrs::iterator I = IP.begin(), E = IP.end(); - I != E; ++I) - if (I->first == LI) return I; - return IP.end(); -} - -static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V, - SlotIndex Point){ - for (unsigned i = 0, e = V.size(); i != e; ++i) { - RALinScan::IntervalPtr &IP = V[i]; - LiveInterval::iterator I = std::upper_bound(IP.first->begin(), - IP.second, Point); - if (I != IP.first->begin()) --I; - IP.second = I; - } -} - -/// getConflictWeight - Return the number of conflicts between cur -/// live interval and defs and uses of Reg weighted by loop depthes. -static -float getConflictWeight(LiveInterval *cur, unsigned Reg, LiveIntervals *li_, - MachineRegisterInfo *mri_, - MachineLoopInfo *loopInfo) { - float Conflicts = 0; - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(Reg), - E = mri_->reg_end(); I != E; ++I) { - MachineInstr *MI = &*I; - if (cur->liveAt(li_->getInstructionIndex(MI))) { - unsigned loopDepth = loopInfo->getLoopDepth(MI->getParent()); - Conflicts += std::pow(10.0f, (float)loopDepth); - } - } - return Conflicts; -} - -/// findIntervalsToSpill - Determine the intervals to spill for the -/// specified interval. It's passed the physical registers whose spill -/// weight is the lowest among all the registers whose live intervals -/// conflict with the interval. -void RALinScan::findIntervalsToSpill(LiveInterval *cur, - std::vector > &Candidates, - unsigned NumCands, - SmallVector &SpillIntervals) { - // We have figured out the *best* register to spill. But there are other - // registers that are pretty good as well (spill weight within 3%). Spill - // the one that has fewest defs and uses that conflict with cur. - float Conflicts[3] = { 0.0f, 0.0f, 0.0f }; - SmallVector SLIs[3]; - - DEBUG({ - dbgs() << "\tConsidering " << NumCands << " candidates: "; - for (unsigned i = 0; i != NumCands; ++i) - dbgs() << tri_->getName(Candidates[i].first) << " "; - dbgs() << "\n"; - }); - - // Calculate the number of conflicts of each candidate. - for (IntervalPtrs::iterator i = active_.begin(); i != active_.end(); ++i) { - unsigned Reg = i->first->reg; - unsigned PhysReg = vrm_->getPhys(Reg); - if (!cur->overlapsFrom(*i->first, i->second)) - continue; - for (unsigned j = 0; j < NumCands; ++j) { - unsigned Candidate = Candidates[j].first; - if (tri_->regsOverlap(PhysReg, Candidate)) { - if (NumCands > 1) - Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo); - SLIs[j].push_back(i->first); - } - } - } - - for (IntervalPtrs::iterator i = inactive_.begin(); i != inactive_.end(); ++i){ - unsigned Reg = i->first->reg; - unsigned PhysReg = vrm_->getPhys(Reg); - if (!cur->overlapsFrom(*i->first, i->second-1)) - continue; - for (unsigned j = 0; j < NumCands; ++j) { - unsigned Candidate = Candidates[j].first; - if (tri_->regsOverlap(PhysReg, Candidate)) { - if (NumCands > 1) - Conflicts[j] += getConflictWeight(cur, Reg, li_, mri_, loopInfo); - SLIs[j].push_back(i->first); - } - } - } - - // Which is the best candidate? - unsigned BestCandidate = 0; - float MinConflicts = Conflicts[0]; - for (unsigned i = 1; i != NumCands; ++i) { - if (Conflicts[i] < MinConflicts) { - BestCandidate = i; - MinConflicts = Conflicts[i]; - } - } - - std::copy(SLIs[BestCandidate].begin(), SLIs[BestCandidate].end(), - std::back_inserter(SpillIntervals)); -} - -namespace { - struct WeightCompare { - private: - const RALinScan &Allocator; - - public: - WeightCompare(const RALinScan &Alloc) : Allocator(Alloc) {} - - typedef std::pair RegWeightPair; - bool operator()(const RegWeightPair &LHS, const RegWeightPair &RHS) const { - return LHS.second < RHS.second && !Allocator.isRecentlyUsed(LHS.first); - } - }; -} - -static bool weightsAreClose(float w1, float w2) { - if (!NewHeuristic) - return false; - - float diff = w1 - w2; - if (diff <= 0.02f) // Within 0.02f - return true; - return (diff / w2) <= 0.05f; // Within 5%. -} - -LiveInterval *RALinScan::hasNextReloadInterval(LiveInterval *cur) { - DenseMap::iterator I = NextReloadMap.find(cur->reg); - if (I == NextReloadMap.end()) - return 0; - return &li_->getInterval(I->second); -} - -void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) { - for (const unsigned *AS = tri_->getOverlaps(Reg); *AS; ++AS) { - bool isNew = DowngradedRegs.insert(*AS); - (void)isNew; // Silence compiler warning. - assert(isNew && "Multiple reloads holding the same register?"); - DowngradeMap.insert(std::make_pair(li->reg, *AS)); - } - ++NumDowngrade; -} - -void RALinScan::UpgradeRegister(unsigned Reg) { - if (Reg) { - DowngradedRegs.erase(Reg); - for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS) - DowngradedRegs.erase(*AS); - } -} - -namespace { - struct LISorter { - bool operator()(LiveInterval* A, LiveInterval* B) { - return A->beginIndex() < B->beginIndex(); - } - }; -} - -/// assignRegOrStackSlotAtInterval - assign a register if one is available, or -/// spill. -void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { - const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); - DEBUG(dbgs() << "\tallocating current interval from " - << RC->getName() << ": "); - - // This is an implicitly defined live interval, just assign any register. - if (cur->empty()) { - unsigned physReg = vrm_->getRegAllocPref(cur->reg); - if (!physReg) - physReg = getFirstNonReservedPhysReg(RC); - DEBUG(dbgs() << tri_->getName(physReg) << '\n'); - // Note the register is not really in use. - vrm_->assignVirt2Phys(cur->reg, physReg); - return; - } - - backUpRegUses(); - - std::vector > SpillWeightsToAdd; - SlotIndex StartPosition = cur->beginIndex(); - const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); - - // If start of this live interval is defined by a move instruction and its - // source is assigned a physical register that is compatible with the target - // register class, then we should try to assign it the same register. - // This can happen when the move is from a larger register class to a smaller - // one, e.g. X86::mov32to32_. These move instructions are not coalescable. - if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) { - VNInfo *vni = cur->begin()->valno; - if (!vni->isUnused() && vni->def.isValid()) { - MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); - if (CopyMI && CopyMI->isCopy()) { - unsigned DstSubReg = CopyMI->getOperand(0).getSubReg(); - unsigned SrcReg = CopyMI->getOperand(1).getReg(); - unsigned SrcSubReg = CopyMI->getOperand(1).getSubReg(); - unsigned Reg = 0; - if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) - Reg = SrcReg; - else if (vrm_->isAssignedReg(SrcReg)) - Reg = vrm_->getPhys(SrcReg); - if (Reg) { - if (SrcSubReg) - Reg = tri_->getSubReg(Reg, SrcSubReg); - if (DstSubReg) - Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC); - if (Reg && allocatableRegs_[Reg] && RC->contains(Reg)) - mri_->setRegAllocationHint(cur->reg, 0, Reg); - } - } - } - } - - // For every interval in inactive we overlap with, mark the - // register as not free and update spill weights. - for (IntervalPtrs::const_iterator i = inactive_.begin(), - e = inactive_.end(); i != e; ++i) { - unsigned Reg = i->first->reg; - assert(TargetRegisterInfo::isVirtualRegister(Reg) && - "Can only allocate virtual registers!"); - const TargetRegisterClass *RegRC = mri_->getRegClass(Reg); - // If this is not in a related reg class to the register we're allocating, - // don't check it. - if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && - cur->overlapsFrom(*i->first, i->second-1)) { - Reg = vrm_->getPhys(Reg); - addRegUse(Reg); - SpillWeightsToAdd.push_back(std::make_pair(Reg, i->first->weight)); - } - } - - // Speculatively check to see if we can get a register right now. If not, - // we know we won't be able to by adding more constraints. If so, we can - // check to see if it is valid. Doing an exhaustive search of the fixed_ list - // is very bad (it contains all callee clobbered registers for any functions - // with a call), so we want to avoid doing that if possible. - unsigned physReg = getFreePhysReg(cur); - unsigned BestPhysReg = physReg; - if (physReg) { - // We got a register. However, if it's in the fixed_ list, we might - // conflict with it. Check to see if we conflict with it or any of its - // aliases. - SmallSet RegAliases; - for (const unsigned *AS = tri_->getAliasSet(physReg); *AS; ++AS) - RegAliases.insert(*AS); - - bool ConflictsWithFixed = false; - for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { - IntervalPtr &IP = fixed_[i]; - if (physReg == IP.first->reg || RegAliases.count(IP.first->reg)) { - // Okay, this reg is on the fixed list. Check to see if we actually - // conflict. - LiveInterval *I = IP.first; - if (I->endIndex() > StartPosition) { - LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition); - IP.second = II; - if (II != I->begin() && II->start > StartPosition) - --II; - if (cur->overlapsFrom(*I, II)) { - ConflictsWithFixed = true; - break; - } - } - } - } - - // Okay, the register picked by our speculative getFreePhysReg call turned - // out to be in use. Actually add all of the conflicting fixed registers to - // regUse_ so we can do an accurate query. - if (ConflictsWithFixed) { - // For every interval in fixed we overlap with, mark the register as not - // free and update spill weights. - for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { - IntervalPtr &IP = fixed_[i]; - LiveInterval *I = IP.first; - - const TargetRegisterClass *RegRC = OneClassForEachPhysReg[I->reg]; - if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && - I->endIndex() > StartPosition) { - LiveInterval::iterator II = I->advanceTo(IP.second, StartPosition); - IP.second = II; - if (II != I->begin() && II->start > StartPosition) - --II; - if (cur->overlapsFrom(*I, II)) { - unsigned reg = I->reg; - addRegUse(reg); - SpillWeightsToAdd.push_back(std::make_pair(reg, I->weight)); - } - } - } - - // Using the newly updated regUse_ object, which includes conflicts in the - // future, see if there are any registers available. - physReg = getFreePhysReg(cur); - } - } - - // Restore the physical register tracker, removing information about the - // future. - restoreRegUses(); - - // If we find a free register, we are done: assign this virtual to - // the free physical register and add this interval to the active - // list. - if (physReg) { - DEBUG(dbgs() << tri_->getName(physReg) << '\n'); - assert(RC->contains(physReg) && "Invalid candidate"); - vrm_->assignVirt2Phys(cur->reg, physReg); - addRegUse(physReg); - active_.push_back(std::make_pair(cur, cur->begin())); - handled_.push_back(cur); - - // Remember physReg for avoiding a write-after-write hazard in the next - // instruction. - if (AvoidWAWHazard && - tri_->avoidWriteAfterWrite(mri_->getRegClass(cur->reg))) - avoidWAW_ = physReg; - - // "Upgrade" the physical register since it has been allocated. - UpgradeRegister(physReg); - if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) { - // "Downgrade" physReg to try to keep physReg from being allocated until - // the next reload from the same SS is allocated. - mri_->setRegAllocationHint(NextReloadLI->reg, 0, physReg); - DowngradeRegister(cur, physReg); - } - return; - } - DEBUG(dbgs() << "no free registers\n"); - - // Compile the spill weights into an array that is better for scanning. - std::vector SpillWeights(tri_->getNumRegs(), 0.0f); - for (std::vector >::iterator - I = SpillWeightsToAdd.begin(), E = SpillWeightsToAdd.end(); I != E; ++I) - updateSpillWeights(SpillWeights, I->first, I->second, RC); - - // for each interval in active, update spill weights. - for (IntervalPtrs::const_iterator i = active_.begin(), e = active_.end(); - i != e; ++i) { - unsigned reg = i->first->reg; - assert(TargetRegisterInfo::isVirtualRegister(reg) && - "Can only allocate virtual registers!"); - reg = vrm_->getPhys(reg); - updateSpillWeights(SpillWeights, reg, i->first->weight, RC); - } - - DEBUG(dbgs() << "\tassigning stack slot at interval "<< *cur << ":\n"); - - // Find a register to spill. - float minWeight = HUGE_VALF; - unsigned minReg = 0; - - bool Found = false; - std::vector > RegsWeights; - ArrayRef Order = RegClassInfo.getOrder(RC); - if (!minReg || SpillWeights[minReg] == HUGE_VALF) - for (unsigned i = 0; i != Order.size(); ++i) { - unsigned reg = Order[i]; - float regWeight = SpillWeights[reg]; - // Skip recently allocated registers and reserved registers. - if (minWeight > regWeight && !isRecentlyUsed(reg)) - Found = true; - RegsWeights.push_back(std::make_pair(reg, regWeight)); - } - - // If we didn't find a register that is spillable, try aliases? - if (!Found) { - for (unsigned i = 0; i != Order.size(); ++i) { - unsigned reg = Order[i]; - // No need to worry about if the alias register size < regsize of RC. - // We are going to spill all registers that alias it anyway. - for (const unsigned* as = tri_->getAliasSet(reg); *as; ++as) - RegsWeights.push_back(std::make_pair(*as, SpillWeights[*as])); - } - } - - // Sort all potential spill candidates by weight. - std::sort(RegsWeights.begin(), RegsWeights.end(), WeightCompare(*this)); - minReg = RegsWeights[0].first; - minWeight = RegsWeights[0].second; - if (minWeight == HUGE_VALF) { - // All registers must have inf weight. Just grab one! - minReg = BestPhysReg ? BestPhysReg : getFirstNonReservedPhysReg(RC); - if (cur->weight == HUGE_VALF || - li_->getApproximateInstructionCount(*cur) == 0) { - // Spill a physical register around defs and uses. - if (li_->spillPhysRegAroundRegDefsUses(*cur, minReg, *vrm_)) { - // spillPhysRegAroundRegDefsUses may have invalidated iterator stored - // in fixed_. Reset them. - for (unsigned i = 0, e = fixed_.size(); i != e; ++i) { - IntervalPtr &IP = fixed_[i]; - LiveInterval *I = IP.first; - if (I->reg == minReg || tri_->isSubRegister(minReg, I->reg)) - IP.second = I->advanceTo(I->begin(), StartPosition); - } - - DowngradedRegs.clear(); - assignRegOrStackSlotAtInterval(cur); - } else { - assert(false && "Ran out of registers during register allocation!"); - report_fatal_error("Ran out of registers during register allocation!"); - } - return; - } - } - - // Find up to 3 registers to consider as spill candidates. - unsigned LastCandidate = RegsWeights.size() >= 3 ? 3 : 1; - while (LastCandidate > 1) { - if (weightsAreClose(RegsWeights[LastCandidate-1].second, minWeight)) - break; - --LastCandidate; - } - - DEBUG({ - dbgs() << "\t\tregister(s) with min weight(s): "; - - for (unsigned i = 0; i != LastCandidate; ++i) - dbgs() << tri_->getName(RegsWeights[i].first) - << " (" << RegsWeights[i].second << ")\n"; - }); - - // If the current has the minimum weight, we need to spill it and - // add any added intervals back to unhandled, and restart - // linearscan. - if (cur->weight != HUGE_VALF && cur->weight <= minWeight) { - DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n'); - SmallVector added; - LiveRangeEdit LRE(*cur, added); - spiller_->spill(LRE); - - std::sort(added.begin(), added.end(), LISorter()); - if (added.empty()) - return; // Early exit if all spills were folded. - - // Merge added with unhandled. Note that we have already sorted - // intervals returned by addIntervalsForSpills by their starting - // point. - // This also update the NextReloadMap. That is, it adds mapping from a - // register defined by a reload from SS to the next reload from SS in the - // same basic block. - MachineBasicBlock *LastReloadMBB = 0; - LiveInterval *LastReload = 0; - int LastReloadSS = VirtRegMap::NO_STACK_SLOT; - for (unsigned i = 0, e = added.size(); i != e; ++i) { - LiveInterval *ReloadLi = added[i]; - if (ReloadLi->weight == HUGE_VALF && - li_->getApproximateInstructionCount(*ReloadLi) == 0) { - SlotIndex ReloadIdx = ReloadLi->beginIndex(); - MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx); - int ReloadSS = vrm_->getStackSlot(ReloadLi->reg); - if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) { - // Last reload of same SS is in the same MBB. We want to try to - // allocate both reloads the same register and make sure the reg - // isn't clobbered in between if at all possible. - assert(LastReload->beginIndex() < ReloadIdx); - NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg)); - } - LastReloadMBB = ReloadMBB; - LastReload = ReloadLi; - LastReloadSS = ReloadSS; - } - unhandled_.push(ReloadLi); - } - return; - } - - ++NumBacktracks; - - // Push the current interval back to unhandled since we are going - // to re-run at least this iteration. Since we didn't modify it it - // should go back right in the front of the list - unhandled_.push(cur); - - assert(TargetRegisterInfo::isPhysicalRegister(minReg) && - "did not choose a register to spill?"); - - // We spill all intervals aliasing the register with - // minimum weight, rollback to the interval with the earliest - // start point and let the linear scan algorithm run again - SmallVector spillIs; - - // Determine which intervals have to be spilled. - findIntervalsToSpill(cur, RegsWeights, LastCandidate, spillIs); - - // Set of spilled vregs (used later to rollback properly) - SmallSet spilled; - - // The earliest start of a Spilled interval indicates up to where - // in handled we need to roll back - assert(!spillIs.empty() && "No spill intervals?"); - SlotIndex earliestStart = spillIs[0]->beginIndex(); - - // Spill live intervals of virtual regs mapped to the physical register we - // want to clear (and its aliases). We only spill those that overlap with the - // current interval as the rest do not affect its allocation. we also keep - // track of the earliest start of all spilled live intervals since this will - // mark our rollback point. - SmallVector added; - while (!spillIs.empty()) { - LiveInterval *sli = spillIs.back(); - spillIs.pop_back(); - DEBUG(dbgs() << "\t\t\tspilling(a): " << *sli << '\n'); - if (sli->beginIndex() < earliestStart) - earliestStart = sli->beginIndex(); - LiveRangeEdit LRE(*sli, added, 0, &spillIs); - spiller_->spill(LRE); - spilled.insert(sli->reg); - } - - // Include any added intervals in earliestStart. - for (unsigned i = 0, e = added.size(); i != e; ++i) { - SlotIndex SI = added[i]->beginIndex(); - if (SI < earliestStart) - earliestStart = SI; - } - - DEBUG(dbgs() << "\t\trolling back to: " << earliestStart << '\n'); - - // Scan handled in reverse order up to the earliest start of a - // spilled live interval and undo each one, restoring the state of - // unhandled. - while (!handled_.empty()) { - LiveInterval* i = handled_.back(); - // If this interval starts before t we are done. - if (!i->empty() && i->beginIndex() < earliestStart) - break; - DEBUG(dbgs() << "\t\t\tundo changes for: " << *i << '\n'); - handled_.pop_back(); - - // When undoing a live interval allocation we must know if it is active or - // inactive to properly update regUse_ and the VirtRegMap. - IntervalPtrs::iterator it; - if ((it = FindIntervalInVector(active_, i)) != active_.end()) { - active_.erase(it); - assert(!TargetRegisterInfo::isPhysicalRegister(i->reg)); - if (!spilled.count(i->reg)) - unhandled_.push(i); - delRegUse(vrm_->getPhys(i->reg)); - vrm_->clearVirt(i->reg); - } else if ((it = FindIntervalInVector(inactive_, i)) != inactive_.end()) { - inactive_.erase(it); - assert(!TargetRegisterInfo::isPhysicalRegister(i->reg)); - if (!spilled.count(i->reg)) - unhandled_.push(i); - vrm_->clearVirt(i->reg); - } else { - assert(TargetRegisterInfo::isVirtualRegister(i->reg) && - "Can only allocate virtual registers!"); - vrm_->clearVirt(i->reg); - unhandled_.push(i); - } - - DenseMap::iterator ii = DowngradeMap.find(i->reg); - if (ii == DowngradeMap.end()) - // It interval has a preference, it must be defined by a copy. Clear the - // preference now since the source interval allocation may have been - // undone as well. - mri_->setRegAllocationHint(i->reg, 0, 0); - else { - UpgradeRegister(ii->second); - } - } - - // Rewind the iterators in the active, inactive, and fixed lists back to the - // point we reverted to. - RevertVectorIteratorsTo(active_, earliestStart); - RevertVectorIteratorsTo(inactive_, earliestStart); - RevertVectorIteratorsTo(fixed_, earliestStart); - - // Scan the rest and undo each interval that expired after t and - // insert it in active (the next iteration of the algorithm will - // put it in inactive if required) - for (unsigned i = 0, e = handled_.size(); i != e; ++i) { - LiveInterval *HI = handled_[i]; - if (!HI->expiredAt(earliestStart) && - HI->expiredAt(cur->beginIndex())) { - DEBUG(dbgs() << "\t\t\tundo changes for: " << *HI << '\n'); - active_.push_back(std::make_pair(HI, HI->begin())); - assert(!TargetRegisterInfo::isPhysicalRegister(HI->reg)); - addRegUse(vrm_->getPhys(HI->reg)); - } - } - - // Merge added with unhandled. - // This also update the NextReloadMap. That is, it adds mapping from a - // register defined by a reload from SS to the next reload from SS in the - // same basic block. - MachineBasicBlock *LastReloadMBB = 0; - LiveInterval *LastReload = 0; - int LastReloadSS = VirtRegMap::NO_STACK_SLOT; - std::sort(added.begin(), added.end(), LISorter()); - for (unsigned i = 0, e = added.size(); i != e; ++i) { - LiveInterval *ReloadLi = added[i]; - if (ReloadLi->weight == HUGE_VALF && - li_->getApproximateInstructionCount(*ReloadLi) == 0) { - SlotIndex ReloadIdx = ReloadLi->beginIndex(); - MachineBasicBlock *ReloadMBB = li_->getMBBFromIndex(ReloadIdx); - int ReloadSS = vrm_->getStackSlot(ReloadLi->reg); - if (LastReloadMBB == ReloadMBB && LastReloadSS == ReloadSS) { - // Last reload of same SS is in the same MBB. We want to try to - // allocate both reloads the same register and make sure the reg - // isn't clobbered in between if at all possible. - assert(LastReload->beginIndex() < ReloadIdx); - NextReloadMap.insert(std::make_pair(LastReload->reg, ReloadLi->reg)); - } - LastReloadMBB = ReloadMBB; - LastReload = ReloadLi; - LastReloadSS = ReloadSS; - } - unhandled_.push(ReloadLi); - } -} - -unsigned RALinScan::getFreePhysReg(LiveInterval* cur, - const TargetRegisterClass *RC, - unsigned MaxInactiveCount, - SmallVector &inactiveCounts, - bool SkipDGRegs) { - unsigned FreeReg = 0; - unsigned FreeRegInactiveCount = 0; - - std::pair Hint = mri_->getRegAllocationHint(cur->reg); - // Resolve second part of the hint (if possible) given the current allocation. - unsigned physReg = Hint.second; - if (TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg)) - physReg = vrm_->getPhys(physReg); - - ArrayRef Order; - if (Hint.first) - Order = tri_->getRawAllocationOrder(RC, Hint.first, physReg, *mf_); - else - Order = RegClassInfo.getOrder(RC); - - assert(!Order.empty() && "No allocatable register in this register class!"); - - // Scan for the first available register. - for (unsigned i = 0; i != Order.size(); ++i) { - unsigned Reg = Order[i]; - // Ignore "downgraded" registers. - if (SkipDGRegs && DowngradedRegs.count(Reg)) - continue; - // Skip reserved registers. - if (reservedRegs_.test(Reg)) - continue; - // Skip recently allocated registers. - if (isRegAvail(Reg) && (!SkipDGRegs || !isRecentlyUsed(Reg))) { - FreeReg = Reg; - if (FreeReg < inactiveCounts.size()) - FreeRegInactiveCount = inactiveCounts[FreeReg]; - else - FreeRegInactiveCount = 0; - break; - } - } - - // If there are no free regs, or if this reg has the max inactive count, - // return this register. - if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) { - // Remember what register we picked so we can skip it next time. - if (FreeReg != 0) recordRecentlyUsed(FreeReg); - return FreeReg; - } - - // Continue scanning the registers, looking for the one with the highest - // inactive count. Alkis found that this reduced register pressure very - // slightly on X86 (in rev 1.94 of this file), though this should probably be - // reevaluated now. - for (unsigned i = 0; i != Order.size(); ++i) { - unsigned Reg = Order[i]; - // Ignore "downgraded" registers. - if (SkipDGRegs && DowngradedRegs.count(Reg)) - continue; - // Skip reserved registers. - if (reservedRegs_.test(Reg)) - continue; - if (isRegAvail(Reg) && Reg < inactiveCounts.size() && - FreeRegInactiveCount < inactiveCounts[Reg] && - (!SkipDGRegs || !isRecentlyUsed(Reg))) { - FreeReg = Reg; - FreeRegInactiveCount = inactiveCounts[Reg]; - if (FreeRegInactiveCount == MaxInactiveCount) - break; // We found the one with the max inactive count. - } - } - - // Remember what register we picked so we can skip it next time. - recordRecentlyUsed(FreeReg); - - return FreeReg; -} - -/// getFreePhysReg - return a free physical register for this virtual register -/// interval if we have one, otherwise return 0. -unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { - SmallVector inactiveCounts; - unsigned MaxInactiveCount = 0; - - const TargetRegisterClass *RC = mri_->getRegClass(cur->reg); - const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); - - for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end(); - i != e; ++i) { - unsigned reg = i->first->reg; - assert(TargetRegisterInfo::isVirtualRegister(reg) && - "Can only allocate virtual registers!"); - - // If this is not in a related reg class to the register we're allocating, - // don't check it. - const TargetRegisterClass *RegRC = mri_->getRegClass(reg); - if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) { - reg = vrm_->getPhys(reg); - if (inactiveCounts.size() <= reg) - inactiveCounts.resize(reg+1); - ++inactiveCounts[reg]; - MaxInactiveCount = std::max(MaxInactiveCount, inactiveCounts[reg]); - } - } - - // If copy coalescer has assigned a "preferred" register, check if it's - // available first. - unsigned Preference = vrm_->getRegAllocPref(cur->reg); - if (Preference) { - DEBUG(dbgs() << "(preferred: " << tri_->getName(Preference) << ") "); - if (isRegAvail(Preference) && - RC->contains(Preference)) - return Preference; - } - - unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, - true); - if (FreeReg) - return FreeReg; - return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false); -} - -FunctionPass* llvm::createLinearScanRegisterAllocator() { - return new RALinScan(); -} -- cgit v1.1 From cfa81014099254bd42f246a4d434dc2ca1463c6c Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Sat, 12 Nov 2011 23:17:52 +0000 Subject: Switch PBQP to the modern InlineSpiller framework. It is worth noting that the old spiller would split live ranges around basic blocks. The new spiller doesn't do that. PBQP should do its own live range splitting with SplitEditor::splitSingleBlock() if desired. See RAGreedy::tryBlockSplit(). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144476 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/RegAllocPBQP.cpp | 51 ++++++++++---------------------------------- 1 file changed, 11 insertions(+), 40 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 0d2cf2d..d4f69dc 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -31,7 +31,9 @@ #define DEBUG_TYPE "regalloc" +#include "LiveRangeEdit.h" #include "RenderMachineFunction.h" +#include "Spiller.h" #include "Splitter.h" #include "VirtRegMap.h" #include "VirtRegRewriter.h" @@ -132,6 +134,7 @@ private: MachineRegisterInfo *mri; RenderMachineFunction *rmf; + std::auto_ptr spiller; LiveIntervals *lis; LiveStacks *lss; VirtRegMap *vrm; @@ -141,10 +144,6 @@ private: /// \brief Finds the initial set of vreg intervals to allocate. void findVRegIntervalsToAlloc(); - /// \brief Adds a stack interval if the given live interval has been - /// spilled. Used to support stack slot coloring. - void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri); - /// \brief Given a solved PBQP problem maps this solution back to a register /// assignment. bool mapPBQPToRegAlloc(const PBQPRAProblem &problem, @@ -488,29 +487,6 @@ void RegAllocPBQP::findVRegIntervalsToAlloc() { } } -void RegAllocPBQP::addStackInterval(const LiveInterval *spilled, - MachineRegisterInfo* mri) { - int stackSlot = vrm->getStackSlot(spilled->reg); - - if (stackSlot == VirtRegMap::NO_STACK_SLOT) { - return; - } - - const TargetRegisterClass *RC = mri->getRegClass(spilled->reg); - LiveInterval &stackInterval = lss->getOrCreateInterval(stackSlot, RC); - - VNInfo *vni; - if (stackInterval.getNumValNums() != 0) { - vni = stackInterval.getValNumInfo(0); - } else { - vni = stackInterval.getNextValue( - SlotIndex(), 0, lss->getVNInfoAllocator()); - } - - LiveInterval &rhsInterval = lis->getInterval(spilled->reg); - stackInterval.MergeRangesInAsValue(rhsInterval, vni); -} - bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, const PBQP::Solution &solution) { // Set to true if we have any spills @@ -535,22 +511,16 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, vrm->assignVirt2Phys(vreg, preg); } else if (problem.isSpillOption(vreg, alloc)) { vregsToAlloc.erase(vreg); - const LiveInterval* spillInterval = &lis->getInterval(vreg); - double oldWeight = spillInterval->weight; - rmf->rememberUseDefs(spillInterval); - std::vector newSpills = - lis->addIntervalsForSpills(*spillInterval, 0, loopInfo, *vrm); - addStackInterval(spillInterval, mri); - rmf->rememberSpills(spillInterval, newSpills); - - (void) oldWeight; + SmallVector newSpills; + LiveRangeEdit LRE(lis->getInterval(vreg), newSpills); + spiller->spill(LRE); + DEBUG(dbgs() << "VREG " << vreg << " -> SPILLED (Cost: " - << oldWeight << ", New vregs: "); + << LRE.getParent().weight << ", New vregs: "); // Copy any newly inserted live intervals into the list of regs to // allocate. - for (std::vector::const_iterator - itr = newSpills.begin(), end = newSpills.end(); + for (LiveRangeEdit::iterator itr = LRE.begin(), end = LRE.end(); itr != end; ++itr) { assert(!(*itr)->empty() && "Empty spill range."); DEBUG(dbgs() << (*itr)->reg << " "); @@ -560,7 +530,7 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, DEBUG(dbgs() << ")\n"); // We need another round if spill intervals were added. - anotherRoundNeeded |= !newSpills.empty(); + anotherRoundNeeded |= !LRE.empty(); } else { assert(false && "Unknown allocation option."); } @@ -650,6 +620,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { rmf = &getAnalysis(); vrm = &getAnalysis(); + spiller.reset(createInlineSpiller(*this, MF, *vrm)); DEBUG(dbgs() << "PBQP Register Allocating for " << mf->getFunction()->getName() << "\n"); -- cgit v1.1 From 5d9b1091811106ebad0517a7e0c7936a95cb38ad Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Sat, 12 Nov 2011 23:29:02 +0000 Subject: Delete the 'standard' spiller with used the old spilling framework. The current register allocators all use the inline spiller. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144477 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/Spiller.cpp | 47 ++--------------------------------------------- 1 file changed, 2 insertions(+), 45 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index b6bbcd7..6efdd5b 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -29,7 +29,7 @@ using namespace llvm; namespace { - enum SpillerName { trivial, standard, inline_ }; + enum SpillerName { trivial, inline_ }; } static cl::opt @@ -37,10 +37,9 @@ spillerOpt("spiller", cl::desc("Spiller to use: (default: standard)"), cl::Prefix, cl::values(clEnumVal(trivial, "trivial spiller"), - clEnumVal(standard, "default spiller"), clEnumValN(inline_, "inline", "inline spiller"), clEnumValEnd), - cl::init(standard)); + cl::init(trivial)); // Spiller virtual destructor implementation. Spiller::~Spiller() {} @@ -188,54 +187,12 @@ public: } // end anonymous namespace -namespace { - -/// Falls back on LiveIntervals::addIntervalsForSpills. -class StandardSpiller : public Spiller { -protected: - MachineFunction *mf; - LiveIntervals *lis; - LiveStacks *lss; - MachineLoopInfo *loopInfo; - VirtRegMap *vrm; -public: - StandardSpiller(MachineFunctionPass &pass, MachineFunction &mf, - VirtRegMap &vrm) - : mf(&mf), - lis(&pass.getAnalysis()), - lss(&pass.getAnalysis()), - loopInfo(pass.getAnalysisIfAvailable()), - vrm(&vrm) {} - - /// Falls back on LiveIntervals::addIntervalsForSpills. - void spill(LiveRangeEdit &LRE) { - std::vector added = - lis->addIntervalsForSpills(LRE.getParent(), LRE.getUselessVRegs(), - loopInfo, *vrm); - LRE.getNewVRegs()->insert(LRE.getNewVRegs()->end(), - added.begin(), added.end()); - - // Update LiveStacks. - int SS = vrm->getStackSlot(LRE.getReg()); - if (SS == VirtRegMap::NO_STACK_SLOT) - return; - const TargetRegisterClass *RC = mf->getRegInfo().getRegClass(LRE.getReg()); - LiveInterval &SI = lss->getOrCreateInterval(SS, RC); - if (!SI.hasAtLeastOneValue()) - SI.getNextValue(SlotIndex(), 0, lss->getVNInfoAllocator()); - SI.MergeRangesInAsValue(LRE.getParent(), SI.getValNumInfo(0)); - } -}; - -} // end anonymous namespace - llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass, MachineFunction &mf, VirtRegMap &vrm) { switch (spillerOpt) { default: assert(0 && "unknown spiller"); case trivial: return new TrivialSpiller(pass, mf, vrm); - case standard: return new StandardSpiller(pass, mf, vrm); case inline_: return createInlineSpiller(pass, mf, vrm); } } -- cgit v1.1 From f5eeaf221c0b8acdc1ffbd5cefe01b61474cb7e8 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Sat, 12 Nov 2011 23:57:05 +0000 Subject: Delete the old spilling framework from LiveIntervalAnalysis. This is dead code, all register allocators use InlineSpiller. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144478 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LiveIntervalAnalysis.cpp | 1193 ---------------------------------- 1 file changed, 1193 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index b1e202a..c902b88 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -52,8 +52,6 @@ static cl::opt DisableReMat("disable-rematerialization", cl::init(false), cl::Hidden); STATISTIC(numIntervals , "Number of original intervals"); -STATISTIC(numFolds , "Number of loads/stores folded into instructions"); -STATISTIC(numSplits , "Number of intervals split"); char LiveIntervals::ID = 0; INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", @@ -149,103 +147,6 @@ void LiveIntervals::dumpInstrs() const { printInstrs(dbgs()); } -bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li, - VirtRegMap &vrm, unsigned reg) { - // We don't handle fancy stuff crossing basic block boundaries - if (li.ranges.size() != 1) - return true; - const LiveRange &range = li.ranges.front(); - SlotIndex idx = range.start.getBaseIndex(); - SlotIndex end = range.end.getPrevSlot().getBaseIndex().getNextIndex(); - - // Skip deleted instructions - MachineInstr *firstMI = getInstructionFromIndex(idx); - while (!firstMI && idx != end) { - idx = idx.getNextIndex(); - firstMI = getInstructionFromIndex(idx); - } - if (!firstMI) - return false; - - // Find last instruction in range - SlotIndex lastIdx = end.getPrevIndex(); - MachineInstr *lastMI = getInstructionFromIndex(lastIdx); - while (!lastMI && lastIdx != idx) { - lastIdx = lastIdx.getPrevIndex(); - lastMI = getInstructionFromIndex(lastIdx); - } - if (!lastMI) - return false; - - // Range cannot cross basic block boundaries or terminators - MachineBasicBlock *MBB = firstMI->getParent(); - if (MBB != lastMI->getParent() || lastMI->getDesc().isTerminator()) - return true; - - MachineBasicBlock::const_iterator E = lastMI; - ++E; - for (MachineBasicBlock::const_iterator I = firstMI; I != E; ++I) { - const MachineInstr &MI = *I; - - // Allow copies to and from li.reg - if (MI.isCopy()) - if (MI.getOperand(0).getReg() == li.reg || - MI.getOperand(1).getReg() == li.reg) - continue; - - // Check for operands using reg - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand& mop = MI.getOperand(i); - if (!mop.isReg()) - continue; - unsigned PhysReg = mop.getReg(); - if (PhysReg == 0 || PhysReg == li.reg) - continue; - if (TargetRegisterInfo::isVirtualRegister(PhysReg)) { - if (!vrm.hasPhys(PhysReg)) - continue; - PhysReg = vrm.getPhys(PhysReg); - } - if (PhysReg && tri_->regsOverlap(PhysReg, reg)) - return true; - } - } - - // No conflicts found. - return false; -} - -bool LiveIntervals::conflictsWithAliasRef(LiveInterval &li, unsigned Reg, - SmallPtrSet &JoinedCopies) { - for (LiveInterval::Ranges::const_iterator - I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { - for (SlotIndex index = I->start.getBaseIndex(), - end = I->end.getPrevSlot().getBaseIndex().getNextIndex(); - index != end; - index = index.getNextIndex()) { - MachineInstr *MI = getInstructionFromIndex(index); - if (!MI) - continue; // skip deleted instructions - - if (JoinedCopies.count(MI)) - continue; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand& MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - unsigned PhysReg = MO.getReg(); - if (PhysReg == 0 || PhysReg == Reg || - TargetRegisterInfo::isVirtualRegister(PhysReg)) - continue; - if (tri_->regsOverlap(Reg, PhysReg)) - return true; - } - } - } - - return false; -} - static bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) { unsigned Reg = MI.getOperand(MOIdx).getReg(); @@ -1011,14 +912,6 @@ LiveIntervals::isReMaterializable(const LiveInterval &li, return true; } -/// isReMaterializable - Returns true if the definition MI of the specified -/// val# of the specified interval is re-materializable. -bool LiveIntervals::isReMaterializable(const LiveInterval &li, - const VNInfo *ValNo, MachineInstr *MI) { - bool Dummy2; - return isReMaterializable(li, ValNo, MI, 0, Dummy2); -} - /// isReMaterializable - Returns true if every definition of MI of every /// val# of the specified interval is re-materializable. bool @@ -1044,107 +937,6 @@ LiveIntervals::isReMaterializable(const LiveInterval &li, return true; } -/// FilterFoldedOps - Filter out two-address use operands. Return -/// true if it finds any issue with the operands that ought to prevent -/// folding. -static bool FilterFoldedOps(MachineInstr *MI, - SmallVector &Ops, - unsigned &MRInfo, - SmallVector &FoldOps) { - MRInfo = 0; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) { - unsigned OpIdx = Ops[i]; - MachineOperand &MO = MI->getOperand(OpIdx); - // FIXME: fold subreg use. - if (MO.getSubReg()) - return true; - if (MO.isDef()) - MRInfo |= (unsigned)VirtRegMap::isMod; - else { - // Filter out two-address use operand(s). - if (MI->isRegTiedToDefOperand(OpIdx)) { - MRInfo = VirtRegMap::isModRef; - continue; - } - MRInfo |= (unsigned)VirtRegMap::isRef; - } - FoldOps.push_back(OpIdx); - } - return false; -} - - -/// tryFoldMemoryOperand - Attempts to fold either a spill / restore from -/// slot / to reg or any rematerialized load into ith operand of specified -/// MI. If it is successul, MI is updated with the newly created MI and -/// returns true. -bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI, - VirtRegMap &vrm, MachineInstr *DefMI, - SlotIndex InstrIdx, - SmallVector &Ops, - bool isSS, int Slot, unsigned Reg) { - // If it is an implicit def instruction, just delete it. - if (MI->isImplicitDef()) { - RemoveMachineInstrFromMaps(MI); - vrm.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - ++numFolds; - return true; - } - - // Filter the list of operand indexes that are to be folded. Abort if - // any operand will prevent folding. - unsigned MRInfo = 0; - SmallVector FoldOps; - if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps)) - return false; - - // The only time it's safe to fold into a two address instruction is when - // it's folding reload and spill from / into a spill stack slot. - if (DefMI && (MRInfo & VirtRegMap::isMod)) - return false; - - MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(MI, FoldOps, Slot) - : tii_->foldMemoryOperand(MI, FoldOps, DefMI); - if (fmi) { - // Remember this instruction uses the spill slot. - if (isSS) vrm.addSpillSlotUse(Slot, fmi); - - // Attempt to fold the memory reference into the instruction. If - // we can do this, we don't need to insert spill code. - if (isSS && !mf_->getFrameInfo()->isImmutableObjectIndex(Slot)) - vrm.virtFolded(Reg, MI, fmi, (VirtRegMap::ModRef)MRInfo); - vrm.transferSpillPts(MI, fmi); - vrm.transferRestorePts(MI, fmi); - vrm.transferEmergencySpills(MI, fmi); - ReplaceMachineInstrInMaps(MI, fmi); - MI->eraseFromParent(); - MI = fmi; - ++numFolds; - return true; - } - return false; -} - -/// canFoldMemoryOperand - Returns true if the specified load / store -/// folding is possible. -bool LiveIntervals::canFoldMemoryOperand(MachineInstr *MI, - SmallVector &Ops, - bool ReMat) const { - // Filter the list of operand indexes that are to be folded. Abort if - // any operand will prevent folding. - unsigned MRInfo = 0; - SmallVector FoldOps; - if (FilterFoldedOps(MI, Ops, MRInfo, FoldOps)) - return false; - - // It's only legal to remat for a use, not a def. - if (ReMat && (MRInfo & VirtRegMap::isMod)) - return false; - - return tii_->canFoldMemoryOperand(MI, FoldOps); -} - bool LiveIntervals::intervalIsInOneMBB(const LiveInterval &li) const { LiveInterval::Ranges::const_iterator itr = li.ranges.begin(); @@ -1164,554 +956,6 @@ bool LiveIntervals::intervalIsInOneMBB(const LiveInterval &li) const { return true; } -/// rewriteImplicitOps - Rewrite implicit use operands of MI (i.e. uses of -/// interval on to-be re-materialized operands of MI) with new register. -void LiveIntervals::rewriteImplicitOps(const LiveInterval &li, - MachineInstr *MI, unsigned NewVReg, - VirtRegMap &vrm) { - // There is an implicit use. That means one of the other operand is - // being remat'ed and the remat'ed instruction has li.reg as an - // use operand. Make sure we rewrite that as well. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - if (!vrm.isReMaterialized(Reg)) - continue; - MachineInstr *ReMatMI = vrm.getReMaterializedMI(Reg); - MachineOperand *UseMO = ReMatMI->findRegisterUseOperand(li.reg); - if (UseMO) - UseMO->setReg(NewVReg); - } -} - -/// rewriteInstructionForSpills, rewriteInstructionsForSpills - Helper functions -/// for addIntervalsForSpills to rewrite uses / defs for the given live range. -bool LiveIntervals:: -rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, - bool TrySplit, SlotIndex index, SlotIndex end, - MachineInstr *MI, - MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI, - unsigned Slot, int LdSlot, - bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete, - VirtRegMap &vrm, - const TargetRegisterClass* rc, - SmallVector &ReMatIds, - const MachineLoopInfo *loopInfo, - unsigned &NewVReg, unsigned ImpUse, bool &HasDef, bool &HasUse, - DenseMap &MBBVRegsMap, - std::vector &NewLIs) { - bool CanFold = false; - RestartInstruction: - for (unsigned i = 0; i != MI->getNumOperands(); ++i) { - MachineOperand& mop = MI->getOperand(i); - if (!mop.isReg()) - continue; - unsigned Reg = mop.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - if (Reg != li.reg) - continue; - - bool TryFold = !DefIsReMat; - bool FoldSS = true; // Default behavior unless it's a remat. - int FoldSlot = Slot; - if (DefIsReMat) { - // If this is the rematerializable definition MI itself and - // all of its uses are rematerialized, simply delete it. - if (MI == ReMatOrigDefMI && CanDelete) { - DEBUG(dbgs() << "\t\t\t\tErasing re-materializable def: " - << *MI << '\n'); - RemoveMachineInstrFromMaps(MI); - vrm.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - break; - } - - // If def for this use can't be rematerialized, then try folding. - // If def is rematerializable and it's a load, also try folding. - TryFold = !ReMatDefMI || (ReMatDefMI && (MI == ReMatOrigDefMI || isLoad)); - if (isLoad) { - // Try fold loads (from stack slot, constant pool, etc.) into uses. - FoldSS = isLoadSS; - FoldSlot = LdSlot; - } - } - - // Scan all of the operands of this instruction rewriting operands - // to use NewVReg instead of li.reg as appropriate. We do this for - // two reasons: - // - // 1. If the instr reads the same spilled vreg multiple times, we - // want to reuse the NewVReg. - // 2. If the instr is a two-addr instruction, we are required to - // keep the src/dst regs pinned. - // - // Keep track of whether we replace a use and/or def so that we can - // create the spill interval with the appropriate range. - SmallVector Ops; - tie(HasUse, HasDef) = MI->readsWritesVirtualRegister(Reg, &Ops); - - // Create a new virtual register for the spill interval. - // Create the new register now so we can map the fold instruction - // to the new register so when it is unfolded we get the correct - // answer. - bool CreatedNewVReg = false; - if (NewVReg == 0) { - NewVReg = mri_->createVirtualRegister(rc); - vrm.grow(); - CreatedNewVReg = true; - - // The new virtual register should get the same allocation hints as the - // old one. - std::pair Hint = mri_->getRegAllocationHint(Reg); - if (Hint.first || Hint.second) - mri_->setRegAllocationHint(NewVReg, Hint.first, Hint.second); - } - - if (!TryFold) - CanFold = false; - else { - // Do not fold load / store here if we are splitting. We'll find an - // optimal point to insert a load / store later. - if (!TrySplit) { - if (tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, - Ops, FoldSS, FoldSlot, NewVReg)) { - // Folding the load/store can completely change the instruction in - // unpredictable ways, rescan it from the beginning. - - if (FoldSS) { - // We need to give the new vreg the same stack slot as the - // spilled interval. - vrm.assignVirt2StackSlot(NewVReg, FoldSlot); - } - - HasUse = false; - HasDef = false; - CanFold = false; - if (isNotInMIMap(MI)) - break; - goto RestartInstruction; - } - } else { - // We'll try to fold it later if it's profitable. - CanFold = canFoldMemoryOperand(MI, Ops, DefIsReMat); - } - } - - mop.setReg(NewVReg); - if (mop.isImplicit()) - rewriteImplicitOps(li, MI, NewVReg, vrm); - - // Reuse NewVReg for other reads. - bool HasEarlyClobber = false; - for (unsigned j = 0, e = Ops.size(); j != e; ++j) { - MachineOperand &mopj = MI->getOperand(Ops[j]); - mopj.setReg(NewVReg); - if (mopj.isImplicit()) - rewriteImplicitOps(li, MI, NewVReg, vrm); - if (mopj.isEarlyClobber()) - HasEarlyClobber = true; - } - - if (CreatedNewVReg) { - if (DefIsReMat) { - vrm.setVirtIsReMaterialized(NewVReg, ReMatDefMI); - if (ReMatIds[VNI->id] == VirtRegMap::MAX_STACK_SLOT) { - // Each valnum may have its own remat id. - ReMatIds[VNI->id] = vrm.assignVirtReMatId(NewVReg); - } else { - vrm.assignVirtReMatId(NewVReg, ReMatIds[VNI->id]); - } - if (!CanDelete || (HasUse && HasDef)) { - // If this is a two-addr instruction then its use operands are - // rematerializable but its def is not. It should be assigned a - // stack slot. - vrm.assignVirt2StackSlot(NewVReg, Slot); - } - } else { - vrm.assignVirt2StackSlot(NewVReg, Slot); - } - } else if (HasUse && HasDef && - vrm.getStackSlot(NewVReg) == VirtRegMap::NO_STACK_SLOT) { - // If this interval hasn't been assigned a stack slot (because earlier - // def is a deleted remat def), do it now. - assert(Slot != VirtRegMap::NO_STACK_SLOT); - vrm.assignVirt2StackSlot(NewVReg, Slot); - } - - // Re-matting an instruction with virtual register use. Add the - // register as an implicit use on the use MI. - if (DefIsReMat && ImpUse) - MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true)); - - // Create a new register interval for this spill / remat. - LiveInterval &nI = getOrCreateInterval(NewVReg); - if (CreatedNewVReg) { - NewLIs.push_back(&nI); - MBBVRegsMap.insert(std::make_pair(MI->getParent()->getNumber(), NewVReg)); - if (TrySplit) - vrm.setIsSplitFromReg(NewVReg, li.reg); - } - - if (HasUse) { - if (CreatedNewVReg) { - LiveRange LR(index.getLoadIndex(), index.getDefIndex(), - nI.getNextValue(SlotIndex(), 0, VNInfoAllocator)); - DEBUG(dbgs() << " +" << LR); - nI.addRange(LR); - } else { - // Extend the split live interval to this def / use. - SlotIndex End = index.getDefIndex(); - LiveRange LR(nI.ranges[nI.ranges.size()-1].end, End, - nI.getValNumInfo(nI.getNumValNums()-1)); - DEBUG(dbgs() << " +" << LR); - nI.addRange(LR); - } - } - if (HasDef) { - // An early clobber starts at the use slot, except for an early clobber - // tied to a use operand (yes, that is a thing). - LiveRange LR(HasEarlyClobber && !HasUse ? - index.getUseIndex() : index.getDefIndex(), - index.getStoreIndex(), - nI.getNextValue(SlotIndex(), 0, VNInfoAllocator)); - DEBUG(dbgs() << " +" << LR); - nI.addRange(LR); - } - - DEBUG({ - dbgs() << "\t\t\t\tAdded new interval: "; - nI.print(dbgs(), tri_); - dbgs() << '\n'; - }); - } - return CanFold; -} -bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li, - const VNInfo *VNI, - MachineBasicBlock *MBB, - SlotIndex Idx) const { - return li.killedInRange(Idx.getNextSlot(), getMBBEndIdx(MBB)); -} - -/// RewriteInfo - Keep track of machine instrs that will be rewritten -/// during spilling. -namespace { - struct RewriteInfo { - SlotIndex Index; - MachineInstr *MI; - RewriteInfo(SlotIndex i, MachineInstr *mi) : Index(i), MI(mi) {} - }; - - struct RewriteInfoCompare { - bool operator()(const RewriteInfo &LHS, const RewriteInfo &RHS) const { - return LHS.Index < RHS.Index; - } - }; -} - -void LiveIntervals:: -rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, - LiveInterval::Ranges::const_iterator &I, - MachineInstr *ReMatOrigDefMI, MachineInstr *ReMatDefMI, - unsigned Slot, int LdSlot, - bool isLoad, bool isLoadSS, bool DefIsReMat, bool CanDelete, - VirtRegMap &vrm, - const TargetRegisterClass* rc, - SmallVector &ReMatIds, - const MachineLoopInfo *loopInfo, - BitVector &SpillMBBs, - DenseMap > &SpillIdxes, - BitVector &RestoreMBBs, - DenseMap > &RestoreIdxes, - DenseMap &MBBVRegsMap, - std::vector &NewLIs) { - bool AllCanFold = true; - unsigned NewVReg = 0; - SlotIndex start = I->start.getBaseIndex(); - SlotIndex end = I->end.getPrevSlot().getBaseIndex().getNextIndex(); - - // First collect all the def / use in this live range that will be rewritten. - // Make sure they are sorted according to instruction index. - std::vector RewriteMIs; - for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg), - re = mri_->reg_end(); ri != re; ) { - MachineInstr *MI = &*ri; - MachineOperand &O = ri.getOperand(); - ++ri; - if (MI->isDebugValue()) { - // Modify DBG_VALUE now that the value is in a spill slot. - if (Slot != VirtRegMap::MAX_STACK_SLOT || isLoadSS) { - uint64_t Offset = MI->getOperand(1).getImm(); - const MDNode *MDPtr = MI->getOperand(2).getMetadata(); - DebugLoc DL = MI->getDebugLoc(); - int FI = isLoadSS ? LdSlot : (int)Slot; - if (MachineInstr *NewDV = tii_->emitFrameIndexDebugValue(*mf_, FI, - Offset, MDPtr, DL)) { - DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); - ReplaceMachineInstrInMaps(MI, NewDV); - MachineBasicBlock *MBB = MI->getParent(); - MBB->insert(MBB->erase(MI), NewDV); - continue; - } - } - - DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI); - RemoveMachineInstrFromMaps(MI); - vrm.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - continue; - } - assert(!(O.isImplicit() && O.isUse()) && - "Spilling register that's used as implicit use?"); - SlotIndex index = getInstructionIndex(MI); - if (index < start || index >= end) - continue; - - if (O.isUndef()) - // Must be defined by an implicit def. It should not be spilled. Note, - // this is for correctness reason. e.g. - // 8 %reg1024 = IMPLICIT_DEF - // 12 %reg1024 = INSERT_SUBREG %reg1024, %reg1025, 2 - // The live range [12, 14) are not part of the r1024 live interval since - // it's defined by an implicit def. It will not conflicts with live - // interval of r1025. Now suppose both registers are spilled, you can - // easily see a situation where both registers are reloaded before - // the INSERT_SUBREG and both target registers that would overlap. - continue; - RewriteMIs.push_back(RewriteInfo(index, MI)); - } - std::sort(RewriteMIs.begin(), RewriteMIs.end(), RewriteInfoCompare()); - - unsigned ImpUse = DefIsReMat ? getReMatImplicitUse(li, ReMatDefMI) : 0; - // Now rewrite the defs and uses. - for (unsigned i = 0, e = RewriteMIs.size(); i != e; ) { - RewriteInfo &rwi = RewriteMIs[i]; - ++i; - SlotIndex index = rwi.Index; - MachineInstr *MI = rwi.MI; - // If MI def and/or use the same register multiple times, then there - // are multiple entries. - while (i != e && RewriteMIs[i].MI == MI) { - assert(RewriteMIs[i].Index == index); - ++i; - } - MachineBasicBlock *MBB = MI->getParent(); - - if (ImpUse && MI != ReMatDefMI) { - // Re-matting an instruction with virtual register use. Prevent interval - // from being spilled. - getInterval(ImpUse).markNotSpillable(); - } - - unsigned MBBId = MBB->getNumber(); - unsigned ThisVReg = 0; - if (TrySplit) { - DenseMap::iterator NVI = MBBVRegsMap.find(MBBId); - if (NVI != MBBVRegsMap.end()) { - ThisVReg = NVI->second; - // One common case: - // x = use - // ... - // ... - // def = ... - // = use - // It's better to start a new interval to avoid artificially - // extend the new interval. - if (MI->readsWritesVirtualRegister(li.reg) == - std::make_pair(false,true)) { - MBBVRegsMap.erase(MBB->getNumber()); - ThisVReg = 0; - } - } - } - - bool IsNew = ThisVReg == 0; - if (IsNew) { - // This ends the previous live interval. If all of its def / use - // can be folded, give it a low spill weight. - if (NewVReg && TrySplit && AllCanFold) { - LiveInterval &nI = getOrCreateInterval(NewVReg); - nI.weight /= 10.0F; - } - AllCanFold = true; - } - NewVReg = ThisVReg; - - bool HasDef = false; - bool HasUse = false; - bool CanFold = rewriteInstructionForSpills(li, I->valno, TrySplit, - index, end, MI, ReMatOrigDefMI, ReMatDefMI, - Slot, LdSlot, isLoad, isLoadSS, DefIsReMat, - CanDelete, vrm, rc, ReMatIds, loopInfo, NewVReg, - ImpUse, HasDef, HasUse, MBBVRegsMap, NewLIs); - if (!HasDef && !HasUse) - continue; - - AllCanFold &= CanFold; - - // Update weight of spill interval. - LiveInterval &nI = getOrCreateInterval(NewVReg); - if (!TrySplit) { - // The spill weight is now infinity as it cannot be spilled again. - nI.markNotSpillable(); - continue; - } - - // Keep track of the last def and first use in each MBB. - if (HasDef) { - if (MI != ReMatOrigDefMI || !CanDelete) { - bool HasKill = false; - if (!HasUse) - HasKill = anyKillInMBBAfterIdx(li, I->valno, MBB, index.getDefIndex()); - else { - // If this is a two-address code, then this index starts a new VNInfo. - const VNInfo *VNI = li.findDefinedVNInfoForRegInt(index.getDefIndex()); - if (VNI) - HasKill = anyKillInMBBAfterIdx(li, VNI, MBB, index.getDefIndex()); - } - DenseMap >::iterator SII = - SpillIdxes.find(MBBId); - if (!HasKill) { - if (SII == SpillIdxes.end()) { - std::vector S; - S.push_back(SRInfo(index, NewVReg, true)); - SpillIdxes.insert(std::make_pair(MBBId, S)); - } else if (SII->second.back().vreg != NewVReg) { - SII->second.push_back(SRInfo(index, NewVReg, true)); - } else if (index > SII->second.back().index) { - // If there is an earlier def and this is a two-address - // instruction, then it's not possible to fold the store (which - // would also fold the load). - SRInfo &Info = SII->second.back(); - Info.index = index; - Info.canFold = !HasUse; - } - SpillMBBs.set(MBBId); - } else if (SII != SpillIdxes.end() && - SII->second.back().vreg == NewVReg && - index > SII->second.back().index) { - // There is an earlier def that's not killed (must be two-address). - // The spill is no longer needed. - SII->second.pop_back(); - if (SII->second.empty()) { - SpillIdxes.erase(MBBId); - SpillMBBs.reset(MBBId); - } - } - } - } - - if (HasUse) { - DenseMap >::iterator SII = - SpillIdxes.find(MBBId); - if (SII != SpillIdxes.end() && - SII->second.back().vreg == NewVReg && - index > SII->second.back().index) - // Use(s) following the last def, it's not safe to fold the spill. - SII->second.back().canFold = false; - DenseMap >::iterator RII = - RestoreIdxes.find(MBBId); - if (RII != RestoreIdxes.end() && RII->second.back().vreg == NewVReg) - // If we are splitting live intervals, only fold if it's the first - // use and there isn't another use later in the MBB. - RII->second.back().canFold = false; - else if (IsNew) { - // Only need a reload if there isn't an earlier def / use. - if (RII == RestoreIdxes.end()) { - std::vector Infos; - Infos.push_back(SRInfo(index, NewVReg, true)); - RestoreIdxes.insert(std::make_pair(MBBId, Infos)); - } else { - RII->second.push_back(SRInfo(index, NewVReg, true)); - } - RestoreMBBs.set(MBBId); - } - } - - // Update spill weight. - unsigned loopDepth = loopInfo->getLoopDepth(MBB); - nI.weight += getSpillWeight(HasDef, HasUse, loopDepth); - } - - if (NewVReg && TrySplit && AllCanFold) { - // If all of its def / use can be folded, give it a low spill weight. - LiveInterval &nI = getOrCreateInterval(NewVReg); - nI.weight /= 10.0F; - } -} - -bool LiveIntervals::alsoFoldARestore(int Id, SlotIndex index, - unsigned vr, BitVector &RestoreMBBs, - DenseMap > &RestoreIdxes) { - if (!RestoreMBBs[Id]) - return false; - std::vector &Restores = RestoreIdxes[Id]; - for (unsigned i = 0, e = Restores.size(); i != e; ++i) - if (Restores[i].index == index && - Restores[i].vreg == vr && - Restores[i].canFold) - return true; - return false; -} - -void LiveIntervals::eraseRestoreInfo(int Id, SlotIndex index, - unsigned vr, BitVector &RestoreMBBs, - DenseMap > &RestoreIdxes) { - if (!RestoreMBBs[Id]) - return; - std::vector &Restores = RestoreIdxes[Id]; - for (unsigned i = 0, e = Restores.size(); i != e; ++i) - if (Restores[i].index == index && Restores[i].vreg) - Restores[i].index = SlotIndex(); -} - -/// handleSpilledImpDefs - Remove IMPLICIT_DEF instructions which are being -/// spilled and create empty intervals for their uses. -void -LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm, - const TargetRegisterClass* rc, - std::vector &NewLIs) { - for (MachineRegisterInfo::reg_iterator ri = mri_->reg_begin(li.reg), - re = mri_->reg_end(); ri != re; ) { - MachineOperand &O = ri.getOperand(); - MachineInstr *MI = &*ri; - ++ri; - if (MI->isDebugValue()) { - // Remove debug info for now. - O.setReg(0U); - DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI); - continue; - } - if (O.isDef()) { - assert(MI->isImplicitDef() && - "Register def was not rewritten?"); - RemoveMachineInstrFromMaps(MI); - vrm.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - } else { - // This must be an use of an implicit_def so it's not part of the live - // interval. Create a new empty live interval for it. - // FIXME: Can we simply erase some of the instructions? e.g. Stores? - unsigned NewVReg = mri_->createVirtualRegister(rc); - vrm.grow(); - vrm.setIsImplicitlyDefined(NewVReg); - NewLIs.push_back(&getOrCreateInterval(NewVReg)); - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == li.reg) { - MO.setReg(NewVReg); - MO.setIsUndef(); - } - } - } - } -} - float LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) { // Limit the loop depth ridiculousness. @@ -1730,443 +974,6 @@ LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) { return (isDef + isUse) * lc; } -static void normalizeSpillWeights(std::vector &NewLIs) { - for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) - NewLIs[i]->weight = - normalizeSpillWeight(NewLIs[i]->weight, NewLIs[i]->getSize()); -} - -std::vector LiveIntervals:: -addIntervalsForSpills(const LiveInterval &li, - const SmallVectorImpl *SpillIs, - const MachineLoopInfo *loopInfo, VirtRegMap &vrm) { - assert(li.isSpillable() && "attempt to spill already spilled interval!"); - - DEBUG({ - dbgs() << "\t\t\t\tadding intervals for spills for interval: "; - li.print(dbgs(), tri_); - dbgs() << '\n'; - }); - - // Each bit specify whether a spill is required in the MBB. - BitVector SpillMBBs(mf_->getNumBlockIDs()); - DenseMap > SpillIdxes; - BitVector RestoreMBBs(mf_->getNumBlockIDs()); - DenseMap > RestoreIdxes; - DenseMap MBBVRegsMap; - std::vector NewLIs; - const TargetRegisterClass* rc = mri_->getRegClass(li.reg); - - unsigned NumValNums = li.getNumValNums(); - SmallVector ReMatDefs; - ReMatDefs.resize(NumValNums, NULL); - SmallVector ReMatOrigDefs; - ReMatOrigDefs.resize(NumValNums, NULL); - SmallVector ReMatIds; - ReMatIds.resize(NumValNums, VirtRegMap::MAX_STACK_SLOT); - BitVector ReMatDelete(NumValNums); - unsigned Slot = VirtRegMap::MAX_STACK_SLOT; - - // Spilling a split live interval. It cannot be split any further. Also, - // it's also guaranteed to be a single val# / range interval. - if (vrm.getPreSplitReg(li.reg)) { - vrm.setIsSplitFromReg(li.reg, 0); - // Unset the split kill marker on the last use. - SlotIndex KillIdx = vrm.getKillPoint(li.reg); - if (KillIdx != SlotIndex()) { - MachineInstr *KillMI = getInstructionFromIndex(KillIdx); - assert(KillMI && "Last use disappeared?"); - int KillOp = KillMI->findRegisterUseOperandIdx(li.reg, true); - assert(KillOp != -1 && "Last use disappeared?"); - KillMI->getOperand(KillOp).setIsKill(false); - } - vrm.removeKillPoint(li.reg); - bool DefIsReMat = vrm.isReMaterialized(li.reg); - Slot = vrm.getStackSlot(li.reg); - assert(Slot != VirtRegMap::MAX_STACK_SLOT); - MachineInstr *ReMatDefMI = DefIsReMat ? - vrm.getReMaterializedMI(li.reg) : NULL; - int LdSlot = 0; - bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot); - bool isLoad = isLoadSS || - (DefIsReMat && (ReMatDefMI->getDesc().canFoldAsLoad())); - bool IsFirstRange = true; - for (LiveInterval::Ranges::const_iterator - I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { - // If this is a split live interval with multiple ranges, it means there - // are two-address instructions that re-defined the value. Only the - // first def can be rematerialized! - if (IsFirstRange) { - // Note ReMatOrigDefMI has already been deleted. - rewriteInstructionsForSpills(li, false, I, NULL, ReMatDefMI, - Slot, LdSlot, isLoad, isLoadSS, DefIsReMat, - false, vrm, rc, ReMatIds, loopInfo, - SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes, - MBBVRegsMap, NewLIs); - } else { - rewriteInstructionsForSpills(li, false, I, NULL, 0, - Slot, 0, false, false, false, - false, vrm, rc, ReMatIds, loopInfo, - SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes, - MBBVRegsMap, NewLIs); - } - IsFirstRange = false; - } - - handleSpilledImpDefs(li, vrm, rc, NewLIs); - normalizeSpillWeights(NewLIs); - return NewLIs; - } - - bool TrySplit = !intervalIsInOneMBB(li); - if (TrySplit) - ++numSplits; - bool NeedStackSlot = false; - for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end(); - i != e; ++i) { - const VNInfo *VNI = *i; - unsigned VN = VNI->id; - if (VNI->isUnused()) - continue; // Dead val#. - // Is the def for the val# rematerializable? - MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def); - bool dummy; - if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) { - // Remember how to remat the def of this val#. - ReMatOrigDefs[VN] = ReMatDefMI; - // Original def may be modified so we have to make a copy here. - MachineInstr *Clone = mf_->CloneMachineInstr(ReMatDefMI); - CloneMIs.push_back(Clone); - ReMatDefs[VN] = Clone; - - bool CanDelete = true; - if (VNI->hasPHIKill()) { - // A kill is a phi node, not all of its uses can be rematerialized. - // It must not be deleted. - CanDelete = false; - // Need a stack slot if there is any live range where uses cannot be - // rematerialized. - NeedStackSlot = true; - } - if (CanDelete) - ReMatDelete.set(VN); - } else { - // Need a stack slot if there is any live range where uses cannot be - // rematerialized. - NeedStackSlot = true; - } - } - - // One stack slot per live interval. - if (NeedStackSlot && vrm.getPreSplitReg(li.reg) == 0) { - if (vrm.getStackSlot(li.reg) == VirtRegMap::NO_STACK_SLOT) - Slot = vrm.assignVirt2StackSlot(li.reg); - - // This case only occurs when the prealloc splitter has already assigned - // a stack slot to this vreg. - else - Slot = vrm.getStackSlot(li.reg); - } - - // Create new intervals and rewrite defs and uses. - for (LiveInterval::Ranges::const_iterator - I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { - MachineInstr *ReMatDefMI = ReMatDefs[I->valno->id]; - MachineInstr *ReMatOrigDefMI = ReMatOrigDefs[I->valno->id]; - bool DefIsReMat = ReMatDefMI != NULL; - bool CanDelete = ReMatDelete[I->valno->id]; - int LdSlot = 0; - bool isLoadSS = DefIsReMat && tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot); - bool isLoad = isLoadSS || - (DefIsReMat && ReMatDefMI->getDesc().canFoldAsLoad()); - rewriteInstructionsForSpills(li, TrySplit, I, ReMatOrigDefMI, ReMatDefMI, - Slot, LdSlot, isLoad, isLoadSS, DefIsReMat, - CanDelete, vrm, rc, ReMatIds, loopInfo, - SpillMBBs, SpillIdxes, RestoreMBBs, RestoreIdxes, - MBBVRegsMap, NewLIs); - } - - // Insert spills / restores if we are splitting. - if (!TrySplit) { - handleSpilledImpDefs(li, vrm, rc, NewLIs); - normalizeSpillWeights(NewLIs); - return NewLIs; - } - - SmallPtrSet AddedKill; - SmallVector Ops; - if (NeedStackSlot) { - int Id = SpillMBBs.find_first(); - while (Id != -1) { - std::vector &spills = SpillIdxes[Id]; - for (unsigned i = 0, e = spills.size(); i != e; ++i) { - SlotIndex index = spills[i].index; - unsigned VReg = spills[i].vreg; - LiveInterval &nI = getOrCreateInterval(VReg); - bool isReMat = vrm.isReMaterialized(VReg); - MachineInstr *MI = getInstructionFromIndex(index); - bool CanFold = false; - bool FoundUse = false; - Ops.clear(); - if (spills[i].canFold) { - CanFold = true; - for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { - MachineOperand &MO = MI->getOperand(j); - if (!MO.isReg() || MO.getReg() != VReg) - continue; - - Ops.push_back(j); - if (MO.isDef()) - continue; - if (isReMat || - (!FoundUse && !alsoFoldARestore(Id, index, VReg, - RestoreMBBs, RestoreIdxes))) { - // MI has two-address uses of the same register. If the use - // isn't the first and only use in the BB, then we can't fold - // it. FIXME: Move this to rewriteInstructionsForSpills. - CanFold = false; - break; - } - FoundUse = true; - } - } - // Fold the store into the def if possible. - bool Folded = false; - if (CanFold && !Ops.empty()) { - if (tryFoldMemoryOperand(MI, vrm, NULL, index, Ops, true, Slot,VReg)){ - Folded = true; - if (FoundUse) { - // Also folded uses, do not issue a load. - eraseRestoreInfo(Id, index, VReg, RestoreMBBs, RestoreIdxes); - nI.removeRange(index.getLoadIndex(), index.getDefIndex()); - } - nI.removeRange(index.getDefIndex(), index.getStoreIndex()); - } - } - - // Otherwise tell the spiller to issue a spill. - if (!Folded) { - LiveRange *LR = &nI.ranges[nI.ranges.size()-1]; - bool isKill = LR->end == index.getStoreIndex(); - if (!MI->registerDefIsDead(nI.reg)) - // No need to spill a dead def. - vrm.addSpillPoint(VReg, isKill, MI); - if (isKill) - AddedKill.insert(&nI); - } - } - Id = SpillMBBs.find_next(Id); - } - } - - int Id = RestoreMBBs.find_first(); - while (Id != -1) { - std::vector &restores = RestoreIdxes[Id]; - for (unsigned i = 0, e = restores.size(); i != e; ++i) { - SlotIndex index = restores[i].index; - if (index == SlotIndex()) - continue; - unsigned VReg = restores[i].vreg; - LiveInterval &nI = getOrCreateInterval(VReg); - bool isReMat = vrm.isReMaterialized(VReg); - MachineInstr *MI = getInstructionFromIndex(index); - bool CanFold = false; - Ops.clear(); - if (restores[i].canFold) { - CanFold = true; - for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { - MachineOperand &MO = MI->getOperand(j); - if (!MO.isReg() || MO.getReg() != VReg) - continue; - - if (MO.isDef()) { - // If this restore were to be folded, it would have been folded - // already. - CanFold = false; - break; - } - Ops.push_back(j); - } - } - - // Fold the load into the use if possible. - bool Folded = false; - if (CanFold && !Ops.empty()) { - if (!isReMat) - Folded = tryFoldMemoryOperand(MI, vrm, NULL,index,Ops,true,Slot,VReg); - else { - MachineInstr *ReMatDefMI = vrm.getReMaterializedMI(VReg); - int LdSlot = 0; - bool isLoadSS = tii_->isLoadFromStackSlot(ReMatDefMI, LdSlot); - // If the rematerializable def is a load, also try to fold it. - if (isLoadSS || ReMatDefMI->getDesc().canFoldAsLoad()) - Folded = tryFoldMemoryOperand(MI, vrm, ReMatDefMI, index, - Ops, isLoadSS, LdSlot, VReg); - if (!Folded) { - unsigned ImpUse = getReMatImplicitUse(li, ReMatDefMI); - if (ImpUse) { - // Re-matting an instruction with virtual register use. Add the - // register as an implicit use on the use MI and mark the register - // interval as unspillable. - LiveInterval &ImpLi = getInterval(ImpUse); - ImpLi.markNotSpillable(); - MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true)); - } - } - } - } - // If folding is not possible / failed, then tell the spiller to issue a - // load / rematerialization for us. - if (Folded) - nI.removeRange(index.getLoadIndex(), index.getDefIndex()); - else - vrm.addRestorePoint(VReg, MI); - } - Id = RestoreMBBs.find_next(Id); - } - - // Finalize intervals: add kills, finalize spill weights, and filter out - // dead intervals. - std::vector RetNewLIs; - for (unsigned i = 0, e = NewLIs.size(); i != e; ++i) { - LiveInterval *LI = NewLIs[i]; - if (!LI->empty()) { - if (!AddedKill.count(LI)) { - LiveRange *LR = &LI->ranges[LI->ranges.size()-1]; - SlotIndex LastUseIdx = LR->end.getBaseIndex(); - MachineInstr *LastUse = getInstructionFromIndex(LastUseIdx); - int UseIdx = LastUse->findRegisterUseOperandIdx(LI->reg, false); - assert(UseIdx != -1); - if (!LastUse->isRegTiedToDefOperand(UseIdx)) { - LastUse->getOperand(UseIdx).setIsKill(); - vrm.addKillPoint(LI->reg, LastUseIdx); - } - } - RetNewLIs.push_back(LI); - } - } - - handleSpilledImpDefs(li, vrm, rc, RetNewLIs); - normalizeSpillWeights(RetNewLIs); - return RetNewLIs; -} - -/// hasAllocatableSuperReg - Return true if the specified physical register has -/// any super register that's allocatable. -bool LiveIntervals::hasAllocatableSuperReg(unsigned Reg) const { - for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) - if (allocatableRegs_[*AS] && hasInterval(*AS)) - return true; - return false; -} - -/// getRepresentativeReg - Find the largest super register of the specified -/// physical register. -unsigned LiveIntervals::getRepresentativeReg(unsigned Reg) const { - // Find the largest super-register that is allocatable. - unsigned BestReg = Reg; - for (const unsigned* AS = tri_->getSuperRegisters(Reg); *AS; ++AS) { - unsigned SuperReg = *AS; - if (!hasAllocatableSuperReg(SuperReg) && hasInterval(SuperReg)) { - BestReg = SuperReg; - break; - } - } - return BestReg; -} - -/// getNumConflictsWithPhysReg - Return the number of uses and defs of the -/// specified interval that conflicts with the specified physical register. -unsigned LiveIntervals::getNumConflictsWithPhysReg(const LiveInterval &li, - unsigned PhysReg) const { - unsigned NumConflicts = 0; - const LiveInterval &pli = getInterval(getRepresentativeReg(PhysReg)); - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg), - E = mri_->reg_end(); I != E; ++I) { - MachineOperand &O = I.getOperand(); - MachineInstr *MI = O.getParent(); - if (MI->isDebugValue()) - continue; - SlotIndex Index = getInstructionIndex(MI); - if (pli.liveAt(Index)) - ++NumConflicts; - } - return NumConflicts; -} - -/// spillPhysRegAroundRegDefsUses - Spill the specified physical register -/// around all defs and uses of the specified interval. Return true if it -/// was able to cut its interval. -bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li, - unsigned PhysReg, VirtRegMap &vrm) { - unsigned SpillReg = getRepresentativeReg(PhysReg); - - DEBUG(dbgs() << "spillPhysRegAroundRegDefsUses " << tri_->getName(PhysReg) - << " represented by " << tri_->getName(SpillReg) << '\n'); - - for (const unsigned *AS = tri_->getAliasSet(PhysReg); *AS; ++AS) - // If there are registers which alias PhysReg, but which are not a - // sub-register of the chosen representative super register. Assert - // since we can't handle it yet. - assert(*AS == SpillReg || !allocatableRegs_[*AS] || !hasInterval(*AS) || - tri_->isSuperRegister(*AS, SpillReg)); - - bool Cut = false; - SmallVector PRegs; - if (hasInterval(SpillReg)) - PRegs.push_back(SpillReg); - for (const unsigned *SR = tri_->getSubRegisters(SpillReg); *SR; ++SR) - if (hasInterval(*SR)) - PRegs.push_back(*SR); - - DEBUG({ - dbgs() << "Trying to spill:"; - for (unsigned i = 0, e = PRegs.size(); i != e; ++i) - dbgs() << ' ' << tri_->getName(PRegs[i]); - dbgs() << '\n'; - }); - - SmallPtrSet SeenMIs; - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg), - E = mri_->reg_end(); I != E; ++I) { - MachineOperand &O = I.getOperand(); - MachineInstr *MI = O.getParent(); - if (MI->isDebugValue() || SeenMIs.count(MI)) - continue; - SeenMIs.insert(MI); - SlotIndex Index = getInstructionIndex(MI); - bool LiveReg = false; - for (unsigned i = 0, e = PRegs.size(); i != e; ++i) { - unsigned PReg = PRegs[i]; - LiveInterval &pli = getInterval(PReg); - if (!pli.liveAt(Index)) - continue; - LiveReg = true; - SlotIndex StartIdx = Index.getLoadIndex(); - SlotIndex EndIdx = Index.getNextIndex().getBaseIndex(); - if (!pli.isInOneLiveRange(StartIdx, EndIdx)) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Ran out of registers during register allocation!"; - if (MI->isInlineAsm()) { - Msg << "\nPlease check your inline asm statement for invalid " - << "constraints:\n"; - MI->print(Msg, tm_); - } - report_fatal_error(Msg.str()); - } - pli.removeRange(StartIdx, EndIdx); - LiveReg = true; - } - if (!LiveReg) - continue; - DEBUG(dbgs() << "Emergency spill around " << Index << '\t' << *MI); - vrm.addEmergencySpill(SpillReg, MI); - Cut = true; - } - return Cut; -} - LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, MachineInstr* startInst) { LiveInterval& Interval = getOrCreateInterval(reg); -- cgit v1.1 From c3f2722615c600ac2cca9ac7aad6b7e05b840c97 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Sun, 13 Nov 2011 00:02:24 +0000 Subject: Switch PBQP to VRM's trivial rewriter. The very complicated VirtRegRewriter is going away. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144479 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/RegAllocPBQP.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index d4f69dc..845ee12 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -36,7 +36,6 @@ #include "Spiller.h" #include "Splitter.h" #include "VirtRegMap.h" -#include "VirtRegRewriter.h" #include "RegisterCoalescer.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -669,9 +668,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n"); // Run rewriter - std::auto_ptr rewriter(createVirtRegRewriter()); - - rewriter->runOnMachineFunction(*mf, *vrm, lis); + vrm->rewrite(lis->getSlotIndexes()); return true; } -- cgit v1.1 From 6e49be7101a69b0af3a435060aa3b2b02c5d4c17 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Sun, 13 Nov 2011 00:16:01 +0000 Subject: Delete VirtRegRewriter. And there was much rejoicing. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144480 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CMakeLists.txt | 1 - lib/CodeGen/VirtRegRewriter.cpp | 2633 --------------------------------------- lib/CodeGen/VirtRegRewriter.h | 32 - 3 files changed, 2666 deletions(-) delete mode 100644 lib/CodeGen/VirtRegRewriter.cpp delete mode 100644 lib/CodeGen/VirtRegRewriter.h (limited to 'lib') diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 1c39cd2..c8d4dcf 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -97,7 +97,6 @@ add_llvm_library(LLVMCodeGen TwoAddressInstructionPass.cpp UnreachableBlockElim.cpp VirtRegMap.cpp - VirtRegRewriter.cpp ) add_llvm_library_dependencies(LLVMCodeGen diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp deleted file mode 100644 index a5ec797..0000000 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ /dev/null @@ -1,2633 +0,0 @@ -//===-- llvm/CodeGen/Rewriter.cpp - Rewriter -----------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "virtregrewriter" -#include "VirtRegRewriter.h" -#include "VirtRegMap.h" -#include "llvm/Function.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" -using namespace llvm; - -STATISTIC(NumDSE , "Number of dead stores elided"); -STATISTIC(NumDSS , "Number of dead spill slots removed"); -STATISTIC(NumCommutes, "Number of instructions commuted"); -STATISTIC(NumDRM , "Number of re-materializable defs elided"); -STATISTIC(NumStores , "Number of stores added"); -STATISTIC(NumPSpills , "Number of physical register spills"); -STATISTIC(NumOmitted , "Number of reloads omitted"); -STATISTIC(NumAvoided , "Number of reloads deemed unnecessary"); -STATISTIC(NumCopified, "Number of available reloads turned into copies"); -STATISTIC(NumReMats , "Number of re-materialization"); -STATISTIC(NumLoads , "Number of loads added"); -STATISTIC(NumReused , "Number of values reused"); -STATISTIC(NumDCE , "Number of copies elided"); -STATISTIC(NumSUnfold , "Number of stores unfolded"); -STATISTIC(NumModRefUnfold, "Number of modref unfolded"); - -namespace { - enum RewriterName { local, trivial }; -} - -static cl::opt -RewriterOpt("rewriter", - cl::desc("Rewriter to use (default=local)"), - cl::Prefix, - cl::values(clEnumVal(local, "local rewriter"), - clEnumVal(trivial, "trivial rewriter"), - clEnumValEnd), - cl::init(local)); - -static cl::opt -ScheduleSpills("schedule-spills", - cl::desc("Schedule spill code"), - cl::init(false)); - -VirtRegRewriter::~VirtRegRewriter() {} - -/// substitutePhysReg - Replace virtual register in MachineOperand with a -/// physical register. Do the right thing with the sub-register index. -/// Note that operands may be added, so the MO reference is no longer valid. -static void substitutePhysReg(MachineOperand &MO, unsigned Reg, - const TargetRegisterInfo &TRI) { - if (MO.getSubReg()) { - MO.substPhysReg(Reg, TRI); - - // Any kill flags apply to the full virtual register, so they also apply to - // the full physical register. - // We assume that partial defs have already been decorated with a super-reg - // operand by LiveIntervals. - MachineInstr &MI = *MO.getParent(); - if (MO.isUse() && !MO.isUndef() && - (MO.isKill() || MI.isRegTiedToDefOperand(&MO-&MI.getOperand(0)))) - MI.addRegisterKilled(Reg, &TRI, /*AddIfNotFound=*/ true); - } else { - MO.setReg(Reg); - } -} - -namespace { - -/// This class is intended for use with the new spilling framework only. It -/// rewrites vreg def/uses to use the assigned preg, but does not insert any -/// spill code. -struct TrivialRewriter : public VirtRegRewriter { - - bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, - LiveIntervals* LIs) { - DEBUG(dbgs() << "********** REWRITE MACHINE CODE **********\n"); - DEBUG(dbgs() << "********** Function: " - << MF.getFunction()->getName() << '\n'); - DEBUG(dbgs() << "**** Machine Instrs" - << "(NOTE! Does not include spills and reloads!) ****\n"); - DEBUG(MF.dump()); - - MachineRegisterInfo *mri = &MF.getRegInfo(); - const TargetRegisterInfo *tri = MF.getTarget().getRegisterInfo(); - - bool changed = false; - - for (LiveIntervals::iterator liItr = LIs->begin(), liEnd = LIs->end(); - liItr != liEnd; ++liItr) { - - const LiveInterval *li = liItr->second; - unsigned reg = li->reg; - - if (TargetRegisterInfo::isPhysicalRegister(reg)) { - if (!li->empty()) - mri->setPhysRegUsed(reg); - } - else { - if (!VRM.hasPhys(reg)) - continue; - unsigned pReg = VRM.getPhys(reg); - mri->setPhysRegUsed(pReg); - // Copy the register use-list before traversing it. - SmallVector, 32> reglist; - for (MachineRegisterInfo::reg_iterator I = mri->reg_begin(reg), - E = mri->reg_end(); I != E; ++I) - reglist.push_back(std::make_pair(&*I, I.getOperandNo())); - for (unsigned N=0; N != reglist.size(); ++N) - substitutePhysReg(reglist[N].first->getOperand(reglist[N].second), - pReg, *tri); - changed |= !reglist.empty(); - } - } - - DEBUG(dbgs() << "**** Post Machine Instrs ****\n"); - DEBUG(MF.dump()); - - return changed; - } - -}; - -} - -// ************************************************************************ // - -namespace { - -/// AvailableSpills - As the local rewriter is scanning and rewriting an MBB -/// from top down, keep track of which spill slots or remat are available in -/// each register. -/// -/// Note that not all physregs are created equal here. In particular, some -/// physregs are reloads that we are allowed to clobber or ignore at any time. -/// Other physregs are values that the register allocated program is using -/// that we cannot CHANGE, but we can read if we like. We keep track of this -/// on a per-stack-slot / remat id basis as the low bit in the value of the -/// SpillSlotsAvailable entries. The predicate 'canClobberPhysReg()' checks -/// this bit and addAvailable sets it if. -class AvailableSpills { - const TargetRegisterInfo *TRI; - const TargetInstrInfo *TII; - - // SpillSlotsOrReMatsAvailable - This map keeps track of all of the spilled - // or remat'ed virtual register values that are still available, due to - // being loaded or stored to, but not invalidated yet. - std::map SpillSlotsOrReMatsAvailable; - - // PhysRegsAvailable - This is the inverse of SpillSlotsOrReMatsAvailable, - // indicating which stack slot values are currently held by a physreg. This - // is used to invalidate entries in SpillSlotsOrReMatsAvailable when a - // physreg is modified. - std::multimap PhysRegsAvailable; - - void disallowClobberPhysRegOnly(unsigned PhysReg); - - void ClobberPhysRegOnly(unsigned PhysReg); -public: - AvailableSpills(const TargetRegisterInfo *tri, const TargetInstrInfo *tii) - : TRI(tri), TII(tii) { - } - - /// clear - Reset the state. - void clear() { - SpillSlotsOrReMatsAvailable.clear(); - PhysRegsAvailable.clear(); - } - - const TargetRegisterInfo *getRegInfo() const { return TRI; } - - /// getSpillSlotOrReMatPhysReg - If the specified stack slot or remat is - /// available in a physical register, return that PhysReg, otherwise - /// return 0. - unsigned getSpillSlotOrReMatPhysReg(int Slot) const { - std::map::const_iterator I = - SpillSlotsOrReMatsAvailable.find(Slot); - if (I != SpillSlotsOrReMatsAvailable.end()) { - return I->second >> 1; // Remove the CanClobber bit. - } - return 0; - } - - /// addAvailable - Mark that the specified stack slot / remat is available - /// in the specified physreg. If CanClobber is true, the physreg can be - /// modified at any time without changing the semantics of the program. - void addAvailable(int SlotOrReMat, unsigned Reg, bool CanClobber = true) { - // If this stack slot is thought to be available in some other physreg, - // remove its record. - ModifyStackSlotOrReMat(SlotOrReMat); - - PhysRegsAvailable.insert(std::make_pair(Reg, SlotOrReMat)); - SpillSlotsOrReMatsAvailable[SlotOrReMat]= (Reg << 1) | - (unsigned)CanClobber; - - if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT) - DEBUG(dbgs() << "Remembering RM#" - << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1); - else - DEBUG(dbgs() << "Remembering SS#" << SlotOrReMat); - DEBUG(dbgs() << " in physreg " << TRI->getName(Reg) - << (CanClobber ? " canclobber" : "") << "\n"); - } - - /// canClobberPhysRegForSS - Return true if the spiller is allowed to change - /// the value of the specified stackslot register if it desires. The - /// specified stack slot must be available in a physreg for this query to - /// make sense. - bool canClobberPhysRegForSS(int SlotOrReMat) const { - assert(SpillSlotsOrReMatsAvailable.count(SlotOrReMat) && - "Value not available!"); - return SpillSlotsOrReMatsAvailable.find(SlotOrReMat)->second & 1; - } - - /// canClobberPhysReg - Return true if the spiller is allowed to clobber the - /// physical register where values for some stack slot(s) might be - /// available. - bool canClobberPhysReg(unsigned PhysReg) const { - std::multimap::const_iterator I = - PhysRegsAvailable.lower_bound(PhysReg); - while (I != PhysRegsAvailable.end() && I->first == PhysReg) { - int SlotOrReMat = I->second; - I++; - if (!canClobberPhysRegForSS(SlotOrReMat)) - return false; - } - return true; - } - - /// disallowClobberPhysReg - Unset the CanClobber bit of the specified - /// stackslot register. The register is still available but is no longer - /// allowed to be modifed. - void disallowClobberPhysReg(unsigned PhysReg); - - /// ClobberPhysReg - This is called when the specified physreg changes - /// value. We use this to invalidate any info about stuff that lives in - /// it and any of its aliases. - void ClobberPhysReg(unsigned PhysReg); - - /// ModifyStackSlotOrReMat - This method is called when the value in a stack - /// slot changes. This removes information about which register the - /// previous value for this slot lives in (as the previous value is dead - /// now). - void ModifyStackSlotOrReMat(int SlotOrReMat); - - /// ClobberSharingStackSlots - When a register mapped to a stack slot changes, - /// other stack slots sharing the same register are no longer valid. - void ClobberSharingStackSlots(int StackSlot); - - /// AddAvailableRegsToLiveIn - Availability information is being kept coming - /// into the specified MBB. Add available physical registers as potential - /// live-in's. If they are reused in the MBB, they will be added to the - /// live-in set to make register scavenger and post-allocation scheduler. - void AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, BitVector &RegKills, - std::vector &KillOps); -}; - -} - -// ************************************************************************ // - -// Given a location where a reload of a spilled register or a remat of -// a constant is to be inserted, attempt to find a safe location to -// insert the load at an earlier point in the basic-block, to hide -// latency of the load and to avoid address-generation interlock -// issues. -static MachineBasicBlock::iterator -ComputeReloadLoc(MachineBasicBlock::iterator const InsertLoc, - MachineBasicBlock::iterator const Begin, - unsigned PhysReg, - const TargetRegisterInfo *TRI, - bool DoReMat, - int SSorRMId, - const TargetInstrInfo *TII, - const MachineFunction &MF) -{ - if (!ScheduleSpills) - return InsertLoc; - - // Spill backscheduling is of primary interest to addresses, so - // don't do anything if the register isn't in the register class - // used for pointers. - - const TargetLowering *TL = MF.getTarget().getTargetLowering(); - - if (!TL->isTypeLegal(TL->getPointerTy())) - // Believe it or not, this is true on 16-bit targets like PIC16. - return InsertLoc; - - const TargetRegisterClass *ptrRegClass = - TL->getRegClassFor(TL->getPointerTy()); - if (!ptrRegClass->contains(PhysReg)) - return InsertLoc; - - // Scan upwards through the preceding instructions. If an instruction doesn't - // reference the stack slot or the register we're loading, we can - // backschedule the reload up past it. - MachineBasicBlock::iterator NewInsertLoc = InsertLoc; - while (NewInsertLoc != Begin) { - MachineBasicBlock::iterator Prev = prior(NewInsertLoc); - for (unsigned i = 0; i < Prev->getNumOperands(); ++i) { - MachineOperand &Op = Prev->getOperand(i); - if (!DoReMat && Op.isFI() && Op.getIndex() == SSorRMId) - goto stop; - } - if (Prev->findRegisterUseOperandIdx(PhysReg) != -1 || - Prev->findRegisterDefOperand(PhysReg)) - goto stop; - for (const unsigned *Alias = TRI->getAliasSet(PhysReg); *Alias; ++Alias) - if (Prev->findRegisterUseOperandIdx(*Alias) != -1 || - Prev->findRegisterDefOperand(*Alias)) - goto stop; - NewInsertLoc = Prev; - } -stop:; - - // If we made it to the beginning of the block, turn around and move back - // down just past any existing reloads. They're likely to be reloads/remats - // for instructions earlier than what our current reload/remat is for, so - // they should be scheduled earlier. - if (NewInsertLoc == Begin) { - int FrameIdx; - while (InsertLoc != NewInsertLoc && - (TII->isLoadFromStackSlot(NewInsertLoc, FrameIdx) || - TII->isTriviallyReMaterializable(NewInsertLoc))) - ++NewInsertLoc; - } - - return NewInsertLoc; -} - -namespace { - -// ReusedOp - For each reused operand, we keep track of a bit of information, -// in case we need to rollback upon processing a new operand. See comments -// below. -struct ReusedOp { - // The MachineInstr operand that reused an available value. - unsigned Operand; - - // StackSlotOrReMat - The spill slot or remat id of the value being reused. - unsigned StackSlotOrReMat; - - // PhysRegReused - The physical register the value was available in. - unsigned PhysRegReused; - - // AssignedPhysReg - The physreg that was assigned for use by the reload. - unsigned AssignedPhysReg; - - // VirtReg - The virtual register itself. - unsigned VirtReg; - - ReusedOp(unsigned o, unsigned ss, unsigned prr, unsigned apr, - unsigned vreg) - : Operand(o), StackSlotOrReMat(ss), PhysRegReused(prr), - AssignedPhysReg(apr), VirtReg(vreg) {} -}; - -/// ReuseInfo - This maintains a collection of ReuseOp's for each operand that -/// is reused instead of reloaded. -class ReuseInfo { - MachineInstr &MI; - std::vector Reuses; - BitVector PhysRegsClobbered; -public: - ReuseInfo(MachineInstr &mi, const TargetRegisterInfo *tri) : MI(mi) { - PhysRegsClobbered.resize(tri->getNumRegs()); - } - - bool hasReuses() const { - return !Reuses.empty(); - } - - /// addReuse - If we choose to reuse a virtual register that is already - /// available instead of reloading it, remember that we did so. - void addReuse(unsigned OpNo, unsigned StackSlotOrReMat, - unsigned PhysRegReused, unsigned AssignedPhysReg, - unsigned VirtReg) { - // If the reload is to the assigned register anyway, no undo will be - // required. - if (PhysRegReused == AssignedPhysReg) return; - - // Otherwise, remember this. - Reuses.push_back(ReusedOp(OpNo, StackSlotOrReMat, PhysRegReused, - AssignedPhysReg, VirtReg)); - } - - void markClobbered(unsigned PhysReg) { - PhysRegsClobbered.set(PhysReg); - } - - bool isClobbered(unsigned PhysReg) const { - return PhysRegsClobbered.test(PhysReg); - } - - /// GetRegForReload - We are about to emit a reload into PhysReg. If there - /// is some other operand that is using the specified register, either pick - /// a new register to use, or evict the previous reload and use this reg. - unsigned GetRegForReload(const TargetRegisterClass *RC, unsigned PhysReg, - MachineFunction &MF, MachineInstr *MI, - AvailableSpills &Spills, - std::vector &MaybeDeadStores, - SmallSet &Rejected, - BitVector &RegKills, - std::vector &KillOps, - VirtRegMap &VRM); - - /// GetRegForReload - Helper for the above GetRegForReload(). Add a - /// 'Rejected' set to remember which registers have been considered and - /// rejected for the reload. This avoids infinite looping in case like - /// this: - /// t1 := op t2, t3 - /// t2 <- assigned r0 for use by the reload but ended up reuse r1 - /// t3 <- assigned r1 for use by the reload but ended up reuse r0 - /// t1 <- desires r1 - /// sees r1 is taken by t2, tries t2's reload register r0 - /// sees r0 is taken by t3, tries t3's reload register r1 - /// sees r1 is taken by t2, tries t2's reload register r0 ... - unsigned GetRegForReload(unsigned VirtReg, unsigned PhysReg, MachineInstr *MI, - AvailableSpills &Spills, - std::vector &MaybeDeadStores, - BitVector &RegKills, - std::vector &KillOps, - VirtRegMap &VRM) { - SmallSet Rejected; - MachineFunction &MF = *MI->getParent()->getParent(); - const TargetRegisterClass* RC = MF.getRegInfo().getRegClass(VirtReg); - return GetRegForReload(RC, PhysReg, MF, MI, Spills, MaybeDeadStores, - Rejected, RegKills, KillOps, VRM); - } -}; - -} - -// ****************** // -// Utility Functions // -// ****************** // - -/// findSinglePredSuccessor - Return via reference a vector of machine basic -/// blocks each of which is a successor of the specified BB and has no other -/// predecessor. -static void findSinglePredSuccessor(MachineBasicBlock *MBB, - SmallVectorImpl &Succs){ - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock *SuccMBB = *SI; - if (SuccMBB->pred_size() == 1) - Succs.push_back(SuccMBB); - } -} - -/// ResurrectConfirmedKill - Helper for ResurrectKill. This register is killed -/// but not re-defined and it's being reused. Remove the kill flag for the -/// register and unset the kill's marker and last kill operand. -static void ResurrectConfirmedKill(unsigned Reg, const TargetRegisterInfo* TRI, - BitVector &RegKills, - std::vector &KillOps) { - DEBUG(dbgs() << "Resurrect " << TRI->getName(Reg) << "\n"); - - MachineOperand *KillOp = KillOps[Reg]; - KillOp->setIsKill(false); - // KillOps[Reg] might be a def of a super-register. - unsigned KReg = KillOp->getReg(); - if (!RegKills[KReg]) - return; - - assert(KillOps[KReg]->getParent() == KillOp->getParent() && - "invalid superreg kill flags"); - KillOps[KReg] = NULL; - RegKills.reset(KReg); - - // If it's a def of a super-register. Its other sub-regsters are no - // longer killed as well. - for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) { - DEBUG(dbgs() << " Resurrect subreg " << TRI->getName(*SR) << "\n"); - - assert(KillOps[*SR]->getParent() == KillOp->getParent() && - "invalid subreg kill flags"); - KillOps[*SR] = NULL; - RegKills.reset(*SR); - } -} - -/// ResurrectKill - Invalidate kill info associated with a previous MI. An -/// optimization may have decided that it's safe to reuse a previously killed -/// register. If we fail to erase the invalid kill flags, then the register -/// scavenger may later clobber the register used by this MI. Note that this -/// must be done even if this MI is being deleted! Consider: -/// -/// USE $r1 (vreg1) -/// ... -/// $r1(vreg3) = COPY $r1 (vreg2) -/// -/// RegAlloc has smartly assigned all three vregs to the same physreg. Initially -/// vreg1's only use is a kill. The rewriter doesn't know it should be live -/// until it rewrites vreg2. At that points it sees that the copy is dead and -/// deletes it. However, deleting the copy implicitly forwards liveness of $r1 -/// (it's copy coalescing). We must resurrect $r1 by removing the kill flag at -/// vreg1 before deleting the copy. -static void ResurrectKill(MachineInstr &MI, unsigned Reg, - const TargetRegisterInfo* TRI, BitVector &RegKills, - std::vector &KillOps) { - if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) { - ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps); - return; - } - // No previous kill for this reg. Check for subreg kills as well. - // d4 = - // store d4, fi#0 - // ... - // = s8 - // ... - // = d4 - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { - unsigned SReg = *SR; - if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI) - ResurrectConfirmedKill(SReg, TRI, RegKills, KillOps); - } -} - -/// InvalidateKills - MI is going to be deleted. If any of its operands are -/// marked kill, then invalidate the information. -static void InvalidateKills(MachineInstr &MI, - const TargetRegisterInfo* TRI, - BitVector &RegKills, - std::vector &KillOps, - SmallVector *KillRegs = NULL) { - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.isUse() || !MO.isKill() || MO.isUndef()) - continue; - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) - continue; - if (KillRegs) - KillRegs->push_back(Reg); - assert(Reg < KillOps.size()); - if (KillOps[Reg] == &MO) { - // This operand was the kill, now no longer. - KillOps[Reg] = NULL; - RegKills.reset(Reg); - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { - if (RegKills[*SR]) { - assert(KillOps[*SR] == &MO && "bad subreg kill flags"); - KillOps[*SR] = NULL; - RegKills.reset(*SR); - } - } - } - else { - // This operand may have reused a previously killed reg. Keep it live in - // case it continues to be used after erasing this instruction. - ResurrectKill(MI, Reg, TRI, RegKills, KillOps); - } - } -} - -/// InvalidateRegDef - If the def operand of the specified def MI is now dead -/// (since its spill instruction is removed), mark it isDead. Also checks if -/// the def MI has other definition operands that are not dead. Returns it by -/// reference. -static bool InvalidateRegDef(MachineBasicBlock::iterator I, - MachineInstr &NewDef, unsigned Reg, - bool &HasLiveDef, - const TargetRegisterInfo *TRI) { - // Due to remat, it's possible this reg isn't being reused. That is, - // the def of this reg (by prev MI) is now dead. - MachineInstr *DefMI = I; - MachineOperand *DefOp = NULL; - for (unsigned i = 0, e = DefMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = DefMI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || !MO.isKill() || MO.isUndef()) - continue; - if (MO.getReg() == Reg) - DefOp = &MO; - else if (!MO.isDead()) - HasLiveDef = true; - } - if (!DefOp) - return false; - - bool FoundUse = false, Done = false; - MachineBasicBlock::iterator E = &NewDef; - ++I; ++E; - for (; !Done && I != E; ++I) { - MachineInstr *NMI = I; - for (unsigned j = 0, ee = NMI->getNumOperands(); j != ee; ++j) { - MachineOperand &MO = NMI->getOperand(j); - if (!MO.isReg() || MO.getReg() == 0 || - (MO.getReg() != Reg && !TRI->isSubRegister(Reg, MO.getReg()))) - continue; - if (MO.isUse()) - FoundUse = true; - Done = true; // Stop after scanning all the operands of this MI. - } - } - if (!FoundUse) { - // Def is dead! - DefOp->setIsDead(); - return true; - } - return false; -} - -/// UpdateKills - Track and update kill info. If a MI reads a register that is -/// marked kill, then it must be due to register reuse. Transfer the kill info -/// over. -static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI, - BitVector &RegKills, - std::vector &KillOps) { - // These do not affect kill info at all. - if (MI.isDebugValue()) - return; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.isUse() || MO.isUndef()) - continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) - continue; - - // This operand may have reused a previously killed reg. Keep it live. - ResurrectKill(MI, Reg, TRI, RegKills, KillOps); - - if (MO.isKill()) { - RegKills.set(Reg); - KillOps[Reg] = &MO; - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { - RegKills.set(*SR); - KillOps[*SR] = &MO; - } - } - } - - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.getReg() || !MO.isDef()) - continue; - unsigned Reg = MO.getReg(); - RegKills.reset(Reg); - KillOps[Reg] = NULL; - // It also defines (or partially define) aliases. - for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) { - RegKills.reset(*SR); - KillOps[*SR] = NULL; - } - for (const unsigned *SR = TRI->getSuperRegisters(Reg); *SR; ++SR) { - RegKills.reset(*SR); - KillOps[*SR] = NULL; - } - } -} - -/// ReMaterialize - Re-materialize definition for Reg targeting DestReg. -/// -static void ReMaterialize(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MII, - unsigned DestReg, unsigned Reg, - const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI, - VirtRegMap &VRM) { - MachineInstr *ReMatDefMI = VRM.getReMaterializedMI(Reg); -#ifndef NDEBUG - const MCInstrDesc &MCID = ReMatDefMI->getDesc(); - assert(MCID.getNumDefs() == 1 && - "Don't know how to remat instructions that define > 1 values!"); -#endif - TII->reMaterialize(MBB, MII, DestReg, 0, ReMatDefMI, *TRI); - MachineInstr *NewMI = prior(MII); - for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = NewMI->getOperand(i); - if (!MO.isReg() || MO.getReg() == 0) - continue; - unsigned VirtReg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) - continue; - assert(MO.isUse()); - unsigned Phys = VRM.getPhys(VirtReg); - assert(Phys && "Virtual register is not assigned a register?"); - substitutePhysReg(MO, Phys, *TRI); - } - ++NumReMats; -} - -/// findSuperReg - Find the SubReg's super-register of given register class -/// where its SubIdx sub-register is SubReg. -static unsigned findSuperReg(const TargetRegisterClass *RC, unsigned SubReg, - unsigned SubIdx, const TargetRegisterInfo *TRI) { - for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); - I != E; ++I) { - unsigned Reg = *I; - if (TRI->getSubReg(Reg, SubIdx) == SubReg) - return Reg; - } - return 0; -} - -// ******************************** // -// Available Spills Implementation // -// ******************************** // - -/// disallowClobberPhysRegOnly - Unset the CanClobber bit of the specified -/// stackslot register. The register is still available but is no longer -/// allowed to be modifed. -void AvailableSpills::disallowClobberPhysRegOnly(unsigned PhysReg) { - std::multimap::iterator I = - PhysRegsAvailable.lower_bound(PhysReg); - while (I != PhysRegsAvailable.end() && I->first == PhysReg) { - int SlotOrReMat = I->second; - I++; - assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg && - "Bidirectional map mismatch!"); - SpillSlotsOrReMatsAvailable[SlotOrReMat] &= ~1; - DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg) - << " copied, it is available for use but can no longer be modified\n"); - } -} - -/// disallowClobberPhysReg - Unset the CanClobber bit of the specified -/// stackslot register and its aliases. The register and its aliases may -/// still available but is no longer allowed to be modifed. -void AvailableSpills::disallowClobberPhysReg(unsigned PhysReg) { - for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS) - disallowClobberPhysRegOnly(*AS); - disallowClobberPhysRegOnly(PhysReg); -} - -/// ClobberPhysRegOnly - This is called when the specified physreg changes -/// value. We use this to invalidate any info about stuff we thing lives in it. -void AvailableSpills::ClobberPhysRegOnly(unsigned PhysReg) { - std::multimap::iterator I = - PhysRegsAvailable.lower_bound(PhysReg); - while (I != PhysRegsAvailable.end() && I->first == PhysReg) { - int SlotOrReMat = I->second; - PhysRegsAvailable.erase(I++); - assert((SpillSlotsOrReMatsAvailable[SlotOrReMat] >> 1) == PhysReg && - "Bidirectional map mismatch!"); - SpillSlotsOrReMatsAvailable.erase(SlotOrReMat); - DEBUG(dbgs() << "PhysReg " << TRI->getName(PhysReg) - << " clobbered, invalidating "); - if (SlotOrReMat > VirtRegMap::MAX_STACK_SLOT) - DEBUG(dbgs() << "RM#" << SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1 <<"\n"); - else - DEBUG(dbgs() << "SS#" << SlotOrReMat << "\n"); - } -} - -/// ClobberPhysReg - This is called when the specified physreg changes -/// value. We use this to invalidate any info about stuff we thing lives in -/// it and any of its aliases. -void AvailableSpills::ClobberPhysReg(unsigned PhysReg) { - for (const unsigned *AS = TRI->getAliasSet(PhysReg); *AS; ++AS) - ClobberPhysRegOnly(*AS); - ClobberPhysRegOnly(PhysReg); -} - -/// AddAvailableRegsToLiveIn - Availability information is being kept coming -/// into the specified MBB. Add available physical registers as potential -/// live-in's. If they are reused in the MBB, they will be added to the -/// live-in set to make register scavenger and post-allocation scheduler. -void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, - BitVector &RegKills, - std::vector &KillOps) { - std::set NotAvailable; - for (std::multimap::iterator - I = PhysRegsAvailable.begin(), E = PhysRegsAvailable.end(); - I != E; ++I) { - unsigned Reg = I->first; - const TargetRegisterClass* RC = TRI->getMinimalPhysRegClass(Reg); - // FIXME: A temporary workaround. We can't reuse available value if it's - // not safe to move the def of the virtual register's class. e.g. - // X86::RFP* register classes. Do not add it as a live-in. - if (!TII->isSafeToMoveRegClassDefs(RC)) - // This is no longer available. - NotAvailable.insert(Reg); - else { - MBB.addLiveIn(Reg); - if (RegKills[Reg]) - ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps); - } - - // Skip over the same register. - std::multimap::iterator NI = llvm::next(I); - while (NI != E && NI->first == Reg) { - ++I; - ++NI; - } - } - - for (std::set::iterator I = NotAvailable.begin(), - E = NotAvailable.end(); I != E; ++I) { - ClobberPhysReg(*I); - for (const unsigned *SubRegs = TRI->getSubRegisters(*I); - *SubRegs; ++SubRegs) - ClobberPhysReg(*SubRegs); - } -} - -/// ModifyStackSlotOrReMat - This method is called when the value in a stack -/// slot changes. This removes information about which register the previous -/// value for this slot lives in (as the previous value is dead now). -void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) { - std::map::iterator It = - SpillSlotsOrReMatsAvailable.find(SlotOrReMat); - if (It == SpillSlotsOrReMatsAvailable.end()) return; - unsigned Reg = It->second >> 1; - SpillSlotsOrReMatsAvailable.erase(It); - - // This register may hold the value of multiple stack slots, only remove this - // stack slot from the set of values the register contains. - std::multimap::iterator I = PhysRegsAvailable.lower_bound(Reg); - for (; ; ++I) { - assert(I != PhysRegsAvailable.end() && I->first == Reg && - "Map inverse broken!"); - if (I->second == SlotOrReMat) break; - } - PhysRegsAvailable.erase(I); -} - -void AvailableSpills::ClobberSharingStackSlots(int StackSlot) { - std::map::iterator It = - SpillSlotsOrReMatsAvailable.find(StackSlot); - if (It == SpillSlotsOrReMatsAvailable.end()) return; - unsigned Reg = It->second >> 1; - - // Erase entries in PhysRegsAvailable for other stack slots. - std::multimap::iterator I = PhysRegsAvailable.lower_bound(Reg); - while (I != PhysRegsAvailable.end() && I->first == Reg) { - std::multimap::iterator NextI = llvm::next(I); - if (I->second != StackSlot) { - DEBUG(dbgs() << "Clobbered sharing SS#" << I->second << " in " - << PrintReg(Reg, TRI) << '\n'); - SpillSlotsOrReMatsAvailable.erase(I->second); - PhysRegsAvailable.erase(I); - } - I = NextI; - } -} - -// ************************** // -// Reuse Info Implementation // -// ************************** // - -/// GetRegForReload - We are about to emit a reload into PhysReg. If there -/// is some other operand that is using the specified register, either pick -/// a new register to use, or evict the previous reload and use this reg. -unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC, - unsigned PhysReg, - MachineFunction &MF, - MachineInstr *MI, AvailableSpills &Spills, - std::vector &MaybeDeadStores, - SmallSet &Rejected, - BitVector &RegKills, - std::vector &KillOps, - VirtRegMap &VRM) { - const TargetInstrInfo* TII = MF.getTarget().getInstrInfo(); - const TargetRegisterInfo *TRI = Spills.getRegInfo(); - - if (Reuses.empty()) return PhysReg; // This is most often empty. - - for (unsigned ro = 0, e = Reuses.size(); ro != e; ++ro) { - ReusedOp &Op = Reuses[ro]; - // If we find some other reuse that was supposed to use this register - // exactly for its reload, we can change this reload to use ITS reload - // register. That is, unless its reload register has already been - // considered and subsequently rejected because it has also been reused - // by another operand. - if (Op.PhysRegReused == PhysReg && - Rejected.count(Op.AssignedPhysReg) == 0 && - RC->contains(Op.AssignedPhysReg)) { - // Yup, use the reload register that we didn't use before. - unsigned NewReg = Op.AssignedPhysReg; - Rejected.insert(PhysReg); - return GetRegForReload(RC, NewReg, MF, MI, Spills, MaybeDeadStores, - Rejected, RegKills, KillOps, VRM); - } else { - // Otherwise, we might also have a problem if a previously reused - // value aliases the new register. If so, codegen the previous reload - // and use this one. - unsigned PRRU = Op.PhysRegReused; - if (TRI->regsOverlap(PRRU, PhysReg)) { - // Okay, we found out that an alias of a reused register - // was used. This isn't good because it means we have - // to undo a previous reuse. - MachineBasicBlock *MBB = MI->getParent(); - const TargetRegisterClass *AliasRC = - MBB->getParent()->getRegInfo().getRegClass(Op.VirtReg); - - // Copy Op out of the vector and remove it, we're going to insert an - // explicit load for it. - ReusedOp NewOp = Op; - Reuses.erase(Reuses.begin()+ro); - - // MI may be using only a sub-register of PhysRegUsed. - unsigned RealPhysRegUsed = MI->getOperand(NewOp.Operand).getReg(); - unsigned SubIdx = 0; - assert(TargetRegisterInfo::isPhysicalRegister(RealPhysRegUsed) && - "A reuse cannot be a virtual register"); - if (PRRU != RealPhysRegUsed) { - // What was the sub-register index? - SubIdx = TRI->getSubRegIndex(PRRU, RealPhysRegUsed); - assert(SubIdx && - "Operand physreg is not a sub-register of PhysRegUsed"); - } - - // Ok, we're going to try to reload the assigned physreg into the - // slot that we were supposed to in the first place. However, that - // register could hold a reuse. Check to see if it conflicts or - // would prefer us to use a different register. - unsigned NewPhysReg = GetRegForReload(RC, NewOp.AssignedPhysReg, - MF, MI, Spills, MaybeDeadStores, - Rejected, RegKills, KillOps, VRM); - - bool DoReMat = NewOp.StackSlotOrReMat > VirtRegMap::MAX_STACK_SLOT; - int SSorRMId = DoReMat - ? VRM.getReMatId(NewOp.VirtReg) : (int) NewOp.StackSlotOrReMat; - - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI, - DoReMat, SSorRMId, TII, MF); - - if (DoReMat) { - ReMaterialize(*MBB, InsertLoc, NewPhysReg, NewOp.VirtReg, TII, - TRI, VRM); - } else { - TII->loadRegFromStackSlot(*MBB, InsertLoc, NewPhysReg, - NewOp.StackSlotOrReMat, AliasRC, TRI); - MachineInstr *LoadMI = prior(InsertLoc); - VRM.addSpillSlotUse(NewOp.StackSlotOrReMat, LoadMI); - // Any stores to this stack slot are not dead anymore. - MaybeDeadStores[NewOp.StackSlotOrReMat] = NULL; - ++NumLoads; - } - Spills.ClobberPhysReg(NewPhysReg); - Spills.ClobberPhysReg(NewOp.PhysRegReused); - - unsigned RReg = SubIdx ? TRI->getSubReg(NewPhysReg, SubIdx) :NewPhysReg; - MI->getOperand(NewOp.Operand).setReg(RReg); - MI->getOperand(NewOp.Operand).setSubReg(0); - - Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg); - UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); - DEBUG(dbgs() << '\t' << *prior(InsertLoc)); - - DEBUG(dbgs() << "Reuse undone!\n"); - --NumReused; - - // Finally, PhysReg is now available, go ahead and use it. - return PhysReg; - } - } - } - return PhysReg; -} - -// ************************************************************************ // - -/// FoldsStackSlotModRef - Return true if the specified MI folds the specified -/// stack slot mod/ref. It also checks if it's possible to unfold the -/// instruction by having it define a specified physical register instead. -static bool FoldsStackSlotModRef(MachineInstr &MI, int SS, unsigned PhysReg, - const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI, - VirtRegMap &VRM) { - if (VRM.hasEmergencySpills(&MI) || VRM.isSpillPt(&MI)) - return false; - - bool Found = false; - VirtRegMap::MI2VirtMapTy::const_iterator I, End; - for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) { - unsigned VirtReg = I->second.first; - VirtRegMap::ModRef MR = I->second.second; - if (MR & VirtRegMap::isModRef) - if (VRM.getStackSlot(VirtReg) == SS) { - Found= TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), true, true) != 0; - break; - } - } - if (!Found) - return false; - - // Does the instruction uses a register that overlaps the scratch register? - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || MO.getReg() == 0) - continue; - unsigned Reg = MO.getReg(); - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - if (!VRM.hasPhys(Reg)) - continue; - Reg = VRM.getPhys(Reg); - } - if (TRI->regsOverlap(PhysReg, Reg)) - return false; - } - return true; -} - -/// FindFreeRegister - Find a free register of a given register class by looking -/// at (at most) the last two machine instructions. -static unsigned FindFreeRegister(MachineBasicBlock::iterator MII, - MachineBasicBlock &MBB, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI, - BitVector &AllocatableRegs) { - BitVector Defs(TRI->getNumRegs()); - BitVector Uses(TRI->getNumRegs()); - SmallVector LocalUses; - SmallVector Kills; - - // Take a look at 2 instructions at most. - unsigned Count = 0; - while (Count < 2) { - if (MII == MBB.begin()) - break; - MachineInstr *PrevMI = prior(MII); - MII = PrevMI; - - if (PrevMI->isDebugValue()) - continue; // Skip over dbg_value instructions. - ++Count; - - for (unsigned i = 0, e = PrevMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = PrevMI->getOperand(i); - if (!MO.isReg() || MO.getReg() == 0) - continue; - unsigned Reg = MO.getReg(); - if (MO.isDef()) { - Defs.set(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) - Defs.set(*AS); - } else { - LocalUses.push_back(Reg); - if (MO.isKill() && AllocatableRegs[Reg]) - Kills.push_back(Reg); - } - } - - for (unsigned i = 0, e = Kills.size(); i != e; ++i) { - unsigned Kill = Kills[i]; - if (!Defs[Kill] && !Uses[Kill] && - RC->contains(Kill)) - return Kill; - } - for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) { - unsigned Reg = LocalUses[i]; - Uses.set(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) - Uses.set(*AS); - } - } - - return 0; -} - -static -void AssignPhysToVirtReg(MachineInstr *MI, unsigned VirtReg, unsigned PhysReg, - const TargetRegisterInfo &TRI) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == VirtReg) - substitutePhysReg(MO, PhysReg, TRI); - } -} - -namespace { - -struct RefSorter { - bool operator()(const std::pair &A, - const std::pair &B) { - return A.second < B.second; - } -}; - -// ***************************** // -// Local Spiller Implementation // -// ***************************** // - -class LocalRewriter : public VirtRegRewriter { - MachineRegisterInfo *MRI; - const TargetRegisterInfo *TRI; - const TargetInstrInfo *TII; - VirtRegMap *VRM; - LiveIntervals *LIs; - BitVector AllocatableRegs; - DenseMap DistanceMap; - DenseMap > Slot2DbgValues; - - MachineBasicBlock *MBB; // Basic block currently being processed. - -public: - - bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, - LiveIntervals* LIs); - -private: - void EraseInstr(MachineInstr *MI) { - VRM->RemoveMachineInstrFromMaps(MI); - LIs->RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - } - - bool OptimizeByUnfold2(unsigned VirtReg, int SS, - MachineBasicBlock::iterator &MII, - std::vector &MaybeDeadStores, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector &KillOps); - - bool OptimizeByUnfold(MachineBasicBlock::iterator &MII, - std::vector &MaybeDeadStores, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector &KillOps); - - bool CommuteToFoldReload(MachineBasicBlock::iterator &MII, - unsigned VirtReg, unsigned SrcReg, int SS, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector &KillOps, - const TargetRegisterInfo *TRI); - - void SpillRegToStackSlot(MachineBasicBlock::iterator &MII, - int Idx, unsigned PhysReg, int StackSlot, - const TargetRegisterClass *RC, - bool isAvailable, MachineInstr *&LastStore, - AvailableSpills &Spills, - SmallSet &ReMatDefs, - BitVector &RegKills, - std::vector &KillOps); - - void TransferDeadness(unsigned Reg, BitVector &RegKills, - std::vector &KillOps); - - bool InsertEmergencySpills(MachineInstr *MI); - - bool InsertRestores(MachineInstr *MI, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector &KillOps); - - bool InsertSpills(MachineInstr *MI); - - void ProcessUses(MachineInstr &MI, AvailableSpills &Spills, - std::vector &MaybeDeadStores, - BitVector &RegKills, - ReuseInfo &ReusedOperands, - std::vector &KillOps); - - void RewriteMBB(LiveIntervals *LIs, - AvailableSpills &Spills, BitVector &RegKills, - std::vector &KillOps); -}; -} - -bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm, - LiveIntervals* lis) { - MRI = &MF.getRegInfo(); - TRI = MF.getTarget().getRegisterInfo(); - TII = MF.getTarget().getInstrInfo(); - VRM = &vrm; - LIs = lis; - AllocatableRegs = TRI->getAllocatableSet(MF); - DEBUG(dbgs() << "\n**** Local spiller rewriting function '" - << MF.getFunction()->getName() << "':\n"); - DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and" - " reloads!) ****\n"); - DEBUG(MF.print(dbgs(), LIs->getSlotIndexes())); - - // Spills - Keep track of which spilled values are available in physregs - // so that we can choose to reuse the physregs instead of emitting - // reloads. This is usually refreshed per basic block. - AvailableSpills Spills(TRI, TII); - - // Keep track of kill information. - BitVector RegKills(TRI->getNumRegs()); - std::vector KillOps; - KillOps.resize(TRI->getNumRegs(), NULL); - - // SingleEntrySuccs - Successor blocks which have a single predecessor. - SmallVector SinglePredSuccs; - SmallPtrSet EarlyVisited; - - // Traverse the basic blocks depth first. - MachineBasicBlock *Entry = MF.begin(); - SmallPtrSet Visited; - for (df_ext_iterator > - DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); - DFI != E; ++DFI) { - MBB = *DFI; - if (!EarlyVisited.count(MBB)) - RewriteMBB(LIs, Spills, RegKills, KillOps); - - // If this MBB is the only predecessor of a successor. Keep the - // availability information and visit it next. - do { - // Keep visiting single predecessor successor as long as possible. - SinglePredSuccs.clear(); - findSinglePredSuccessor(MBB, SinglePredSuccs); - if (SinglePredSuccs.empty()) - MBB = 0; - else { - // FIXME: More than one successors, each of which has MBB has - // the only predecessor. - MBB = SinglePredSuccs[0]; - if (!Visited.count(MBB) && EarlyVisited.insert(MBB)) { - Spills.AddAvailableRegsToLiveIn(*MBB, RegKills, KillOps); - RewriteMBB(LIs, Spills, RegKills, KillOps); - } - } - } while (MBB); - - // Clear the availability info. - Spills.clear(); - } - - DEBUG(dbgs() << "**** Post Machine Instrs ****\n"); - DEBUG(MF.print(dbgs(), LIs->getSlotIndexes())); - - // Mark unused spill slots. - MachineFrameInfo *MFI = MF.getFrameInfo(); - int SS = VRM->getLowSpillSlot(); - if (SS != VirtRegMap::NO_STACK_SLOT) { - for (int e = VRM->getHighSpillSlot(); SS <= e; ++SS) { - SmallVector &DbgValues = Slot2DbgValues[SS]; - if (!VRM->isSpillSlotUsed(SS)) { - MFI->RemoveStackObject(SS); - for (unsigned j = 0, ee = DbgValues.size(); j != ee; ++j) { - MachineInstr *DVMI = DbgValues[j]; - DEBUG(dbgs() << "Removing debug info referencing FI#" << SS << '\n'); - EraseInstr(DVMI); - } - ++NumDSS; - } - DbgValues.clear(); - } - } - Slot2DbgValues.clear(); - - return true; -} - -/// OptimizeByUnfold2 - Unfold a series of load / store folding instructions if -/// a scratch register is available. -/// xorq %r12, %r13 -/// addq %rax, -184(%rbp) -/// addq %r13, -184(%rbp) -/// ==> -/// xorq %r12, %r13 -/// movq -184(%rbp), %r12 -/// addq %rax, %r12 -/// addq %r13, %r12 -/// movq %r12, -184(%rbp) -bool LocalRewriter:: -OptimizeByUnfold2(unsigned VirtReg, int SS, - MachineBasicBlock::iterator &MII, - std::vector &MaybeDeadStores, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector &KillOps) { - - MachineBasicBlock::iterator NextMII = llvm::next(MII); - // Skip over dbg_value instructions. - while (NextMII != MBB->end() && NextMII->isDebugValue()) - NextMII = llvm::next(NextMII); - if (NextMII == MBB->end()) - return false; - - if (TII->getOpcodeAfterMemoryUnfold(MII->getOpcode(), true, true) == 0) - return false; - - // Now let's see if the last couple of instructions happens to have freed up - // a register. - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); - unsigned PhysReg = FindFreeRegister(MII, *MBB, RC, TRI, AllocatableRegs); - if (!PhysReg) - return false; - - MachineFunction &MF = *MBB->getParent(); - TRI = MF.getTarget().getRegisterInfo(); - MachineInstr &MI = *MII; - if (!FoldsStackSlotModRef(MI, SS, PhysReg, TII, TRI, *VRM)) - return false; - - // If the next instruction also folds the same SS modref and can be unfoled, - // then it's worthwhile to issue a load from SS into the free register and - // then unfold these instructions. - if (!FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM)) - return false; - - // Back-schedule reloads and remats. - ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, false, SS, TII, MF); - - // Load from SS to the spare physical register. - TII->loadRegFromStackSlot(*MBB, MII, PhysReg, SS, RC, TRI); - // This invalidates Phys. - Spills.ClobberPhysReg(PhysReg); - // Remember it's available. - Spills.addAvailable(SS, PhysReg); - MaybeDeadStores[SS] = NULL; - - // Unfold current MI. - SmallVector NewMIs; - if (!TII->unfoldMemoryOperand(MF, &MI, VirtReg, false, false, NewMIs)) - llvm_unreachable("Unable unfold the load / store folding instruction!"); - assert(NewMIs.size() == 1); - AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI); - VRM->transferRestorePts(&MI, NewMIs[0]); - MII = MBB->insert(MII, NewMIs[0]); - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - ++NumModRefUnfold; - - // Unfold next instructions that fold the same SS. - do { - MachineInstr &NextMI = *NextMII; - NextMII = llvm::next(NextMII); - NewMIs.clear(); - if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs)) - llvm_unreachable("Unable unfold the load / store folding instruction!"); - assert(NewMIs.size() == 1); - AssignPhysToVirtReg(NewMIs[0], VirtReg, PhysReg, *TRI); - VRM->transferRestorePts(&NextMI, NewMIs[0]); - MBB->insert(NextMII, NewMIs[0]); - InvalidateKills(NextMI, TRI, RegKills, KillOps); - EraseInstr(&NextMI); - ++NumModRefUnfold; - // Skip over dbg_value instructions. - while (NextMII != MBB->end() && NextMII->isDebugValue()) - NextMII = llvm::next(NextMII); - if (NextMII == MBB->end()) - break; - } while (FoldsStackSlotModRef(*NextMII, SS, PhysReg, TII, TRI, *VRM)); - - // Store the value back into SS. - TII->storeRegToStackSlot(*MBB, NextMII, PhysReg, true, SS, RC, TRI); - MachineInstr *StoreMI = prior(NextMII); - VRM->addSpillSlotUse(SS, StoreMI); - VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); - - return true; -} - -/// OptimizeByUnfold - Turn a store folding instruction into a load folding -/// instruction. e.g. -/// xorl %edi, %eax -/// movl %eax, -32(%ebp) -/// movl -36(%ebp), %eax -/// orl %eax, -32(%ebp) -/// ==> -/// xorl %edi, %eax -/// orl -36(%ebp), %eax -/// mov %eax, -32(%ebp) -/// This enables unfolding optimization for a subsequent instruction which will -/// also eliminate the newly introduced store instruction. -bool LocalRewriter:: -OptimizeByUnfold(MachineBasicBlock::iterator &MII, - std::vector &MaybeDeadStores, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector &KillOps) { - MachineFunction &MF = *MBB->getParent(); - MachineInstr &MI = *MII; - unsigned UnfoldedOpc = 0; - unsigned UnfoldPR = 0; - unsigned UnfoldVR = 0; - int FoldedSS = VirtRegMap::NO_STACK_SLOT; - VirtRegMap::MI2VirtMapTy::const_iterator I, End; - for (tie(I, End) = VRM->getFoldedVirts(&MI); I != End; ) { - // Only transform a MI that folds a single register. - if (UnfoldedOpc) - return false; - UnfoldVR = I->second.first; - VirtRegMap::ModRef MR = I->second.second; - // MI2VirtMap be can updated which invalidate the iterator. - // Increment the iterator first. - ++I; - if (VRM->isAssignedReg(UnfoldVR)) - continue; - // If this reference is not a use, any previous store is now dead. - // Otherwise, the store to this stack slot is not dead anymore. - FoldedSS = VRM->getStackSlot(UnfoldVR); - MachineInstr* DeadStore = MaybeDeadStores[FoldedSS]; - if (DeadStore && (MR & VirtRegMap::isModRef)) { - unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(FoldedSS); - if (!PhysReg || !DeadStore->readsRegister(PhysReg)) - continue; - UnfoldPR = PhysReg; - UnfoldedOpc = TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), - false, true); - } - } - - if (!UnfoldedOpc) { - if (!UnfoldVR) - return false; - - // Look for other unfolding opportunities. - return OptimizeByUnfold2(UnfoldVR, FoldedSS, MII, MaybeDeadStores, Spills, - RegKills, KillOps); - } - - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || MO.getReg() == 0 || !MO.isUse()) - continue; - unsigned VirtReg = MO.getReg(); - if (TargetRegisterInfo::isPhysicalRegister(VirtReg) || MO.getSubReg()) - continue; - if (VRM->isAssignedReg(VirtReg)) { - unsigned PhysReg = VRM->getPhys(VirtReg); - if (PhysReg && TRI->regsOverlap(PhysReg, UnfoldPR)) - return false; - } else if (VRM->isReMaterialized(VirtReg)) - continue; - int SS = VRM->getStackSlot(VirtReg); - unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS); - if (PhysReg) { - if (TRI->regsOverlap(PhysReg, UnfoldPR)) - return false; - continue; - } - if (VRM->hasPhys(VirtReg)) { - PhysReg = VRM->getPhys(VirtReg); - if (!TRI->regsOverlap(PhysReg, UnfoldPR)) - continue; - } - - // Ok, we'll need to reload the value into a register which makes - // it impossible to perform the store unfolding optimization later. - // Let's see if it is possible to fold the load if the store is - // unfolded. This allows us to perform the store unfolding - // optimization. - SmallVector NewMIs; - if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) { - assert(NewMIs.size() == 1); - MachineInstr *NewMI = NewMIs.back(); - MBB->insert(MII, NewMI); - NewMIs.clear(); - int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false); - assert(Idx != -1); - SmallVector Ops; - Ops.push_back(Idx); - MachineInstr *FoldedMI = TII->foldMemoryOperand(NewMI, Ops, SS); - NewMI->eraseFromParent(); - if (FoldedMI) { - VRM->addSpillSlotUse(SS, FoldedMI); - if (!VRM->hasPhys(UnfoldVR)) - VRM->assignVirt2Phys(UnfoldVR, UnfoldPR); - VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef); - MII = FoldedMI; - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - return true; - } - } - } - - return false; -} - -/// CommuteChangesDestination - We are looking for r0 = op r1, r2 and -/// where SrcReg is r1 and it is tied to r0. Return true if after -/// commuting this instruction it will be r0 = op r2, r1. -static bool CommuteChangesDestination(MachineInstr *DefMI, - const MCInstrDesc &MCID, - unsigned SrcReg, - const TargetInstrInfo *TII, - unsigned &DstIdx) { - if (MCID.getNumDefs() != 1 && MCID.getNumOperands() != 3) - return false; - if (!DefMI->getOperand(1).isReg() || - DefMI->getOperand(1).getReg() != SrcReg) - return false; - unsigned DefIdx; - if (!DefMI->isRegTiedToDefOperand(1, &DefIdx) || DefIdx != 0) - return false; - unsigned SrcIdx1, SrcIdx2; - if (!TII->findCommutedOpIndices(DefMI, SrcIdx1, SrcIdx2)) - return false; - if (SrcIdx1 == 1 && SrcIdx2 == 2) { - DstIdx = 2; - return true; - } - return false; -} - -/// CommuteToFoldReload - -/// Look for -/// r1 = load fi#1 -/// r1 = op r1, r2 -/// store r1, fi#1 -/// -/// If op is commutable and r2 is killed, then we can xform these to -/// r2 = op r2, fi#1 -/// store r2, fi#1 -bool LocalRewriter:: -CommuteToFoldReload(MachineBasicBlock::iterator &MII, - unsigned VirtReg, unsigned SrcReg, int SS, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector &KillOps, - const TargetRegisterInfo *TRI) { - if (MII == MBB->begin() || !MII->killsRegister(SrcReg)) - return false; - - MachineInstr &MI = *MII; - MachineBasicBlock::iterator DefMII = prior(MII); - MachineInstr *DefMI = DefMII; - const MCInstrDesc &MCID = DefMI->getDesc(); - unsigned NewDstIdx; - if (DefMII != MBB->begin() && - MCID.isCommutable() && - CommuteChangesDestination(DefMI, MCID, SrcReg, TII, NewDstIdx)) { - MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); - unsigned NewReg = NewDstMO.getReg(); - if (!NewDstMO.isKill() || TRI->regsOverlap(NewReg, SrcReg)) - return false; - MachineInstr *ReloadMI = prior(DefMII); - int FrameIdx; - unsigned DestReg = TII->isLoadFromStackSlot(ReloadMI, FrameIdx); - if (DestReg != SrcReg || FrameIdx != SS) - return false; - int UseIdx = DefMI->findRegisterUseOperandIdx(DestReg, false); - if (UseIdx == -1) - return false; - unsigned DefIdx; - if (!MI.isRegTiedToDefOperand(UseIdx, &DefIdx)) - return false; - assert(DefMI->getOperand(DefIdx).isReg() && - DefMI->getOperand(DefIdx).getReg() == SrcReg); - - // Now commute def instruction. - MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true); - if (!CommutedMI) - return false; - MBB->insert(MII, CommutedMI); - SmallVector Ops; - Ops.push_back(NewDstIdx); - MachineInstr *FoldedMI = TII->foldMemoryOperand(CommutedMI, Ops, SS); - // Not needed since foldMemoryOperand returns new MI. - CommutedMI->eraseFromParent(); - if (!FoldedMI) - return false; - - VRM->addSpillSlotUse(SS, FoldedMI); - VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef); - // Insert new def MI and spill MI. - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); - TII->storeRegToStackSlot(*MBB, &MI, NewReg, true, SS, RC, TRI); - MII = prior(MII); - MachineInstr *StoreMI = MII; - VRM->addSpillSlotUse(SS, StoreMI); - VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); - MII = FoldedMI; // Update MII to backtrack. - - // Delete all 3 old instructions. - InvalidateKills(*ReloadMI, TRI, RegKills, KillOps); - EraseInstr(ReloadMI); - InvalidateKills(*DefMI, TRI, RegKills, KillOps); - EraseInstr(DefMI); - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - - // If NewReg was previously holding value of some SS, it's now clobbered. - // This has to be done now because it's a physical register. When this - // instruction is re-visited, it's ignored. - Spills.ClobberPhysReg(NewReg); - - ++NumCommutes; - return true; - } - - return false; -} - -/// SpillRegToStackSlot - Spill a register to a specified stack slot. Check if -/// the last store to the same slot is now dead. If so, remove the last store. -void LocalRewriter:: -SpillRegToStackSlot(MachineBasicBlock::iterator &MII, - int Idx, unsigned PhysReg, int StackSlot, - const TargetRegisterClass *RC, - bool isAvailable, MachineInstr *&LastStore, - AvailableSpills &Spills, - SmallSet &ReMatDefs, - BitVector &RegKills, - std::vector &KillOps) { - - MachineBasicBlock::iterator oldNextMII = llvm::next(MII); - TII->storeRegToStackSlot(*MBB, llvm::next(MII), PhysReg, true, StackSlot, RC, - TRI); - MachineInstr *StoreMI = prior(oldNextMII); - VRM->addSpillSlotUse(StackSlot, StoreMI); - DEBUG(dbgs() << "Store:\t" << *StoreMI); - - // If there is a dead store to this stack slot, nuke it now. - if (LastStore) { - DEBUG(dbgs() << "Removed dead store:\t" << *LastStore); - ++NumDSE; - SmallVector KillRegs; - InvalidateKills(*LastStore, TRI, RegKills, KillOps, &KillRegs); - MachineBasicBlock::iterator PrevMII = LastStore; - bool CheckDef = PrevMII != MBB->begin(); - if (CheckDef) - --PrevMII; - EraseInstr(LastStore); - if (CheckDef) { - // Look at defs of killed registers on the store. Mark the defs - // as dead since the store has been deleted and they aren't - // being reused. - for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) { - bool HasOtherDef = false; - if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef, TRI)) { - MachineInstr *DeadDef = PrevMII; - if (ReMatDefs.count(DeadDef) && !HasOtherDef) { - // FIXME: This assumes a remat def does not have side effects. - EraseInstr(DeadDef); - ++NumDRM; - } - } - } - } - } - - // Allow for multi-instruction spill sequences, as on PPC Altivec. Presume - // the last of multiple instructions is the actual store. - LastStore = prior(oldNextMII); - - // If the stack slot value was previously available in some other - // register, change it now. Otherwise, make the register available, - // in PhysReg. - Spills.ModifyStackSlotOrReMat(StackSlot); - Spills.ClobberPhysReg(PhysReg); - Spills.addAvailable(StackSlot, PhysReg, isAvailable); - ++NumStores; -} - -/// isSafeToDelete - Return true if this instruction doesn't produce any side -/// effect and all of its defs are dead. -static bool isSafeToDelete(MachineInstr &MI) { - const MCInstrDesc &MCID = MI.getDesc(); - if (MCID.mayLoad() || MCID.mayStore() || MCID.isTerminator() || - MCID.isCall() || MCID.isBarrier() || MCID.isReturn() || - MI.isLabel() || MI.isDebugValue() || - MI.hasUnmodeledSideEffects()) - return false; - - // Technically speaking inline asm without side effects and no defs can still - // be deleted. But there is so much bad inline asm code out there, we should - // let them be. - if (MI.isInlineAsm()) - return false; - - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || !MO.getReg()) - continue; - if (MO.isDef() && !MO.isDead()) - return false; - if (MO.isUse() && MO.isKill()) - // FIXME: We can't remove kill markers or else the scavenger will assert. - // An alternative is to add a ADD pseudo instruction to replace kill - // markers. - return false; - } - return true; -} - -/// TransferDeadness - A identity copy definition is dead and it's being -/// removed. Find the last def or use and mark it as dead / kill. -void LocalRewriter:: -TransferDeadness(unsigned Reg, BitVector &RegKills, - std::vector &KillOps) { - SmallPtrSet Seens; - SmallVector,8> Refs; - for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(Reg), - RE = MRI->reg_end(); RI != RE; ++RI) { - MachineInstr *UDMI = &*RI; - if (UDMI->isDebugValue() || UDMI->getParent() != MBB) - continue; - DenseMap::iterator DI = DistanceMap.find(UDMI); - if (DI == DistanceMap.end()) - continue; - if (Seens.insert(UDMI)) - Refs.push_back(std::make_pair(UDMI, DI->second)); - } - - if (Refs.empty()) - return; - std::sort(Refs.begin(), Refs.end(), RefSorter()); - - while (!Refs.empty()) { - MachineInstr *LastUDMI = Refs.back().first; - Refs.pop_back(); - - MachineOperand *LastUD = NULL; - for (unsigned i = 0, e = LastUDMI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = LastUDMI->getOperand(i); - if (!MO.isReg() || MO.getReg() != Reg) - continue; - if (!LastUD || (LastUD->isUse() && MO.isDef())) - LastUD = &MO; - if (LastUDMI->isRegTiedToDefOperand(i)) - break; - } - if (LastUD->isDef()) { - // If the instruction has no side effect, delete it and propagate - // backward further. Otherwise, mark is dead and we are done. - if (!isSafeToDelete(*LastUDMI)) { - LastUD->setIsDead(); - break; - } - EraseInstr(LastUDMI); - } else { - LastUD->setIsKill(); - RegKills.set(Reg); - KillOps[Reg] = LastUD; - break; - } - } -} - -/// InsertEmergencySpills - Insert emergency spills before MI if requested by -/// VRM. Return true if spills were inserted. -bool LocalRewriter::InsertEmergencySpills(MachineInstr *MI) { - if (!VRM->hasEmergencySpills(MI)) - return false; - MachineBasicBlock::iterator MII = MI; - SmallSet UsedSS; - std::vector &EmSpills = VRM->getEmergencySpills(MI); - for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) { - unsigned PhysReg = EmSpills[i]; - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysReg); - assert(RC && "Unable to determine register class!"); - int SS = VRM->getEmergencySpillSlot(RC); - if (UsedSS.count(SS)) - llvm_unreachable("Need to spill more than one physical registers!"); - UsedSS.insert(SS); - TII->storeRegToStackSlot(*MBB, MII, PhysReg, true, SS, RC, TRI); - MachineInstr *StoreMI = prior(MII); - VRM->addSpillSlotUse(SS, StoreMI); - - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(llvm::next(MII), MBB->begin(), PhysReg, TRI, false, SS, - TII, *MBB->getParent()); - - TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SS, RC, TRI); - - MachineInstr *LoadMI = prior(InsertLoc); - VRM->addSpillSlotUse(SS, LoadMI); - ++NumPSpills; - DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size())); - } - return true; -} - -/// InsertRestores - Restore registers before MI is requested by VRM. Return -/// true is any instructions were inserted. -bool LocalRewriter::InsertRestores(MachineInstr *MI, - AvailableSpills &Spills, - BitVector &RegKills, - std::vector &KillOps) { - if (!VRM->isRestorePt(MI)) - return false; - MachineBasicBlock::iterator MII = MI; - std::vector &RestoreRegs = VRM->getRestorePtRestores(MI); - for (unsigned i = 0, e = RestoreRegs.size(); i != e; ++i) { - unsigned VirtReg = RestoreRegs[e-i-1]; // Reverse order. - if (!VRM->getPreSplitReg(VirtReg)) - continue; // Split interval spilled again. - unsigned Phys = VRM->getPhys(VirtReg); - MRI->setPhysRegUsed(Phys); - - // Check if the value being restored if available. If so, it must be - // from a predecessor BB that fallthrough into this BB. We do not - // expect: - // BB1: - // r1 = load fi#1 - // ... - // = r1 - // ... # r1 not clobbered - // ... - // = load fi#1 - bool DoReMat = VRM->isReMaterialized(VirtReg); - int SSorRMId = DoReMat - ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg); - unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId); - if (InReg == Phys) { - // If the value is already available in the expected register, save - // a reload / remat. - if (SSorRMId) - DEBUG(dbgs() << "Reusing RM#" - << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1); - else - DEBUG(dbgs() << "Reusing SS#" << SSorRMId); - DEBUG(dbgs() << " from physreg " - << TRI->getName(InReg) << " for " << PrintReg(VirtReg) - <<" instead of reloading into physreg " - << TRI->getName(Phys) << '\n'); - - // Reusing a physreg may resurrect it. But we expect ProcessUses to update - // the kill flags for the current instruction after processing it. - - ++NumOmitted; - continue; - } else if (InReg && InReg != Phys) { - if (SSorRMId) - DEBUG(dbgs() << "Reusing RM#" - << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1); - else - DEBUG(dbgs() << "Reusing SS#" << SSorRMId); - DEBUG(dbgs() << " from physreg " - << TRI->getName(InReg) << " for " << PrintReg(VirtReg) - <<" by copying it into physreg " - << TRI->getName(Phys) << '\n'); - - // If the reloaded / remat value is available in another register, - // copy it to the desired register. - - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII, - *MBB->getParent()); - MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI->getDebugLoc(), - TII->get(TargetOpcode::COPY), Phys) - .addReg(InReg, RegState::Kill); - - // This invalidates Phys. - Spills.ClobberPhysReg(Phys); - // Remember it's available. - Spills.addAvailable(SSorRMId, Phys); - - CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse); - UpdateKills(*CopyMI, TRI, RegKills, KillOps); - - DEBUG(dbgs() << '\t' << *CopyMI); - ++NumCopified; - continue; - } - - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII, - *MBB->getParent()); - - if (VRM->isReMaterialized(VirtReg)) { - ReMaterialize(*MBB, InsertLoc, Phys, VirtReg, TII, TRI, *VRM); - } else { - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); - TII->loadRegFromStackSlot(*MBB, InsertLoc, Phys, SSorRMId, RC, TRI); - MachineInstr *LoadMI = prior(InsertLoc); - VRM->addSpillSlotUse(SSorRMId, LoadMI); - ++NumLoads; - DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size())); - } - - // This invalidates Phys. - Spills.ClobberPhysReg(Phys); - // Remember it's available. - Spills.addAvailable(SSorRMId, Phys); - - UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); - DEBUG(dbgs() << '\t' << *prior(MII)); - } - return true; -} - -/// InsertSpills - Insert spills after MI if requested by VRM. Return -/// true if spills were inserted. -bool LocalRewriter::InsertSpills(MachineInstr *MI) { - if (!VRM->isSpillPt(MI)) - return false; - MachineBasicBlock::iterator MII = MI; - std::vector > &SpillRegs = - VRM->getSpillPtSpills(MI); - for (unsigned i = 0, e = SpillRegs.size(); i != e; ++i) { - unsigned VirtReg = SpillRegs[i].first; - bool isKill = SpillRegs[i].second; - if (!VRM->getPreSplitReg(VirtReg)) - continue; // Split interval spilled again. - const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); - unsigned Phys = VRM->getPhys(VirtReg); - int StackSlot = VRM->getStackSlot(VirtReg); - MachineBasicBlock::iterator oldNextMII = llvm::next(MII); - TII->storeRegToStackSlot(*MBB, llvm::next(MII), Phys, isKill, StackSlot, - RC, TRI); - MachineInstr *StoreMI = prior(oldNextMII); - VRM->addSpillSlotUse(StackSlot, StoreMI); - DEBUG(dbgs() << "Store:\t" << *StoreMI); - VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); - } - return true; -} - - -/// ProcessUses - Process all of MI's spilled operands and all available -/// operands. -void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills, - std::vector &MaybeDeadStores, - BitVector &RegKills, - ReuseInfo &ReusedOperands, - std::vector &KillOps) { - // Clear kill info. - SmallSet KilledMIRegs; - SmallVector VirtUseOps; - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg() || MO.getReg() == 0) - continue; // Ignore non-register operands. - - unsigned VirtReg = MO.getReg(); - - if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) { - // Ignore physregs for spilling, but remember that it is used by this - // function. - MRI->setPhysRegUsed(VirtReg); - continue; - } - - // We want to process implicit virtual register uses first. - if (MO.isImplicit()) - // If the virtual register is implicitly defined, emit a implicit_def - // before so scavenger knows it's "defined". - // FIXME: This is a horrible hack done the by register allocator to - // remat a definition with virtual register operand. - VirtUseOps.insert(VirtUseOps.begin(), i); - else - VirtUseOps.push_back(i); - - // A partial def causes problems because the same operand both reads and - // writes the register. This rewriter is designed to rewrite uses and defs - // separately, so a partial def would already have been rewritten to a - // physreg by the time we get to processing defs. - // Add an implicit use operand to model the partial def. - if (MO.isDef() && MO.getSubReg() && MI.readsVirtualRegister(VirtReg) && - MI.findRegisterUseOperandIdx(VirtReg) == -1) { - VirtUseOps.insert(VirtUseOps.begin(), MI.getNumOperands()); - MI.addOperand(MachineOperand::CreateReg(VirtReg, - false, // isDef - true)); // isImplicit - DEBUG(dbgs() << "Partial redef: " << MI); - } - } - - // Process all of the spilled uses and all non spilled reg references. - SmallVector PotentialDeadStoreSlots; - KilledMIRegs.clear(); - for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) { - unsigned i = VirtUseOps[j]; - unsigned VirtReg = MI.getOperand(i).getReg(); - assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && - "Not a virtual register?"); - - unsigned SubIdx = MI.getOperand(i).getSubReg(); - if (VRM->isAssignedReg(VirtReg)) { - // This virtual register was assigned a physreg! - unsigned Phys = VRM->getPhys(VirtReg); - MRI->setPhysRegUsed(Phys); - if (MI.getOperand(i).isDef()) - ReusedOperands.markClobbered(Phys); - substitutePhysReg(MI.getOperand(i), Phys, *TRI); - if (VRM->isImplicitlyDefined(VirtReg)) - // FIXME: Is this needed? - BuildMI(*MBB, &MI, MI.getDebugLoc(), - TII->get(TargetOpcode::IMPLICIT_DEF), Phys); - continue; - } - - // This virtual register is now known to be a spilled value. - if (!MI.getOperand(i).isUse()) - continue; // Handle defs in the loop below (handle use&def here though) - - bool AvoidReload = MI.getOperand(i).isUndef(); - // Check if it is defined by an implicit def. It should not be spilled. - // Note, this is for correctness reason. e.g. - // 8 %reg1024 = IMPLICIT_DEF - // 12 %reg1024 = INSERT_SUBREG %reg1024, %reg1025, 2 - // The live range [12, 14) are not part of the r1024 live interval since - // it's defined by an implicit def. It will not conflicts with live - // interval of r1025. Now suppose both registers are spilled, you can - // easily see a situation where both registers are reloaded before - // the INSERT_SUBREG and both target registers that would overlap. - bool DoReMat = VRM->isReMaterialized(VirtReg); - int SSorRMId = DoReMat - ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg); - int ReuseSlot = SSorRMId; - - // Check to see if this stack slot is available. - unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId); - - // If this is a sub-register use, make sure the reuse register is in the - // right register class. For example, for x86 not all of the 32-bit - // registers have accessible sub-registers. - // Similarly so for EXTRACT_SUBREG. Consider this: - // EDI = op - // MOV32_mr fi#1, EDI - // ... - // = EXTRACT_SUBREG fi#1 - // fi#1 is available in EDI, but it cannot be reused because it's not in - // the right register file. - if (PhysReg && !AvoidReload && SubIdx) { - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); - if (!RC->contains(PhysReg)) - PhysReg = 0; - } - - if (PhysReg && !AvoidReload) { - // This spilled operand might be part of a two-address operand. If this - // is the case, then changing it will necessarily require changing the - // def part of the instruction as well. However, in some cases, we - // aren't allowed to modify the reused register. If none of these cases - // apply, reuse it. - bool CanReuse = true; - bool isTied = MI.isRegTiedToDefOperand(i); - if (isTied) { - // Okay, we have a two address operand. We can reuse this physreg as - // long as we are allowed to clobber the value and there isn't an - // earlier def that has already clobbered the physreg. - CanReuse = !ReusedOperands.isClobbered(PhysReg) && - Spills.canClobberPhysReg(PhysReg); - } - // If this is an asm, and a PhysReg alias is used elsewhere as an - // earlyclobber operand, we can't also use it as an input. - if (MI.isInlineAsm()) { - for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) { - MachineOperand &MOk = MI.getOperand(k); - if (MOk.isReg() && MOk.isEarlyClobber() && - TRI->regsOverlap(MOk.getReg(), PhysReg)) { - CanReuse = false; - DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg) - << " for " << PrintReg(VirtReg) << ": " << MOk - << '\n'); - break; - } - } - } - - if (CanReuse) { - // If this stack slot value is already available, reuse it! - if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) - DEBUG(dbgs() << "Reusing RM#" - << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1); - else - DEBUG(dbgs() << "Reusing SS#" << ReuseSlot); - DEBUG(dbgs() << " from physreg " - << TRI->getName(PhysReg) << " for " << PrintReg(VirtReg) - << " instead of reloading into " - << PrintReg(VRM->getPhys(VirtReg), TRI) << '\n'); - unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - - // Reusing a physreg may resurrect it. But we expect ProcessUses to - // update the kill flags for the current instr after processing it. - - // The only technical detail we have is that we don't know that - // PhysReg won't be clobbered by a reloaded stack slot that occurs - // later in the instruction. In particular, consider 'op V1, V2'. - // If V1 is available in physreg R0, we would choose to reuse it - // here, instead of reloading it into the register the allocator - // indicated (say R1). However, V2 might have to be reloaded - // later, and it might indicate that it needs to live in R0. When - // this occurs, we need to have information available that - // indicates it is safe to use R1 for the reload instead of R0. - // - // To further complicate matters, we might conflict with an alias, - // or R0 and R1 might not be compatible with each other. In this - // case, we actually insert a reload for V1 in R1, ensuring that - // we can get at R0 or its alias. - ReusedOperands.addReuse(i, ReuseSlot, PhysReg, - VRM->getPhys(VirtReg), VirtReg); - if (isTied) - // Only mark it clobbered if this is a use&def operand. - ReusedOperands.markClobbered(PhysReg); - ++NumReused; - - if (MI.getOperand(i).isKill() && - ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) { - - // The store of this spilled value is potentially dead, but we - // won't know for certain until we've confirmed that the re-use - // above is valid, which means waiting until the other operands - // are processed. For now we just track the spill slot, we'll - // remove it after the other operands are processed if valid. - - PotentialDeadStoreSlots.push_back(ReuseSlot); - } - - // Mark is isKill if it's there no other uses of the same virtual - // register and it's not a two-address operand. IsKill will be - // unset if reg is reused. - if (!isTied && KilledMIRegs.count(VirtReg) == 0) { - MI.getOperand(i).setIsKill(); - KilledMIRegs.insert(VirtReg); - } - continue; - } // CanReuse - - // Otherwise we have a situation where we have a two-address instruction - // whose mod/ref operand needs to be reloaded. This reload is already - // available in some register "PhysReg", but if we used PhysReg as the - // operand to our 2-addr instruction, the instruction would modify - // PhysReg. This isn't cool if something later uses PhysReg and expects - // to get its initial value. - // - // To avoid this problem, and to avoid doing a load right after a store, - // we emit a copy from PhysReg into the designated register for this - // operand. - // - // This case also applies to an earlyclobber'd PhysReg. - unsigned DesignatedReg = VRM->getPhys(VirtReg); - assert(DesignatedReg && "Must map virtreg to physreg!"); - - // Note that, if we reused a register for a previous operand, the - // register we want to reload into might not actually be - // available. If this occurs, use the register indicated by the - // reuser. - if (ReusedOperands.hasReuses()) - DesignatedReg = ReusedOperands. - GetRegForReload(VirtReg, DesignatedReg, &MI, Spills, - MaybeDeadStores, RegKills, KillOps, *VRM); - - // If the mapped designated register is actually the physreg we have - // incoming, we don't need to inserted a dead copy. - if (DesignatedReg == PhysReg) { - // If this stack slot value is already available, reuse it! - if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT) - DEBUG(dbgs() << "Reusing RM#" - << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1); - else - DEBUG(dbgs() << "Reusing SS#" << ReuseSlot); - DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg) - << " for " << PrintReg(VirtReg) - << " instead of reloading into same physreg.\n"); - unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - ReusedOperands.markClobbered(RReg); - ++NumReused; - continue; - } - - MRI->setPhysRegUsed(DesignatedReg); - ReusedOperands.markClobbered(DesignatedReg); - - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat, - SSorRMId, TII, *MBB->getParent()); - MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(), - TII->get(TargetOpcode::COPY), - DesignatedReg).addReg(PhysReg); - CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse); - UpdateKills(*CopyMI, TRI, RegKills, KillOps); - - // This invalidates DesignatedReg. - Spills.ClobberPhysReg(DesignatedReg); - - Spills.addAvailable(ReuseSlot, DesignatedReg); - unsigned RReg = - SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg; - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - DEBUG(dbgs() << '\t' << *prior(InsertLoc)); - ++NumReused; - continue; - } // if (PhysReg) - - // Otherwise, reload it and remember that we have it. - PhysReg = VRM->getPhys(VirtReg); - assert(PhysReg && "Must map virtreg to physreg!"); - - // Note that, if we reused a register for a previous operand, the - // register we want to reload into might not actually be - // available. If this occurs, use the register indicated by the - // reuser. - if (ReusedOperands.hasReuses()) - PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI, - Spills, MaybeDeadStores, RegKills, KillOps, *VRM); - - MRI->setPhysRegUsed(PhysReg); - ReusedOperands.markClobbered(PhysReg); - if (AvoidReload) - ++NumAvoided; - else { - // Back-schedule reloads and remats. - MachineBasicBlock::iterator InsertLoc = - ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI, DoReMat, - SSorRMId, TII, *MBB->getParent()); - - if (DoReMat) { - ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM); - } else { - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); - TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI); - MachineInstr *LoadMI = prior(InsertLoc); - VRM->addSpillSlotUse(SSorRMId, LoadMI); - ++NumLoads; - DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size())); - } - // This invalidates PhysReg. - Spills.ClobberPhysReg(PhysReg); - - // Any stores to this stack slot are not dead anymore. - if (!DoReMat) - MaybeDeadStores[SSorRMId] = NULL; - Spills.addAvailable(SSorRMId, PhysReg); - // Assumes this is the last use. IsKill will be unset if reg is reused - // unless it's a two-address operand. - if (!MI.isRegTiedToDefOperand(i) && - KilledMIRegs.count(VirtReg) == 0) { - MI.getOperand(i).setIsKill(); - KilledMIRegs.insert(VirtReg); - } - - UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps); - DEBUG(dbgs() << '\t' << *prior(InsertLoc)); - } - unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - } - - // Ok - now we can remove stores that have been confirmed dead. - for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) { - // This was the last use and the spilled value is still available - // for reuse. That means the spill was unnecessary! - int PDSSlot = PotentialDeadStoreSlots[j]; - MachineInstr* DeadStore = MaybeDeadStores[PDSSlot]; - if (DeadStore) { - DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore); - InvalidateKills(*DeadStore, TRI, RegKills, KillOps); - EraseInstr(DeadStore); - MaybeDeadStores[PDSSlot] = NULL; - ++NumDSE; - } - } -} - -/// rewriteMBB - Keep track of which spills are available even after the -/// register allocator is done with them. If possible, avoid reloading vregs. -void -LocalRewriter::RewriteMBB(LiveIntervals *LIs, - AvailableSpills &Spills, BitVector &RegKills, - std::vector &KillOps) { - - DEBUG(dbgs() << "\n**** Local spiller rewriting MBB '" - << MBB->getName() << "':\n"); - - MachineFunction &MF = *MBB->getParent(); - - // MaybeDeadStores - When we need to write a value back into a stack slot, - // keep track of the inserted store. If the stack slot value is never read - // (because the value was used from some available register, for example), and - // subsequently stored to, the original store is dead. This map keeps track - // of inserted stores that are not used. If we see a subsequent store to the - // same stack slot, the original store is deleted. - std::vector MaybeDeadStores; - MaybeDeadStores.resize(MF.getFrameInfo()->getObjectIndexEnd(), NULL); - - // ReMatDefs - These are rematerializable def MIs which are not deleted. - SmallSet ReMatDefs; - - // Keep track of the registers we have already spilled in case there are - // multiple defs of the same register in MI. - SmallSet SpilledMIRegs; - - RegKills.reset(); - KillOps.clear(); - KillOps.resize(TRI->getNumRegs(), NULL); - - DistanceMap.clear(); - for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); - MII != E; ) { - MachineBasicBlock::iterator NextMII = llvm::next(MII); - - if (OptimizeByUnfold(MII, MaybeDeadStores, Spills, RegKills, KillOps)) - NextMII = llvm::next(MII); - - if (InsertEmergencySpills(MII)) - NextMII = llvm::next(MII); - - InsertRestores(MII, Spills, RegKills, KillOps); - - if (InsertSpills(MII)) - NextMII = llvm::next(MII); - - bool Erased = false; - bool BackTracked = false; - MachineInstr &MI = *MII; - - // Remember DbgValue's which reference stack slots. - if (MI.isDebugValue() && MI.getOperand(0).isFI()) - Slot2DbgValues[MI.getOperand(0).getIndex()].push_back(&MI); - - /// ReusedOperands - Keep track of operand reuse in case we need to undo - /// reuse. - ReuseInfo ReusedOperands(MI, TRI); - - ProcessUses(MI, Spills, MaybeDeadStores, RegKills, ReusedOperands, KillOps); - - DEBUG(dbgs() << '\t' << MI); - - - // If we have folded references to memory operands, make sure we clear all - // physical registers that may contain the value of the spilled virtual - // register - - // Copy the folded virts to a small vector, we may change MI2VirtMap. - SmallVector, 4> FoldedVirts; - // C++0x FTW! - for (std::pair FVRange = - VRM->getFoldedVirts(&MI); - FVRange.first != FVRange.second; ++FVRange.first) - FoldedVirts.push_back(FVRange.first->second); - - SmallSet FoldedSS; - for (unsigned FVI = 0, FVE = FoldedVirts.size(); FVI != FVE; ++FVI) { - unsigned VirtReg = FoldedVirts[FVI].first; - VirtRegMap::ModRef MR = FoldedVirts[FVI].second; - DEBUG(dbgs() << "Folded " << PrintReg(VirtReg) << " MR: " << MR); - - int SS = VRM->getStackSlot(VirtReg); - if (SS == VirtRegMap::NO_STACK_SLOT) - continue; - FoldedSS.insert(SS); - DEBUG(dbgs() << " - StackSlot: " << SS << "\n"); - - // If this folded instruction is just a use, check to see if it's a - // straight load from the virt reg slot. - if ((MR & VirtRegMap::isRef) && !(MR & VirtRegMap::isMod)) { - int FrameIdx; - unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx); - if (DestReg && FrameIdx == SS) { - // If this spill slot is available, turn it into a copy (or nothing) - // instead of leaving it as a load! - if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) { - DEBUG(dbgs() << "Promoted Load To Copy: " << MI); - if (DestReg != InReg) { - MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg); - MachineInstr *CopyMI = BuildMI(*MBB, &MI, MI.getDebugLoc(), - TII->get(TargetOpcode::COPY)) - .addReg(DestReg, RegState::Define, DefMO->getSubReg()) - .addReg(InReg, RegState::Kill); - // Revisit the copy so we make sure to notice the effects of the - // operation on the destreg (either needing to RA it if it's - // virtual or needing to clobber any values if it's physical). - NextMII = CopyMI; - NextMII->setAsmPrinterFlag(MachineInstr::ReloadReuse); - BackTracked = true; - } else { - DEBUG(dbgs() << "Removing now-noop copy: " << MI); - // InvalidateKills resurrects any prior kill of the copy's source - // allowing the source reg to be reused in place of the copy. - Spills.disallowClobberPhysReg(InReg); - } - - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - Erased = true; - goto ProcessNextInst; - } - } else { - unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS); - SmallVector NewMIs; - if (PhysReg && - TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)){ - MBB->insert(MII, NewMIs[0]); - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - Erased = true; - --NextMII; // backtrack to the unfolded instruction. - BackTracked = true; - goto ProcessNextInst; - } - } - } - - // If this reference is not a use, any previous store is now dead. - // Otherwise, the store to this stack slot is not dead anymore. - MachineInstr* DeadStore = MaybeDeadStores[SS]; - if (DeadStore) { - bool isDead = !(MR & VirtRegMap::isRef); - MachineInstr *NewStore = NULL; - if (MR & VirtRegMap::isModRef) { - unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS); - SmallVector NewMIs; - // We can reuse this physreg as long as we are allowed to clobber - // the value and there isn't an earlier def that has already clobbered - // the physreg. - if (PhysReg && - !ReusedOperands.isClobbered(PhysReg) && - Spills.canClobberPhysReg(PhysReg) && - !TII->isStoreToStackSlot(&MI, SS)) { // Not profitable! - MachineOperand *KillOpnd = - DeadStore->findRegisterUseOperand(PhysReg, true); - // Note, if the store is storing a sub-register, it's possible the - // super-register is needed below. - if (KillOpnd && !KillOpnd->getSubReg() && - TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, true,NewMIs)){ - MBB->insert(MII, NewMIs[0]); - NewStore = NewMIs[1]; - MBB->insert(MII, NewStore); - VRM->addSpillSlotUse(SS, NewStore); - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - Erased = true; - --NextMII; - --NextMII; // backtrack to the unfolded instruction. - BackTracked = true; - isDead = true; - ++NumSUnfold; - } - } - } - - if (isDead) { // Previous store is dead. - // If we get here, the store is dead, nuke it now. - DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore); - InvalidateKills(*DeadStore, TRI, RegKills, KillOps); - EraseInstr(DeadStore); - if (!NewStore) - ++NumDSE; - } - - MaybeDeadStores[SS] = NULL; - if (NewStore) { - // Treat this store as a spill merged into a copy. That makes the - // stack slot value available. - VRM->virtFolded(VirtReg, NewStore, VirtRegMap::isMod); - goto ProcessNextInst; - } - } - - // If the spill slot value is available, and this is a new definition of - // the value, the value is not available anymore. - if (MR & VirtRegMap::isMod) { - // Notice that the value in this stack slot has been modified. - Spills.ModifyStackSlotOrReMat(SS); - - // If this is *just* a mod of the value, check to see if this is just a - // store to the spill slot (i.e. the spill got merged into the copy). If - // so, realize that the vreg is available now, and add the store to the - // MaybeDeadStore info. - int StackSlot; - if (!(MR & VirtRegMap::isRef)) { - if (unsigned SrcReg = TII->isStoreToStackSlot(&MI, StackSlot)) { - assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) && - "Src hasn't been allocated yet?"); - - if (CommuteToFoldReload(MII, VirtReg, SrcReg, StackSlot, - Spills, RegKills, KillOps, TRI)) { - NextMII = llvm::next(MII); - BackTracked = true; - goto ProcessNextInst; - } - - // Okay, this is certainly a store of SrcReg to [StackSlot]. Mark - // this as a potentially dead store in case there is a subsequent - // store into the stack slot without a read from it. - MaybeDeadStores[StackSlot] = &MI; - - // If the stack slot value was previously available in some other - // register, change it now. Otherwise, make the register - // available in PhysReg. - Spills.addAvailable(StackSlot, SrcReg, MI.killsRegister(SrcReg)); - } - } - } - } - - // Process all of the spilled defs. - SpilledMIRegs.clear(); - for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI.getOperand(i); - if (!(MO.isReg() && MO.getReg() && MO.isDef())) - continue; - - unsigned VirtReg = MO.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(VirtReg)) { - // Check to see if this is a noop copy. If so, eliminate the - // instruction before considering the dest reg to be changed. - // Also check if it's copying from an "undef", if so, we can't - // eliminate this or else the undef marker is lost and it will - // confuses the scavenger. This is extremely rare. - if (MI.isIdentityCopy() && !MI.getOperand(1).isUndef() && - MI.getNumOperands() == 2) { - ++NumDCE; - DEBUG(dbgs() << "Removing now-noop copy: " << MI); - SmallVector KillRegs; - InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs); - if (MO.isDead() && !KillRegs.empty()) { - // Source register or an implicit super/sub-register use is killed. - assert(TRI->regsOverlap(KillRegs[0], MI.getOperand(0).getReg())); - // Last def is now dead. - TransferDeadness(MI.getOperand(1).getReg(), RegKills, KillOps); - } - EraseInstr(&MI); - Erased = true; - Spills.disallowClobberPhysReg(VirtReg); - goto ProcessNextInst; - } - - // If it's not a no-op copy, it clobbers the value in the destreg. - Spills.ClobberPhysReg(VirtReg); - ReusedOperands.markClobbered(VirtReg); - - // Check to see if this instruction is a load from a stack slot into - // a register. If so, this provides the stack slot value in the reg. - int FrameIdx; - if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) { - assert(DestReg == VirtReg && "Unknown load situation!"); - - // If it is a folded reference, then it's not safe to clobber. - bool Folded = FoldedSS.count(FrameIdx); - // Otherwise, if it wasn't available, remember that it is now! - Spills.addAvailable(FrameIdx, DestReg, !Folded); - goto ProcessNextInst; - } - - continue; - } - - unsigned SubIdx = MO.getSubReg(); - bool DoReMat = VRM->isReMaterialized(VirtReg); - if (DoReMat) - ReMatDefs.insert(&MI); - - // The only vregs left are stack slot definitions. - int StackSlot = VRM->getStackSlot(VirtReg); - const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); - - // If this def is part of a two-address operand, make sure to execute - // the store from the correct physical register. - unsigned PhysReg; - unsigned TiedOp; - if (MI.isRegTiedToUseOperand(i, &TiedOp)) { - PhysReg = MI.getOperand(TiedOp).getReg(); - if (SubIdx) { - unsigned SuperReg = findSuperReg(RC, PhysReg, SubIdx, TRI); - assert(SuperReg && TRI->getSubReg(SuperReg, SubIdx) == PhysReg && - "Can't find corresponding super-register!"); - PhysReg = SuperReg; - } - } else { - PhysReg = VRM->getPhys(VirtReg); - if (ReusedOperands.isClobbered(PhysReg)) { - // Another def has taken the assigned physreg. It must have been a - // use&def which got it due to reuse. Undo the reuse! - PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI, - Spills, MaybeDeadStores, RegKills, KillOps, *VRM); - } - } - - // If StackSlot is available in a register that also holds other stack - // slots, clobber those stack slots now. - Spills.ClobberSharingStackSlots(StackSlot); - - assert(PhysReg && "VR not assigned a physical register?"); - MRI->setPhysRegUsed(PhysReg); - unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg; - ReusedOperands.markClobbered(RReg); - MI.getOperand(i).setReg(RReg); - MI.getOperand(i).setSubReg(0); - - if (!MO.isDead() && SpilledMIRegs.insert(VirtReg)) { - MachineInstr *&LastStore = MaybeDeadStores[StackSlot]; - SpillRegToStackSlot(MII, -1, PhysReg, StackSlot, RC, true, - LastStore, Spills, ReMatDefs, RegKills, KillOps); - NextMII = llvm::next(MII); - - // Check to see if this is a noop copy. If so, eliminate the - // instruction before considering the dest reg to be changed. - if (MI.isIdentityCopy()) { - ++NumDCE; - DEBUG(dbgs() << "Removing now-noop copy: " << MI); - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - Erased = true; - UpdateKills(*LastStore, TRI, RegKills, KillOps); - goto ProcessNextInst; - } - } - } - ProcessNextInst: - // Delete dead instructions without side effects. - if (!Erased && !BackTracked && isSafeToDelete(MI)) { - InvalidateKills(MI, TRI, RegKills, KillOps); - EraseInstr(&MI); - Erased = true; - } - if (!Erased) - DistanceMap.insert(std::make_pair(&MI, DistanceMap.size())); - if (!Erased && !BackTracked) { - for (MachineBasicBlock::iterator II = &MI; II != NextMII; ++II) - UpdateKills(*II, TRI, RegKills, KillOps); - } - MII = NextMII; - } - -} - -llvm::VirtRegRewriter* llvm::createVirtRegRewriter() { - switch (RewriterOpt) { - default: llvm_unreachable("Unreachable!"); - case local: - return new LocalRewriter(); - case trivial: - return new TrivialRewriter(); - } -} diff --git a/lib/CodeGen/VirtRegRewriter.h b/lib/CodeGen/VirtRegRewriter.h deleted file mode 100644 index 93474e0..0000000 --- a/lib/CodeGen/VirtRegRewriter.h +++ /dev/null @@ -1,32 +0,0 @@ -//===-- llvm/CodeGen/VirtRegRewriter.h - VirtRegRewriter -*- C++ -*--------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_VIRTREGREWRITER_H -#define LLVM_CODEGEN_VIRTREGREWRITER_H - -namespace llvm { - class LiveIntervals; - class MachineFunction; - class VirtRegMap; - - /// VirtRegRewriter interface: Implementations of this interface assign - /// spilled virtual registers to stack slots, rewriting the code. - struct VirtRegRewriter { - virtual ~VirtRegRewriter(); - virtual bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM, - LiveIntervals* LIs) = 0; - }; - - /// createVirtRegRewriter - Create an return a rewriter object, as specified - /// on the command line. - VirtRegRewriter* createVirtRegRewriter(); - -} - -#endif -- cgit v1.1 From 334575e79b93500547e73519f5a68c7d99d1efaf Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Sun, 13 Nov 2011 00:31:23 +0000 Subject: Remove the -color-ss-with-regs option. It was off by default. The new register allocators don't have the problems that made it necessary to reallocate registers during stack slot coloring. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144481 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/StackSlotColoring.cpp | 308 +------------------------------------- 1 file changed, 2 insertions(+), 306 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 57cbe1b..fbca337 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -40,18 +40,9 @@ DisableSharing("no-stack-slot-sharing", cl::init(false), cl::Hidden, cl::desc("Suppress slot sharing during stack coloring")); -static cl::opt -ColorWithRegsOpt("color-ss-with-regs", - cl::init(false), cl::Hidden, - cl::desc("Color stack slots with free registers")); - - static cl::opt DCELimit("ssc-dce-limit", cl::init(-1), cl::Hidden); STATISTIC(NumEliminated, "Number of stack slots eliminated due to coloring"); -STATISTIC(NumRegRepl, "Number of stack slot refs replaced with reg refs"); -STATISTIC(NumLoadElim, "Number of loads eliminated"); -STATISTIC(NumStoreElim, "Number of stores eliminated"); STATISTIC(NumDead, "Number of trivially dead stack accesses eliminated"); namespace { @@ -127,22 +118,8 @@ namespace { bool OverlapWithAssignments(LiveInterval *li, int Color) const; int ColorSlot(LiveInterval *li); bool ColorSlots(MachineFunction &MF); - bool ColorSlotsWithFreeRegs(SmallVector &SlotMapping, - SmallVector, 16> &RevMap, - BitVector &SlotIsReg); void RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI, MachineFunction &MF); - bool PropagateBackward(MachineBasicBlock::iterator MII, - MachineBasicBlock *MBB, - unsigned OldReg, unsigned NewReg); - bool PropagateForward(MachineBasicBlock::iterator MII, - MachineBasicBlock *MBB, - unsigned OldReg, unsigned NewReg); - void UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, - unsigned Reg, const TargetRegisterClass *RC, - SmallSet &Defs, - MachineFunction &MF); - bool AllMemRefsCanBeUnfolded(int SS); bool RemoveDeadStores(MachineBasicBlock* MBB); }; } // end anonymous namespace @@ -248,79 +225,6 @@ StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const { return false; } -/// ColorSlotsWithFreeRegs - If there are any free registers available, try -/// replacing spill slots references with registers instead. -bool -StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector &SlotMapping, - SmallVector, 16> &RevMap, - BitVector &SlotIsReg) { - if (!(ColorWithRegs || ColorWithRegsOpt) || !VRM->HasUnusedRegisters()) - return false; - - bool Changed = false; - DEBUG(dbgs() << "Assigning unused registers to spill slots:\n"); - for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) { - LiveInterval *li = SSIntervals[i]; - int SS = TargetRegisterInfo::stackSlot2Index(li->reg); - if (!UsedColors[SS] || li->weight < 20) - // If the weight is < 20, i.e. two references in a loop with depth 1, - // don't bother with it. - continue; - - // These slots allow to share the same registers. - bool AllColored = true; - SmallVector ColoredRegs; - for (unsigned j = 0, ee = RevMap[SS].size(); j != ee; ++j) { - int RSS = RevMap[SS][j]; - const TargetRegisterClass *RC = LS->getIntervalRegClass(RSS); - // If it's not colored to another stack slot, try coloring it - // to a "free" register. - if (!RC) { - AllColored = false; - continue; - } - unsigned Reg = VRM->getFirstUnusedRegister(RC); - if (!Reg) { - AllColored = false; - continue; - } - if (!AllMemRefsCanBeUnfolded(RSS)) { - AllColored = false; - continue; - } else { - DEBUG(dbgs() << "Assigning fi#" << RSS << " to " - << TRI->getName(Reg) << '\n'); - ColoredRegs.push_back(Reg); - SlotMapping[RSS] = Reg; - SlotIsReg.set(RSS); - Changed = true; - } - } - - // Register and its sub-registers are no longer free. - while (!ColoredRegs.empty()) { - unsigned Reg = ColoredRegs.back(); - ColoredRegs.pop_back(); - VRM->setRegisterUsed(Reg); - // If reg is a callee-saved register, it will have to be spilled in - // the prologue. - MRI->setPhysRegUsed(Reg); - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { - VRM->setRegisterUsed(*AS); - MRI->setPhysRegUsed(*AS); - } - } - // This spill slot is dead after the rewrites - if (AllColored) { - MFI->RemoveStackObject(SS); - ++NumEliminated; - } - } - DEBUG(dbgs() << '\n'); - - return Changed; -} - /// ColorSlot - Assign a "color" (stack slot) to the specified stack slot. /// int StackSlotColoring::ColorSlot(LiveInterval *li) { @@ -372,7 +276,6 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { SmallVector SlotMapping(NumObjs, -1); SmallVector SlotWeights(NumObjs, 0.0); SmallVector, 16> RevMap(NumObjs); - BitVector SlotIsReg(NumObjs); BitVector UsedColors(NumObjs); DEBUG(dbgs() << "Color spill slot intervals:\n"); @@ -404,31 +307,19 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { DEBUG(dbgs() << '\n'); #endif - // Can we "color" a stack slot with a unused register? - Changed |= ColorSlotsWithFreeRegs(SlotMapping, RevMap, SlotIsReg); - if (!Changed) return false; // Rewrite all MO_FrameIndex operands. SmallVector, 4> NewDefs(MF.getNumBlockIDs()); for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) { - bool isReg = SlotIsReg[SS]; int NewFI = SlotMapping[SS]; - if (NewFI == -1 || (NewFI == (int)SS && !isReg)) + if (NewFI == -1 || (NewFI == (int)SS)) continue; - const TargetRegisterClass *RC = LS->getIntervalRegClass(SS); SmallVector &RefMIs = SSRefs[SS]; for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) - if (!isReg) - RewriteInstruction(RefMIs[i], SS, NewFI, MF); - else { - // Rewrite to use a register instead. - unsigned MBBId = RefMIs[i]->getParent()->getNumber(); - SmallSet &Defs = NewDefs[MBBId]; - UnfoldAndRewriteInstruction(RefMIs[i], SS, NewFI, RC, Defs, MF); - } + RewriteInstruction(RefMIs[i], SS, NewFI, MF); } // Delete unused stack slots. @@ -441,28 +332,6 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { return true; } -/// AllMemRefsCanBeUnfolded - Return true if all references of the specified -/// spill slot index can be unfolded. -bool StackSlotColoring::AllMemRefsCanBeUnfolded(int SS) { - SmallVector &RefMIs = SSRefs[SS]; - for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) { - MachineInstr *MI = RefMIs[i]; - if (TII->isLoadFromStackSlot(MI, SS) || - TII->isStoreToStackSlot(MI, SS)) - // Restore and spill will become copies. - return true; - if (!TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), false, false)) - return false; - for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { - MachineOperand &MO = MI->getOperand(j); - if (MO.isFI() && MO.getIndex() != SS) - // If it uses another frameindex, we can, currently* unfold it. - return false; - } - } - return true; -} - /// RewriteInstruction - Rewrite specified instruction by replacing references /// to old frame index with new one. void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI, @@ -489,179 +358,6 @@ void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI, (*I)->setValue(NewSV); } -/// PropagateBackward - Traverse backward and look for the definition of -/// OldReg. If it can successfully update all of the references with NewReg, -/// do so and return true. -bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII, - MachineBasicBlock *MBB, - unsigned OldReg, unsigned NewReg) { - if (MII == MBB->begin()) - return false; - - SmallVector Uses; - SmallVector Refs; - while (--MII != MBB->begin()) { - bool FoundDef = false; // Not counting 2address def. - - Uses.clear(); - const MCInstrDesc &MCID = MII->getDesc(); - for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MII->getOperand(i); - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) - continue; - if (Reg == OldReg) { - if (MO.isImplicit()) - return false; - - // Abort the use is actually a sub-register def. We don't have enough - // information to figure out if it is really legal. - if (MO.getSubReg() || MII->isSubregToReg()) - return false; - - const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI); - if (RC && !RC->contains(NewReg)) - return false; - - if (MO.isUse()) { - Uses.push_back(&MO); - } else { - Refs.push_back(&MO); - if (!MII->isRegTiedToUseOperand(i)) - FoundDef = true; - } - } else if (TRI->regsOverlap(Reg, NewReg)) { - return false; - } else if (TRI->regsOverlap(Reg, OldReg)) { - if (!MO.isUse() || !MO.isKill()) - return false; - } - } - - if (FoundDef) { - // Found non-two-address def. Stop here. - for (unsigned i = 0, e = Refs.size(); i != e; ++i) - Refs[i]->setReg(NewReg); - return true; - } - - // Two-address uses must be updated as well. - for (unsigned i = 0, e = Uses.size(); i != e; ++i) - Refs.push_back(Uses[i]); - } - return false; -} - -/// PropagateForward - Traverse forward and look for the kill of OldReg. If -/// it can successfully update all of the uses with NewReg, do so and -/// return true. -bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII, - MachineBasicBlock *MBB, - unsigned OldReg, unsigned NewReg) { - if (MII == MBB->end()) - return false; - - SmallVector Uses; - while (++MII != MBB->end()) { - bool FoundKill = false; - const MCInstrDesc &MCID = MII->getDesc(); - for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MII->getOperand(i); - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) - continue; - if (Reg == OldReg) { - if (MO.isDef() || MO.isImplicit()) - return false; - - // Abort the use is actually a sub-register use. We don't have enough - // information to figure out if it is really legal. - if (MO.getSubReg()) - return false; - - const TargetRegisterClass *RC = TII->getRegClass(MCID, i, TRI); - if (RC && !RC->contains(NewReg)) - return false; - if (MO.isKill()) - FoundKill = true; - - Uses.push_back(&MO); - } else if (TRI->regsOverlap(Reg, NewReg) || - TRI->regsOverlap(Reg, OldReg)) - return false; - } - if (FoundKill) { - for (unsigned i = 0, e = Uses.size(); i != e; ++i) - Uses[i]->setReg(NewReg); - return true; - } - } - return false; -} - -/// UnfoldAndRewriteInstruction - Rewrite specified instruction by unfolding -/// folded memory references and replacing those references with register -/// references instead. -void -StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, - unsigned Reg, - const TargetRegisterClass *RC, - SmallSet &Defs, - MachineFunction &MF) { - MachineBasicBlock *MBB = MI->getParent(); - if (unsigned DstReg = TII->isLoadFromStackSlot(MI, OldFI)) { - if (PropagateForward(MI, MBB, DstReg, Reg)) { - DEBUG(dbgs() << "Eliminated load: "); - DEBUG(MI->dump()); - ++NumLoadElim; - } else { - BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), - DstReg).addReg(Reg); - ++NumRegRepl; - } - - if (!Defs.count(Reg)) { - // If this is the first use of Reg in this MBB and it wasn't previously - // defined in MBB, add it to livein. - MBB->addLiveIn(Reg); - Defs.insert(Reg); - } - } else if (unsigned SrcReg = TII->isStoreToStackSlot(MI, OldFI)) { - if (MI->killsRegister(SrcReg) && PropagateBackward(MI, MBB, SrcReg, Reg)) { - DEBUG(dbgs() << "Eliminated store: "); - DEBUG(MI->dump()); - ++NumStoreElim; - } else { - BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), Reg) - .addReg(SrcReg); - ++NumRegRepl; - } - - // Remember reg has been defined in MBB. - Defs.insert(Reg); - } else { - SmallVector NewMIs; - bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, false, false, NewMIs); - (void)Success; // Silence compiler warning. - assert(Success && "Failed to unfold!"); - MachineInstr *NewMI = NewMIs[0]; - MBB->insert(MI, NewMI); - ++NumRegRepl; - - if (NewMI->readsRegister(Reg)) { - if (!Defs.count(Reg)) - // If this is the first use of Reg in this MBB and it wasn't previously - // defined in MBB, add it to livein. - MBB->addLiveIn(Reg); - Defs.insert(Reg); - } - } - MBB->erase(MI); -} /// RemoveDeadStores - Scan through a basic block and look for loads followed /// by stores. If they're both using the same stack slot, then the store is -- cgit v1.1 From 929e4da68ba95f0616172d326b8547f888f5eb37 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Sun, 13 Nov 2011 00:39:45 +0000 Subject: Stop tracking unused registers in VirtRegMap. The information was only used by the register allocator in StackSlotColoring. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144482 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/StackSlotColoring.cpp | 14 +++----------- lib/CodeGen/VirtRegMap.cpp | 35 ----------------------------------- lib/CodeGen/VirtRegMap.h | 36 ------------------------------------ 3 files changed, 3 insertions(+), 82 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index fbca337..f8177a2 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -49,11 +49,8 @@ namespace { class StackSlotColoring : public MachineFunctionPass { bool ColorWithRegs; LiveStacks* LS; - VirtRegMap* VRM; MachineFrameInfo *MFI; - MachineRegisterInfo *MRI; const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; const MachineLoopInfo *loopInfo; // SSIntervals - Spill slot intervals. @@ -414,21 +411,16 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { }); MFI = MF.getFrameInfo(); - MRI = &MF.getRegInfo(); TII = MF.getTarget().getInstrInfo(); - TRI = MF.getTarget().getRegisterInfo(); LS = &getAnalysis(); - VRM = &getAnalysis(); loopInfo = &getAnalysis(); bool Changed = false; unsigned NumSlots = LS->getNumIntervals(); - if (NumSlots < 2) { - if (NumSlots == 0 || !VRM->HasUnusedRegisters()) - // Nothing to do! - return false; - } + if (NumSlots == 0) + // Nothing to do! + return false; // If there are calls to setjmp or sigsetjmp, don't perform stack slot // coloring. The stack could be modified before the longjmp is executed, diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 8a1cdc0..d72d924 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -221,41 +221,6 @@ void VirtRegMap::RemoveMachineInstrFromMaps(MachineInstr *MI) { EmergencySpillMap.erase(MI); } -/// FindUnusedRegisters - Gather a list of allocatable registers that -/// have not been allocated to any virtual register. -bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) { - unsigned NumRegs = TRI->getNumRegs(); - UnusedRegs.reset(); - UnusedRegs.resize(NumRegs); - - BitVector Used(NumRegs); - for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { - unsigned Reg = TargetRegisterInfo::index2VirtReg(i); - if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) - Used.set(Virt2PhysMap[Reg]); - } - - BitVector Allocatable = TRI->getAllocatableSet(*MF); - bool AnyUnused = false; - for (unsigned Reg = 1; Reg < NumRegs; ++Reg) { - if (Allocatable[Reg] && !Used[Reg] && !LIs->hasInterval(Reg)) { - bool ReallyUnused = true; - for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { - if (Used[*AS] || LIs->hasInterval(*AS)) { - ReallyUnused = false; - break; - } - } - if (ReallyUnused) { - AnyUnused = true; - UnusedRegs.set(Reg); - } - } - } - - return AnyUnused; -} - void VirtRegMap::rewrite(SlotIndexes *Indexes) { DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" << "********** Function: " diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h index 03abff3..3ce4a34 100644 --- a/lib/CodeGen/VirtRegMap.h +++ b/lib/CodeGen/VirtRegMap.h @@ -132,9 +132,6 @@ namespace llvm { /// the register is implicitly defined. BitVector ImplicitDefed; - /// UnusedRegs - A list of physical registers that have not been used. - BitVector UnusedRegs; - /// createSpillSlot - Allocate a spill slot for RC from MFI. unsigned createSpillSlot(const TargetRegisterClass *RC); @@ -475,39 +472,6 @@ namespace llvm { /// the folded instruction map and spill point map. void RemoveMachineInstrFromMaps(MachineInstr *MI); - /// FindUnusedRegisters - Gather a list of allocatable registers that - /// have not been allocated to any virtual register. - bool FindUnusedRegisters(LiveIntervals* LIs); - - /// HasUnusedRegisters - Return true if there are any allocatable registers - /// that have not been allocated to any virtual register. - bool HasUnusedRegisters() const { - return !UnusedRegs.none(); - } - - /// setRegisterUsed - Remember the physical register is now used. - void setRegisterUsed(unsigned Reg) { - UnusedRegs.reset(Reg); - } - - /// isRegisterUnused - Return true if the physical register has not been - /// used. - bool isRegisterUnused(unsigned Reg) const { - return UnusedRegs[Reg]; - } - - /// getFirstUnusedRegister - Return the first physical register that has not - /// been used. - unsigned getFirstUnusedRegister(const TargetRegisterClass *RC) { - int Reg = UnusedRegs.find_first(); - while (Reg != -1) { - if (allocatableRCRegs[RC][Reg]) - return (unsigned)Reg; - Reg = UnusedRegs.find_next(Reg); - } - return 0; - } - /// rewrite - Rewrite all instructions in MF to use only physical registers /// by mapping all virtual register operands to their assigned physical /// registers. -- cgit v1.1 From 3cb0b0edd9ec537a9415eaff97edd042a07fd16e Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Sun, 13 Nov 2011 01:02:04 +0000 Subject: Remove dead code and data from VirtRegMap. Most of this stuff was supporting the old deferred spill code insertion mechanism. Modern spillers just edit machine code in place. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144484 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/VirtRegMap.cpp | 61 ----------- lib/CodeGen/VirtRegMap.h | 265 +-------------------------------------------- 2 files changed, 2 insertions(+), 324 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index d72d924..19064f0 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -58,25 +58,14 @@ bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) { TRI = mf.getTarget().getRegisterInfo(); MF = &mf; - ReMatId = MAX_STACK_SLOT+1; LowSpillSlot = HighSpillSlot = NO_STACK_SLOT; Virt2PhysMap.clear(); Virt2StackSlotMap.clear(); - Virt2ReMatIdMap.clear(); Virt2SplitMap.clear(); - Virt2SplitKillMap.clear(); - ReMatMap.clear(); - ImplicitDefed.clear(); SpillSlotToUsesMap.clear(); - MI2VirtMap.clear(); - SpillPt2VirtMap.clear(); - RestorePt2VirtMap.clear(); - EmergencySpillMap.clear(); - EmergencySpillSlots.clear(); SpillSlotToUsesMap.resize(8); - ImplicitDefed.resize(MF->getRegInfo().getNumVirtRegs()); allocatableRCRegs.clear(); for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), @@ -93,11 +82,7 @@ void VirtRegMap::grow() { unsigned NumRegs = MF->getRegInfo().getNumVirtRegs(); Virt2PhysMap.resize(NumRegs); Virt2StackSlotMap.resize(NumRegs); - Virt2ReMatIdMap.resize(NumRegs); Virt2SplitMap.resize(NumRegs); - Virt2SplitKillMap.resize(NumRegs); - ReMatMap.resize(NumRegs); - ImplicitDefed.resize(NumRegs); } unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) { @@ -144,29 +129,6 @@ void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) { Virt2StackSlotMap[virtReg] = SS; } -int VirtRegMap::assignVirtReMatId(unsigned virtReg) { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT && - "attempt to assign re-mat id to already spilled register"); - Virt2ReMatIdMap[virtReg] = ReMatId; - return ReMatId++; -} - -void VirtRegMap::assignVirtReMatId(unsigned virtReg, int id) { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - assert(Virt2ReMatIdMap[virtReg] == NO_STACK_SLOT && - "attempt to assign re-mat id to already spilled register"); - Virt2ReMatIdMap[virtReg] = id; -} - -int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) { - std::map::iterator I = - EmergencySpillSlots.find(RC); - if (I != EmergencySpillSlots.end()) - return I->second; - return EmergencySpillSlots[RC] = createSpillSlot(RC); -} - void VirtRegMap::addSpillSlotUse(int FI, MachineInstr *MI) { if (!MF->getFrameInfo()->isFixedObjectIndex(FI)) { // If FI < LowSpillSlot, this stack reference was produced by @@ -180,25 +142,6 @@ void VirtRegMap::addSpillSlotUse(int FI, MachineInstr *MI) { } } -void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *OldMI, - MachineInstr *NewMI, ModRef MRInfo) { - // Move previous memory references folded to new instruction. - MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(NewMI); - for (MI2VirtMapTy::iterator I = MI2VirtMap.lower_bound(OldMI), - E = MI2VirtMap.end(); I != E && I->first == OldMI; ) { - MI2VirtMap.insert(IP, std::make_pair(NewMI, I->second)); - MI2VirtMap.erase(I++); - } - - // add new memory reference - MI2VirtMap.insert(IP, std::make_pair(NewMI, std::make_pair(VirtReg, MRInfo))); -} - -void VirtRegMap::virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo) { - MI2VirtMapTy::iterator IP = MI2VirtMap.lower_bound(MI); - MI2VirtMap.insert(IP, std::make_pair(MI, std::make_pair(VirtReg, MRInfo))); -} - void VirtRegMap::RemoveMachineInstrFromMaps(MachineInstr *MI) { for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); @@ -215,10 +158,6 @@ void VirtRegMap::RemoveMachineInstrFromMaps(MachineInstr *MI) { && "Invalid spill slot"); SpillSlotToUsesMap[FI-LowSpillSlot].erase(MI); } - MI2VirtMap.erase(MI); - SpillPt2VirtMap.erase(MI); - RestorePt2VirtMap.erase(MI); - EmergencySpillMap.erase(MI); } void VirtRegMap::rewrite(SlotIndexes *Indexes) { diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h index 3ce4a34..7213088 100644 --- a/lib/CodeGen/VirtRegMap.h +++ b/lib/CodeGen/VirtRegMap.h @@ -70,68 +70,16 @@ namespace llvm { /// at. IndexedMap Virt2StackSlotMap; - /// Virt2ReMatIdMap - This is virtual register to rematerialization id - /// mapping. Each spilled virtual register that should be remat'd has an - /// entry in it which corresponds to the remat id. - IndexedMap Virt2ReMatIdMap; - /// Virt2SplitMap - This is virtual register to splitted virtual register /// mapping. IndexedMap Virt2SplitMap; - /// Virt2SplitKillMap - This is splitted virtual register to its last use - /// (kill) index mapping. - IndexedMap Virt2SplitKillMap; - - /// ReMatMap - This is virtual register to re-materialized instruction - /// mapping. Each virtual register whose definition is going to be - /// re-materialized has an entry in it. - IndexedMap ReMatMap; - - /// MI2VirtMap - This is MachineInstr to virtual register - /// mapping. In the case of memory spill code being folded into - /// instructions, we need to know which virtual register was - /// read/written by this instruction. - MI2VirtMapTy MI2VirtMap; - - /// SpillPt2VirtMap - This records the virtual registers which should - /// be spilled right after the MachineInstr due to live interval - /// splitting. - std::map > > - SpillPt2VirtMap; - - /// RestorePt2VirtMap - This records the virtual registers which should - /// be restored right before the MachineInstr due to live interval - /// splitting. - std::map > RestorePt2VirtMap; - - /// EmergencySpillMap - This records the physical registers that should - /// be spilled / restored around the MachineInstr since the register - /// allocator has run out of registers. - std::map > EmergencySpillMap; - - /// EmergencySpillSlots - This records emergency spill slots used to - /// spill physical registers when the register allocator runs out of - /// registers. Ideally only one stack slot is used per function per - /// register class. - std::map EmergencySpillSlots; - - /// ReMatId - Instead of assigning a stack slot to a to be rematerialized - /// virtual register, an unique id is being assigned. This keeps track of - /// the highest id used so far. Note, this starts at (1<<18) to avoid - /// conflicts with stack slot numbers. - int ReMatId; - /// LowSpillSlot, HighSpillSlot - Lowest and highest spill slot indexes. int LowSpillSlot, HighSpillSlot; /// SpillSlotToUsesMap - Records uses for each register spill slot. SmallVector, 8> SpillSlotToUsesMap; - /// ImplicitDefed - One bit for each virtual register. If set it indicates - /// the register is implicitly defined. - BitVector ImplicitDefed; - /// createSpillSlot - Allocate a spill slot for RC from MFI. unsigned createSpillSlot(const TargetRegisterClass *RC); @@ -141,10 +89,7 @@ namespace llvm { public: static char ID; VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG), - Virt2StackSlotMap(NO_STACK_SLOT), - Virt2ReMatIdMap(NO_STACK_SLOT), Virt2SplitMap(0), - Virt2SplitKillMap(SlotIndex()), ReMatMap(NULL), - ReMatId(MAX_STACK_SLOT+1), + Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0), LowSpillSlot(NO_STACK_SLOT), HighSpillSlot(NO_STACK_SLOT) { } virtual bool runOnMachineFunction(MachineFunction &MF); @@ -232,8 +177,7 @@ namespace llvm { /// @brief returns true if the specified virtual register is not /// mapped to a stack slot or rematerialized. bool isAssignedReg(unsigned virtReg) const { - if (getStackSlot(virtReg) == NO_STACK_SLOT && - getReMatId(virtReg) == NO_STACK_SLOT) + if (getStackSlot(virtReg) == NO_STACK_SLOT) return true; // Split register can be assigned a physical register as well as a // stack slot or remat id. @@ -247,13 +191,6 @@ namespace llvm { return Virt2StackSlotMap[virtReg]; } - /// @brief returns the rematerialization id mapped to the specified virtual - /// register - int getReMatId(unsigned virtReg) const { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - return Virt2ReMatIdMap[virtReg]; - } - /// @brief create a mapping for the specifed virtual register to /// the next available stack slot int assignVirt2StackSlot(unsigned virtReg); @@ -261,178 +198,6 @@ namespace llvm { /// the specified stack slot void assignVirt2StackSlot(unsigned virtReg, int frameIndex); - /// @brief assign an unique re-materialization id to the specified - /// virtual register. - int assignVirtReMatId(unsigned virtReg); - /// @brief assign an unique re-materialization id to the specified - /// virtual register. - void assignVirtReMatId(unsigned virtReg, int id); - - /// @brief returns true if the specified virtual register is being - /// re-materialized. - bool isReMaterialized(unsigned virtReg) const { - return ReMatMap[virtReg] != NULL; - } - - /// @brief returns the original machine instruction being re-issued - /// to re-materialize the specified virtual register. - MachineInstr *getReMaterializedMI(unsigned virtReg) const { - return ReMatMap[virtReg]; - } - - /// @brief records the specified virtual register will be - /// re-materialized and the original instruction which will be re-issed - /// for this purpose. If parameter all is true, then all uses of the - /// registers are rematerialized and it's safe to delete the definition. - void setVirtIsReMaterialized(unsigned virtReg, MachineInstr *def) { - ReMatMap[virtReg] = def; - } - - /// @brief record the last use (kill) of a split virtual register. - void addKillPoint(unsigned virtReg, SlotIndex index) { - Virt2SplitKillMap[virtReg] = index; - } - - SlotIndex getKillPoint(unsigned virtReg) const { - return Virt2SplitKillMap[virtReg]; - } - - /// @brief remove the last use (kill) of a split virtual register. - void removeKillPoint(unsigned virtReg) { - Virt2SplitKillMap[virtReg] = SlotIndex(); - } - - /// @brief returns true if the specified MachineInstr is a spill point. - bool isSpillPt(MachineInstr *Pt) const { - return SpillPt2VirtMap.find(Pt) != SpillPt2VirtMap.end(); - } - - /// @brief returns the virtual registers that should be spilled due to - /// splitting right after the specified MachineInstr. - std::vector > &getSpillPtSpills(MachineInstr *Pt) { - return SpillPt2VirtMap[Pt]; - } - - /// @brief records the specified MachineInstr as a spill point for virtReg. - void addSpillPoint(unsigned virtReg, bool isKill, MachineInstr *Pt) { - std::map > >::iterator - I = SpillPt2VirtMap.find(Pt); - if (I != SpillPt2VirtMap.end()) - I->second.push_back(std::make_pair(virtReg, isKill)); - else { - std::vector > Virts; - Virts.push_back(std::make_pair(virtReg, isKill)); - SpillPt2VirtMap.insert(std::make_pair(Pt, Virts)); - } - } - - /// @brief - transfer spill point information from one instruction to - /// another. - void transferSpillPts(MachineInstr *Old, MachineInstr *New) { - std::map > >::iterator - I = SpillPt2VirtMap.find(Old); - if (I == SpillPt2VirtMap.end()) - return; - while (!I->second.empty()) { - unsigned virtReg = I->second.back().first; - bool isKill = I->second.back().second; - I->second.pop_back(); - addSpillPoint(virtReg, isKill, New); - } - SpillPt2VirtMap.erase(I); - } - - /// @brief returns true if the specified MachineInstr is a restore point. - bool isRestorePt(MachineInstr *Pt) const { - return RestorePt2VirtMap.find(Pt) != RestorePt2VirtMap.end(); - } - - /// @brief returns the virtual registers that should be restoreed due to - /// splitting right after the specified MachineInstr. - std::vector &getRestorePtRestores(MachineInstr *Pt) { - return RestorePt2VirtMap[Pt]; - } - - /// @brief records the specified MachineInstr as a restore point for virtReg. - void addRestorePoint(unsigned virtReg, MachineInstr *Pt) { - std::map >::iterator I = - RestorePt2VirtMap.find(Pt); - if (I != RestorePt2VirtMap.end()) - I->second.push_back(virtReg); - else { - std::vector Virts; - Virts.push_back(virtReg); - RestorePt2VirtMap.insert(std::make_pair(Pt, Virts)); - } - } - - /// @brief - transfer restore point information from one instruction to - /// another. - void transferRestorePts(MachineInstr *Old, MachineInstr *New) { - std::map >::iterator I = - RestorePt2VirtMap.find(Old); - if (I == RestorePt2VirtMap.end()) - return; - while (!I->second.empty()) { - unsigned virtReg = I->second.back(); - I->second.pop_back(); - addRestorePoint(virtReg, New); - } - RestorePt2VirtMap.erase(I); - } - - /// @brief records that the specified physical register must be spilled - /// around the specified machine instr. - void addEmergencySpill(unsigned PhysReg, MachineInstr *MI) { - if (EmergencySpillMap.find(MI) != EmergencySpillMap.end()) - EmergencySpillMap[MI].push_back(PhysReg); - else { - std::vector PhysRegs; - PhysRegs.push_back(PhysReg); - EmergencySpillMap.insert(std::make_pair(MI, PhysRegs)); - } - } - - /// @brief returns true if one or more physical registers must be spilled - /// around the specified instruction. - bool hasEmergencySpills(MachineInstr *MI) const { - return EmergencySpillMap.find(MI) != EmergencySpillMap.end(); - } - - /// @brief returns the physical registers to be spilled and restored around - /// the instruction. - std::vector &getEmergencySpills(MachineInstr *MI) { - return EmergencySpillMap[MI]; - } - - /// @brief - transfer emergency spill information from one instruction to - /// another. - void transferEmergencySpills(MachineInstr *Old, MachineInstr *New) { - std::map >::iterator I = - EmergencySpillMap.find(Old); - if (I == EmergencySpillMap.end()) - return; - while (!I->second.empty()) { - unsigned virtReg = I->second.back(); - I->second.pop_back(); - addEmergencySpill(virtReg, New); - } - EmergencySpillMap.erase(I); - } - - /// @brief return or get a emergency spill slot for the register class. - int getEmergencySpillSlot(const TargetRegisterClass *RC); - - /// @brief Return lowest spill slot index. - int getLowSpillSlot() const { - return LowSpillSlot; - } - - /// @brief Return highest spill slot index. - int getHighSpillSlot() const { - return HighSpillSlot; - } - /// @brief Records a spill slot use. void addSpillSlotUse(int FrameIndex, MachineInstr *MI); @@ -442,32 +207,6 @@ namespace llvm { return !SpillSlotToUsesMap[FrameIndex-LowSpillSlot].empty(); } - /// @brief Mark the specified register as being implicitly defined. - void setIsImplicitlyDefined(unsigned VirtReg) { - ImplicitDefed.set(TargetRegisterInfo::virtReg2Index(VirtReg)); - } - - /// @brief Returns true if the virtual register is implicitly defined. - bool isImplicitlyDefined(unsigned VirtReg) const { - return ImplicitDefed[TargetRegisterInfo::virtReg2Index(VirtReg)]; - } - - /// @brief Updates information about the specified virtual register's value - /// folded into newMI machine instruction. - void virtFolded(unsigned VirtReg, MachineInstr *OldMI, MachineInstr *NewMI, - ModRef MRInfo); - - /// @brief Updates information about the specified virtual register's value - /// folded into the specified machine instruction. - void virtFolded(unsigned VirtReg, MachineInstr *MI, ModRef MRInfo); - - /// @brief returns the virtual registers' values folded in memory - /// operands of this instruction - std::pair - getFoldedVirts(MachineInstr* MI) const { - return MI2VirtMap.equal_range(MI); - } - /// RemoveMachineInstrFromMaps - MI is being erased, remove it from the /// the folded instruction map and spill point map. void RemoveMachineInstrFromMaps(MachineInstr *MI); -- cgit v1.1 From cb39064e7aee2273da1d00e6b800db84ddc34b6b Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Sun, 13 Nov 2011 01:23:30 +0000 Subject: Stop tracking spill slot uses in VirtRegMap. Nobody cared, StackSlotColoring scans the instructions to find used stack slots. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144485 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/InlineSpiller.cpp | 6 ----- lib/CodeGen/LiveDebugVariables.cpp | 3 +-- lib/CodeGen/RegAllocGreedy.cpp | 6 ----- lib/CodeGen/Spiller.cpp | 2 -- lib/CodeGen/VirtRegMap.cpp | 45 -------------------------------------- lib/CodeGen/VirtRegMap.h | 22 +------------------ 6 files changed, 2 insertions(+), 82 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index d1e3f1a..0066b7a 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -726,7 +726,6 @@ bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) { MRI.getRegClass(SVI.SpillReg), &TRI); --MII; // Point to store instruction. LIS.InsertMachineInstrInMaps(MII); - VRM.addSpillSlotUse(StackSlot, MII); DEBUG(dbgs() << "\thoisted: " << SVI.SpillVNI->def << '\t' << *MII); ++NumSpills; @@ -1046,8 +1045,6 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI, if (!FoldMI) return false; LIS.ReplaceMachineInstrInMaps(MI, FoldMI); - if (!LoadMI) - VRM.addSpillSlotUse(StackSlot, FoldMI); MI->eraseFromParent(); // TII.foldMemoryOperand may have left some implicit operands on the @@ -1081,7 +1078,6 @@ void InlineSpiller::insertReload(LiveInterval &NewLI, MRI.getRegClass(NewLI.reg), &TRI); --MI; // Point to load instruction. SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex(); - VRM.addSpillSlotUse(StackSlot, MI); DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI); VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0, LIS.getVNInfoAllocator()); @@ -1097,7 +1093,6 @@ void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI, MRI.getRegClass(NewLI.reg), &TRI); --MI; // Point to store instruction. SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex(); - VRM.addSpillSlotUse(StackSlot, MI); DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI); VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator()); NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI)); @@ -1254,7 +1249,6 @@ void InlineSpiller::spillAll() { MachineInstr *MI = RI.skipInstruction();) { assert(SnippetCopies.count(MI) && "Remaining use wasn't a snippet copy"); // FIXME: Do this with a LiveRangeEdit callback. - VRM.RemoveMachineInstrFromMaps(MI); LIS.RemoveMachineInstrFromMaps(MI); MI->eraseFromParent(); } diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 3dfe4c0..0414692 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -889,8 +889,7 @@ UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) { // index is no longer available. That means the user value is in a // non-existent sub-register, and %noreg is exactly what we want. Loc.substPhysReg(VRM.getPhys(VirtReg), TRI); - } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT && - VRM.isSpillSlotUsed(VRM.getStackSlot(VirtReg))) { + } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT) { // FIXME: Translate SubIdx to a stackslot offset. Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg)); } else { diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 71b7f4f..366c94e 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -248,7 +248,6 @@ public: static char ID; private: - void LRE_WillEraseInstruction(MachineInstr*); bool LRE_CanEraseVirtReg(unsigned); void LRE_WillShrinkVirtReg(unsigned); void LRE_DidCloneVirtReg(unsigned, unsigned); @@ -350,11 +349,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { // LiveRangeEdit delegate methods //===----------------------------------------------------------------------===// -void RAGreedy::LRE_WillEraseInstruction(MachineInstr *MI) { - // LRE itself will remove from SlotIndexes and parent basic block. - VRM->RemoveMachineInstrFromMaps(MI); -} - bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { if (unsigned PhysReg = VRM->getPhys(VirtReg)) { unassign(LIS->getInterval(VirtReg), PhysReg); diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index 6efdd5b..4a170bc 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -140,7 +140,6 @@ protected: MachineInstr *loadInstr(prior(miItr)); SlotIndex loadIndex = lis->InsertMachineInstrInMaps(loadInstr).getDefIndex(); - vrm->addSpillSlotUse(ss, loadInstr); SlotIndex endIndex = loadIndex.getNextIndex(); VNInfo *loadVNI = newLI->getNextValue(loadIndex, 0, lis->getVNInfoAllocator()); @@ -154,7 +153,6 @@ protected: MachineInstr *storeInstr(llvm::next(miItr)); SlotIndex storeIndex = lis->InsertMachineInstrInMaps(storeInstr).getDefIndex(); - vrm->addSpillSlotUse(ss, storeInstr); SlotIndex beginIndex = storeIndex.getPrevIndex(); VNInfo *storeVNI = newLI->getNextValue(beginIndex, 0, lis->getVNInfoAllocator()); diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 19064f0..faa44ad 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -58,14 +58,9 @@ bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) { TRI = mf.getTarget().getRegisterInfo(); MF = &mf; - LowSpillSlot = HighSpillSlot = NO_STACK_SLOT; - Virt2PhysMap.clear(); Virt2StackSlotMap.clear(); Virt2SplitMap.clear(); - SpillSlotToUsesMap.clear(); - - SpillSlotToUsesMap.resize(8); allocatableRCRegs.clear(); for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), @@ -88,14 +83,6 @@ void VirtRegMap::grow() { unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) { int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), RC->getAlignment()); - if (LowSpillSlot == NO_STACK_SLOT) - LowSpillSlot = SS; - if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot) - HighSpillSlot = SS; - assert(SS >= LowSpillSlot && "Unexpected low spill slot"); - unsigned Idx = SS-LowSpillSlot; - while (Idx >= SpillSlotToUsesMap.size()) - SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2); ++NumSpillSlots; return SS; } @@ -129,37 +116,6 @@ void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) { Virt2StackSlotMap[virtReg] = SS; } -void VirtRegMap::addSpillSlotUse(int FI, MachineInstr *MI) { - if (!MF->getFrameInfo()->isFixedObjectIndex(FI)) { - // If FI < LowSpillSlot, this stack reference was produced by - // instruction selection and is not a spill - if (FI >= LowSpillSlot) { - assert(FI >= 0 && "Spill slot index should not be negative!"); - assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size() - && "Invalid spill slot"); - SpillSlotToUsesMap[FI-LowSpillSlot].insert(MI); - } - } -} - -void VirtRegMap::RemoveMachineInstrFromMaps(MachineInstr *MI) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isFI()) - continue; - int FI = MO.getIndex(); - if (MF->getFrameInfo()->isFixedObjectIndex(FI)) - continue; - // This stack reference was produced by instruction selection and - // is not a spill - if (FI < LowSpillSlot) - continue; - assert((unsigned)FI-LowSpillSlot < SpillSlotToUsesMap.size() - && "Invalid spill slot"); - SpillSlotToUsesMap[FI-LowSpillSlot].erase(MI); - } -} - void VirtRegMap::rewrite(SlotIndexes *Indexes) { DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n" << "********** Function: " @@ -236,7 +192,6 @@ void VirtRegMap::rewrite(SlotIndexes *Indexes) { ++NumIdCopies; if (MI->getNumOperands() == 2) { DEBUG(dbgs() << "Deleting identity copy.\n"); - RemoveMachineInstrFromMaps(MI); if (Indexes) Indexes->removeMachineInstrFromMaps(MI); // It's safe to erase MI because MII has already been incremented. diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h index 7213088..68d817b 100644 --- a/lib/CodeGen/VirtRegMap.h +++ b/lib/CodeGen/VirtRegMap.h @@ -74,12 +74,6 @@ namespace llvm { /// mapping. IndexedMap Virt2SplitMap; - /// LowSpillSlot, HighSpillSlot - Lowest and highest spill slot indexes. - int LowSpillSlot, HighSpillSlot; - - /// SpillSlotToUsesMap - Records uses for each register spill slot. - SmallVector, 8> SpillSlotToUsesMap; - /// createSpillSlot - Allocate a spill slot for RC from MFI. unsigned createSpillSlot(const TargetRegisterClass *RC); @@ -89,8 +83,7 @@ namespace llvm { public: static char ID; VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG), - Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0), - LowSpillSlot(NO_STACK_SLOT), HighSpillSlot(NO_STACK_SLOT) { } + Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) { } virtual bool runOnMachineFunction(MachineFunction &MF); virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -198,19 +191,6 @@ namespace llvm { /// the specified stack slot void assignVirt2StackSlot(unsigned virtReg, int frameIndex); - /// @brief Records a spill slot use. - void addSpillSlotUse(int FrameIndex, MachineInstr *MI); - - /// @brief Returns true if spill slot has been used. - bool isSpillSlotUsed(int FrameIndex) const { - assert(FrameIndex >= 0 && "Spill slot index should not be negative!"); - return !SpillSlotToUsesMap[FrameIndex-LowSpillSlot].empty(); - } - - /// RemoveMachineInstrFromMaps - MI is being erased, remove it from the - /// the folded instruction map and spill point map. - void RemoveMachineInstrFromMaps(MachineInstr *MI); - /// rewrite - Rewrite all instructions in MF to use only physical registers /// by mapping all virtual register operands to their assigned physical /// registers. -- cgit v1.1 From 0cb80d9f0f71ff3faaf890bb9f023e53f4e8e3dc Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Sun, 13 Nov 2011 01:23:34 +0000 Subject: More dead code elimination in VirtRegMap. This thing is looking a lot like a virtual register map now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144486 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/VirtRegMap.cpp | 12 ------------ lib/CodeGen/VirtRegMap.h | 14 -------------- 2 files changed, 26 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index faa44ad..1a78db7 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -19,7 +19,6 @@ #define DEBUG_TYPE "virtregmap" #include "VirtRegMap.h" #include "llvm/Function.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -32,12 +31,8 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallSet.h" #include using namespace llvm; @@ -62,14 +57,7 @@ bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) { Virt2StackSlotMap.clear(); Virt2SplitMap.clear(); - allocatableRCRegs.clear(); - for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(), - E = TRI->regclass_end(); I != E; ++I) - allocatableRCRegs.insert(std::make_pair(*I, - TRI->getAllocatableSet(mf, *I))); - grow(); - return false; } diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h index 68d817b..8cac311 100644 --- a/lib/CodeGen/VirtRegMap.h +++ b/lib/CodeGen/VirtRegMap.h @@ -18,22 +18,14 @@ #define LLVM_CODEGEN_VIRTREGMAP_H #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/LiveInterval.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include namespace llvm { - class LiveIntervals; class MachineInstr; class MachineFunction; class MachineRegisterInfo; class TargetInstrInfo; - class TargetRegisterInfo; class raw_ostream; class SlotIndexes; @@ -45,18 +37,12 @@ namespace llvm { MAX_STACK_SLOT = (1L << 18)-1 }; - enum ModRef { isRef = 1, isMod = 2, isModRef = 3 }; - typedef std::multimap > MI2VirtMapTy; - private: MachineRegisterInfo *MRI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; MachineFunction *MF; - DenseMap allocatableRCRegs; - /// Virt2PhysMap - This is a virtual to physical register /// mapping. Each virtual register is required to have an entry in /// it; even spilled virtual registers (the register mapped to a -- cgit v1.1 From 569561c7eedbd96b8f78c30505d2bdc265a1efc5 Mon Sep 17 00:00:00 2001 From: NAKAMURA Takumi Date: Sun, 13 Nov 2011 01:33:10 +0000 Subject: Prune more RALinScan. RALinScan was also here! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144487 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/CodeGen.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index 899baad..3112c22 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -42,7 +42,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializePeepholeOptimizerPass(Registry); initializeProcessImplicitDefsPass(Registry); initializePEIPass(Registry); - initializeRALinScanPass(Registry); initializeRegisterCoalescerPass(Registry); initializeRenderMachineFunctionPass(Registry); initializeSlotIndexesPass(Registry); -- cgit v1.1 From b29b950bf227b65e193abf924f77ef3fa4eceaae Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Sun, 13 Nov 2011 02:23:59 +0000 Subject: Add support for emitting both signed- and zero-extend loads. Fix SimplifyAddress to handle either a 12-bit unsigned offset or the ARM +/-imm8 offsets (addressing mode 3). This enables a load followed by an integer extend to be folded into a single load. For example: ldrb r1, [r0] ldrb r1, [r0] uxtb r2, r1 => mov r3, r2 mov r3, r1 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144488 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 123 ++++++++++++++++++++++++++++++----------- 1 file changed, 91 insertions(+), 32 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 4bf55fb..2551425 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -148,6 +148,8 @@ class ARMFastISel : public FastISel { virtual bool TargetSelectInstruction(const Instruction *I); virtual unsigned TargetMaterializeConstant(const Constant *C); virtual unsigned TargetMaterializeAlloca(const AllocaInst *AI); + virtual bool TryToFoldLoad(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI); #include "ARMGenFastISel.inc" @@ -177,10 +179,12 @@ class ARMFastISel : public FastISel { bool isLoadTypeLegal(Type *Ty, MVT &VT); bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, bool isZExt); - bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr); + bool ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, bool isZExt, + bool allocReg); + bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr); bool ARMComputeAddress(const Value *Obj, Address &Addr); - void ARMSimplifyAddress(Address &Addr, EVT VT); + void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3); unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt); unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT); unsigned ARMMaterializeInt(const Constant *C, EVT VT); @@ -213,7 +217,7 @@ class ARMFastISel : public FastISel { const MachineInstrBuilder &AddOptionalDefs(const MachineInstrBuilder &MIB); void AddLoadStoreOperands(EVT VT, Address &Addr, const MachineInstrBuilder &MIB, - unsigned Flags); + unsigned Flags, bool useAM3); }; } // end anonymous namespace @@ -724,7 +728,7 @@ bool ARMFastISel::isLoadTypeLegal(Type *Ty, MVT &VT) { // If this is a type than can be sign or zero-extended to a basic operation // go ahead and accept it now. - if (VT == MVT::i8 || VT == MVT::i16) + if (VT == MVT::i1 || VT == MVT::i8 || VT == MVT::i16) return true; return false; @@ -853,7 +857,7 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { return Addr.Base.Reg != 0; } -void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) { +void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) { assert(VT.isSimple() && "Non-simple types are invalid here!"); @@ -861,21 +865,18 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) { switch (VT.getSimpleVT().SimpleTy) { default: assert(false && "Unhandled load/store type!"); + break; + case MVT::i1: + case MVT::i8: case MVT::i16: - if (isThumb2) + case MVT::i32: + if (!useAM3) // Integer loads/stores handle 12-bit offsets. needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset); else - // ARM i16 integer loads/stores handle +/-imm8 offsets. + // ARM halfword and signed byte load/stores use +/-imm8 offsets. // FIXME: Negative offsets require special handling. - if (Addr.Offset > 255 || Addr.Offset < 0) - needsLowering = true; - break; - case MVT::i1: - case MVT::i8: - case MVT::i32: - // Integer loads/stores handle 12-bit offsets. - needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset); + needsLowering = (Addr.Offset > 255 || Addr.Offset < 0); break; case MVT::f32: case MVT::f64: @@ -911,7 +912,7 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT) { void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, const MachineInstrBuilder &MIB, - unsigned Flags) { + unsigned Flags, bool useAM3) { // addrmode5 output depends on the selection dag addressing dividing the // offset by 4 that it then later multiplies. Do this here as well. if (VT.getSimpleVT().SimpleTy == MVT::f32 || @@ -931,8 +932,8 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, // Now add the rest of the operands. MIB.addFrameIndex(FI); - // ARM halfword load/stores need an additional operand. - if (!isThumb2 && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0); + // ARM halfword and signed byte load/stores need an additional operand. + if (useAM3) MIB.addReg(0); MIB.addImm(Addr.Offset); MIB.addMemOperand(MMO); @@ -940,29 +941,39 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, // Now add the rest of the operands. MIB.addReg(Addr.Base.Reg); - // ARM halfword load/stores need an additional operand. - if (!isThumb2 && VT.getSimpleVT().SimpleTy == MVT::i16) MIB.addReg(0); + // ARM halfword and signed byte load/stores need an additional operand. + if (useAM3) MIB.addReg(0); MIB.addImm(Addr.Offset); } AddOptionalDefs(MIB); } -bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) { - +bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, + bool isZExt = true, bool allocReg = true) { assert(VT.isSimple() && "Non-simple types are invalid here!"); unsigned Opc; - TargetRegisterClass *RC; + bool useAM3 = false; + TargetRegisterClass *RC; switch (VT.getSimpleVT().SimpleTy) { // This is mostly going to be Neon/vector support. default: return false; case MVT::i1: case MVT::i8: - Opc = isThumb2 ? ARM::t2LDRBi12 : ARM::LDRBi12; + if (isZExt) { + Opc = isThumb2 ? ARM::t2LDRBi12 : ARM::LDRBi12; + } else { + Opc = isThumb2 ? ARM::t2LDRSBi12 : ARM::LDRSB; + if (!isThumb2) useAM3 = true; + } RC = ARM::GPRRegisterClass; break; case MVT::i16: - Opc = isThumb2 ? ARM::t2LDRHi12 : ARM::LDRH; + if (isZExt) + Opc = isThumb2 ? ARM::t2LDRHi12 : ARM::LDRH; + else + Opc = isThumb2 ? ARM::t2LDRSHi12 : ARM::LDRSH; + if (!isThumb2) useAM3 = true; RC = ARM::GPRRegisterClass; break; case MVT::i32: @@ -979,13 +990,15 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr) { break; } // Simplify this down to something we can handle. - ARMSimplifyAddress(Addr, VT); + ARMSimplifyAddress(Addr, VT, useAM3); // Create the base instruction, then add the operands. - ResultReg = createResultReg(RC); + if (allocReg) + ResultReg = createResultReg(RC); + assert (ResultReg > 255 && "Expected an allocated virtual register."); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg); - AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad); + AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOLoad, useAM3); return true; } @@ -1011,6 +1024,7 @@ bool ARMFastISel::SelectLoad(const Instruction *I) { bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { unsigned StrOpc; + bool useAM3 = false; switch (VT.getSimpleVT().SimpleTy) { // This is mostly going to be Neon/vector support. default: return false; @@ -1028,6 +1042,7 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { break; case MVT::i16: StrOpc = isThumb2 ? ARM::t2STRHi12 : ARM::STRH; + if (!isThumb2) useAM3 = true; break; case MVT::i32: StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12; @@ -1042,13 +1057,13 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { break; } // Simplify this down to something we can handle. - ARMSimplifyAddress(Addr, VT); + ARMSimplifyAddress(Addr, VT, useAM3); // Create the base instruction, then add the operands. MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(StrOpc)) .addReg(SrcReg, getKillRegState(true)); - AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore); + AddLoadStoreOperands(VT, Addr, MIB, MachineMemOperand::MOStore, useAM3); return true; } @@ -2231,8 +2246,6 @@ unsigned ARMFastISel::ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool ARMFastISel::SelectIntExt(const Instruction *I) { // On ARM, in general, integer casts don't involve legal types; this code // handles promotable integers. - // FIXME: We could save an instruction in many cases by special-casing - // load instructions. Type *DestTy = I->getType(); Value *Src = I->getOperand(0); Type *SrcTy = Src->getType(); @@ -2300,6 +2313,52 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { return false; } +/// TryToFoldLoad - The specified machine instr operand is a vreg, and that +/// vreg is being provided by the specified load instruction. If possible, +/// try to fold the load as an operand to the instruction, returning true if +/// successful. +bool ARMFastISel::TryToFoldLoad(MachineInstr *MI, unsigned OpNo, + const LoadInst *LI) { + // Verify we have a legal type before going any further. + MVT VT; + if (!isLoadTypeLegal(LI->getType(), VT)) + return false; + + // Combine load followed by zero- or sign-extend. + // ldrb r1, [r0] ldrb r1, [r0] + // uxtb r2, r1 => + // mov r3, r2 mov r3, r1 + bool isZExt = true; + switch(MI->getOpcode()) { + default: return false; + case ARM::SXTH: + case ARM::t2SXTH: + isZExt = false; + case ARM::UXTH: + case ARM::t2UXTH: + if (VT != MVT::i16) + return false; + break; + case ARM::SXTB: + case ARM::t2SXTB: + isZExt = false; + case ARM::UXTB: + case ARM::t2UXTB: + if (VT != MVT::i8) + return false; + break; + } + // See if we can handle this address. + Address Addr; + if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false; + + unsigned ResultReg = MI->getOperand(0).getReg(); + if (!ARMEmitLoad(VT, ResultReg, Addr, isZExt, false)) + return false; + MI->eraseFromParent(); + return true; +} + namespace llvm { llvm::FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo) { // Completely untested on non-darwin. -- cgit v1.1 From 5be833de769608254fdfc56e8173000e874a8154 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Sun, 13 Nov 2011 04:25:02 +0000 Subject: Fix comments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144490 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 2551425..88ebd19 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -874,7 +874,7 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) { // Integer loads/stores handle 12-bit offsets. needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset); else - // ARM halfword and signed byte load/stores use +/-imm8 offsets. + // ARM halfword load/stores and signed byte loads use +/-imm8 offsets. // FIXME: Negative offsets require special handling. needsLowering = (Addr.Offset > 255 || Addr.Offset < 0); break; @@ -932,7 +932,7 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, // Now add the rest of the operands. MIB.addFrameIndex(FI); - // ARM halfword and signed byte load/stores need an additional operand. + // ARM halfword load/stores and signed byte loads need an additional operand. if (useAM3) MIB.addReg(0); MIB.addImm(Addr.Offset); @@ -941,7 +941,7 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, // Now add the rest of the operands. MIB.addReg(Addr.Base.Reg); - // ARM halfword and signed byte load/stores need an additional operand. + // ARM halfword load/stores and signed byte loads need an additional operand. if (useAM3) MIB.addReg(0); MIB.addImm(Addr.Offset); -- cgit v1.1 From a517ab155b371e2911450d0048fb158931b8e68c Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Sun, 13 Nov 2011 05:14:43 +0000 Subject: Temporarily disable SelectIntrinsicCall when in ARM mode. This is causing failures. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144492 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 88ebd19..f3e6fc7 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -2136,6 +2136,7 @@ bool ARMFastISel::SelectCall(const Instruction *I, } bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { + if (!isThumb2) return false; // FIXME: Handle more intrinsics. switch (I.getIntrinsicID()) { default: return false; -- cgit v1.1 From 9eb674880b98cbeca0cd5b3f0265b77282d48b4a Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Sun, 13 Nov 2011 09:44:21 +0000 Subject: The order in which the predicate is added differs between Thumb and ARM mode. Fix predicate when in ARM mode and restore SelectIntrinsicCall. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144494 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index f3e6fc7..d330367 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -2108,18 +2108,25 @@ bool ARMFastISel::SelectCall(const Instruction *I, MachineInstrBuilder MIB; unsigned CallOpc = ARMSelectCallOp(GV); // Explicitly adding the predicate here. - if(isThumb2) - // Explicitly adding the predicate here. - MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CallOpc))); - else + if(isThumb2) { // Explicitly adding the predicate here. MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc))); - if (!IntrMemName) - MIB.addGlobalAddress(GV, 0, 0); - else - MIB.addExternalSymbol(IntrMemName, 0); + if (!IntrMemName) + MIB.addGlobalAddress(GV, 0, 0); + else + MIB.addExternalSymbol(IntrMemName, 0); + } else { + if (!IntrMemName) + // Explicitly adding the predicate here. + MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(CallOpc)) + .addGlobalAddress(GV, 0, 0)); + else + MIB = AddDefaultPred(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(CallOpc)) + .addExternalSymbol(IntrMemName, 0)); + } // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) @@ -2136,7 +2143,6 @@ bool ARMFastISel::SelectCall(const Instruction *I, } bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { - if (!isThumb2) return false; // FIXME: Handle more intrinsics. switch (I.getIntrinsicID()) { default: return false; -- cgit v1.1 From df234353fb396e84e7a3a1cdd94f73681e65bd88 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Sun, 13 Nov 2011 11:20:44 +0000 Subject: Rewrite #3 of machine block placement. This is based somewhat on the second algorithm, but only loosely. It is more heavily based on the last discussion I had with Andy. It continues to walk from the inner-most loop outward, but there is a key difference. With this algorithm we ensure that as we visit each loop, the entire loop is merged into a single chain. At the end, the entire function is treated as a "loop", and merged into a single chain. This chain forms the desired sequence of blocks within the function. Switching to a single algorithm removes my biggest problem with the previous approaches -- they had different behavior depending on which system triggered the layout. Now there is exactly one algorithm and one basis for the decision making. The other key difference is how the chain is formed. This is based heavily on the idea Andy mentioned of keeping a worklist of blocks that are viable layout successors based on the CFG. Having this set allows us to consistently select the best layout successor for each block. It is expensive though. The code here remains very rough. There is a lot that needs to be done to clean up the code, and to make the runtime cost of this pass much lower. Very much WIP, but this was a giant chunk of code and I'd rather folks see it sooner than later. Everything remains behind a flag of course. I've added a couple of tests to exercise the issues that this iteration was motivated by: loop structure preservation. I've also fixed one test that was exhibiting the broken behavior of the previous version. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144495 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBlockPlacement.cpp | 395 ++++++++++++++++++++++------------ 1 file changed, 256 insertions(+), 139 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 53a8779..6aa4268 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -115,7 +115,7 @@ public: /// function. It also registers itself as the chain that block participates /// in with the BlockToChain mapping. BlockChain(BlockToChainMapType &BlockToChain, MachineBasicBlock *BB) - : Blocks(1, BB), BlockToChain(BlockToChain) { + : Blocks(1, BB), BlockToChain(BlockToChain), LoopPredecessors(0) { assert(BB && "Cannot create a chain with a null basic block"); BlockToChain[BB] = this; } @@ -138,7 +138,6 @@ public: void merge(MachineBasicBlock *BB, BlockChain *Chain) { assert(BB); assert(!Blocks.empty()); - assert(Blocks.back()->isSuccessor(BB)); // Fast path in case we don't have a chain already. if (!Chain) { @@ -160,6 +159,12 @@ public: BlockToChain[*BI] = this; } } + + /// \brief Count of predecessors within the loop currently being processed. + /// + /// This count is updated at each loop we process to represent the number of + /// in-loop predecessors of this chain. + unsigned LoopPredecessors; }; } @@ -199,12 +204,15 @@ class MachineBlockPlacement : public MachineFunctionPass { /// between basic blocks. DenseMap BlockToChain; - BlockChain *CreateChain(MachineBasicBlock *BB); - void mergeSuccessor(MachineBasicBlock *BB, BlockChain *Chain, - BlockFilterSet *Filter = 0); + void markChainSuccessors(BlockChain &Chain, + MachineBasicBlock *LoopHeaderBB, + SmallVectorImpl &Blocks, + const BlockFilterSet *BlockFilter = 0); + void buildChain(MachineBasicBlock *BB, BlockChain &Chain, + SmallVectorImpl &Blocks, + const BlockFilterSet *BlockFilter = 0); void buildLoopChains(MachineFunction &F, MachineLoop &L); void buildCFGChains(MachineFunction &F); - void placeChainsTopologically(MachineFunction &F); void AlignLoops(MachineFunction &F); public: @@ -264,96 +272,130 @@ static std::string getBlockNum(MachineBasicBlock *BB) { } #endif -/// \brief Helper to create a new chain for a single BB. -/// -/// Takes care of growing the Chains, setting up the BlockChain object, and any -/// debug checking logic. -/// \returns A pointer to the new BlockChain. -BlockChain *MachineBlockPlacement::CreateChain(MachineBasicBlock *BB) { - BlockChain *Chain = - new (ChainAllocator.Allocate()) BlockChain(BlockToChain, BB); - return Chain; +void MachineBlockPlacement::markChainSuccessors( + BlockChain &Chain, + MachineBasicBlock *LoopHeaderBB, + SmallVectorImpl &BlockWorkList, + const BlockFilterSet *BlockFilter) { + // Walk all the blocks in this chain, marking their successors as having + // a predecessor placed. + for (BlockChain::iterator CBI = Chain.begin(), CBE = Chain.end(); + CBI != CBE; ++CBI) { + // Add any successors for which this is the only un-placed in-loop + // predecessor to the worklist as a viable candidate for CFG-neutral + // placement. No subsequent placement of this block will violate the CFG + // shape, so we get to use heuristics to choose a favorable placement. + for (MachineBasicBlock::succ_iterator SI = (*CBI)->succ_begin(), + SE = (*CBI)->succ_end(); + SI != SE; ++SI) { + if (BlockFilter && !BlockFilter->count(*SI)) + continue; + BlockChain &SuccChain = *BlockToChain[*SI]; + // Disregard edges within a fixed chain, or edges to the loop header. + if (&Chain == &SuccChain || *SI == LoopHeaderBB) + continue; + + // This is a cross-chain edge that is within the loop, so decrement the + // loop predecessor count of the destination chain. + if (SuccChain.LoopPredecessors > 0 && --SuccChain.LoopPredecessors == 0) + BlockWorkList.push_back(*SI); + } + } } -/// \brief Merge a chain with any viable successor. -/// -/// This routine walks the predecessors of the current block, looking for -/// viable merge candidates. It has strict rules it uses to determine when -/// a predecessor can be merged with the current block, which center around -/// preserving the CFG structure. It performs the merge if any viable candidate -/// is found. -void MachineBlockPlacement::mergeSuccessor(MachineBasicBlock *BB, - BlockChain *Chain, - BlockFilterSet *Filter) { +void MachineBlockPlacement::buildChain( + MachineBasicBlock *BB, + BlockChain &Chain, + SmallVectorImpl &BlockWorkList, + const BlockFilterSet *BlockFilter) { + const BranchProbability HotProb(4, 5); // 80% assert(BB); - assert(Chain); + assert(BlockToChain[BB] == &Chain); + assert(*Chain.begin() == BB); + MachineBasicBlock *LoopHeaderBB = BB; + markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter); + BB = *llvm::prior(Chain.end()); + for (;;) { + assert(BB); + assert(BlockToChain[BB] == &Chain); + assert(*llvm::prior(Chain.end()) == BB); + + // Look for the best viable successor if there is one to place immediately + // after this block. + MachineBasicBlock *BestSucc = 0; + BranchProbability BestProb = BranchProbability::getZero(); + DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n"); + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); + SI != SE; ++SI) { + if (BlockFilter && !BlockFilter->count(*SI)) + continue; + BlockChain &SuccChain = *BlockToChain[*SI]; + if (&SuccChain == &Chain) { + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Already merged!\n"); + continue; + } - // If this block is not at the end of its chain, it cannot merge with any - // other chain. - if (Chain && *llvm::prior(Chain->end()) != BB) - return; + BranchProbability SuccProb = MBPI->getEdgeProbability(BB, *SI); - // Walk through the successors looking for the highest probability edge. - MachineBasicBlock *Successor = 0; - BranchProbability BestProb = BranchProbability::getZero(); - DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n"); - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); - SI != SE; ++SI) { - if (BB == *SI || (Filter && !Filter->count(*SI))) - continue; + // Only consider successors which are either "hot", or wouldn't violate + // any CFG constraints. + if (SuccChain.LoopPredecessors != 0 && SuccProb < HotProb) { + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> CFG conflict\n"); + continue; + } - BranchProbability SuccProb = MBPI->getEdgeProbability(BB, *SI); - DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb << "\n"); - if (!Successor || SuccProb > BestProb || (!(SuccProb < BestProb) && - BB->isLayoutSuccessor(*SI))) { - Successor = *SI; + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb + << " (prob)" + << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "") + << "\n"); + if (BestSucc && BestProb >= SuccProb) + continue; + BestSucc = *SI; BestProb = SuccProb; } - } - if (!Successor) - return; - // Grab a chain if it exists already for this successor and make sure the - // successor is at the start of the chain as we can't merge mid-chain. Also, - // if the successor chain is the same as our chain, we're already merged. - BlockChain *SuccChain = BlockToChain[Successor]; - if (SuccChain && (SuccChain == Chain || Successor != *SuccChain->begin())) - return; - - // We only merge chains across a CFG merge when the desired merge path is - // significantly hotter than the incoming edge. We define a hot edge more - // strictly than the BranchProbabilityInfo does, as the two predecessor - // blocks may have dramatically different incoming probabilities we need to - // account for. Therefor we use the "global" edge weight which is the - // branch's probability times the block frequency of the predecessor. - BlockFrequency MergeWeight = MBFI->getBlockFreq(BB); - MergeWeight *= MBPI->getEdgeProbability(BB, Successor); - // We only want to consider breaking the CFG when the merge weight is much - // higher (80% vs. 20%), so multiply it by 1/4. This will require the merged - // edge to be 4x more likely before we disrupt the CFG. This number matches - // the definition of "hot" in BranchProbabilityAnalysis (80% vs. 20%). - MergeWeight *= BranchProbability(1, 4); - for (MachineBasicBlock::pred_iterator PI = Successor->pred_begin(), - PE = Successor->pred_end(); - PI != PE; ++PI) { - if (BB == *PI || Successor == *PI) continue; - BlockFrequency PredWeight = MBFI->getBlockFreq(*PI); - PredWeight *= MBPI->getEdgeProbability(*PI, Successor); - - // Return on the first predecessor we find which outstrips our merge weight. - if (MergeWeight < PredWeight) + // If an immediate successor isn't available, look for the best viable + // block among those we've identified as not violating the loop's CFG at + // this point. This won't be a fallthrough, but it will increase locality. + if (!BestSucc) { + BlockFrequency BestFreq; + for (SmallVectorImpl::iterator WBI = BlockWorkList.begin(), + WBE = BlockWorkList.end(); + WBI != WBE; ++WBI) { + if (BlockFilter && !BlockFilter->count(*WBI)) + continue; + BlockChain &SuccChain = *BlockToChain[*WBI]; + if (&SuccChain == &Chain) { + DEBUG(dbgs() << " " << getBlockName(*WBI) + << " -> Already merged!\n"); + continue; + } + assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block"); + + BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI); + DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> " << CandidateFreq + << " (freq)\n"); + if (BestSucc && BestFreq >= CandidateFreq) + continue; + BestSucc = *WBI; + BestFreq = CandidateFreq; + } + } + if (!BestSucc) { + DEBUG(dbgs() << "Finished forming chain for header block " + << getBlockNum(*Chain.begin()) << "\n"); return; - DEBUG(dbgs() << "Breaking CFG edge!\n" - << " Edge from " << getBlockNum(BB) << " to " - << getBlockNum(Successor) << ": " << MergeWeight << "\n" - << " vs. " << getBlockNum(BB) << " to " - << getBlockNum(*PI) << ": " << PredWeight << "\n"); - } + } - DEBUG(dbgs() << "Merging from " << getBlockNum(BB) << " to " - << getBlockNum(Successor) << "\n"); - Chain->merge(Successor, SuccChain); + // Place this block, updating the datastructures to reflect its placement. + BlockChain &SuccChain = *BlockToChain[BestSucc]; + DEBUG(dbgs() << "Merging from " << getBlockNum(BB) + << " to " << getBlockNum(BestSucc) << "\n"); + markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter); + Chain.merge(BestSucc, &SuccChain); + BB = *llvm::prior(Chain.end()); + } } /// \brief Forms basic block chains from the natural loop structures. @@ -362,86 +404,162 @@ void MachineBlockPlacement::mergeSuccessor(MachineBasicBlock *BB, /// as much as possible. We can then stitch the chains together in a way which /// both preserves the topological structure and minimizes taken conditional /// branches. -void MachineBlockPlacement::buildLoopChains(MachineFunction &F, MachineLoop &L) { +void MachineBlockPlacement::buildLoopChains(MachineFunction &F, + MachineLoop &L) { // First recurse through any nested loops, building chains for those inner // loops. for (MachineLoop::iterator LI = L.begin(), LE = L.end(); LI != LE; ++LI) buildLoopChains(F, **LI); - SmallPtrSet LoopBlockSet(L.block_begin(), - L.block_end()); + SmallVector BlockWorkList; + BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end()); - // Begin building up a set of chains of blocks within this loop which should - // remain contiguous. Some of the blocks already belong to a chain which - // represents an inner loop. - for (MachineLoop::block_iterator BI = L.block_begin(), BE = L.block_end(); + // FIXME: This is a really lame way of walking the chains in the loop: we + // walk the blocks, and use a set to prevent visiting a particular chain + // twice. + SmallPtrSet UpdatedPreds; + for (MachineLoop::block_iterator BI = L.block_begin(), + BE = L.block_end(); BI != BE; ++BI) { - MachineBasicBlock *BB = *BI; - BlockChain *Chain = BlockToChain[BB]; - if (!Chain) Chain = CreateChain(BB); - mergeSuccessor(BB, Chain, &LoopBlockSet); + BlockChain &Chain = *BlockToChain[*BI]; + if (!UpdatedPreds.insert(&Chain) || BI == L.block_begin()) + continue; + + assert(Chain.LoopPredecessors == 0); + for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end(); + BCI != BCE; ++BCI) { + assert(BlockToChain[*BCI] == &Chain); + for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(), + PE = (*BCI)->pred_end(); + PI != PE; ++PI) { + if (BlockToChain[*PI] == &Chain || !LoopBlockSet.count(*PI)) + continue; + ++Chain.LoopPredecessors; + } + } + + if (Chain.LoopPredecessors == 0) + BlockWorkList.push_back(*BI); } + + BlockChain &LoopChain = *BlockToChain[L.getHeader()]; + buildChain(*L.block_begin(), LoopChain, BlockWorkList, &LoopBlockSet); + + DEBUG({ + if (LoopChain.LoopPredecessors) + dbgs() << "Loop chain contains a block without its preds placed!\n" + << " Loop header: " << getBlockName(*L.block_begin()) << "\n" + << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"; + for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end(); + BCI != BCE; ++BCI) + if (!LoopBlockSet.erase(*BCI)) + dbgs() << "Loop chain contains a block not contained by the loop!\n" + << " Loop header: " << getBlockName(*L.block_begin()) << "\n" + << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" + << " Bad block: " << getBlockName(*BCI) << "\n"; + + if (!LoopBlockSet.empty()) + for (SmallPtrSet::iterator LBI = LoopBlockSet.begin(), LBE = LoopBlockSet.end(); + LBI != LBE; ++LBI) + dbgs() << "Loop contains blocks never placed into a chain!\n" + << " Loop header: " << getBlockName(*L.block_begin()) << "\n" + << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" + << " Bad block: " << getBlockName(*LBI) << "\n"; + }); } void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { - // First build any loop-based chains. + // Ensure that every BB in the function has an associated chain to simplify + // the assumptions of the remaining algorithm. + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) + BlockToChain[&*FI] = + new (ChainAllocator.Allocate()) BlockChain(BlockToChain, &*FI); + + // Build any loop-based chains. for (MachineLoopInfo::iterator LI = MLI->begin(), LE = MLI->end(); LI != LE; ++LI) buildLoopChains(F, **LI); - // Now walk the blocks of the function forming chains where they don't - // violate any CFG structure. - for (MachineFunction::iterator BI = F.begin(), BE = F.end(); - BI != BE; ++BI) { - MachineBasicBlock *BB = BI; - BlockChain *Chain = BlockToChain[BB]; - if (!Chain) Chain = CreateChain(BB); - mergeSuccessor(BB, Chain); - } -} + SmallVector BlockWorkList; -void MachineBlockPlacement::placeChainsTopologically(MachineFunction &F) { - MachineBasicBlock *EntryB = &F.front(); - assert(BlockToChain[EntryB] && "Missing chain for entry block"); - assert(*BlockToChain[EntryB]->begin() == EntryB && - "Entry block is not the head of the entry block chain"); - - // Walk the blocks in RPO, and insert each block for a chain in order the - // first time we see that chain. - MachineFunction::iterator InsertPos = F.begin(); - SmallPtrSet VisitedChains; - ReversePostOrderTraversal RPOT(EntryB); - typedef ReversePostOrderTraversal::rpo_iterator - rpo_iterator; - for (rpo_iterator I = RPOT.begin(), E = RPOT.end(); I != E; ++I) { - BlockChain *Chain = BlockToChain[*I]; - assert(Chain); - if(!VisitedChains.insert(Chain)) + SmallPtrSet UpdatedPreds; + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + MachineBasicBlock *BB = &*FI; + BlockChain &Chain = *BlockToChain[BB]; + if (!UpdatedPreds.insert(&Chain)) continue; - for (BlockChain::iterator BI = Chain->begin(), BE = Chain->end(); BI != BE; - ++BI) { - DEBUG(dbgs() << (BI == Chain->begin() ? "Placing chain " - : " ... ") - << getBlockName(*BI) << "\n"); - if (InsertPos != MachineFunction::iterator(*BI)) - F.splice(InsertPos, *BI); - else - ++InsertPos; + + assert(Chain.LoopPredecessors == 0); + for (BlockChain::iterator BCI = Chain.begin(), BCE = Chain.end(); + BCI != BCE; ++BCI) { + assert(BlockToChain[*BCI] == &Chain); + for (MachineBasicBlock::pred_iterator PI = (*BCI)->pred_begin(), + PE = (*BCI)->pred_end(); + PI != PE; ++PI) { + if (BlockToChain[*PI] == &Chain) + continue; + ++Chain.LoopPredecessors; + } } + + if (Chain.LoopPredecessors == 0) + BlockWorkList.push_back(BB); } - // Now that every block is in its final position, update all of the - // terminators. + BlockChain &FunctionChain = *BlockToChain[&F.front()]; + buildChain(&F.front(), FunctionChain, BlockWorkList); + + typedef SmallPtrSet FunctionBlockSetType; + DEBUG({ + FunctionBlockSetType FunctionBlockSet; + for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) + FunctionBlockSet.insert(FI); + + for (BlockChain::iterator BCI = FunctionChain.begin(), BCE = FunctionChain.end(); + BCI != BCE; ++BCI) + if (!FunctionBlockSet.erase(*BCI)) + dbgs() << "Function chain contains a block not in the function!\n" + << " Bad block: " << getBlockName(*BCI) << "\n"; + + if (!FunctionBlockSet.empty()) + for (SmallPtrSet::iterator FBI = FunctionBlockSet.begin(), + FBE = FunctionBlockSet.end(); FBI != FBE; ++FBI) + dbgs() << "Function contains blocks never placed into a chain!\n" + << " Bad block: " << getBlockName(*FBI) << "\n"; + }); + + // Splice the blocks into place. + MachineFunction::iterator InsertPos = F.begin(); SmallVector Cond; // For AnalyzeBranch. - for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { + for (BlockChain::iterator BI = FunctionChain.begin(), BE = FunctionChain.end(); + BI != BE; ++BI) { + DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain " + : " ... ") + << getBlockName(*BI) << "\n"); + if (InsertPos != MachineFunction::iterator(*BI)) + F.splice(InsertPos, *BI); + else + ++InsertPos; + + // Update the terminator of the previous block. + if (BI == FunctionChain.begin()) + continue; + MachineBasicBlock *PrevBB = llvm::prior(MachineFunction::iterator(*BI)); + // FIXME: It would be awesome of updateTerminator would just return rather // than assert when the branch cannot be analyzed in order to remove this // boiler plate. Cond.clear(); MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. - if (!TII->AnalyzeBranch(*FI, TBB, FBB, Cond)) - FI->updateTerminator(); + if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) + PrevBB->updateTerminator(); } + + // Fixup the last block. + Cond.clear(); + MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + if (!TII->AnalyzeBranch(F.back(), TBB, FBB, Cond)) + F.back().updateTerminator(); } /// \brief Recursive helper to align a loop and any nested loops. @@ -479,7 +597,6 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { assert(BlockToChain.empty()); buildCFGChains(F); - placeChainsTopologically(F); AlignLoops(F); BlockToChain.clear(); -- cgit v1.1 From 9fd4e056e433b286f0e6576046ef2242365bfc38 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Sun, 13 Nov 2011 11:34:53 +0000 Subject: Hoist a nested loop into its own method. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144496 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBlockPlacement.cpp | 86 +++++++++++++++++++++-------------- 1 file changed, 53 insertions(+), 33 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 6aa4268..3eb2998 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -208,6 +208,9 @@ class MachineBlockPlacement : public MachineFunctionPass { MachineBasicBlock *LoopHeaderBB, SmallVectorImpl &Blocks, const BlockFilterSet *BlockFilter = 0); + MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB, + BlockChain &Chain, + const BlockFilterSet *BlockFilter); void buildChain(MachineBasicBlock *BB, BlockChain &Chain, SmallVectorImpl &Blocks, const BlockFilterSet *BlockFilter = 0); @@ -303,12 +306,60 @@ void MachineBlockPlacement::markChainSuccessors( } } +/// \brief Select the best successor for a block. +/// +/// This looks across all successors of a particular block and attempts to +/// select the "best" one to be the layout successor. It only considers direct +/// successors which also pass the block filter. It will attempt to avoid +/// breaking CFG structure, but cave and break such structures in the case of +/// very hot successor edges. +/// +/// \returns The best successor block found, or null if none are viable. +MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( + MachineBasicBlock *BB, BlockChain &Chain, + const BlockFilterSet *BlockFilter) { + const BranchProbability HotProb(4, 5); // 80% + + MachineBasicBlock *BestSucc = 0; + BranchProbability BestProb = BranchProbability::getZero(); + DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n"); + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); + SI != SE; ++SI) { + if (BlockFilter && !BlockFilter->count(*SI)) + continue; + BlockChain &SuccChain = *BlockToChain[*SI]; + if (&SuccChain == &Chain) { + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Already merged!\n"); + continue; + } + + BranchProbability SuccProb = MBPI->getEdgeProbability(BB, *SI); + + // Only consider successors which are either "hot", or wouldn't violate + // any CFG constraints. + if (SuccChain.LoopPredecessors != 0 && SuccProb < HotProb) { + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> CFG conflict\n"); + continue; + } + + DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb + << " (prob)" + << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "") + << "\n"); + if (BestSucc && BestProb >= SuccProb) + continue; + BestSucc = *SI; + BestProb = SuccProb; + } + return BestSucc; +} + void MachineBlockPlacement::buildChain( MachineBasicBlock *BB, BlockChain &Chain, SmallVectorImpl &BlockWorkList, const BlockFilterSet *BlockFilter) { - const BranchProbability HotProb(4, 5); // 80% assert(BB); assert(BlockToChain[BB] == &Chain); assert(*Chain.begin() == BB); @@ -322,38 +373,7 @@ void MachineBlockPlacement::buildChain( // Look for the best viable successor if there is one to place immediately // after this block. - MachineBasicBlock *BestSucc = 0; - BranchProbability BestProb = BranchProbability::getZero(); - DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n"); - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); - SI != SE; ++SI) { - if (BlockFilter && !BlockFilter->count(*SI)) - continue; - BlockChain &SuccChain = *BlockToChain[*SI]; - if (&SuccChain == &Chain) { - DEBUG(dbgs() << " " << getBlockName(*SI) << " -> Already merged!\n"); - continue; - } - - BranchProbability SuccProb = MBPI->getEdgeProbability(BB, *SI); - - // Only consider successors which are either "hot", or wouldn't violate - // any CFG constraints. - if (SuccChain.LoopPredecessors != 0 && SuccProb < HotProb) { - DEBUG(dbgs() << " " << getBlockName(*SI) << " -> CFG conflict\n"); - continue; - } - - DEBUG(dbgs() << " " << getBlockName(*SI) << " -> " << SuccProb - << " (prob)" - << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "") - << "\n"); - if (BestSucc && BestProb >= SuccProb) - continue; - BestSucc = *SI; - BestProb = SuccProb; - } + MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter); // If an immediate successor isn't available, look for the best viable // block among those we've identified as not violating the loop's CFG at -- cgit v1.1 From 729bec89bd8c4368a741359fb882967ce01a6909 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Sun, 13 Nov 2011 11:34:55 +0000 Subject: Add a missing doxygen comment for a helper method. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144497 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBlockPlacement.cpp | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 3eb2998..ec0877f 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -275,6 +275,12 @@ static std::string getBlockNum(MachineBasicBlock *BB) { } #endif +/// \brief Mark a chain's successors as having one fewer preds. +/// +/// When a chain is being merged into the "placed" chain, this routine will +/// quickly walk the successors of each block in the chain and mark them as +/// having one fewer active predecessor. It also adds any successors of this +/// chain which reach the zero-predecessor state to the worklist passed in. void MachineBlockPlacement::markChainSuccessors( BlockChain &Chain, MachineBasicBlock *LoopHeaderBB, -- cgit v1.1 From f3fc0050abc1698504cbaede7766c4180c076928 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Sun, 13 Nov 2011 11:42:26 +0000 Subject: Hoist another gross nested loop into a helper method. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144498 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBlockPlacement.cpp | 67 +++++++++++++++++++++++------------ 1 file changed, 44 insertions(+), 23 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index ec0877f..f934776 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -211,6 +211,9 @@ class MachineBlockPlacement : public MachineFunctionPass { MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB, BlockChain &Chain, const BlockFilterSet *BlockFilter); + MachineBasicBlock *selectBestCandidateBlock( + BlockChain &Chain, SmallVectorImpl &WorkList, + const BlockFilterSet *BlockFilter); void buildChain(MachineBasicBlock *BB, BlockChain &Chain, SmallVectorImpl &Blocks, const BlockFilterSet *BlockFilter = 0); @@ -361,6 +364,45 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( return BestSucc; } +/// \brief Select the best block from a worklist. +/// +/// This looks through the provided worklist as a list of candidate basic +/// blocks and select the most profitable one to place. The definition of +/// profitable only really makes sense in the context of a loop. This returns +/// the most frequently visited block in the worklist, which in the case of +/// a loop, is the one most desirable to be physically close to the rest of the +/// loop body in order to improve icache behavior. +/// +/// \returns The best block found, or null if none are viable. +MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( + BlockChain &Chain, SmallVectorImpl &WorkList, + const BlockFilterSet *BlockFilter) { + MachineBasicBlock *BestBlock = 0; + BlockFrequency BestFreq; + for (SmallVectorImpl::iterator WBI = WorkList.begin(), + WBE = WorkList.end(); + WBI != WBE; ++WBI) { + if (BlockFilter && !BlockFilter->count(*WBI)) + continue; + BlockChain &SuccChain = *BlockToChain[*WBI]; + if (&SuccChain == &Chain) { + DEBUG(dbgs() << " " << getBlockName(*WBI) + << " -> Already merged!\n"); + continue; + } + assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block"); + + BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI); + DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> " << CandidateFreq + << " (freq)\n"); + if (BestBlock && BestFreq >= CandidateFreq) + continue; + BestBlock = *WBI; + BestFreq = CandidateFreq; + } + return BestBlock; +} + void MachineBlockPlacement::buildChain( MachineBasicBlock *BB, BlockChain &Chain, @@ -384,30 +426,9 @@ void MachineBlockPlacement::buildChain( // If an immediate successor isn't available, look for the best viable // block among those we've identified as not violating the loop's CFG at // this point. This won't be a fallthrough, but it will increase locality. - if (!BestSucc) { - BlockFrequency BestFreq; - for (SmallVectorImpl::iterator WBI = BlockWorkList.begin(), - WBE = BlockWorkList.end(); - WBI != WBE; ++WBI) { - if (BlockFilter && !BlockFilter->count(*WBI)) - continue; - BlockChain &SuccChain = *BlockToChain[*WBI]; - if (&SuccChain == &Chain) { - DEBUG(dbgs() << " " << getBlockName(*WBI) - << " -> Already merged!\n"); - continue; - } - assert(SuccChain.LoopPredecessors == 0 && "Found CFG-violating block"); + if (!BestSucc) + BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter); - BlockFrequency CandidateFreq = MBFI->getBlockFreq(*WBI); - DEBUG(dbgs() << " " << getBlockName(*WBI) << " -> " << CandidateFreq - << " (freq)\n"); - if (BestSucc && BestFreq >= CandidateFreq) - continue; - BestSucc = *WBI; - BestFreq = CandidateFreq; - } - } if (!BestSucc) { DEBUG(dbgs() << "Finished forming chain for header block " << getBlockNum(*Chain.begin()) << "\n"); -- cgit v1.1 From 6527ecc9189058b762c699521462956995f59dd8 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Sun, 13 Nov 2011 12:17:28 +0000 Subject: Teach MBP to force-merge layout successors for blocks with unanalyzable branches that also may involve fallthrough. In the case of blocks with no fallthrough, we can still re-order the blocks profitably. For example instruction decoding will in some cases continue past an indirect jump, making laying out its most likely successor there profitable. Note, no test case. I don't know how to write a test case that exercises this logic, but it matches the described desired semantics in discussions with Jakob and others. If anyone has a nice example of IR that will trigger this, that would be lovely. Also note, there are still assertion failures in real world code with this. I'm digging into those next, now that I know this isn't the cause. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144499 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBlockPlacement.cpp | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index f934776..d0b6926 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -413,15 +413,32 @@ void MachineBlockPlacement::buildChain( assert(*Chain.begin() == BB); MachineBasicBlock *LoopHeaderBB = BB; markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter); + SmallVector Cond; // For AnalyzeBranch. BB = *llvm::prior(Chain.end()); for (;;) { assert(BB); assert(BlockToChain[BB] == &Chain); assert(*llvm::prior(Chain.end()) == BB); + MachineBasicBlock *BestSucc = 0; + + // Check for unreasonable branches, and forcibly merge the existing layout + // successor for them. We can handle cases that AnalyzeBranch can't: jump + // tables etc are fine. The case we want to handle specially is when there + // is potential fallthrough, but the branch cannot be analyzed. This + // includes blocks without terminators as well as other cases. + Cond.clear(); + MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. + if (TII->AnalyzeBranch(*BB, TBB, FBB, Cond) && BB->canFallThrough()) { + MachineFunction::iterator I(BB); + assert(llvm::next(I) != BB->getParent()->end() && + "The final block in the function can fallthrough!"); + BestSucc = llvm::next(I); + } - // Look for the best viable successor if there is one to place immediately - // after this block. - MachineBasicBlock *BestSucc = selectBestSuccessor(BB, Chain, BlockFilter); + // Otherwise, look for the best viable successor if there is one to place + // immediately after this block. + if (!BestSucc) + BestSucc = selectBestSuccessor(BB, Chain, BlockFilter); // If an immediate successor isn't available, look for the best viable // block among those we've identified as not violating the loop's CFG at -- cgit v1.1 From e6a6277d3683ef7aa9d799b32b5194a79eefde62 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 13 Nov 2011 17:31:07 +0000 Subject: Add BLSI, BLSMSK, and BLSR to getTargetNodeName. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144502 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f1c80a2..cf03c30 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -11036,12 +11036,13 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::PSIGNB: return "X86ISD::PSIGNB"; case X86ISD::PSIGNW: return "X86ISD::PSIGNW"; case X86ISD::PSIGND: return "X86ISD::PSIGND"; + case X86ISD::BLENDV: return "X86ISD::BLENDV"; + case X86ISD::FHADD: return "X86ISD::FHADD"; + case X86ISD::FHSUB: return "X86ISD::FHSUB"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMIN: return "X86ISD::FMIN"; case X86ISD::FRSQRT: return "X86ISD::FRSQRT"; case X86ISD::FRCP: return "X86ISD::FRCP"; - case X86ISD::FHADD: return "X86ISD::FHADD"; - case X86ISD::FHSUB: return "X86ISD::FHSUB"; case X86ISD::TLSADDR: return "X86ISD::TLSADDR"; case X86ISD::TLSCALL: return "X86ISD::TLSCALL"; case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN"; @@ -11081,6 +11082,9 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { case X86ISD::XOR: return "X86ISD::XOR"; case X86ISD::AND: return "X86ISD::AND"; case X86ISD::ANDN: return "X86ISD::ANDN"; + case X86ISD::BLSI: return "X86ISD::BLSI"; + case X86ISD::BLSMSK: return "X86ISD::BLSMSK"; + case X86ISD::BLSR: return "X86ISD::BLSR"; case X86ISD::MUL_IMM: return "X86ISD::MUL_IMM"; case X86ISD::PTEST: return "X86ISD::PTEST"; case X86ISD::TESTP: return "X86ISD::TESTP"; -- cgit v1.1 From 2debd48ca790ac01be6e12e094fdf4fdcadc8364 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Sun, 13 Nov 2011 20:45:27 +0000 Subject: Rename SlotIndexes to match how they are used. The old naming scheme (load/use/def/store) can be traced back to an old linear scan article, but the names don't match how slots are actually used. The load and store slots are not needed after the deferred spill code insertion framework was deleted. The use and def slots don't make any sense because we are using half-open intervals as is customary in C code, but the names suggest closed intervals. In reality, these slots were used to distinguish early-clobber defs from normal defs. The new naming scheme also has 4 slots, but the names match how the slots are really used. This is a purely mechanical renaming, but some of the code makes a lot more sense now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144503 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/InlineSpiller.cpp | 24 ++++++------- lib/CodeGen/LiveDebugVariables.cpp | 8 ++--- lib/CodeGen/LiveInterval.cpp | 2 +- lib/CodeGen/LiveIntervalAnalysis.cpp | 64 +++++++++++++++++------------------ lib/CodeGen/LiveRangeEdit.cpp | 8 ++--- lib/CodeGen/MachineVerifier.cpp | 19 ++++++----- lib/CodeGen/RegisterCoalescer.cpp | 22 ++++++------ lib/CodeGen/RenderMachineFunction.cpp | 17 +++++----- lib/CodeGen/SlotIndexes.cpp | 10 +++--- lib/CodeGen/Spiller.cpp | 4 +-- lib/CodeGen/SplitKit.cpp | 10 +++--- lib/CodeGen/Splitter.cpp | 8 ++--- lib/CodeGen/StrongPHIElimination.cpp | 14 ++++---- 13 files changed, 107 insertions(+), 103 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 0066b7a..021c381 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -578,7 +578,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, if (unsigned SrcReg = isFullCopyOf(MI, Reg)) { if (isSibling(SrcReg)) { LiveInterval &SrcLI = LIS.getInterval(SrcReg); - LiveRange *SrcLR = SrcLI.getLiveRangeContaining(VNI->def.getUseIndex()); + LiveRange *SrcLR = SrcLI.getLiveRangeContaining(VNI->def.getRegSlot(true)); assert(SrcLR && "Copy from non-existing value"); // Check if this COPY kills its source. SVI->second.KillsSource = (SrcLR->end == VNI->def); @@ -665,8 +665,8 @@ void InlineSpiller::analyzeSiblingValues() { /// a spill at a better location. bool InlineSpiller::hoistSpill(LiveInterval &SpillLI, MachineInstr *CopyMI) { SlotIndex Idx = LIS.getInstructionIndex(CopyMI); - VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getDefIndex()); - assert(VNI && VNI->def == Idx.getDefIndex() && "Not defined by copy"); + VNInfo *VNI = SpillLI.getVNInfoAt(Idx.getRegSlot()); + assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy"); SibValueMap::iterator I = SibValues.find(VNI); if (I == SibValues.end()) return false; @@ -769,9 +769,9 @@ void InlineSpiller::eliminateRedundantSpills(LiveInterval &SLI, VNInfo *VNI) { if (unsigned DstReg = isFullCopyOf(MI, Reg)) { if (isSibling(DstReg)) { LiveInterval &DstLI = LIS.getInterval(DstReg); - VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getDefIndex()); + VNInfo *DstVNI = DstLI.getVNInfoAt(Idx.getRegSlot()); assert(DstVNI && "Missing defined value"); - assert(DstVNI->def == Idx.getDefIndex() && "Wrong copy def slot"); + assert(DstVNI->def == Idx.getRegSlot() && "Wrong copy def slot"); WorkList.push_back(std::make_pair(&DstLI, DstVNI)); } continue; @@ -823,7 +823,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { continue; LiveInterval &SnipLI = LIS.getInterval(MI->getOperand(1).getReg()); assert(isRegToSpill(SnipLI.reg) && "Unexpected register in copy"); - VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getUseIndex()); + VNInfo *SnipVNI = SnipLI.getVNInfoAt(VNI->def.getRegSlot(true)); assert(SnipVNI && "Snippet undefined before copy"); WorkList.push_back(std::make_pair(&SnipLI, SnipVNI)); } while (!WorkList.empty()); @@ -832,7 +832,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { /// reMaterializeFor - Attempt to rematerialize before MI instead of reloading. bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineBasicBlock::iterator MI) { - SlotIndex UseIdx = LIS.getInstructionIndex(MI).getUseIndex(); + SlotIndex UseIdx = LIS.getInstructionIndex(MI).getRegSlot(true); VNInfo *ParentVNI = VirtReg.getVNInfoAt(UseIdx.getBaseIndex()); if (!ParentVNI) { @@ -906,7 +906,7 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI); VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, LIS.getVNInfoAllocator()); - NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI)); + NewLI.addRange(LiveRange(DefIdx, UseIdx.getRegSlot(), DefVNI)); DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); ++NumRemats; return true; @@ -1077,7 +1077,7 @@ void InlineSpiller::insertReload(LiveInterval &NewLI, TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot, MRI.getRegClass(NewLI.reg), &TRI); --MI; // Point to load instruction. - SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex(); + SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot(); DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI); VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0, LIS.getVNInfoAllocator()); @@ -1092,7 +1092,7 @@ void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI, TII.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, StackSlot, MRI.getRegClass(NewLI.reg), &TRI); --MI; // Point to store instruction. - SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getDefIndex(); + SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot(); DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI); VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator()); NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI)); @@ -1141,8 +1141,8 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { // Find the slot index where this instruction reads and writes OldLI. // This is usually the def slot, except for tied early clobbers. - SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex(); - if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getUseIndex())) + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); + if (VNInfo *VNI = OldLI.getVNInfoAt(Idx.getRegSlot(true))) if (SlotIndex::isSameInstr(Idx, VNI->def)) Idx = VNI->def; diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index 0414692..eb54baa7 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -468,7 +468,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { // DBG_VALUE has no slot index, use the previous instruction instead. SlotIndex Idx = MBBI == MBB->begin() ? LIS->getMBBStartIdx(MBB) : - LIS->getInstructionIndex(llvm::prior(MBBI)).getDefIndex(); + LIS->getInstructionIndex(llvm::prior(MBBI)).getRegSlot(); // Handle consecutive DBG_VALUE instructions with the same slot index. do { if (handleDebugValue(MBBI, Idx)) { @@ -575,15 +575,15 @@ UserValue::addDefsFromCopies(LiveInterval *LI, unsigned LocNo, // Is LocNo extended to reach this copy? If not, another def may be blocking // it, or we are looking at a wrong value of LI. SlotIndex Idx = LIS.getInstructionIndex(MI); - LocMap::iterator I = locInts.find(Idx.getUseIndex()); + LocMap::iterator I = locInts.find(Idx.getRegSlot(true)); if (!I.valid() || I.value() != LocNo) continue; if (!LIS.hasInterval(DstReg)) continue; LiveInterval *DstLI = &LIS.getInterval(DstReg); - const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getDefIndex()); - assert(DstVNI && DstVNI->def == Idx.getDefIndex() && "Bad copy value"); + const VNInfo *DstVNI = DstLI->getVNInfoAt(Idx.getRegSlot()); + assert(DstVNI && DstVNI->def == Idx.getRegSlot() && "Bad copy value"); CopyValues.push_back(std::make_pair(DstLI, DstVNI)); } diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index b69945a..42e122e 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -716,7 +716,7 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], continue; // DBG_VALUE instructions should have been eliminated earlier. SlotIndex Idx = LIS.getInstructionIndex(MI); - Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex(); + Idx = Idx.getRegSlot(MO.isUse()); const VNInfo *VNI = LI.getVNInfoAt(Idx); assert(VNI && "Interval not live at use."); MO.setReg(LIV[getEqClass(VNI)]->reg); diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index c902b88..954d9bb 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -172,9 +172,9 @@ bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO, if (!MO.getSubReg() || MO.isEarlyClobber()) return false; - SlotIndex RedefIndex = MIIdx.getDefIndex(); + SlotIndex RedefIndex = MIIdx.getRegSlot(); const LiveRange *OldLR = - interval.getLiveRangeContaining(RedefIndex.getUseIndex()); + interval.getLiveRangeContaining(RedefIndex.getRegSlot(true)); MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def); if (DefMI != 0) { return DefMI->findRegisterDefOperandIdx(interval.reg) != -1; @@ -197,11 +197,11 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg); if (interval.empty()) { // Get the Idx of the defining instructions. - SlotIndex defIndex = MIIdx.getDefIndex(); + SlotIndex defIndex = MIIdx.getRegSlot(); // Earlyclobbers move back one, so that they overlap the live range // of inputs. if (MO.isEarlyClobber()) - defIndex = MIIdx.getUseIndex(); + defIndex = MIIdx.getRegSlot(true); // Make sure the first definition is not a partial redefinition. Add an // of the full register. @@ -235,9 +235,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // FIXME: what about dead vars? SlotIndex killIdx; if (vi.Kills[0] != mi) - killIdx = getInstructionIndex(vi.Kills[0]).getDefIndex(); + killIdx = getInstructionIndex(vi.Kills[0]).getRegSlot(); else - killIdx = defIndex.getStoreIndex(); + killIdx = defIndex.getDeadSlot(); // If the kill happens after the definition, we have an intra-block // live range. @@ -285,7 +285,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) { MachineInstr *Kill = vi.Kills[i]; SlotIndex Start = getMBBStartIdx(Kill->getParent()); - SlotIndex killIdx = getInstructionIndex(Kill).getDefIndex(); + SlotIndex killIdx = getInstructionIndex(Kill).getRegSlot(); // Create interval with one of a NEW value number. Note that this value // number isn't actually defined by an instruction, weird huh? :) @@ -323,14 +323,14 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // are actually two values in the live interval. Because of this we // need to take the LiveRegion that defines this register and split it // into two values. - SlotIndex RedefIndex = MIIdx.getDefIndex(); + SlotIndex RedefIndex = MIIdx.getRegSlot(); if (MO.isEarlyClobber()) - RedefIndex = MIIdx.getUseIndex(); + RedefIndex = MIIdx.getRegSlot(true); const LiveRange *OldLR = - interval.getLiveRangeContaining(RedefIndex.getUseIndex()); + interval.getLiveRangeContaining(RedefIndex.getRegSlot(true)); VNInfo *OldValNo = OldLR->valno; - SlotIndex DefIndex = OldValNo->def.getDefIndex(); + SlotIndex DefIndex = OldValNo->def.getRegSlot(); // Delete the previous value, which should be short and continuous, // because the 2-addr copy must be in the same MBB as the redef. @@ -356,7 +356,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // If this redefinition is dead, we need to add a dummy unit live // range covering the def slot. if (MO.isDead()) - interval.addRange(LiveRange(RedefIndex, RedefIndex.getStoreIndex(), + interval.addRange(LiveRange(RedefIndex, RedefIndex.getDeadSlot(), OldValNo)); DEBUG({ @@ -368,9 +368,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // live until the end of the block. We've already taken care of the // rest of the live range. - SlotIndex defIndex = MIIdx.getDefIndex(); + SlotIndex defIndex = MIIdx.getRegSlot(); if (MO.isEarlyClobber()) - defIndex = MIIdx.getUseIndex(); + defIndex = MIIdx.getRegSlot(true); VNInfo *ValNo; MachineInstr *CopyMI = NULL; @@ -402,10 +402,10 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_)); SlotIndex baseIndex = MIIdx; - SlotIndex start = baseIndex.getDefIndex(); + SlotIndex start = baseIndex.getRegSlot(); // Earlyclobbers move back one. if (MO.isEarlyClobber()) - start = MIIdx.getUseIndex(); + start = MIIdx.getRegSlot(true); SlotIndex end = start; // If it is not used after definition, it is considered dead at @@ -415,7 +415,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, // advance below compensates. if (MO.isDead()) { DEBUG(dbgs() << " dead"); - end = start.getStoreIndex(); + end = start.getDeadSlot(); goto exit; } @@ -432,21 +432,21 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, if (mi->killsRegister(interval.reg, tri_)) { DEBUG(dbgs() << " killed"); - end = baseIndex.getDefIndex(); + end = baseIndex.getRegSlot(); goto exit; } else { int DefIdx = mi->findRegisterDefOperandIdx(interval.reg,false,false,tri_); if (DefIdx != -1) { if (mi->isRegTiedToUseOperand(DefIdx)) { // Two-address instruction. - end = baseIndex.getDefIndex(); + end = baseIndex.getRegSlot(); } else { // Another instruction redefines the register before it is ever read. // Then the register is essentially dead at the instruction that // defines it. Hence its interval is: // [defSlot(def), defSlot(def)+1) DEBUG(dbgs() << " dead"); - end = start.getStoreIndex(); + end = start.getDeadSlot(); } goto exit; } @@ -459,7 +459,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, // instruction where we know it's dead is if it is live-in to the function // and never used. Another possible case is the implicit use of the // physical register has been deleted by two-address pass. - end = start.getStoreIndex(); + end = start.getDeadSlot(); exit: assert(start < end && "did not find end of interval?"); @@ -522,7 +522,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, while (mi != E) { if (mi->killsRegister(interval.reg, tri_)) { DEBUG(dbgs() << " killed"); - end = baseIndex.getDefIndex(); + end = baseIndex.getRegSlot(); SeenDefUse = true; break; } else if (mi->definesRegister(interval.reg, tri_)) { @@ -531,7 +531,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, // it. Hence its interval is: // [defSlot(def), defSlot(def)+1) DEBUG(dbgs() << " dead"); - end = start.getStoreIndex(); + end = start.getDeadSlot(); SeenDefUse = true; break; } @@ -547,7 +547,7 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, if (!SeenDefUse) { if (isAlias) { DEBUG(dbgs() << " dead"); - end = MIIdx.getStoreIndex(); + end = MIIdx.getDeadSlot(); } else { DEBUG(dbgs() << " live through"); end = getMBBEndIdx(MBB); @@ -667,7 +667,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, MachineInstr *UseMI = I.skipInstruction();) { if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg)) continue; - SlotIndex Idx = getInstructionIndex(UseMI).getUseIndex(); + SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot(true); VNInfo *VNI = li->getVNInfoAt(Idx); if (!VNI) { // This shouldn't happen: readsVirtualRegister returns true, but there is @@ -700,9 +700,9 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // A use tied to an early-clobber def ends at the load slot and isn't caught // above. Catch it here instead. This probably only ever happens for inline // assembly. - if (VNI->def.isUse()) - if (VNInfo *UVNI = li->getVNInfoAt(VNI->def.getLoadIndex())) - WorkList.push_back(std::make_pair(VNI->def.getLoadIndex(), UVNI)); + if (VNI->def.isEarlyClobber()) + if (VNInfo *UVNI = li->getVNInfoBefore(VNI->def)) + WorkList.push_back(std::make_pair(VNI->def.getPrevSlot(), UVNI)); } // Keep track of the PHIs that are in use. @@ -825,8 +825,8 @@ void LiveIntervals::addKillFlags() { // Every instruction that kills Reg corresponds to a live range end point. for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE; ++RI) { - // A LOAD index indicates an MBB edge. - if (RI->end.isLoad()) + // A block index indicates an MBB edge. + if (RI->end.isBlock()) continue; MachineInstr *MI = getInstructionFromIndex(RI->end); if (!MI) @@ -978,11 +978,11 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, MachineInstr* startInst) { LiveInterval& Interval = getOrCreateInterval(reg); VNInfo* VN = Interval.getNextValue( - SlotIndex(getInstructionIndex(startInst).getDefIndex()), + SlotIndex(getInstructionIndex(startInst).getRegSlot()), startInst, getVNInfoAllocator()); VN->setHasPHIKill(true); LiveRange LR( - SlotIndex(getInstructionIndex(startInst).getDefIndex()), + SlotIndex(getInstructionIndex(startInst).getRegSlot()), getMBBEndIdx(startInst->getParent()), VN); Interval.addRange(LR); diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index b23f851..2f283b2 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -83,8 +83,8 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, SlotIndex UseIdx, LiveIntervals &lis) { - OrigIdx = OrigIdx.getUseIndex(); - UseIdx = UseIdx.getUseIndex(); + OrigIdx = OrigIdx.getRegSlot(true); + UseIdx = UseIdx.getRegSlot(true); for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = OrigMI->getOperand(i); if (!MO.isReg() || !MO.getReg() || MO.isDef()) @@ -151,7 +151,7 @@ SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB, tii.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri); rematted_.insert(RM.ParentVNI); return lis.getSlotIndexes()->insertMachineInstrInMaps(--MI, Late) - .getDefIndex(); + .getRegSlot(); } void LiveRangeEdit::eraseVirtReg(unsigned Reg, LiveIntervals &LIS) { @@ -221,7 +221,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, while (!Dead.empty()) { MachineInstr *MI = Dead.pop_back_val(); assert(MI->allDefsAreDead() && "Def isn't really dead"); - SlotIndex Idx = LIS.getInstructionIndex(MI).getDefIndex(); + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); // Never delete inline asm. if (MI->isInlineAsm()) { diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 26847d3..b36aab3 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -659,7 +659,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { // Check LiveInts liveness and kill. if (TargetRegisterInfo::isVirtualRegister(Reg) && LiveInts && !LiveInts->isNotInMIMap(MI)) { - SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getUseIndex(); + SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getRegSlot(true); if (LiveInts->hasInterval(Reg)) { const LiveInterval &LI = LiveInts->getInterval(Reg); if (!LI.liveAt(UseIdx)) { @@ -668,7 +668,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { } // Check for extra kill flags. // Note that we allow missing kill flags for now. - if (MO->isKill() && !LI.killedAt(UseIdx.getDefIndex())) { + if (MO->isKill() && !LI.killedAt(UseIdx.getRegSlot())) { report("Live range continues after kill flag", MO, MONum); *OS << "Live range: " << LI << '\n'; } @@ -710,7 +710,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { // Check LiveInts for a live range, but only for virtual registers. if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) && !LiveInts->isNotInMIMap(MI)) { - SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getDefIndex(); + SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getRegSlot(); if (LiveInts->hasInterval(Reg)) { const LiveInterval &LI = LiveInts->getInterval(Reg); if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) { @@ -800,11 +800,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { LiveInts && !LiveInts->isNotInMIMap(MI)) { LiveInterval &LI = LiveStks->getInterval(MO->getIndex()); SlotIndex Idx = LiveInts->getInstructionIndex(MI); - if (MCID.mayLoad() && !LI.liveAt(Idx.getUseIndex())) { + if (MCID.mayLoad() && !LI.liveAt(Idx.getRegSlot(true))) { report("Instruction loads from dead spill slot", MO, MONum); *OS << "Live stack: " << LI << '\n'; } - if (MCID.mayStore() && !LI.liveAt(Idx.getDefIndex())) { + if (MCID.mayStore() && !LI.liveAt(Idx.getRegSlot())) { report("Instruction stores to dead spill slot", MO, MONum); *OS << "Live stack: " << LI << '\n'; } @@ -1085,13 +1085,14 @@ void MachineVerifier::verifyLiveIntervals() { // Early clobber defs begin at USE slots, but other defs must begin at // DEF slots. if (isEarlyClobber) { - if (!VNI->def.isUse()) { - report("Early clobber def must be at a USE slot", MF); + if (!VNI->def.isEarlyClobber()) { + report("Early clobber def must be at an early-clobber slot", MF); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << " in " << LI << '\n'; } - } else if (!VNI->def.isDef()) { - report("Non-PHI, non-early clobber def must be at a DEF slot", MF); + } else if (!VNI->def.isRegister()) { + report("Non-PHI, non-early clobber def must be at a register slot", + MF); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << " in " << LI << '\n'; } diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 9b414d6..22d6a3b 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -423,7 +423,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); LiveInterval &IntB = LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); - SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getDefIndex(); + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); // BValNo is a value number in B that is defined by a copy from A. 'B3' in // the example above. @@ -438,7 +438,7 @@ bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); // AValNo is the value number in A that defines the copy, A3 in the example. - SlotIndex CopyUseIdx = CopyIdx.getUseIndex(); + SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true); LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); // The live range might not exist after fun with physreg coalescing. if (ALR == IntA.end()) return false; @@ -625,7 +625,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, if (!LIS->hasInterval(CP.getDstReg())) return false; - SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getDefIndex(); + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); LiveInterval &IntA = LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); @@ -641,7 +641,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); // AValNo is the value number in A that defines the copy, A3 in the example. - VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex()); + VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); assert(AValNo && "COPY source not live"); // If other defs can reach uses of this def, then it's not safe to perform @@ -747,7 +747,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, UseMO.setReg(NewReg); continue; } - SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getUseIndex(); + SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true); LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); if (ULR == IntA.end() || ULR->valno != AValNo) continue; @@ -765,7 +765,7 @@ bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, // This copy will become a noop. If it's defining a new val#, merge it into // BValNo. - SlotIndex DefIdx = UseIdx.getDefIndex(); + SlotIndex DefIdx = UseIdx.getRegSlot(); VNInfo *DVNI = IntB.getVNInfoAt(DefIdx); if (!DVNI) continue; @@ -799,7 +799,7 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt, bool preserveSrcInt, unsigned DstReg, MachineInstr *CopyMI) { - SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getUseIndex(); + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true); LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); assert(SrcLR != SrcInt.end() && "Live range not found!"); VNInfo *ValNo = SrcLR->valno; @@ -887,7 +887,7 @@ bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI, DstInt = SrcInt; SrcInt = 0; - VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getDefIndex()); + VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getRegSlot()); assert(DeadVNI && "No value defined in DstInt"); DstInt->removeValNo(DeadVNI); @@ -1013,7 +1013,7 @@ static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *LIS, /// the val# it defines. If the live interval becomes empty, remove it as well. bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI) { - SlotIndex DefIdx = LIS->getInstructionIndex(DefMI).getDefIndex(); + SlotIndex DefIdx = LIS->getInstructionIndex(DefMI).getRegSlot(); LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx); if (DefIdx != MLR->valno->def) return false; @@ -1023,7 +1023,7 @@ bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li, void RegisterCoalescer::RemoveCopyFlag(unsigned DstReg, const MachineInstr *CopyMI) { - SlotIndex DefIdx = LIS->getInstructionIndex(CopyMI).getDefIndex(); + SlotIndex DefIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); if (LIS->hasInterval(DstReg)) { LiveInterval &LI = LIS->getInterval(DstReg); if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) @@ -1936,7 +1936,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { // Check for now unnecessary kill flags. if (LIS->isNotInMIMap(MI)) continue; - SlotIndex DefIdx = LIS->getInstructionIndex(MI).getDefIndex(); + SlotIndex DefIdx = LIS->getInstructionIndex(MI).getRegSlot(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isKill()) continue; diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp index 8b02ec4..448f2fb 100644 --- a/lib/CodeGen/RenderMachineFunction.cpp +++ b/lib/CodeGen/RenderMachineFunction.cpp @@ -560,12 +560,13 @@ namespace llvm { // For uses/defs recorded use/def indexes override current liveness and // instruction operands (Only for the interval which records the indexes). - if (i.isUse() || i.isDef()) { + // FIXME: This is all wrong, uses and defs share the same slots. + if (i.isEarlyClobber() || i.isRegister()) { UseDefs::const_iterator udItr = useDefs.find(li); if (udItr != useDefs.end()) { const SlotSet &slotSet = udItr->second; if (slotSet.count(i)) { - if (i.isUse()) { + if (i.isEarlyClobber()) { return Used; } // else @@ -586,9 +587,9 @@ namespace llvm { return AliveStack; } } else { - if (i.isDef() && mi->definesRegister(li->reg, tri)) { + if (i.isRegister() && mi->definesRegister(li->reg, tri)) { return Defined; - } else if (i.isUse() && mi->readsRegister(li->reg)) { + } else if (i.isEarlyClobber() && mi->readsRegister(li->reg)) { return Used; } else { if (vrm == 0 || @@ -804,7 +805,7 @@ namespace llvm { os << indent + s(2) << "\n"; // Render the code column. - if (i.isLoad()) { + if (i.isBlock()) { MachineBasicBlock *mbb = sis->getMBBFromIndex(i); mi = sis->getInstructionFromIndex(i); @@ -823,7 +824,7 @@ namespace llvm { } os << indent + s(4) << "\n"; } else { - i = i.getStoreIndex(); // <- Will be incremented to the next index. + i = i.getDeadSlot(); // <- Will be incremented to the next index. continue; } } @@ -952,10 +953,10 @@ namespace llvm { rItr != rEnd; ++rItr) { const MachineInstr *mi = &*rItr; if (mi->readsRegister(li->reg)) { - useDefs[li].insert(lis->getInstructionIndex(mi).getUseIndex()); + useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot(true)); } if (mi->definesRegister(li->reg)) { - useDefs[li].insert(lis->getInstructionIndex(mi).getDefIndex()); + useDefs[li].insert(lis->getInstructionIndex(mi).getRegSlot()); } } } diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index ca79caf..6a7666e 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -76,7 +76,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { MachineBasicBlock *mbb = &*mbbItr; // Insert an index for the MBB start. - SlotIndex blockStartIndex(back(), SlotIndex::LOAD); + SlotIndex blockStartIndex(back(), SlotIndex::Slot_Block); for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end(); miItr != miEnd; ++miItr) { @@ -88,7 +88,8 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { push_back(createEntry(mi, index += SlotIndex::InstrDist)); // Save this base index in the maps. - mi2iMap.insert(std::make_pair(mi, SlotIndex(back(), SlotIndex::LOAD))); + mi2iMap.insert(std::make_pair(mi, SlotIndex(back(), + SlotIndex::Slot_Block))); ++functionSize; } @@ -97,7 +98,8 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) { push_back(createEntry(0, index += SlotIndex::InstrDist)); MBBRanges[mbb->getNumber()].first = blockStartIndex; - MBBRanges[mbb->getNumber()].second = SlotIndex(back(), SlotIndex::LOAD); + MBBRanges[mbb->getNumber()].second = SlotIndex(back(), + SlotIndex::Slot_Block); idx2MBBMap.push_back(IdxMBBPair(blockStartIndex, mbb)); } @@ -166,7 +168,7 @@ void SlotIndexes::dump() const { // Print a SlotIndex to a raw_ostream. void SlotIndex::print(raw_ostream &os) const { if (isValid()) - os << entry().getIndex() << "LudS"[getSlot()]; + os << entry().getIndex() << "Berd"[getSlot()]; else os << "invalid"; } diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index 4a170bc..9083804 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -139,7 +139,7 @@ protected: tri); MachineInstr *loadInstr(prior(miItr)); SlotIndex loadIndex = - lis->InsertMachineInstrInMaps(loadInstr).getDefIndex(); + lis->InsertMachineInstrInMaps(loadInstr).getRegSlot(); SlotIndex endIndex = loadIndex.getNextIndex(); VNInfo *loadVNI = newLI->getNextValue(loadIndex, 0, lis->getVNInfoAllocator()); @@ -152,7 +152,7 @@ protected: true, ss, trc, tri); MachineInstr *storeInstr(llvm::next(miItr)); SlotIndex storeIndex = - lis->InsertMachineInstrInMaps(storeInstr).getDefIndex(); + lis->InsertMachineInstrInMaps(storeInstr).getRegSlot(); SlotIndex beginIndex = storeIndex.getPrevIndex(); VNInfo *storeVNI = newLI->getNextValue(beginIndex, 0, lis->getVNInfoAllocator()); diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 6362780..61454ed 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -112,7 +112,7 @@ void SplitAnalysis::analyzeUses() { I = MRI.use_nodbg_begin(CurLI->reg), E = MRI.use_nodbg_end(); I != E; ++I) if (!I.getOperand().isUndef()) - UseSlots.push_back(LIS.getInstructionIndex(&*I).getDefIndex()); + UseSlots.push_back(LIS.getInstructionIndex(&*I).getRegSlot()); array_pod_sort(UseSlots.begin(), UseSlots.end()); @@ -421,7 +421,7 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg) .addReg(Edit->getReg()); Def = LIS.getSlotIndexes()->insertMachineInstrInMaps(CopyMI, Late) - .getDefIndex(); + .getRegSlot(); ++NumCopies; } @@ -640,7 +640,7 @@ void SplitEditor::removeBackCopies(SmallVectorImpl &Copies) { DEBUG(dbgs() << " cannot find simple kill of RegIdx " << RegIdx << '\n'); forceRecompute(RegIdx, Edit->getParent().getVNInfoAt(Def)); } else { - SlotIndex Kill = LIS.getInstructionIndex(MBBI).getDefIndex(); + SlotIndex Kill = LIS.getInstructionIndex(MBBI).getRegSlot(); DEBUG(dbgs() << " move kill to " << Kill << '\t' << *MBBI); AssignI.setStop(Kill); } @@ -958,7 +958,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { // use the same register as the def, so just do that always. SlotIndex Idx = LIS.getInstructionIndex(MI); if (MO.isDef() || MO.isUndef()) - Idx = MO.isEarlyClobber() ? Idx.getUseIndex() : Idx.getDefIndex(); + Idx = Idx.getRegSlot(MO.isEarlyClobber()); // Rewrite to the mapped register at Idx. unsigned RegIdx = RegAssign.lookup(Idx); @@ -981,7 +981,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { if (!Edit->getParent().liveAt(Idx)) continue; } else - Idx = Idx.getUseIndex(); + Idx = Idx.getRegSlot(true); getLRCalc(RegIdx).extend(LI, Idx.getNextSlot(), LIS.getSlotIndexes(), &MDT, &LIS.getVNInfoAllocator()); diff --git a/lib/CodeGen/Splitter.cpp b/lib/CodeGen/Splitter.cpp index 77973b7..16cf9b8 100644 --- a/lib/CodeGen/Splitter.cpp +++ b/lib/CodeGen/Splitter.cpp @@ -141,7 +141,7 @@ namespace llvm { ls.lis->InsertMachineInstrInMaps(copy); - SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex(); + SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getRegSlot(); VNInfo *newVal = getNewVNI(preHeaderRange->valno); newVal->def = copyDefIdx; @@ -175,7 +175,7 @@ namespace llvm { ls.lis->InsertMachineInstrInMaps(copy); - SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getDefIndex(); + SlotIndex copyDefIdx = ls.lis->getInstructionIndex(copy).getRegSlot(); // Blow away output range definition. outRange->valno->def = ls.lis->getInvalidIndex(); @@ -216,13 +216,13 @@ namespace llvm { SlotIndex instrIdx = ls.lis->getInstructionIndex(&instr); if (instr.modifiesRegister(li.reg, 0)) { LiveRange *defRange = - li.getLiveRangeContaining(instrIdx.getDefIndex()); + li.getLiveRangeContaining(instrIdx.getRegSlot()); if (defRange != 0) // May have caught this already. copyRange(*defRange); } if (instr.readsRegister(li.reg, 0)) { LiveRange *useRange = - li.getLiveRangeContaining(instrIdx.getUseIndex()); + li.getLiveRangeContaining(instrIdx.getRegSlot(true)); if (useRange != 0) { // May have caught this already. copyRange(*useRange); } diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index 260cc0e..8c6e44b 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -390,7 +390,7 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) { MachineOperand *LastUse = findLastUse(MBB, SrcReg); assert(LastUse); SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent()); - SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB)); + SrcLI.removeRange(LastUseIndex.getRegSlot(), LI->getMBBEndIdx(MBB)); LastUse->setIsKill(true); } @@ -745,7 +745,7 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, // Set the phi-def flag for the VN at this PHI. SlotIndex PHIIndex = LI->getInstructionIndex(PHI); - VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getDefIndex()); + VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot()); assert(DestVNI); DestVNI->setIsPHIDef(true); @@ -756,7 +756,7 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); DestVNI->def = MBBStartIndex; DestLI.addRange(LiveRange(MBBStartIndex, - PHIIndex.getDefIndex(), + PHIIndex.getRegSlot(), DestVNI)); return; } @@ -783,18 +783,18 @@ void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, LI->getVNInfoAllocator()); CopyVNI->setIsPHIDef(true); CopyLI.addRange(LiveRange(MBBStartIndex, - DestCopyIndex.getDefIndex(), + DestCopyIndex.getRegSlot(), CopyVNI)); // Adjust DestReg's live interval to adjust for its new definition at // CopyInstr. LiveInterval &DestLI = LI->getOrCreateInterval(DestReg); SlotIndex PHIIndex = LI->getInstructionIndex(PHI); - DestLI.removeRange(PHIIndex.getDefIndex(), DestCopyIndex.getDefIndex()); + DestLI.removeRange(PHIIndex.getRegSlot(), DestCopyIndex.getRegSlot()); - VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getDefIndex()); + VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot()); assert(DestVNI); - DestVNI->def = DestCopyIndex.getDefIndex(); + DestVNI->def = DestCopyIndex.getRegSlot(); InsertedDestCopies[CopyReg] = CopyInstr; } -- cgit v1.1 From 10252db69bdddb445e53892b388fbe5921114b86 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Sun, 13 Nov 2011 21:39:51 +0000 Subject: Enhance the assertion mechanisms in place to make it easier to catch when we fail to place all the blocks of a loop. Currently this is happening for unnatural loops, and this logic helps more immediately point to the problem. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144504 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBlockPlacement.cpp | 33 ++++++++++++++++++++++++++++----- 1 file changed, 28 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index d0b6926..bf2a71b 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -440,6 +440,13 @@ void MachineBlockPlacement::buildChain( if (!BestSucc) BestSucc = selectBestSuccessor(BB, Chain, BlockFilter); + if (BestSucc) { + // Zero out LoopPredecessors for the successor we're about to merge. We + // do this here instead of during the merge to catch cases where we + // didn't *intend* to merge despite non-zero loop predecessors. + BlockToChain[BestSucc]->LoopPredecessors = 0; + } + // If an immediate successor isn't available, look for the best viable // block among those we've identified as not violating the loop's CFG at // this point. This won't be a fallthrough, but it will increase locality. @@ -510,25 +517,34 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, buildChain(*L.block_begin(), LoopChain, BlockWorkList, &LoopBlockSet); DEBUG({ - if (LoopChain.LoopPredecessors) + // Crash at the end so we get all of the debugging output first. + bool BadLoop = false; + if (LoopChain.LoopPredecessors) { + BadLoop = true; dbgs() << "Loop chain contains a block without its preds placed!\n" << " Loop header: " << getBlockName(*L.block_begin()) << "\n" << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n"; + } for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end(); BCI != BCE; ++BCI) - if (!LoopBlockSet.erase(*BCI)) + if (!LoopBlockSet.erase(*BCI)) { + BadLoop = true; dbgs() << "Loop chain contains a block not contained by the loop!\n" << " Loop header: " << getBlockName(*L.block_begin()) << "\n" << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" << " Bad block: " << getBlockName(*BCI) << "\n"; + } - if (!LoopBlockSet.empty()) + if (!LoopBlockSet.empty()) { + BadLoop = true; for (SmallPtrSet::iterator LBI = LoopBlockSet.begin(), LBE = LoopBlockSet.end(); LBI != LBE; ++LBI) dbgs() << "Loop contains blocks never placed into a chain!\n" << " Loop header: " << getBlockName(*L.block_begin()) << "\n" << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" << " Bad block: " << getBlockName(*LBI) << "\n"; + } + assert(!BadLoop && "Detected problems with the placement of this loop."); }); } @@ -575,21 +591,28 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { typedef SmallPtrSet FunctionBlockSetType; DEBUG({ + // Crash at the end so we get all of the debugging output first. + bool BadFunc = false; FunctionBlockSetType FunctionBlockSet; for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) FunctionBlockSet.insert(FI); for (BlockChain::iterator BCI = FunctionChain.begin(), BCE = FunctionChain.end(); BCI != BCE; ++BCI) - if (!FunctionBlockSet.erase(*BCI)) + if (!FunctionBlockSet.erase(*BCI)) { + BadFunc = true; dbgs() << "Function chain contains a block not in the function!\n" << " Bad block: " << getBlockName(*BCI) << "\n"; + } - if (!FunctionBlockSet.empty()) + if (!FunctionBlockSet.empty()) { + BadFunc = true; for (SmallPtrSet::iterator FBI = FunctionBlockSet.begin(), FBE = FunctionBlockSet.end(); FBI != FBE; ++FBI) dbgs() << "Function contains blocks never placed into a chain!\n" << " Bad block: " << getBlockName(*FBI) << "\n"; + } + assert(!BadFunc && "Detected problems with the block placement."); }); // Splice the blocks into place. -- cgit v1.1 From d14614e6777771f8fec3062bcaf2986c189ac84d Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Sun, 13 Nov 2011 22:05:42 +0000 Subject: Simplify early clobber slots a bit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144507 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LiveIntervalAnalysis.cpp | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 954d9bb..1346487 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -197,11 +197,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, LiveVariables::VarInfo& vi = lv_->getVarInfo(interval.reg); if (interval.empty()) { // Get the Idx of the defining instructions. - SlotIndex defIndex = MIIdx.getRegSlot(); - // Earlyclobbers move back one, so that they overlap the live range - // of inputs. - if (MO.isEarlyClobber()) - defIndex = MIIdx.getRegSlot(true); + SlotIndex defIndex = MIIdx.getRegSlot(MO.isEarlyClobber()); // Make sure the first definition is not a partial redefinition. Add an // of the full register. @@ -323,9 +319,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // are actually two values in the live interval. Because of this we // need to take the LiveRegion that defines this register and split it // into two values. - SlotIndex RedefIndex = MIIdx.getRegSlot(); - if (MO.isEarlyClobber()) - RedefIndex = MIIdx.getRegSlot(true); + SlotIndex RedefIndex = MIIdx.getRegSlot(MO.isEarlyClobber()); const LiveRange *OldLR = interval.getLiveRangeContaining(RedefIndex.getRegSlot(true)); @@ -402,10 +396,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB, DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_)); SlotIndex baseIndex = MIIdx; - SlotIndex start = baseIndex.getRegSlot(); - // Earlyclobbers move back one. - if (MO.isEarlyClobber()) - start = MIIdx.getRegSlot(true); + SlotIndex start = baseIndex.getRegSlot(MO.isEarlyClobber()); SlotIndex end = start; // If it is not used after definition, it is considered dead at -- cgit v1.1 From 1f81e316b042c02c841801a71e7439e166ffa2a0 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Sun, 13 Nov 2011 22:42:13 +0000 Subject: Terminate all dead defs at the dead slot instead of the 'next' slot. This makes no difference for normal defs, but early clobber dead defs now look like: [Slot_EarlyClobber; Slot_Dead) instead of: [Slot_EarlyClobber; Slot_Register). Live ranges for normal dead defs look like: [Slot_Register; Slot_Dead) as before. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144512 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/InlineSpiller.cpp | 2 +- lib/CodeGen/LiveIntervalAnalysis.cpp | 4 ++-- lib/CodeGen/SplitKit.cpp | 10 +++++----- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 021c381..3341ae1 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -1206,7 +1206,7 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { // This instruction defines a dead value. We don't need to spill it, // but do create a live range for the dead value. VNInfo *VNI = NewLI.getNextValue(Idx, 0, LIS.getVNInfoAllocator()); - NewLI.addRange(LiveRange(Idx, Idx.getNextSlot(), VNI)); + NewLI.addRange(LiveRange(Idx, Idx.getDeadSlot(), VNI)); } } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 1346487..7baa5fb 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -686,7 +686,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, VNInfo *VNI = *I; if (VNI->isUnused()) continue; - NewLI.addRange(LiveRange(VNI->def, VNI->def.getNextSlot(), VNI)); + NewLI.addRange(LiveRange(VNI->def, VNI->def.getDeadSlot(), VNI)); // A use tied to an early-clobber def ends at the load slot and isn't caught // above. Catch it here instead. This probably only ever happens for inline @@ -751,7 +751,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, continue; LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def); assert(LII != NewLI.end() && "Missing live range for PHI"); - if (LII->end != VNI->def.getNextSlot()) + if (LII->end != VNI->def.getDeadSlot()) continue; if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 61454ed..6c46d53 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -366,14 +366,14 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx, // If the previous value was a simple mapping, add liveness for it now. if (VNInfo *OldVNI = InsP.first->second.getPointer()) { SlotIndex Def = OldVNI->def; - LI->addRange(LiveRange(Def, Def.getNextSlot(), OldVNI)); + LI->addRange(LiveRange(Def, Def.getDeadSlot(), OldVNI)); // No longer a simple mapping. Switch to a complex, non-forced mapping. InsP.first->second = ValueForcePair(); } // This is a complex mapping, add liveness for VNI SlotIndex Def = VNI->def; - LI->addRange(LiveRange(Def, Def.getNextSlot(), VNI)); + LI->addRange(LiveRange(Def, Def.getDeadSlot(), VNI)); return VNI; } @@ -393,7 +393,7 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) { // This was previously a single mapping. Make sure the old def is represented // by a trivial live range. SlotIndex Def = VNI->def; - Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI)); + Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getDeadSlot(), VNI)); // Mark as complex mapped, forced. VFP = ValueForcePair(0, true); } @@ -994,8 +994,8 @@ void SplitEditor::deleteRematVictims() { LiveInterval *LI = *I; for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end(); LII != LIE; ++LII) { - // Dead defs end at the store slot. - if (LII->end != LII->valno->def.getNextSlot()) + // Dead defs end at the dead slot. + if (LII->end != LII->valno->def.getDeadSlot()) continue; MachineInstr *MI = LIS.getInstructionFromIndex(LII->valno->def); assert(MI && "Missing instruction for dead def"); -- cgit v1.1 From c0f05b3c3fe191b09e04a5f3d16be9f4f8cc036e Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Sun, 13 Nov 2011 22:50:09 +0000 Subject: Cleanup some 80-columns violations and poor formatting. These snuck by when I was reading through the code for style. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144513 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBlockPlacement.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index bf2a71b..de813a3 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -537,7 +537,8 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, if (!LoopBlockSet.empty()) { BadLoop = true; - for (SmallPtrSet::iterator LBI = LoopBlockSet.begin(), LBE = LoopBlockSet.end(); + for (BlockFilterSet::iterator LBI = LoopBlockSet.begin(), + LBE = LoopBlockSet.end(); LBI != LBE; ++LBI) dbgs() << "Loop contains blocks never placed into a chain!\n" << " Loop header: " << getBlockName(*L.block_begin()) << "\n" @@ -597,7 +598,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) FunctionBlockSet.insert(FI); - for (BlockChain::iterator BCI = FunctionChain.begin(), BCE = FunctionChain.end(); + for (BlockChain::iterator BCI = FunctionChain.begin(), + BCE = FunctionChain.end(); BCI != BCE; ++BCI) if (!FunctionBlockSet.erase(*BCI)) { BadFunc = true; @@ -607,8 +609,9 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { if (!FunctionBlockSet.empty()) { BadFunc = true; - for (SmallPtrSet::iterator FBI = FunctionBlockSet.begin(), - FBE = FunctionBlockSet.end(); FBI != FBE; ++FBI) + for (FunctionBlockSetType::iterator FBI = FunctionBlockSet.begin(), + FBE = FunctionBlockSet.end(); + FBI != FBE; ++FBI) dbgs() << "Function contains blocks never placed into a chain!\n" << " Bad block: " << getBlockName(*FBI) << "\n"; } @@ -618,7 +621,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // Splice the blocks into place. MachineFunction::iterator InsertPos = F.begin(); SmallVector Cond; // For AnalyzeBranch. - for (BlockChain::iterator BI = FunctionChain.begin(), BE = FunctionChain.end(); + for (BlockChain::iterator BI = FunctionChain.begin(), + BE = FunctionChain.end(); BI != BE; ++BI) { DEBUG(dbgs() << (BI == FunctionChain.begin() ? "Placing chain " : " ... ") -- cgit v1.1 From 6c9cc21d85cdef79b971f710ace287f3a2f847a3 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Sun, 13 Nov 2011 23:53:25 +0000 Subject: Use kill slots instead of the previous slot in shrinkToUses. It's more natural to use the actual end points. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144515 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LiveIntervalAnalysis.cpp | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 7baa5fb..2ec2cbc 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -658,8 +658,8 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, MachineInstr *UseMI = I.skipInstruction();) { if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg)) continue; - SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot(true); - VNInfo *VNI = li->getVNInfoAt(Idx); + SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot(); + VNInfo *VNI = li->getVNInfoAt(Idx.getBaseIndex()); if (!VNI) { // This shouldn't happen: readsVirtualRegister returns true, but there is // no live value. It is likely caused by a target getting flags @@ -669,11 +669,11 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, << *li << '\n'); continue; } - if (VNI->def == Idx) { + if (VNI->def == Idx.getRegSlot(true)) { // Special case: An early-clobber tied operand reads and writes the // register one slot early. - Idx = Idx.getPrevSlot(); - VNI = li->getVNInfoAt(Idx); + Idx = Idx.getRegSlot(true); + VNI = li->getVNInfoBefore(Idx); assert(VNI && "Early-clobber tied value not available"); } WorkList.push_back(std::make_pair(Idx, VNI)); @@ -693,7 +693,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // assembly. if (VNI->def.isEarlyClobber()) if (VNInfo *UVNI = li->getVNInfoBefore(VNI->def)) - WorkList.push_back(std::make_pair(VNI->def.getPrevSlot(), UVNI)); + WorkList.push_back(std::make_pair(VNI->def, UVNI)); } // Keep track of the PHIs that are in use. @@ -704,11 +704,11 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, SlotIndex Idx = WorkList.back().first; VNInfo *VNI = WorkList.back().second; WorkList.pop_back(); - const MachineBasicBlock *MBB = getMBBFromIndex(Idx); + const MachineBasicBlock *MBB = getMBBFromIndex(Idx.getPrevSlot()); SlotIndex BlockStart = getMBBStartIdx(MBB); // Extend the live range for VNI to be live at Idx. - if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx.getNextSlot())) { + if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx)) { (void)ExtVNI; assert(ExtVNI == VNI && "Unexpected existing value number"); // Is this a PHIDef we haven't seen before? @@ -719,9 +719,9 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, PE = MBB->pred_end(); PI != PE; ++PI) { if (!LiveOut.insert(*PI)) continue; - SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot(); + SlotIndex Stop = getMBBEndIdx(*PI); // A predecessor is not required to have a live-out value for a PHI. - if (VNInfo *PVNI = li->getVNInfoAt(Stop)) + if (VNInfo *PVNI = li->getVNInfoBefore(Stop)) WorkList.push_back(std::make_pair(Stop, PVNI)); } continue; @@ -729,15 +729,16 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // VNI is live-in to MBB. DEBUG(dbgs() << " live-in at " << BlockStart << '\n'); - NewLI.addRange(LiveRange(BlockStart, Idx.getNextSlot(), VNI)); + NewLI.addRange(LiveRange(BlockStart, Idx, VNI)); // Make sure VNI is live-out from the predecessors. for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { if (!LiveOut.insert(*PI)) continue; - SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot(); - assert(li->getVNInfoAt(Stop) == VNI && "Wrong value out of predecessor"); + SlotIndex Stop = getMBBEndIdx(*PI); + assert(li->getVNInfoBefore(Stop) == VNI && + "Wrong value out of predecessor"); WorkList.push_back(std::make_pair(Stop, VNI)); } } -- cgit v1.1 From b5856c83ff4fd796c3eabccca2ed3b06173aeb51 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 14 Nov 2011 00:00:35 +0000 Subject: Teach machine block placement to cope with unnatural loops. These don't get loop info structures associated with them, and so we need some way to make forward progress selecting and placing basic blocks. The technique used here is pretty brutal -- it just scans the list of blocks looking for the first unplaced candidate. It keeps placing blocks like this until the CFG becomes tractable. The cost is somewhat unfortunate, it requires allocating a vector of all basic block pointers eagerly. I have some ideas about how to simplify and optimize this, but I'm trying to get the logic correct first. Thanks to Benjamin Kramer for the reduced test case out of GCC. Sadly there are other bugs that GCC is tickling that I'm reducing and working on now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144516 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBlockPlacement.cpp | 81 ++++++++++++++++++++++++++--------- 1 file changed, 60 insertions(+), 21 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index de813a3..ca17ad0 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -206,7 +206,7 @@ class MachineBlockPlacement : public MachineFunctionPass { void markChainSuccessors(BlockChain &Chain, MachineBasicBlock *LoopHeaderBB, - SmallVectorImpl &Blocks, + SmallVectorImpl &BlockWorkList, const BlockFilterSet *BlockFilter = 0); MachineBasicBlock *selectBestSuccessor(MachineBasicBlock *BB, BlockChain &Chain, @@ -214,8 +214,12 @@ class MachineBlockPlacement : public MachineFunctionPass { MachineBasicBlock *selectBestCandidateBlock( BlockChain &Chain, SmallVectorImpl &WorkList, const BlockFilterSet *BlockFilter); + MachineBasicBlock *getFirstUnplacedBlock(const BlockChain &PlacedChain, + ArrayRef Blocks, + unsigned &PrevUnplacedBlockIdx); void buildChain(MachineBasicBlock *BB, BlockChain &Chain, - SmallVectorImpl &Blocks, + ArrayRef Blocks, + SmallVectorImpl &BlockWorkList, const BlockFilterSet *BlockFilter = 0); void buildLoopChains(MachineFunction &F, MachineLoop &L); void buildCFGChains(MachineFunction &F); @@ -403,17 +407,41 @@ MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( return BestBlock; } +/// \brief Retrieve the first unplaced basic block. +/// +/// This routine is called when we are unable to use the CFG to walk through +/// all of the basic blocks and form a chain due to unnatural loops in the CFG. +/// We walk through the sequence of blocks, starting from the +/// LastUnplacedBlockIdx. We update this index to avoid re-scanning the entire +/// sequence on repeated calls to this routine. +MachineBasicBlock *MachineBlockPlacement::getFirstUnplacedBlock( + const BlockChain &PlacedChain, + ArrayRef Blocks, + unsigned &PrevUnplacedBlockIdx) { + for (unsigned i = PrevUnplacedBlockIdx, e = Blocks.size(); i != e; ++i) { + MachineBasicBlock *BB = Blocks[i]; + if (BlockToChain[BB] != &PlacedChain) { + PrevUnplacedBlockIdx = i; + return BB; + } + } + return 0; +} + void MachineBlockPlacement::buildChain( MachineBasicBlock *BB, BlockChain &Chain, + ArrayRef Blocks, SmallVectorImpl &BlockWorkList, const BlockFilterSet *BlockFilter) { assert(BB); assert(BlockToChain[BB] == &Chain); assert(*Chain.begin() == BB); + SmallVector Cond; // For AnalyzeBranch. + unsigned PrevUnplacedBlockIdx = 0; + MachineBasicBlock *LoopHeaderBB = BB; markChainSuccessors(Chain, LoopHeaderBB, BlockWorkList, BlockFilter); - SmallVector Cond; // For AnalyzeBranch. BB = *llvm::prior(Chain.end()); for (;;) { assert(BB); @@ -429,10 +457,12 @@ void MachineBlockPlacement::buildChain( Cond.clear(); MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. if (TII->AnalyzeBranch(*BB, TBB, FBB, Cond) && BB->canFallThrough()) { - MachineFunction::iterator I(BB); - assert(llvm::next(I) != BB->getParent()->end() && - "The final block in the function can fallthrough!"); - BestSucc = llvm::next(I); + MachineFunction::iterator I(BB), NextI(llvm::next(I)); + // Ensure that the layout successor is a viable block, as we know that + // fallthrough is a possibility. + assert(NextI != BB->getParent()->end()); + assert(!BlockFilter || BlockFilter->count(NextI)); + BestSucc = NextI; } // Otherwise, look for the best viable successor if there is one to place @@ -440,13 +470,6 @@ void MachineBlockPlacement::buildChain( if (!BestSucc) BestSucc = selectBestSuccessor(BB, Chain, BlockFilter); - if (BestSucc) { - // Zero out LoopPredecessors for the successor we're about to merge. We - // do this here instead of during the merge to catch cases where we - // didn't *intend* to merge despite non-zero loop predecessors. - BlockToChain[BestSucc]->LoopPredecessors = 0; - } - // If an immediate successor isn't available, look for the best viable // block among those we've identified as not violating the loop's CFG at // this point. This won't be a fallthrough, but it will increase locality. @@ -454,19 +477,28 @@ void MachineBlockPlacement::buildChain( BestSucc = selectBestCandidateBlock(Chain, BlockWorkList, BlockFilter); if (!BestSucc) { - DEBUG(dbgs() << "Finished forming chain for header block " - << getBlockNum(*Chain.begin()) << "\n"); - return; + BestSucc = getFirstUnplacedBlock(Chain, Blocks, PrevUnplacedBlockIdx); + if (!BestSucc) + break; + + DEBUG(dbgs() << "Unnatural loop CFG detected, forcibly merging the " + "layout successor until the CFG reduces\n"); } // Place this block, updating the datastructures to reflect its placement. BlockChain &SuccChain = *BlockToChain[BestSucc]; + // Zero out LoopPredecessors for the successor we're about to merge in case + // we selected a successor that didn't fit naturally into the CFG. + SuccChain.LoopPredecessors = 0; DEBUG(dbgs() << "Merging from " << getBlockNum(BB) << " to " << getBlockNum(BestSucc) << "\n"); markChainSuccessors(SuccChain, LoopHeaderBB, BlockWorkList, BlockFilter); Chain.merge(BestSucc, &SuccChain); BB = *llvm::prior(Chain.end()); - } + }; + + DEBUG(dbgs() << "Finished forming chain for header block " + << getBlockNum(*Chain.begin()) << "\n"); } /// \brief Forms basic block chains from the natural loop structures. @@ -484,6 +516,7 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, SmallVector BlockWorkList; BlockFilterSet LoopBlockSet(L.block_begin(), L.block_end()); + BlockChain &LoopChain = *BlockToChain[L.getHeader()]; // FIXME: This is a really lame way of walking the chains in the loop: we // walk the blocks, and use a set to prevent visiting a particular chain @@ -513,8 +546,8 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, BlockWorkList.push_back(*BI); } - BlockChain &LoopChain = *BlockToChain[L.getHeader()]; - buildChain(*L.block_begin(), LoopChain, BlockWorkList, &LoopBlockSet); + buildChain(*L.block_begin(), LoopChain, L.getBlocks(), BlockWorkList, + &LoopBlockSet); DEBUG({ // Crash at the end so we get all of the debugging output first. @@ -561,11 +594,17 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { ++LI) buildLoopChains(F, **LI); + // We need a vector of blocks so that buildChain can handle unnatural CFG + // constructs by searching for unplaced blocks and just concatenating them. + SmallVector Blocks; + Blocks.reserve(F.size()); + SmallVector BlockWorkList; SmallPtrSet UpdatedPreds; for (MachineFunction::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) { MachineBasicBlock *BB = &*FI; + Blocks.push_back(BB); BlockChain &Chain = *BlockToChain[BB]; if (!UpdatedPreds.insert(&Chain)) continue; @@ -588,7 +627,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { } BlockChain &FunctionChain = *BlockToChain[&F.front()]; - buildChain(&F.front(), FunctionChain, BlockWorkList); + buildChain(&F.front(), FunctionChain, Blocks, BlockWorkList); typedef SmallPtrSet FunctionBlockSetType; DEBUG({ -- cgit v1.1 From 194eb71a11a77c7fb576780783a77e64924dfb10 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Mon, 14 Nov 2011 01:39:36 +0000 Subject: Use getVNInfoBefore() when it makes sense. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144517 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/InlineSpiller.cpp | 2 +- lib/CodeGen/LiveInterval.cpp | 5 ++--- lib/CodeGen/MachineVerifier.cpp | 6 +++--- lib/CodeGen/SplitKit.cpp | 2 +- 4 files changed, 7 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 3341ae1..59907d9 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -810,7 +810,7 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) { MachineBasicBlock *MBB = LIS.getMBBFromIndex(VNI->def); for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) { - VNInfo *PVNI = LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot()); + VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI)); if (PVNI) WorkList.push_back(std::make_pair(LI, PVNI)); } diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 42e122e..a85639f 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -680,15 +680,14 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { // Connect to values live out of predecessors. for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PE = MBB->pred_end(); PI != PE; ++PI) - if (const VNInfo *PVNI = - LI->getVNInfoAt(LIS.getMBBEndIdx(*PI).getPrevSlot())) + if (const VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI))) EqClass.join(VNI->id, PVNI->id); } else { // Normal value defined by an instruction. Check for two-addr redef. // FIXME: This could be coincidental. Should we really check for a tied // operand constraint? // Note that VNI->def may be a use slot for an early clobber def. - if (const VNInfo *UVNI = LI->getVNInfoAt(VNI->def.getPrevSlot())) + if (const VNInfo *UVNI = LI->getVNInfoBefore(VNI->def)) EqClass.join(VNI->id, UVNI->id); } } diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index b36aab3..b3c28b0 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -1193,8 +1193,8 @@ void MachineVerifier::verifyLiveIntervals() { // Check that VNI is live-out of all predecessors. for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), PE = MFI->pred_end(); PI != PE; ++PI) { - SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI).getPrevSlot(); - const VNInfo *PVNI = LI.getVNInfoAt(PEnd); + SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); + const VNInfo *PVNI = LI.getVNInfoBefore(PEnd); if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI)) continue; @@ -1202,7 +1202,7 @@ void MachineVerifier::verifyLiveIntervals() { if (!PVNI) { report("Register not marked live out of predecessor", *PI); *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() - << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live at " + << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before " << PEnd << " in " << LI << '\n'; continue; } diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index 6c46d53..751d604 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -586,7 +586,7 @@ SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) { void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) { assert(OpenIdx && "openIntv not called before overlapIntv"); const VNInfo *ParentVNI = Edit->getParent().getVNInfoAt(Start); - assert(ParentVNI == Edit->getParent().getVNInfoAt(End.getPrevSlot()) && + assert(ParentVNI == Edit->getParent().getVNInfoBefore(End) && "Parent changes value in extended range"); assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) && "Range cannot span basic blocks"); -- cgit v1.1 From dc9205d9c29171f1ddcf2de7eb172a583cadbe63 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Mon, 14 Nov 2011 04:09:28 +0000 Subject: Add support for ARM halfword load/stores and signed byte loads with negative offsets. rdar://10412592 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144518 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index d330367..432abb5 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -875,8 +875,7 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) { needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset); else // ARM halfword load/stores and signed byte loads use +/-imm8 offsets. - // FIXME: Negative offsets require special handling. - needsLowering = (Addr.Offset > 255 || Addr.Offset < 0); + needsLowering = (Addr.Offset > 255 || Addr.Offset < -255); break; case MVT::f32: case MVT::f64: @@ -933,18 +932,26 @@ void ARMFastISel::AddLoadStoreOperands(EVT VT, Address &Addr, MIB.addFrameIndex(FI); // ARM halfword load/stores and signed byte loads need an additional operand. - if (useAM3) MIB.addReg(0); - - MIB.addImm(Addr.Offset); + if (useAM3) { + signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset; + MIB.addReg(0); + MIB.addImm(Imm); + } else { + MIB.addImm(Addr.Offset); + } MIB.addMemOperand(MMO); } else { // Now add the rest of the operands. MIB.addReg(Addr.Base.Reg); // ARM halfword load/stores and signed byte loads need an additional operand. - if (useAM3) MIB.addReg(0); - - MIB.addImm(Addr.Offset); + if (useAM3) { + signed Imm = (Addr.Offset < 0) ? (0x100 | -Addr.Offset) : Addr.Offset; + MIB.addReg(0); + MIB.addImm(Imm); + } else { + MIB.addImm(Addr.Offset); + } } AddOptionalDefs(MIB); } -- cgit v1.1 From 3426a3efef546a20adf9e2064f589417ab89442b Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 14 Nov 2011 06:46:21 +0000 Subject: Add neverHasSideEffects, mayLoad, and mayStore to many patternless SSE/AVX instructions. Remove MMX check from LowerVECTOR_SHUFFLE since MMX vector types won't go through it anyway. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144522 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 7 ++---- lib/Target/X86/X86InstrSSE.td | 49 +++++++++++++++++++++++++++----------- 2 files changed, 37 insertions(+), 19 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index cf03c30..5d16f47 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6623,7 +6623,6 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); DebugLoc dl = Op.getDebugLoc(); unsigned NumElems = VT.getVectorNumElements(); - bool isMMX = VT.getSizeInBits() == 64; bool V1IsUndef = V1.getOpcode() == ISD::UNDEF; bool V2IsUndef = V2.getOpcode() == ISD::UNDEF; bool V1IsSplat = false; @@ -6632,9 +6631,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); bool OptForSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize); - // Shuffle operations on MMX not supported. - if (isMMX) - return Op; + assert(VT.getSizeInBits() != 64 && "Can't lower MMX shuffles"); // Vector shuffle lowering takes 3 steps: // @@ -6646,7 +6643,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const { // so the shuffle can be broken into other shuffles and the legalizer can // try the lowering again. // - // The general ideia is that no vector_shuffle operation should be left to + // The general idea is that no vector_shuffle operation should be left to // be matched during isel, all of them must be converted to a target specific // node here. diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 735a30f..caaf544 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -80,8 +80,9 @@ multiclass sse12_fp_packed opc, string OpcodeStr, SDNode OpNode, multiclass sse12_fp_packed_logical_rm opc, RegisterClass RC, Domain d, string OpcodeStr, X86MemOperand x86memop, list pat_rr, list pat_rm, - bit Is2Addr = 1> { - let isCommutable = 1 in + bit Is2Addr = 1, + bit rr_hasSideEffects = 0> { + let isCommutable = 1, neverHasSideEffects = rr_hasSideEffects in def rr : PI opc, string OpcodeStr, defm V#NAME#PS : sse12_fp_packed_logical_rm, TB, VEX_4V; + (memopv2i64 addr:$src2)))], 0, 1>, TB, VEX_4V; defm V#NAME#PD : sse12_fp_packed_logical_rm opc, string OpcodeStr, /// sse2_fp_unop_s_avx - AVX SSE2 unops in scalar form. multiclass sse2_fp_unop_s_avx opc, string OpcodeStr> { + let neverHasSideEffects = 1 in { def SDr : SDI; + let mayLoad = 1 in def SDm : SDI; + } def SDm_Int : SDI; // PSRADQri doesn't exist in SSE[1-3]. - } - def PANDNrr : PDI<0xDF, MRMSrcReg, - (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", []>; + def PANDNrr : PDI<0xDF, MRMSrcReg, + (outs VR128:$dst), (ins VR128:$src1, VR128:$src2), + "pandn\t{$src2, $dst|$dst, $src2}", []>; - def PANDNrm : PDI<0xDF, MRMSrcMem, - (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), - "pandn\t{$src2, $dst|$dst, $src2}", []>; + let mayLoad = 1 in + def PANDNrm : PDI<0xDF, MRMSrcMem, + (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2), + "pandn\t{$src2, $dst|$dst, $src2}", []>; + } } } // Constraints = "$src1 = $dst" @@ -5348,6 +5353,7 @@ let Predicates = [HasAVX] in { //===---------------------------------------------------------------------===// multiclass ssse3_palign { + let neverHasSideEffects = 1 in { def R128rr : SS3AI<0x0F, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), !if(Is2Addr, @@ -5355,6 +5361,7 @@ multiclass ssse3_palign { !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), []>, OpSize; + let mayLoad = 1 in def R128rm : SS3AI<0x0F, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), !if(Is2Addr, @@ -5362,19 +5369,23 @@ multiclass ssse3_palign { !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}")), []>, OpSize; + } } multiclass ssse3_palign_y { + let neverHasSideEffects = 1 in { def R256rr : SS3AI<0x0F, MRMSrcReg, (outs VR256:$dst), (ins VR256:$src1, VR256:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, OpSize; + let mayLoad = 1 in def R256rm : SS3AI<0x0F, MRMSrcMem, (outs VR256:$dst), (ins VR256:$src1, i256mem:$src2, i8imm:$src3), !strconcat(asm, "\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}"), []>, OpSize; + } } let Predicates = [HasAVX] in @@ -5721,6 +5732,7 @@ multiclass SS41I_extract8 opc, string OpcodeStr> { "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), [(set GR32:$dst, (X86pextrb (v16i8 VR128:$src1), imm:$src2))]>, OpSize; + let neverHasSideEffects = 1, mayStore = 1 in def mr : SS4AIi8; /// SS41I_extract16 - SSE 4.1 extract 16 bits to memory destination multiclass SS41I_extract16 opc, string OpcodeStr> { + let neverHasSideEffects = 1, mayStore = 1 in def mr : SS4AIi8, Requires<[HasAVX]>; } -let Defs = [XMM0, EFLAGS], Predicates = [HasAVX] in { +let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1, Predicates = [HasAVX] in { def VPCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; + let mayLoad = 1 in def VPCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), "vpcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize, VEX; } -let Defs = [XMM0, EFLAGS] in { +let Defs = [XMM0, EFLAGS], neverHasSideEffects = 1 in { def PCMPISTRM128rr : SS42AI<0x62, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; + let mayLoad = 1 in def PCMPISTRM128rm : SS42AI<0x62, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), "pcmpistrm\t{$src3, $src2, $src1|$src1, $src2, $src3}", []>, OpSize; @@ -6756,19 +6771,21 @@ let Defs = [EFLAGS], Uses = [EAX, EDX], usesCustomInserter = 1 in { } let Predicates = [HasAVX], - Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { + Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { def VPCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src3, i8imm:$src5), "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX; + let mayLoad = 1 in def VPCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src3, i8imm:$src5), "vpcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize, VEX; } -let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX] in { +let Defs = [XMM0, EFLAGS], Uses = [EAX, EDX], neverHasSideEffects = 1 in { def PCMPESTRM128rr : SS42AI<0x60, MRMSrcReg, (outs), (ins VR128:$src1, VR128:$src3, i8imm:$src5), "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; + let mayLoad = 1 in def PCMPESTRM128rm : SS42AI<0x60, MRMSrcMem, (outs), (ins VR128:$src1, i128mem:$src3, i8imm:$src5), "pcmpestrm\t{$src5, $src3, $src1|$src1, $src3, $src5}", []>, OpSize; @@ -7071,12 +7088,14 @@ def AESKEYGENASSIST128rm : AESAI<0xDF, MRMSrcMem, (outs VR128:$dst), //===----------------------------------------------------------------------===// // Carry-less Multiplication instructions +let neverHasSideEffects = 1 in { let Constraints = "$src1 = $dst" in { def PCLMULQDQrr : CLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src1, VR128:$src2, i8imm:$src3), "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", []>; +let mayLoad = 1 in def PCLMULQDQrm : CLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), "pclmulqdq\t{$src3, $src2, $dst|$dst, $src2, $src3}", @@ -7089,10 +7108,12 @@ def VPCLMULQDQrr : AVXCLMULIi8<0x44, MRMSrcReg, (outs VR128:$dst), "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>; +let mayLoad = 1 in def VPCLMULQDQrm : AVXCLMULIi8<0x44, MRMSrcMem, (outs VR128:$dst), (ins VR128:$src1, i128mem:$src2, i8imm:$src3), "vpclmulqdq\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", []>; +} multiclass pclmul_alias { -- cgit v1.1 From dcce244dd85ec410c2e0b8ac2b23320df3e3ece9 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 14 Nov 2011 08:07:55 +0000 Subject: Add AVX2 version of instructions to load folding tables. Also add a bunch of missing SSE/AVX instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144525 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrInfo.cpp | 141 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 139 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 102911f..d9ffd81 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -456,6 +456,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MOVZX64rr16, X86::MOVZX64rm16, 0 }, { X86::MOVZX64rr32, X86::MOVZX64rm32, 0 }, { X86::MOVZX64rr8, X86::MOVZX64rm8, 0 }, + { X86::PABSBrr128, X86::PABSBrm128, TB_ALIGN_16 }, + { X86::PABSDrr128, X86::PABSDrm128, TB_ALIGN_16 }, + { X86::PABSWrr128, X86::PABSWrm128, TB_ALIGN_16 }, { X86::PSHUFDri, X86::PSHUFDmi, TB_ALIGN_16 }, { X86::PSHUFHWri, X86::PSHUFHWmi, TB_ALIGN_16 }, { X86::PSHUFLWri, X86::PSHUFLWmi, TB_ALIGN_16 }, @@ -508,6 +511,9 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VMOVZDI2PDIrr, X86::VMOVZDI2PDIrm, 0 }, { X86::VMOVZQI2PQIrr, X86::VMOVZQI2PQIrm, 0 }, { X86::VMOVZPQILo2PQIrr,X86::VMOVZPQILo2PQIrm, TB_ALIGN_16 }, + { X86::VPABSBrr128, X86::VPABSBrm128, TB_ALIGN_16 }, + { X86::VPABSDrr128, X86::VPABSDrm128, TB_ALIGN_16 }, + { X86::VPABSWrr128, X86::VPABSWrm128, TB_ALIGN_16 }, { X86::VPSHUFDri, X86::VPSHUFDmi, TB_ALIGN_16 }, { X86::VPSHUFHWri, X86::VPSHUFHWmi, TB_ALIGN_16 }, { X86::VPSHUFLWri, X86::VPSHUFLWmi, TB_ALIGN_16 }, @@ -526,7 +532,14 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VMOVAPSYrr, X86::VMOVAPSYrm, TB_ALIGN_32 }, { X86::VMOVDQAYrr, X86::VMOVDQAYrm, TB_ALIGN_16 }, { X86::VMOVUPDYrr, X86::VMOVUPDYrm, 0 }, - { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 } + { X86::VMOVUPSYrr, X86::VMOVUPSYrm, 0 }, + // AVX2 foldable instructions + { X86::VPABSBrr256, X86::VPABSBrm256, TB_ALIGN_16 }, + { X86::VPABSDrr256, X86::VPABSDrm256, TB_ALIGN_16 }, + { X86::VPABSWrr256, X86::VPABSWrm256, TB_ALIGN_16 }, + { X86::VPSHUFDYri, X86::VPSHUFDYmi, TB_ALIGN_16 }, + { X86::VPSHUFHWYri, X86::VPSHUFHWYmi, TB_ALIGN_16 }, + { X86::VPSHUFLWYri, X86::VPSHUFLWYmi, TB_ALIGN_16 } }; for (unsigned i = 0, e = array_lengthof(OpTbl1); i != e; ++i) { @@ -652,6 +665,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::MINSDrr_Int, X86::MINSDrm_Int, 0 }, { X86::MINSSrr, X86::MINSSrm, 0 }, { X86::MINSSrr_Int, X86::MINSSrm_Int, 0 }, + { X86::MPSADBWrri, X86::MPSADBWrmi, TB_ALIGN_16 }, { X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 }, { X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 }, { X86::MULSDrr, X86::MULSDrm, 0 }, @@ -664,30 +678,44 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::ORPSrr, X86::ORPSrm, TB_ALIGN_16 }, { X86::PACKSSDWrr, X86::PACKSSDWrm, TB_ALIGN_16 }, { X86::PACKSSWBrr, X86::PACKSSWBrm, TB_ALIGN_16 }, + { X86::PACKUSDWrr, X86::PACKUSDWrm, TB_ALIGN_16 }, { X86::PACKUSWBrr, X86::PACKUSWBrm, TB_ALIGN_16 }, { X86::PADDBrr, X86::PADDBrm, TB_ALIGN_16 }, { X86::PADDDrr, X86::PADDDrm, TB_ALIGN_16 }, { X86::PADDQrr, X86::PADDQrm, TB_ALIGN_16 }, { X86::PADDSBrr, X86::PADDSBrm, TB_ALIGN_16 }, { X86::PADDSWrr, X86::PADDSWrm, TB_ALIGN_16 }, + { X86::PADDUSBrr, X86::PADDUSBrm, TB_ALIGN_16 }, + { X86::PADDUSWrr, X86::PADDUSWrm, TB_ALIGN_16 }, { X86::PADDWrr, X86::PADDWrm, TB_ALIGN_16 }, + { X86::PALIGNR128rr, X86::PALIGNR128rm, TB_ALIGN_16 }, { X86::PANDNrr, X86::PANDNrm, TB_ALIGN_16 }, { X86::PANDrr, X86::PANDrm, TB_ALIGN_16 }, { X86::PAVGBrr, X86::PAVGBrm, TB_ALIGN_16 }, { X86::PAVGWrr, X86::PAVGWrm, TB_ALIGN_16 }, { X86::PCMPEQBrr, X86::PCMPEQBrm, TB_ALIGN_16 }, { X86::PCMPEQDrr, X86::PCMPEQDrm, TB_ALIGN_16 }, + { X86::PCMPEQQrr, X86::PCMPEQQrm, TB_ALIGN_16 }, { X86::PCMPEQWrr, X86::PCMPEQWrm, TB_ALIGN_16 }, { X86::PCMPGTBrr, X86::PCMPGTBrm, TB_ALIGN_16 }, { X86::PCMPGTDrr, X86::PCMPGTDrm, TB_ALIGN_16 }, + { X86::PCMPGTQrr, X86::PCMPGTQrm, TB_ALIGN_16 }, { X86::PCMPGTWrr, X86::PCMPGTWrm, TB_ALIGN_16 }, + { X86::PHADDDrr128, X86::PHADDDrm128, TB_ALIGN_16 }, + { X86::PHADDWrr128, X86::PHADDWrm128, TB_ALIGN_16 }, + { X86::PHADDSWrr128, X86::PHADDSWrm128, TB_ALIGN_16 }, + { X86::PHSUBDrr128, X86::PHSUBDrm128, TB_ALIGN_16 }, + { X86::PHSUBSWrr128, X86::PHSUBSWrm128, TB_ALIGN_16 }, + { X86::PHSUBWrr128, X86::PHSUBWrm128, TB_ALIGN_16 }, { X86::PINSRWrri, X86::PINSRWrmi, TB_ALIGN_16 }, + { X86::PMADDUBSWrr128, X86::PMADDUBSWrm128, TB_ALIGN_16 }, { X86::PMADDWDrr, X86::PMADDWDrm, TB_ALIGN_16 }, { X86::PMAXSWrr, X86::PMAXSWrm, TB_ALIGN_16 }, { X86::PMAXUBrr, X86::PMAXUBrm, TB_ALIGN_16 }, { X86::PMINSWrr, X86::PMINSWrm, TB_ALIGN_16 }, { X86::PMINUBrr, X86::PMINUBrm, TB_ALIGN_16 }, { X86::PMULDQrr, X86::PMULDQrm, TB_ALIGN_16 }, + { X86::PMULHRSWrr128, X86::PMULHRSWrm128, TB_ALIGN_16 }, { X86::PMULHUWrr, X86::PMULHUWrm, TB_ALIGN_16 }, { X86::PMULHWrr, X86::PMULHWrm, TB_ALIGN_16 }, { X86::PMULLDrr, X86::PMULLDrm, TB_ALIGN_16 }, @@ -695,6 +723,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::PMULUDQrr, X86::PMULUDQrm, TB_ALIGN_16 }, { X86::PORrr, X86::PORrm, TB_ALIGN_16 }, { X86::PSADBWrr, X86::PSADBWrm, TB_ALIGN_16 }, + { X86::PSHUFBrr128, X86::PSHUFBrm128, TB_ALIGN_16 }, + { X86::PSIGNBrr128, X86::PSIGNBrm128, TB_ALIGN_16 }, + { X86::PSIGNWrr128, X86::PSIGNWrm128, TB_ALIGN_16 }, + { X86::PSIGNDrr128, X86::PSIGNDrm128, TB_ALIGN_16 }, { X86::PSLLDrr, X86::PSLLDrm, TB_ALIGN_16 }, { X86::PSLLQrr, X86::PSLLQrm, TB_ALIGN_16 }, { X86::PSLLWrr, X86::PSLLWrm, TB_ALIGN_16 }, @@ -816,6 +848,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VMINSDrr_Int, X86::VMINSDrm_Int, 0 }, { X86::VMINSSrr, X86::VMINSSrm, 0 }, { X86::VMINSSrr_Int, X86::VMINSSrm_Int, 0 }, + { X86::VMPSADBWrri, X86::VMPSADBWrmi, TB_ALIGN_16 }, { X86::VMULPDrr, X86::VMULPDrm, TB_ALIGN_16 }, { X86::VMULPSrr, X86::VMULPSrm, TB_ALIGN_16 }, { X86::VMULSDrr, X86::VMULSDrm, 0 }, @@ -824,28 +857,44 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VORPSrr, X86::VORPSrm, TB_ALIGN_16 }, { X86::VPACKSSDWrr, X86::VPACKSSDWrm, TB_ALIGN_16 }, { X86::VPACKSSWBrr, X86::VPACKSSWBrm, TB_ALIGN_16 }, + { X86::VPACKUSDWrr, X86::VPACKUSDWrm, TB_ALIGN_16 }, { X86::VPACKUSWBrr, X86::VPACKUSWBrm, TB_ALIGN_16 }, { X86::VPADDBrr, X86::VPADDBrm, TB_ALIGN_16 }, { X86::VPADDDrr, X86::VPADDDrm, TB_ALIGN_16 }, { X86::VPADDQrr, X86::VPADDQrm, TB_ALIGN_16 }, { X86::VPADDSBrr, X86::VPADDSBrm, TB_ALIGN_16 }, { X86::VPADDSWrr, X86::VPADDSWrm, TB_ALIGN_16 }, + { X86::VPADDUSBrr, X86::VPADDUSBrm, TB_ALIGN_16 }, + { X86::VPADDUSWrr, X86::VPADDUSWrm, TB_ALIGN_16 }, { X86::VPADDWrr, X86::VPADDWrm, TB_ALIGN_16 }, + { X86::VPALIGNR128rr, X86::VPALIGNR128rm, TB_ALIGN_16 }, { X86::VPANDNrr, X86::VPANDNrm, TB_ALIGN_16 }, { X86::VPANDrr, X86::VPANDrm, TB_ALIGN_16 }, + { X86::VPAVGBrr, X86::VPAVGBrm, TB_ALIGN_16 }, + { X86::VPAVGWrr, X86::VPAVGWrm, TB_ALIGN_16 }, { X86::VPCMPEQBrr, X86::VPCMPEQBrm, TB_ALIGN_16 }, { X86::VPCMPEQDrr, X86::VPCMPEQDrm, TB_ALIGN_16 }, + { X86::VPCMPEQQrr, X86::VPCMPEQQrm, TB_ALIGN_16 }, { X86::VPCMPEQWrr, X86::VPCMPEQWrm, TB_ALIGN_16 }, { X86::VPCMPGTBrr, X86::VPCMPGTBrm, TB_ALIGN_16 }, { X86::VPCMPGTDrr, X86::VPCMPGTDrm, TB_ALIGN_16 }, + { X86::VPCMPGTQrr, X86::VPCMPGTQrm, TB_ALIGN_16 }, { X86::VPCMPGTWrr, X86::VPCMPGTWrm, TB_ALIGN_16 }, + { X86::VPHADDDrr128, X86::VPHADDDrm128, TB_ALIGN_16 }, + { X86::VPHADDSWrr128, X86::VPHADDSWrm128, TB_ALIGN_16 }, + { X86::VPHADDWrr128, X86::VPHADDWrm128, TB_ALIGN_16 }, + { X86::VPHSUBDrr128, X86::VPHSUBDrm128, TB_ALIGN_16 }, + { X86::VPHSUBSWrr128, X86::VPHSUBSWrm128, TB_ALIGN_16 }, + { X86::VPHSUBWrr128, X86::VPHSUBWrm128, TB_ALIGN_16 }, { X86::VPINSRWrri, X86::VPINSRWrmi, TB_ALIGN_16 }, + { X86::VPMADDUBSWrr128, X86::VPMADDUBSWrm128, TB_ALIGN_16 }, { X86::VPMADDWDrr, X86::VPMADDWDrm, TB_ALIGN_16 }, { X86::VPMAXSWrr, X86::VPMAXSWrm, TB_ALIGN_16 }, { X86::VPMAXUBrr, X86::VPMAXUBrm, TB_ALIGN_16 }, { X86::VPMINSWrr, X86::VPMINSWrm, TB_ALIGN_16 }, { X86::VPMINUBrr, X86::VPMINUBrm, TB_ALIGN_16 }, { X86::VPMULDQrr, X86::VPMULDQrm, TB_ALIGN_16 }, + { X86::VPMULHRSWrr128, X86::VPMULHRSWrm128, TB_ALIGN_16 }, { X86::VPMULHUWrr, X86::VPMULHUWrm, TB_ALIGN_16 }, { X86::VPMULHWrr, X86::VPMULHWrm, TB_ALIGN_16 }, { X86::VPMULLDrr, X86::VPMULLDrm, TB_ALIGN_16 }, @@ -853,6 +902,10 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VPMULUDQrr, X86::VPMULUDQrm, TB_ALIGN_16 }, { X86::VPORrr, X86::VPORrm, TB_ALIGN_16 }, { X86::VPSADBWrr, X86::VPSADBWrm, TB_ALIGN_16 }, + { X86::VPSHUFBrr128, X86::VPSHUFBrm128, TB_ALIGN_16 }, + { X86::VPSIGNBrr128, X86::VPSIGNBrm128, TB_ALIGN_16 }, + { X86::VPSIGNWrr128, X86::VPSIGNWrm128, TB_ALIGN_16 }, + { X86::VPSIGNDrr128, X86::VPSIGNDrm128, TB_ALIGN_16 }, { X86::VPSLLDrr, X86::VPSLLDrm, TB_ALIGN_16 }, { X86::VPSLLQrr, X86::VPSLLQrm, TB_ALIGN_16 }, { X86::VPSLLWrr, X86::VPSLLWrm, TB_ALIGN_16 }, @@ -886,7 +939,91 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm) { X86::VUNPCKLPDrr, X86::VUNPCKLPDrm, TB_ALIGN_16 }, { X86::VUNPCKLPSrr, X86::VUNPCKLPSrm, TB_ALIGN_16 }, { X86::VXORPDrr, X86::VXORPDrm, TB_ALIGN_16 }, - { X86::VXORPSrr, X86::VXORPSrm, TB_ALIGN_16 } + { X86::VXORPSrr, X86::VXORPSrm, TB_ALIGN_16 }, + // AVX2 foldable instructions + { X86::VPACKSSDWYrr, X86::VPACKSSDWYrm, TB_ALIGN_16 }, + { X86::VPACKSSWBYrr, X86::VPACKSSWBYrm, TB_ALIGN_16 }, + { X86::VPACKUSDWYrr, X86::VPACKUSDWYrm, TB_ALIGN_16 }, + { X86::VPACKUSWBYrr, X86::VPACKUSWBYrm, TB_ALIGN_16 }, + { X86::VPADDBYrr, X86::VPADDBYrm, TB_ALIGN_16 }, + { X86::VPADDDYrr, X86::VPADDDYrm, TB_ALIGN_16 }, + { X86::VPADDQYrr, X86::VPADDQYrm, TB_ALIGN_16 }, + { X86::VPADDSBYrr, X86::VPADDSBYrm, TB_ALIGN_16 }, + { X86::VPADDSWYrr, X86::VPADDSWYrm, TB_ALIGN_16 }, + { X86::VPADDUSBYrr, X86::VPADDUSBYrm, TB_ALIGN_16 }, + { X86::VPADDUSWYrr, X86::VPADDUSWYrm, TB_ALIGN_16 }, + { X86::VPADDWYrr, X86::VPADDWYrm, TB_ALIGN_16 }, + { X86::VPALIGNR256rr, X86::VPALIGNR256rm, TB_ALIGN_16 }, + { X86::VPANDNYrr, X86::VPANDNYrm, TB_ALIGN_16 }, + { X86::VPANDYrr, X86::VPANDYrm, TB_ALIGN_16 }, + { X86::VPAVGBYrr, X86::VPAVGBYrm, TB_ALIGN_16 }, + { X86::VPAVGWYrr, X86::VPAVGWYrm, TB_ALIGN_16 }, + { X86::VPCMPEQBYrr, X86::VPCMPEQBYrm, TB_ALIGN_16 }, + { X86::VPCMPEQDYrr, X86::VPCMPEQDYrm, TB_ALIGN_16 }, + { X86::VPCMPEQQYrr, X86::VPCMPEQQYrm, TB_ALIGN_16 }, + { X86::VPCMPEQWYrr, X86::VPCMPEQWYrm, TB_ALIGN_16 }, + { X86::VPCMPGTBYrr, X86::VPCMPGTBYrm, TB_ALIGN_16 }, + { X86::VPCMPGTDYrr, X86::VPCMPGTDYrm, TB_ALIGN_16 }, + { X86::VPCMPGTQYrr, X86::VPCMPGTQYrm, TB_ALIGN_16 }, + { X86::VPCMPGTWYrr, X86::VPCMPGTWYrm, TB_ALIGN_16 }, + { X86::VPHADDDrr256, X86::VPHADDDrm256, TB_ALIGN_16 }, + { X86::VPHADDSWrr256, X86::VPHADDSWrm256, TB_ALIGN_16 }, + { X86::VPHADDWrr256, X86::VPHADDWrm256, TB_ALIGN_16 }, + { X86::VPHSUBDrr256, X86::VPHSUBDrm256, TB_ALIGN_16 }, + { X86::VPHSUBSWrr256, X86::VPHSUBSWrm256, TB_ALIGN_16 }, + { X86::VPHSUBWrr256, X86::VPHSUBWrm256, TB_ALIGN_16 }, + { X86::VPMADDUBSWrr256, X86::VPMADDUBSWrm256, TB_ALIGN_16 }, + { X86::VPMADDWDYrr, X86::VPMADDWDYrm, TB_ALIGN_16 }, + { X86::VPMAXSWYrr, X86::VPMAXSWYrm, TB_ALIGN_16 }, + { X86::VPMAXUBYrr, X86::VPMAXUBYrm, TB_ALIGN_16 }, + { X86::VPMINSWYrr, X86::VPMINSWYrm, TB_ALIGN_16 }, + { X86::VPMINUBYrr, X86::VPMINUBYrm, TB_ALIGN_16 }, + { X86::VMPSADBWYrri, X86::VMPSADBWYrmi, TB_ALIGN_16 }, + { X86::VPMULDQYrr, X86::VPMULDQYrm, TB_ALIGN_16 }, + { X86::VPMULHRSWrr256, X86::VPMULHRSWrm256, TB_ALIGN_16 }, + { X86::VPMULHUWYrr, X86::VPMULHUWYrm, TB_ALIGN_16 }, + { X86::VPMULHWYrr, X86::VPMULHWYrm, TB_ALIGN_16 }, + { X86::VPMULLDYrr, X86::VPMULLDYrm, TB_ALIGN_16 }, + { X86::VPMULLWYrr, X86::VPMULLWYrm, TB_ALIGN_16 }, + { X86::VPMULUDQYrr, X86::VPMULUDQYrm, TB_ALIGN_16 }, + { X86::VPORYrr, X86::VPORYrm, TB_ALIGN_16 }, + { X86::VPSADBWYrr, X86::VPSADBWYrm, TB_ALIGN_16 }, + { X86::VPSHUFBrr256, X86::VPSHUFBrm256, TB_ALIGN_16 }, + { X86::VPSIGNBrr256, X86::VPSIGNBrm256, TB_ALIGN_16 }, + { X86::VPSIGNWrr256, X86::VPSIGNWrm256, TB_ALIGN_16 }, + { X86::VPSIGNDrr256, X86::VPSIGNDrm256, TB_ALIGN_16 }, + { X86::VPSLLDYrr, X86::VPSLLDYrm, TB_ALIGN_16 }, + { X86::VPSLLQYrr, X86::VPSLLQYrm, TB_ALIGN_16 }, + { X86::VPSLLWYrr, X86::VPSLLWYrm, TB_ALIGN_16 }, + { X86::VPSLLVDrr, X86::VPSLLVDrm, TB_ALIGN_16 }, + { X86::VPSLLVDYrr, X86::VPSLLVDYrm, TB_ALIGN_16 }, + { X86::VPSLLVQrr, X86::VPSLLVQrm, TB_ALIGN_16 }, + { X86::VPSLLVQYrr, X86::VPSLLVQYrm, TB_ALIGN_16 }, + { X86::VPSRADYrr, X86::VPSRADYrm, TB_ALIGN_16 }, + { X86::VPSRAWYrr, X86::VPSRAWYrm, TB_ALIGN_16 }, + { X86::VPSRAVDrr, X86::VPSRAVDrm, TB_ALIGN_16 }, + { X86::VPSRAVDYrr, X86::VPSRAVDYrm, TB_ALIGN_16 }, + { X86::VPSRLDYrr, X86::VPSRLDYrm, TB_ALIGN_16 }, + { X86::VPSRLQYrr, X86::VPSRLQYrm, TB_ALIGN_16 }, + { X86::VPSRLWYrr, X86::VPSRLWYrm, TB_ALIGN_16 }, + { X86::VPSRLVDrr, X86::VPSRLVDrm, TB_ALIGN_16 }, + { X86::VPSRLVDYrr, X86::VPSRLVDYrm, TB_ALIGN_16 }, + { X86::VPSRLVQrr, X86::VPSRLVQrm, TB_ALIGN_16 }, + { X86::VPSRLVQYrr, X86::VPSRLVQYrm, TB_ALIGN_16 }, + { X86::VPSUBBYrr, X86::VPSUBBYrm, TB_ALIGN_16 }, + { X86::VPSUBDYrr, X86::VPSUBDYrm, TB_ALIGN_16 }, + { X86::VPSUBSBYrr, X86::VPSUBSBYrm, TB_ALIGN_16 }, + { X86::VPSUBSWYrr, X86::VPSUBSWYrm, TB_ALIGN_16 }, + { X86::VPSUBWYrr, X86::VPSUBWYrm, TB_ALIGN_16 }, + { X86::VPUNPCKHBWYrr, X86::VPUNPCKHBWYrm, TB_ALIGN_16 }, + { X86::VPUNPCKHDQYrr, X86::VPUNPCKHDQYrm, TB_ALIGN_16 }, + { X86::VPUNPCKHQDQYrr, X86::VPUNPCKHQDQYrm, TB_ALIGN_16 }, + { X86::VPUNPCKHWDYrr, X86::VPUNPCKHWDYrm, TB_ALIGN_16 }, + { X86::VPUNPCKLBWYrr, X86::VPUNPCKLBWYrm, TB_ALIGN_16 }, + { X86::VPUNPCKLDQYrr, X86::VPUNPCKLDQYrm, TB_ALIGN_16 }, + { X86::VPUNPCKLQDQYrr, X86::VPUNPCKLQDQYrm, TB_ALIGN_16 }, + { X86::VPUNPCKLWDYrr, X86::VPUNPCKLWDYrm, TB_ALIGN_16 }, + { X86::VPXORYrr, X86::VPXORYrm, TB_ALIGN_16 }, // FIXME: add AVX 256-bit foldable instructions }; -- cgit v1.1 From 2770c141856eefda6a192622bf5fd5b06bef4963 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 14 Nov 2011 08:50:16 +0000 Subject: Fix an overflow bug in MachineBranchProbabilityInfo. This pass relied on the sum of the edge weights not overflowing uint32, and crashed when they did. This is generally safe as BranchProbabilityInfo tries to provide this guarantee. However, the CFG can get modified during codegen in a way that grows the *sum* of the edge weights. This doesn't seem unreasonable (imagine just adding more blocks all with the default weight of 16), but it is hard to come up with a case that actually triggers 32-bit overflow. Fortuately, the single-source GCC build is good at this. The solution isn't very pretty, but its no worse than the previous code. We're already summing all of the edge weights on each query, we can sum them, check for an overflow, compute a scale, and sum them again. I've included a *greatly* reduced test case out of the GCC source that triggers it. It's a pretty lame test, as it clearly is just barely triggering the overflow. I'd like to have something that is much more definitive, but I don't understand the fundamental pattern that triggers an explosion in the edge weight sums. The buggy code is duplicated within this file. I'll colapse them into a single implementation in a subsequent commit. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144526 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBranchProbabilityInfo.cpp | 36 ++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp index 9874037..0037d52 100644 --- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -27,19 +27,34 @@ INITIALIZE_PASS_END(MachineBranchProbabilityInfo, "machine-branch-prob", char MachineBranchProbabilityInfo::ID = 0; uint32_t MachineBranchProbabilityInfo:: -getSumForBlock(MachineBasicBlock *MBB) const { - uint32_t Sum = 0; - +getSumForBlock(MachineBasicBlock *MBB, uint32_t &Scale) const { + // First we compute the sum with 64-bits of precision, ensuring that cannot + // overflow by bounding the number of weights considered. Hopefully no one + // actually needs 2^32 successors. + assert(MBB->succ_size() < UINT32_MAX); + uint64_t Sum = 0; + Scale = 1; for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - MachineBasicBlock *Succ = *I; - uint32_t Weight = getEdgeWeight(MBB, Succ); - uint32_t PrevSum = Sum; - + uint32_t Weight = getEdgeWeight(MBB, *I); Sum += Weight; - assert(Sum > PrevSum); (void) PrevSum; } + // If the computed sum fits in 32-bits, we're done. + if (Sum <= UINT32_MAX) + return Sum; + + // Otherwise, compute the scale necessary to cause the weights to fit, and + // re-sum with that scale applied. + assert((Sum / UINT32_MAX) < UINT32_MAX); + Scale = (Sum / UINT32_MAX) + 1; + Sum = 0; + for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) { + uint32_t Weight = getEdgeWeight(MBB, *I); + Sum += Weight / Scale; + } + assert(Sum <= UINT32_MAX); return Sum; } @@ -89,8 +104,9 @@ MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { BranchProbability MachineBranchProbabilityInfo::getEdgeProbability(MachineBasicBlock *Src, MachineBasicBlock *Dst) const { - uint32_t N = getEdgeWeight(Src, Dst); - uint32_t D = getSumForBlock(Src); + uint32_t Scale = 1; + uint32_t D = getSumForBlock(Src, Scale); + uint32_t N = getEdgeWeight(Src, Dst) / Scale; return BranchProbability(N, D); } -- cgit v1.1 From c4e15628255b24cb17138404abe3d94bde811e25 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 14 Nov 2011 08:55:59 +0000 Subject: Reuse the logic in getEdgeProbability within getHotSucc in order to correctly handle blocks whose successor weights sum to more than UINT32_MAX. This is slightly less efficient, but the entire thing is already linear on the number of successors. Calling it within any hot routine is a mistake, and indeed no one is calling it. It also simplifies the code. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144527 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBranchProbabilityInfo.cpp | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp index 0037d52..e3cfa9e 100644 --- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -76,26 +76,18 @@ bool MachineBranchProbabilityInfo::isEdgeHot(MachineBasicBlock *Src, MachineBasicBlock * MachineBranchProbabilityInfo::getHotSucc(MachineBasicBlock *MBB) const { - uint32_t Sum = 0; uint32_t MaxWeight = 0; MachineBasicBlock *MaxSucc = 0; - for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), E = MBB->succ_end(); I != E; ++I) { - MachineBasicBlock *Succ = *I; - uint32_t Weight = getEdgeWeight(MBB, Succ); - uint32_t PrevSum = Sum; - - Sum += Weight; - assert(Sum > PrevSum); (void) PrevSum; - + uint32_t Weight = getEdgeWeight(MBB, *I); if (Weight > MaxWeight) { MaxWeight = Weight; - MaxSucc = Succ; + MaxSucc = *I; } } - if (BranchProbability(MaxWeight, Sum) >= BranchProbability(4, 5)) + if (getEdgeProbability(MBB, MaxSucc) >= BranchProbability(4, 5)) return MaxSucc; return 0; -- cgit v1.1 From 340d596509129de8c3fa9dbe4184a2b148b78757 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 14 Nov 2011 09:12:57 +0000 Subject: Under the hood, MBPI is doing a linear scan of every successor every time it is queried to compute the probability of a single successor. This makes computing the probability of every successor of a block in sequence... really really slow. ;] This switches to a linear walk of the successors rather than a quadratic one. One of several quadratic behaviors slowing this pass down. I'm not really thrilled with moving the sum code into the public interface of MBPI, but I don't (at the moment) have ideas for a better interface. My direction I'm thinking in for a better interface is to have MBPI actually retain much more state and make *all* of these queries cheap. That's a lot of work, and would require invasive changes. Until then, this seems like the least bad (ie, least quadratic) solution. Suggestions welcome. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144530 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBlockPlacement.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index ca17ad0..bd50ac3 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -334,7 +334,15 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( const BranchProbability HotProb(4, 5); // 80% MachineBasicBlock *BestSucc = 0; - BranchProbability BestProb = BranchProbability::getZero(); + // FIXME: Due to the performance of the probability and weight routines in + // the MBPI analysis, we manually compute probabilities using the edge + // weights. This is suboptimal as it means that the somewhat subtle + // definition of edge weight semantics is encoded here as well. We should + // improve the MBPI interface to effeciently support query patterns such as + // this. + uint32_t BestWeight = 0; + uint32_t WeightScale = 0; + uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale); DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n"); for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); @@ -347,7 +355,8 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( continue; } - BranchProbability SuccProb = MBPI->getEdgeProbability(BB, *SI); + uint32_t SuccWeight = MBPI->getEdgeWeight(BB, *SI); + BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight); // Only consider successors which are either "hot", or wouldn't violate // any CFG constraints. @@ -360,10 +369,10 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( << " (prob)" << (SuccChain.LoopPredecessors != 0 ? " (CFG break)" : "") << "\n"); - if (BestSucc && BestProb >= SuccProb) + if (BestSucc && BestWeight >= SuccWeight) continue; BestSucc = *SI; - BestProb = SuccProb; + BestWeight = SuccWeight; } return BestSucc; } -- cgit v1.1 From fa97658b1c71f747cfe0f3e1f1bcbd86d7fa9f75 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 14 Nov 2011 09:46:33 +0000 Subject: Begin chipping away at one of the biggest quadratic-ish behaviors in this pass. We're leaving already merged blocks on the worklist, and scanning them again and again only to determine each time through that indeed they aren't viable. We can instead remove them once we're going to have to scan the worklist. This is the easy way to implement removing them. If this remains on the profile (as I somewhat suspect it will), we can get a lot more clever here, as the worklist's order is essentially irrelevant. We can use swapping and fold the two loops to reduce overhead even when there are many blocks on the worklist but only a few of them are removed. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144531 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBlockPlacement.cpp | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index bd50ac3..6c8390f 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -377,6 +377,23 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( return BestSucc; } +namespace { +/// \brief Predicate struct to detect blocks already placed. +class IsBlockPlaced { + const BlockChain &PlacedChain; + const BlockToChainMapType &BlockToChain; + +public: + IsBlockPlaced(const BlockChain &PlacedChain, + const BlockToChainMapType &BlockToChain) + : PlacedChain(PlacedChain), BlockToChain(BlockToChain) {} + + bool operator()(MachineBasicBlock *BB) const { + return BlockToChain.lookup(BB) == &PlacedChain; + } +}; +} + /// \brief Select the best block from a worklist. /// /// This looks through the provided worklist as a list of candidate basic @@ -390,13 +407,20 @@ MachineBasicBlock *MachineBlockPlacement::selectBestSuccessor( MachineBasicBlock *MachineBlockPlacement::selectBestCandidateBlock( BlockChain &Chain, SmallVectorImpl &WorkList, const BlockFilterSet *BlockFilter) { + // Once we need to walk the worklist looking for a candidate, cleanup the + // worklist of already placed entries. + // FIXME: If this shows up on profiles, it could be folded (at the cost of + // some code complexity) into the loop below. + WorkList.erase(std::remove_if(WorkList.begin(), WorkList.end(), + IsBlockPlaced(Chain, BlockToChain)), + WorkList.end()); + MachineBasicBlock *BestBlock = 0; BlockFrequency BestFreq; for (SmallVectorImpl::iterator WBI = WorkList.begin(), WBE = WorkList.end(); WBI != WBE; ++WBI) { - if (BlockFilter && !BlockFilter->count(*WBI)) - continue; + assert(!BlockFilter || BlockFilter->count(*WBI)); BlockChain &SuccChain = *BlockToChain[*WBI]; if (&SuccChain == &Chain) { DEBUG(dbgs() << " " << getBlockName(*WBI) -- cgit v1.1 From bc83fcd9bd95f8eff83cd5ad77b0aa5312d8a6a5 Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 14 Nov 2011 10:55:53 +0000 Subject: Remove an over-eager assert that was firing on one of the ARM regression tests when I forcibly enabled block placement. It is apparantly possible for an unanalyzable block to fallthrough to a non-loop block. I don't actually beleive this is correct, I believe that 'canFallThrough' is returning true needlessly for the code construct, and I've left a bit of a FIXME on the verification code to try to track down why this is coming up. Anyways, removing the assert doesn't degrade the correctness of the algorithm. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144532 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBlockPlacement.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 6c8390f..412d0b1 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -492,9 +492,9 @@ void MachineBlockPlacement::buildChain( if (TII->AnalyzeBranch(*BB, TBB, FBB, Cond) && BB->canFallThrough()) { MachineFunction::iterator I(BB), NextI(llvm::next(I)); // Ensure that the layout successor is a viable block, as we know that - // fallthrough is a possibility. + // fallthrough is a possibility. Note that this may not be a valid block + // in the loop, but we allow that to cope with degenerate situations. assert(NextI != BB->getParent()->end()); - assert(!BlockFilter || BlockFilter->count(NextI)); BestSucc = NextI; } @@ -594,7 +594,10 @@ void MachineBlockPlacement::buildLoopChains(MachineFunction &F, for (BlockChain::iterator BCI = LoopChain.begin(), BCE = LoopChain.end(); BCI != BCE; ++BCI) if (!LoopBlockSet.erase(*BCI)) { - BadLoop = true; + // We don't mark the loop as bad here because there are real situations + // where this can occur. For example, with an unanalyzable fallthrough + // from a loop block to a non-loop block. + // FIXME: Such constructs shouldn't exist. Track them down and fix them. dbgs() << "Loop chain contains a block not contained by the loop!\n" << " Loop header: " << getBlockName(*L.block_begin()) << "\n" << " Chain header: " << getBlockName(*LoopChain.begin()) << "\n" -- cgit v1.1 From f5e47ac596c698f1659c86bdad3a60056e68439c Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 14 Nov 2011 10:57:23 +0000 Subject: It helps to deallocate memory as well as allocate it. =] This actually cleans up all the chains allocated during the processing of each function so that for very large inputs we don't just grow memory usage without bound. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144533 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/MachineBlockPlacement.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index 412d0b1..304f167 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -766,6 +766,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &F) { AlignLoops(F); BlockToChain.clear(); + ChainAllocator.DestroyAll(); // We always return true as we have no way to track whether the final order // differs from the original order. -- cgit v1.1 From aa5354c3ba93032dcc76e8c105575f31196084f1 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 14 Nov 2011 17:22:45 +0000 Subject: Make headers standalone, move a virtual method out of line. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144536 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Transforms/Utils/SSAUpdater.cpp | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'lib') diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp index fa8061c..e60a41b 100644 --- a/lib/Transforms/Utils/SSAUpdater.cpp +++ b/lib/Transforms/Utils/SSAUpdater.cpp @@ -518,3 +518,10 @@ run(const SmallVectorImpl &Insts) const { User->eraseFromParent(); } } + +bool +LoadAndStorePromoter::isInstInList(Instruction *I, + const SmallVectorImpl &Insts) + const { + return std::find(Insts.begin(), Insts.end(), I) != Insts.end(); +} -- cgit v1.1 From 430052b084de7ab4eb6162b9f1a6a16bfb2a80ad Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Mon, 14 Nov 2011 17:52:47 +0000 Subject: Tidy up. 80 column. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144538 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp index 6974b11..1d66d12 100644 --- a/lib/Target/ARM/AsmParser/ARMAsmParser.cpp +++ b/lib/Target/ARM/AsmParser/ARMAsmParser.cpp @@ -714,7 +714,7 @@ public: bool isBitfield() const { return Kind == k_BitfieldDescriptor; } bool isPostIdxRegShifted() const { return Kind == k_PostIndexRegister; } bool isPostIdxReg() const { - return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy == ARM_AM::no_shift; + return Kind == k_PostIndexRegister && PostIdxReg.ShiftTy ==ARM_AM::no_shift; } bool isMemNoOffset(bool alignOK = false) const { if (!isMemory()) @@ -1101,7 +1101,8 @@ public: void addRegShiftedRegOperands(MCInst &Inst, unsigned N) const { assert(N == 3 && "Invalid number of operands!"); - assert(isRegShiftedReg() && "addRegShiftedRegOperands() on non RegShiftedReg!"); + assert(isRegShiftedReg() && + "addRegShiftedRegOperands() on non RegShiftedReg!"); Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.SrcReg)); Inst.addOperand(MCOperand::CreateReg(RegShiftedReg.ShiftReg)); Inst.addOperand(MCOperand::CreateImm( @@ -1110,7 +1111,8 @@ public: void addRegShiftedImmOperands(MCInst &Inst, unsigned N) const { assert(N == 2 && "Invalid number of operands!"); - assert(isRegShiftedImm() && "addRegShiftedImmOperands() on non RegShiftedImm!"); + assert(isRegShiftedImm() && + "addRegShiftedImmOperands() on non RegShiftedImm!"); Inst.addOperand(MCOperand::CreateReg(RegShiftedImm.SrcReg)); Inst.addOperand(MCOperand::CreateImm( ARM_AM::getSORegOpc(RegShiftedImm.ShiftTy, RegShiftedImm.ShiftImm))); @@ -1426,8 +1428,9 @@ public: void addMemRegOffsetOperands(MCInst &Inst, unsigned N) const { assert(N == 3 && "Invalid number of operands!"); - unsigned Val = ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add, - Memory.ShiftImm, Memory.ShiftType); + unsigned Val = + ARM_AM::getAM2Opc(Memory.isNegative ? ARM_AM::sub : ARM_AM::add, + Memory.ShiftImm, Memory.ShiftType); Inst.addOperand(MCOperand::CreateReg(Memory.BaseRegNum)); Inst.addOperand(MCOperand::CreateReg(Memory.OffsetRegNum)); Inst.addOperand(MCOperand::CreateImm(Val)); -- cgit v1.1 From 96b685b4aadef3c273fead1ea8788e1452d7bbea Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Mon, 14 Nov 2011 18:21:07 +0000 Subject: Disable generation of compact unwind encodings. This still seems to be causing some failures. It needs more testing before it gets enabled again. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144543 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/MC/MCDwarf.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/MC/MCDwarf.cpp b/lib/MC/MCDwarf.cpp index c274b9d..d2bbd7d 100644 --- a/lib/MC/MCDwarf.cpp +++ b/lib/MC/MCDwarf.cpp @@ -1011,7 +1011,8 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer, // Emit the compact unwind info if available. // FIXME: This emits both the compact unwind and the old CIE/FDE // information. Only one of those is needed. - if (IsEH && MOFI->getCompactUnwindSection()) + // FIXME: Disable. This seems to still be causing failures. + if (false && IsEH && MOFI->getCompactUnwindSection()) for (unsigned i = 0, n = Streamer.getNumFrameInfos(); i < n; ++i) { const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i); if (Frame.CompactUnwindEncoding) -- cgit v1.1 From f054e198197122011fc80b673f35333bc3e58c98 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Mon, 14 Nov 2011 18:45:38 +0000 Subject: Fix early-clobber handling in shrinkToUses. I broke this in r144515, it affected most ARM testers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144547 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/LiveIntervalAnalysis.cpp | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 2ec2cbc..edcfebe 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -659,7 +659,9 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg)) continue; SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot(); - VNInfo *VNI = li->getVNInfoAt(Idx.getBaseIndex()); + // Note: This intentionally picks up the wrong VNI in case of an EC redef. + // See below. + VNInfo *VNI = li->getVNInfoBefore(Idx); if (!VNI) { // This shouldn't happen: readsVirtualRegister returns true, but there is // no live value. It is likely caused by a target getting flags @@ -669,10 +671,11 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, << *li << '\n'); continue; } - if (VNI->def == Idx.getRegSlot(true)) { - // Special case: An early-clobber tied operand reads and writes the - // register one slot early. - Idx = Idx.getRegSlot(true); + // Special case: An early-clobber tied operand reads and writes the + // register one slot early. The getVNInfoBefore call above would have + // picked up the value defined by UseMI. Adjust the kill slot and value. + if (SlotIndex::isSameInstr(VNI->def, Idx)) { + Idx = VNI->def; VNI = li->getVNInfoBefore(Idx); assert(VNI && "Early-clobber tied value not available"); } @@ -687,13 +690,6 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, if (VNI->isUnused()) continue; NewLI.addRange(LiveRange(VNI->def, VNI->def.getDeadSlot(), VNI)); - - // A use tied to an early-clobber def ends at the load slot and isn't caught - // above. Catch it here instead. This probably only ever happens for inline - // assembly. - if (VNI->def.isEarlyClobber()) - if (VNInfo *UVNI = li->getVNInfoBefore(VNI->def)) - WorkList.push_back(std::make_pair(VNI->def, UVNI)); } // Keep track of the PHIs that are in use. -- cgit v1.1 From 47a4ab84fe2535e6d8d77456c08eb52de20caaee Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Mon, 14 Nov 2011 18:56:20 +0000 Subject: Remove variable that keeps the size of area used to save byval or variable argument registers on the callee's stack frame, along with functions that set and get it. It is not necessary to add the size of this area when computing stack size in emitPrologue, since it has already been accounted for in PEI::calculateFrameObjectOffsets. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144549 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsFrameLowering.cpp | 1 - lib/Target/Mips/MipsISelLowering.cpp | 1 - lib/Target/Mips/MipsMachineFunction.h | 11 +---------- 3 files changed, 1 insertion(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsFrameLowering.cpp b/lib/Target/Mips/MipsFrameLowering.cpp index 07de251..19bb1a5 100644 --- a/lib/Target/Mips/MipsFrameLowering.cpp +++ b/lib/Target/Mips/MipsFrameLowering.cpp @@ -163,7 +163,6 @@ void MipsFrameLowering::emitPrologue(MachineFunction &MF) const { (MFI->getObjectOffset(MipsFI->getGPFI()) + RegSize) : MipsFI->getMaxCallFrameSize(); unsigned StackSize = AlignOffset(LocalVarAreaOffset, StackAlign) + - AlignOffset(MipsFI->getRegSaveAreaSize(), StackAlign) + AlignOffset(MFI->getStackSize(), StackAlign); // Update stack size diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 1ba138f..9d2cd1d 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -2459,7 +2459,6 @@ CopyMips64ByValRegs(MachineFunction &MF, SDValue Chain, DebugLoc dl, if (IsRegLoc) { Reg = std::find(Mips64IntRegs, Mips64IntRegs + 8, VA.getLocReg()); FOOffset = (Reg - Mips64IntRegs) * 8 - 8 * 8; - MipsFI->setRegSaveAreaSize(-FOOffset); } else FOOffset = VA.getLocMemOffset(); diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h index be27606..bc30b6b 100644 --- a/lib/Target/Mips/MipsMachineFunction.h +++ b/lib/Target/Mips/MipsMachineFunction.h @@ -51,16 +51,12 @@ private: mutable int DynAllocFI; // Frame index of dynamically allocated stack area. unsigned MaxCallFrameSize; - // Size of area on callee's stack frame which is used to save va_arg or - // byval arguments passed in registers. - unsigned RegSaveAreaSize; - public: MipsFunctionInfo(MachineFunction& MF) : MF(MF), SRetReturnReg(0), GlobalBaseReg(0), VarArgsFrameIndex(0), InArgFIRange(std::make_pair(-1, 0)), OutArgFIRange(std::make_pair(-1, 0)), GPFI(0), DynAllocFI(0), - MaxCallFrameSize(0), RegSaveAreaSize(0) + MaxCallFrameSize(0) {} bool isInArgFI(int FI) const { @@ -104,11 +100,6 @@ public: unsigned getMaxCallFrameSize() const { return MaxCallFrameSize; } void setMaxCallFrameSize(unsigned S) { MaxCallFrameSize = S; } - - unsigned getRegSaveAreaSize() const { return RegSaveAreaSize; } - void setRegSaveAreaSize(unsigned S) { - if (RegSaveAreaSize < S) RegSaveAreaSize = S; - } }; } // end of namespace llvm -- cgit v1.1 From a3f7e229b657c3ffb66f10d4cc465ce9038c0f5e Mon Sep 17 00:00:00 2001 From: Justin Holewinski Date: Mon, 14 Nov 2011 18:58:20 +0000 Subject: PTX: Let LLVM use loads/stores for all mem* intrinsics, instead of relying on custom implementations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144551 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PTX/PTXISelLowering.cpp | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib') diff --git a/lib/Target/PTX/PTXISelLowering.cpp b/lib/Target/PTX/PTXISelLowering.cpp index 7f55871..17191fb 100644 --- a/lib/Target/PTX/PTXISelLowering.cpp +++ b/lib/Target/PTX/PTXISelLowering.cpp @@ -47,6 +47,11 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM) setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? setMinFunctionAlignment(2); + // Let LLVM use loads/stores for all mem* operations + maxStoresPerMemcpy = 4096; + maxStoresPerMemmove = 4096; + maxStoresPerMemset = 4096; + //////////////////////////////////// /////////// Expansion ////////////// //////////////////////////////////// -- cgit v1.1 From bad53f41c298d251de98e3c864b08f6b9125345c Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Mon, 14 Nov 2011 19:01:09 +0000 Subject: Modify LowerFormalArguments to correctly handle vaarg arguments for Mips64. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144552 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 44 ++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 14 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 9d2cd1d..33cb261 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -2621,24 +2621,40 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain, Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); } - if (isVarArg && IsO32) { + if (isVarArg) { + unsigned NumOfRegs = IsO32 ? 4 : 8; + const unsigned *ArgRegs = IsO32 ? O32IntRegs : Mips64IntRegs; + unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs, NumOfRegs); + int FirstRegSlotOffset = IsO32 ? 0 : -64 ; // offset of $a0's slot. + TargetRegisterClass *RC + = IsO32 ? Mips::CPURegsRegisterClass : Mips::CPU64RegsRegisterClass; + unsigned RegSize = RC->getSize(); + int RegSlotOffset = FirstRegSlotOffset + Idx * RegSize; + + // Offset of the first variable argument from stack pointer. + int FirstVaArgOffset; + + if (IsO32 || (Idx == NumOfRegs)) { + FirstVaArgOffset = + (CCInfo.getNextStackOffset() + RegSize - 1) / RegSize * RegSize; + } else + FirstVaArgOffset = RegSlotOffset; + // Record the frame index of the first variable argument // which is a value necessary to VASTART. - unsigned NextStackOffset = CCInfo.getNextStackOffset(); - assert(NextStackOffset % 4 == 0 && - "NextStackOffset must be aligned to 4-byte boundaries."); - LastFI = MFI->CreateFixedObject(4, NextStackOffset, true); + LastFI = MFI->CreateFixedObject(RegSize, FirstVaArgOffset, true); MipsFI->setVarArgsFrameIndex(LastFI); - // If NextStackOffset is smaller than o32's 16-byte reserved argument area, - // copy the integer registers that have not been used for argument passing - // to the caller's stack frame. - for (; NextStackOffset < 16; NextStackOffset += 4) { - TargetRegisterClass *RC = Mips::CPURegsRegisterClass; - unsigned Idx = NextStackOffset / 4; - unsigned Reg = AddLiveIn(DAG.getMachineFunction(), O32IntRegs[Idx], RC); - SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, MVT::i32); - LastFI = MFI->CreateFixedObject(4, NextStackOffset, true); + // Copy the integer registers that have not been used for argument passing + // to the argument register save area. For O32, the save area is allocated + // in the caller's stack frame, while for N32/64, it is allocated in the + // callee's stack frame. + for (int StackOffset = RegSlotOffset; + Idx < NumOfRegs; ++Idx, StackOffset += RegSize) { + unsigned Reg = AddLiveIn(DAG.getMachineFunction(), ArgRegs[Idx], RC); + SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, + MVT::getIntegerVT(RegSize * 8)); + LastFI = MFI->CreateFixedObject(RegSize, StackOffset, true); SDValue PtrOff = DAG.getFrameIndex(LastFI, getPointerTy()); OutChains.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, MachinePointerInfo(), -- cgit v1.1 From 4961709688be9e21985e413e296e84e391ae92ac Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Mon, 14 Nov 2011 19:02:54 +0000 Subject: AnalyzeCallOperands function for N32/64. N32/64 places all variable arguments in integer registers (or on stack), regardless of their types, but follows calling convention of non-vaarg function when it handles fixed arguments. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144553 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsCallingConv.td | 19 +++++++++++++++++++ lib/Target/Mips/MipsISelLowering.cpp | 26 ++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) (limited to 'lib') diff --git a/lib/Target/Mips/MipsCallingConv.td b/lib/Target/Mips/MipsCallingConv.td index b8a863b..3d973ce 100644 --- a/lib/Target/Mips/MipsCallingConv.td +++ b/lib/Target/Mips/MipsCallingConv.td @@ -64,6 +64,25 @@ def CC_MipsN : CallingConv<[ CCIfType<[f32], CCAssignToStack<4, 8>> ]>; +// N32/64 variable arguments. +// All arguments are passed in integer registers. +def CC_MipsN_VarArg : CallingConv<[ + // Handles byval parameters. + CCIfByVal>, + + // Promote i8/i16/i32 arguments to i64. + CCIfType<[i8, i16, i32], CCPromoteToType>, + + CCIfType<[i64, f64], CCAssignToReg<[A0_64, A1_64, A2_64, A3_64, + T0_64, T1_64, T2_64, T3_64]>>, + + CCIfType<[f32], CCAssignToReg<[A0, A1, A2, A3, T0, T1, T2, T3]>>, + + // All stack parameter slots become 64-bit doublewords and are 8-byte aligned. + CCIfType<[i64, f64], CCAssignToStack<8, 8>>, + CCIfType<[f32], CCAssignToStack<4, 8>> +]>; + def RetCC_MipsN : CallingConv<[ // FIXME: Handle complex and float double return values. diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 33cb261..c01ddf0 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -1918,6 +1918,30 @@ static bool CC_Mips64Byval(unsigned ValNo, MVT ValVT, MVT LocVT, #include "MipsGenCallingConv.inc" +static void +AnalyzeMips64CallOperands(CCState CCInfo, + const SmallVectorImpl &Outs) { + unsigned NumOps = Outs.size(); + for (unsigned i = 0; i != NumOps; ++i) { + MVT ArgVT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + bool R; + + if (Outs[i].IsFixed) + R = CC_MipsN(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); + else + R = CC_MipsN_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); + +#ifndef NDEBUG + if (R) { + dbgs() << "Call operand #" << i << " has unhandled type " + << EVT(ArgVT).getEVTString(); +#endif + llvm_unreachable(0); + } + } +} + //===----------------------------------------------------------------------===// // Call Calling Convention Implementation //===----------------------------------------------------------------------===// @@ -2138,6 +2162,8 @@ MipsTargetLowering::LowerCall(SDValue InChain, SDValue Callee, if (IsO32) CCInfo.AnalyzeCallOperands(Outs, CC_MipsO32); + else if (HasMips64) + AnalyzeMips64CallOperands(CCInfo, Outs); else CCInfo.AnalyzeCallOperands(Outs, CC_Mips); -- cgit v1.1 From 788dc0f4e51628651850ca3c68cccd713b694052 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Mon, 14 Nov 2011 19:06:14 +0000 Subject: 32-to-64-bit extended load. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144554 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/Mips64InstrInfo.td | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 51b9bf9..3c97241 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -219,11 +219,16 @@ def : Pat<(i64 immZExt16:$in), def : Pat<(i64 imm:$imm), (ORi64 (LUi64 (HI16 imm:$imm)), (LO16 imm:$imm))>; -// zextloadi32_u -def : Pat<(zextloadi32_u addr:$a), (DSRL32 (DSLL32 (ULW64_P8 addr:$a), 0), 0)>, - Requires<[IsN64]>; -def : Pat<(zextloadi32_u addr:$a), (DSRL32 (DSLL32 (ULW64 addr:$a), 0), 0)>, - Requires<[NotN64]>; +// extended loads +let Predicates = [NotN64] in { + def : Pat<(extloadi32_a addr:$a), (DSRL32 (DSLL32 (LW64 addr:$a), 0), 0)>; + def : Pat<(zextloadi32_u addr:$a), (DSRL32 (DSLL32 (ULW64 addr:$a), 0), 0)>; +} +let Predicates = [IsN64] in { + def : Pat<(extloadi32_a addr:$a), (DSRL32 (DSLL32 (LW64_P8 addr:$a), 0), 0)>; + def : Pat<(zextloadi32_u addr:$a), + (DSRL32 (DSLL32 (ULW64_P8 addr:$a), 0), 0)>; +} // hi/lo relocs def : Pat<(i64 (MipsLo tglobaladdr:$in)), (DADDiu ZERO_64, tglobaladdr:$in)>; -- cgit v1.1 From b518cae015f9a91aa5035e4047e50215a47a7bb2 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 14 Nov 2011 19:12:20 +0000 Subject: Fold ConstantVector::isAllOnesValue into Constant::isAllOnesValue and simplify it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144555 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/VMCore/Constants.cpp | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) (limited to 'lib') diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp index a84a046..cd94da1 100644 --- a/lib/VMCore/Constants.cpp +++ b/lib/VMCore/Constants.cpp @@ -71,12 +71,14 @@ bool Constant::isAllOnesValue() const { if (const ConstantFP *CFP = dyn_cast(this)) return CFP->getValueAPF().bitcastToAPInt().isAllOnesValue(); - // Check for constant vectors + // Check for constant vectors which are splats of -1 values. if (const ConstantVector *CV = dyn_cast(this)) - return CV->isAllOnesValue(); + if (Constant *Splat = CV->getSplatValue()) + return Splat->isAllOnesValue(); return false; } + // Constructor to create a '0' constant of arbitrary type... Constant *Constant::getNullValue(Type *Ty) { switch (Ty->getTypeID()) { @@ -1071,26 +1073,6 @@ void ConstantVector::destroyConstant() { destroyConstantImpl(); } -/// This function will return true iff every element in this vector constant -/// is set to all ones. -/// @returns true iff this constant's elements are all set to all ones. -/// @brief Determine if the value is all ones. -bool ConstantVector::isAllOnesValue() const { - // Check out first element. - const Constant *Elt = getOperand(0); - const ConstantInt *CI = dyn_cast(Elt); - const ConstantFP *CF = dyn_cast(Elt); - - // Then make sure all remaining elements point to the same value. - for (unsigned I = 1, E = getNumOperands(); I < E; ++I) - if (getOperand(I) != Elt) - return false; - - // First value is all-ones. - return (CI && CI->isAllOnesValue()) || - (CF && CF->isAllOnesValue()); -} - /// getSplatValue - If this is a splat constant, where all of the /// elements have the same value, return that value. Otherwise return null. Constant *ConstantVector::getSplatValue() const { -- cgit v1.1 From a77214a4c43d7a0c49c348439c6887f28bd6d53d Mon Sep 17 00:00:00 2001 From: Pete Cooper Date: Mon, 14 Nov 2011 19:38:42 +0000 Subject: Changed SSE4/AVX <2 x i64> extract and insert ops to be Custom lowered Constant idx case is still done in tablegen but other cases are then expanded Fixes git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144557 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelLowering.cpp | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5d16f47..4e11131 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -944,9 +944,11 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4i32, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); + // FIXME: these should be Legal but thats only for the case where + // the index is constant. For now custom expand to deal with that if (Subtarget->is64Bit()) { - setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Legal); - setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Legal); + setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom); + setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2i64, Custom); } } @@ -6963,8 +6965,8 @@ X86TargetLowering::LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, Op.getOperand(0)), Op.getOperand(1)); return DAG.getNode(ISD::BITCAST, dl, MVT::f32, Extract); - } else if (VT == MVT::i32) { - // ExtractPS works with constant index. + } else if (VT == MVT::i32 || VT == MVT::i64) { + // ExtractPS/pextrq works with constant index. if (isa(Op.getOperand(1))) return Op; } @@ -7103,7 +7105,8 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, // Create this as a scalar to vector.. N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, N2); - } else if (EltVT == MVT::i32 && isa(N2)) { + } else if ((EltVT == MVT::i32 || EltVT == MVT::i64) && + isa(N2)) { // PINSR* works with constant index. return Op; } -- cgit v1.1 From 2a4410df44cd710e20b3f12873c35405830d66fb Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Mon, 14 Nov 2011 19:48:55 +0000 Subject: Teach two-address pass to re-schedule two-address instructions (or the kill instructions of the two-address operands) in order to avoid inserting copies. This fixes the few regressions introduced when the two-address hack was disabled (without regressing the improvements). rdar://10422688 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144559 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TwoAddressInstructionPass.cpp | 375 ++++++++++++++++++++++++++++-- 1 file changed, 356 insertions(+), 19 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index d879378..33ed4cc 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -36,6 +36,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -56,11 +57,14 @@ STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address"); STATISTIC(Num3AddrSunk, "Number of 3-address instructions sunk"); STATISTIC(NumReMats, "Number of instructions re-materialized"); STATISTIC(NumDeletes, "Number of dead instructions deleted"); +STATISTIC(NumReSchedUps, "Number of instructions re-scheduled up"); +STATISTIC(NumReSchedDowns, "Number of instructions re-scheduled down"); namespace { class TwoAddressInstructionPass : public MachineFunctionPass { const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const InstrItineraryData *InstrItins; MachineRegisterInfo *MRI; LiveVariables *LV; AliasAnalysis *AA; @@ -120,6 +124,18 @@ namespace { MachineBasicBlock::iterator &nmi, MachineFunction::iterator &mbbi, unsigned Dist); + bool isDefTooClose(unsigned Reg, unsigned Dist, + MachineInstr *MI, MachineBasicBlock *MBB); + + bool RescheduleMIBelowKill(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg); + bool RescheduleKillAboveMI(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg); + bool TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, MachineFunction::iterator &mbbi, @@ -467,6 +483,32 @@ static bool isTwoAddrUse(MachineInstr &MI, unsigned Reg, unsigned &DstReg) { return false; } +/// findLocalKill - Look for an instruction below MI in the MBB that kills the +/// specified register. Returns null if there are any other Reg use between the +/// instructions. +static +MachineInstr *findLocalKill(unsigned Reg, MachineBasicBlock *MBB, + MachineInstr *MI, MachineRegisterInfo *MRI, + DenseMap &DistanceMap) { + MachineInstr *KillMI = 0; + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(Reg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + if (UseMI == MI || UseMI->getParent() != MBB) + continue; + DenseMap::iterator DI = DistanceMap.find(UseMI); + if (DI != DistanceMap.end()) + continue; + if (!UI.getOperand().isKill()) + return 0; + assert(!KillMI && "More than one local kills?"); + KillMI = UseMI; + } + + return KillMI; +} + /// findOnlyInterestingUse - Given a register, if has a single in-basic block /// use, return the use instruction if it's a copy or a two-address use. static @@ -852,6 +894,285 @@ TwoAddressInstructionPass::DeleteUnusedInstr(MachineBasicBlock::iterator &mi, return true; } +/// RescheduleMIBelowKill - If there is one more local instruction that reads +/// 'Reg' and it kills 'Reg, consider moving the instruction below the kill +/// instruction in order to eliminate the need for the copy. +bool +TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg) { + MachineInstr *MI = &*mi; + DenseMap::iterator DI = DistanceMap.find(MI); + if (DI == DistanceMap.end()) + // Must be created from unfolded load. Don't waste time trying this. + return false; + + MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap); + if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike()) + // Don't mess with copies, they may be coalesced later. + return false; + + const MCInstrDesc &MCID = KillMI->getDesc(); + if (MCID.hasUnmodeledSideEffects() || MCID.isCall() || MCID.isBranch() || + MCID.isTerminator()) + // Don't move pass calls, etc. + return false; + + unsigned DstReg; + if (isTwoAddrUse(*KillMI, Reg, DstReg)) + return false; + + bool SeenStore; + if (!MI->isSafeToMove(TII, AA, SeenStore)) + return false; + + if (TII->getInstrLatency(InstrItins, MI) > 1) + // FIXME: Needs more sophisticated heuristics. + return false; + + SmallSet Uses; + SmallSet Defs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MO.isDef()) + Defs.insert(MOReg); + else + Uses.insert(MOReg); + } + + // Move the copies connected to MI down as well. + MachineBasicBlock::iterator From = MI; + MachineBasicBlock::iterator To = llvm::next(From); + while (To->isCopy() && Defs.count(To->getOperand(1).getReg())) { + Defs.insert(To->getOperand(0).getReg()); + ++To; + } + + // Check if the reschedule will not break depedencies. + unsigned NumVisited = 0; + MachineBasicBlock::iterator KillPos = KillMI; + ++KillPos; + for (MachineBasicBlock::iterator I = To; I != KillPos; ++I) { + MachineInstr *OtherMI = I; + // DBG_VALUE cannot be counted against the limit. + if (OtherMI->isDebugValue()) + continue; + if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. + return false; + ++NumVisited; + const MCInstrDesc &OMCID = OtherMI->getDesc(); + if (OMCID.hasUnmodeledSideEffects() || OMCID.isCall() || OMCID.isBranch() || + OMCID.isTerminator()) + // Don't move pass calls, etc. + return false; + for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = OtherMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MO.isDef()) { + if (Uses.count(MOReg)) + // Physical register use would be clobbered. + return false; + if (!MO.isDead() && Defs.count(MOReg)) + // May clobber a physical register def. + // FIXME: This may be too conservative. It's ok if the instruction + // is sunken completely below the use. + return false; + } else { + if (Defs.count(MOReg)) + return false; + if (MOReg != Reg && MO.isKill() && Uses.count(MOReg)) + // Don't want to extend other live ranges and update kills. + return false; + } + } + } + + // Move debug info as well. + if (From != MBB->begin()) { + while (llvm::prior(From)->isDebugValue()) + --From; + } + + // Copies following MI may have been moved as well. + nmi = To; + MBB->splice(KillPos, MBB, From, To); + DistanceMap.erase(DI); + + if (LV) { + // Update live variables + LV->removeVirtualRegisterKilled(Reg, KillMI); + LV->addVirtualRegisterKilled(Reg, MI); + } else { + for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = KillMI->getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) + continue; + MO.setIsKill(false); + } + MI->addRegisterKilled(Reg, 0); + } + + return true; +} + +/// isDefTooClose - Return true if the re-scheduling will put the given +/// instruction too close to the defs of its register dependencies. +bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist, + MachineInstr *MI, + MachineBasicBlock *MBB) { + for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(Reg), + DE = MRI->def_end(); DI != DE; ++DI) { + MachineInstr *DefMI = &*DI; + if (DefMI->getParent() != MBB || DefMI->isCopy() || DefMI->isCopyLike()) + continue; + if (DefMI == MI) + return true; // MI is defining something KillMI uses + DenseMap::iterator DDI = DistanceMap.find(DefMI); + if (DDI == DistanceMap.end()) + return true; // Below MI + unsigned DefDist = DDI->second; + assert(Dist > DefDist && "Visited def already?"); + if (TII->getInstrLatency(InstrItins, DefMI) > (int)(Dist - DefDist)) + return true; + } + return false; +} + +/// RescheduleKillAboveMI - If there is one more local instruction that reads +/// 'Reg' and it kills 'Reg, consider moving the kill instruction above the +/// current two-address instruction in order to eliminate the need for the +/// copy. +bool +TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, + MachineBasicBlock::iterator &mi, + MachineBasicBlock::iterator &nmi, + unsigned Reg) { + MachineInstr *MI = &*mi; + DenseMap::iterator DI = DistanceMap.find(MI); + if (DI == DistanceMap.end()) + // Must be created from unfolded load. Don't waste time trying this. + return false; + + MachineInstr *KillMI = findLocalKill(Reg, MBB, mi, MRI, DistanceMap); + if (!KillMI || KillMI->isCopy() || KillMI->isCopyLike()) + // Don't mess with copies, they may be coalesced later. + return false; + + unsigned DstReg; + if (isTwoAddrUse(*KillMI, Reg, DstReg)) + return false; + + bool SeenStore; + if (!KillMI->isSafeToMove(TII, AA, SeenStore)) + return false; + + SmallSet Uses; + SmallSet Kills; + SmallSet Defs; + SmallSet LiveDefs; + for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = KillMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (MO.isUse()) { + if (!MOReg) + continue; + if (isDefTooClose(MOReg, DI->second, MI, MBB)) + return false; + Uses.insert(MOReg); + if (MO.isKill() && MOReg != Reg) + Kills.insert(MOReg); + } else if (TargetRegisterInfo::isPhysicalRegister(MOReg)) { + Defs.insert(MOReg); + if (!MO.isDead()) + LiveDefs.insert(MOReg); + } + } + + // Check if the reschedule will not break depedencies. + unsigned NumVisited = 0; + MachineBasicBlock::iterator KillPos = KillMI; + for (MachineBasicBlock::iterator I = mi; I != KillPos; ++I) { + MachineInstr *OtherMI = I; + // DBG_VALUE cannot be counted against the limit. + if (OtherMI->isDebugValue()) + continue; + if (NumVisited > 10) // FIXME: Arbitrary limit to reduce compile time cost. + return false; + ++NumVisited; + const MCInstrDesc &MCID = OtherMI->getDesc(); + if (MCID.hasUnmodeledSideEffects() || MCID.isCall() || MCID.isBranch() || + MCID.isTerminator()) + // Don't move pass calls, etc. + return false; + for (unsigned i = 0, e = OtherMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = OtherMI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned MOReg = MO.getReg(); + if (!MOReg) + continue; + if (MO.isUse()) { + if (Defs.count(MOReg)) + // Moving KillMI can clobber the physical register if the def has + // not been seen. + return false; + if (Kills.count(MOReg)) + // Don't want to extend other live ranges and update kills. + return false; + } else { + if (Uses.count(MOReg)) + return false; + if (TargetRegisterInfo::isPhysicalRegister(MOReg) && + LiveDefs.count(MOReg)) + return false; + // Physical register def is seen. + Defs.erase(MOReg); + } + } + } + + // Move the old kill above MI, don't forget to move debug info as well. + MachineBasicBlock::iterator InsertPos = mi; + if (InsertPos != MBB->begin()) + while (llvm::prior(InsertPos)->isDebugValue()) + --InsertPos; + MachineBasicBlock::iterator From = KillMI; + MachineBasicBlock::iterator To = llvm::next(From); + while (llvm::prior(From)->isDebugValue()) + --From; + MBB->splice(InsertPos, MBB, From, To); + + nmi = llvm::prior(mi); // Backtrack so we process the moved instruction. + DistanceMap.erase(DI); + + if (LV) { + // Update live variables + LV->removeVirtualRegisterKilled(Reg, KillMI); + LV->addVirtualRegisterKilled(Reg, MI); + } else { + for (unsigned i = 0, e = KillMI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = KillMI->getOperand(i); + if (!MO.isReg() || !MO.isUse() || MO.getReg() != Reg) + continue; + MO.setIsKill(false); + } + MI->addRegisterKilled(Reg, 0); + } + return true; +} + /// TryInstructionTransform - For the case where an instruction has a single /// pair of tied register operands, attempt some transformations that may /// either eliminate the tied operands or improve the opportunities for @@ -863,17 +1184,18 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineFunction::iterator &mbbi, unsigned SrcIdx, unsigned DstIdx, unsigned Dist, SmallPtrSet &Processed) { - const MCInstrDesc &MCID = mi->getDesc(); - unsigned regA = mi->getOperand(DstIdx).getReg(); - unsigned regB = mi->getOperand(SrcIdx).getReg(); + MachineInstr &MI = *mi; + const MCInstrDesc &MCID = MI.getDesc(); + unsigned regA = MI.getOperand(DstIdx).getReg(); + unsigned regB = MI.getOperand(SrcIdx).getReg(); assert(TargetRegisterInfo::isVirtualRegister(regB) && "cannot make instruction into two-address form"); // If regA is dead and the instruction can be deleted, just delete // it so it doesn't clobber regB. - bool regBKilled = isKilled(*mi, regB, MRI, TII); - if (!regBKilled && mi->getOperand(DstIdx).isDead() && + bool regBKilled = isKilled(MI, regB, MRI, TII); + if (!regBKilled && MI.getOperand(DstIdx).isDead() && DeleteUnusedInstr(mi, nmi, mbbi, Dist)) { ++NumDeletes; return true; // Done with this instruction. @@ -885,20 +1207,20 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, unsigned regCIdx = ~0U; bool TryCommute = false; bool AggressiveCommute = false; - if (MCID.isCommutable() && mi->getNumOperands() >= 3 && - TII->findCommutedOpIndices(mi, SrcOp1, SrcOp2)) { + if (MCID.isCommutable() && MI.getNumOperands() >= 3 && + TII->findCommutedOpIndices(&MI, SrcOp1, SrcOp2)) { if (SrcIdx == SrcOp1) regCIdx = SrcOp2; else if (SrcIdx == SrcOp2) regCIdx = SrcOp1; if (regCIdx != ~0U) { - regC = mi->getOperand(regCIdx).getReg(); - if (!regBKilled && isKilled(*mi, regC, MRI, TII)) + regC = MI.getOperand(regCIdx).getReg(); + if (!regBKilled && isKilled(MI, regC, MRI, TII)) // If C dies but B does not, swap the B and C operands. // This makes the live ranges of A and C joinable. TryCommute = true; - else if (isProfitableToCommute(regB, regC, mi, mbbi, Dist)) { + else if (isProfitableToCommute(regB, regC, &MI, mbbi, Dist)) { TryCommute = true; AggressiveCommute = true; } @@ -913,6 +1235,13 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, return false; } + // If there is one more use of regB later in the same MBB, consider + // re-schedule this MI below it. + if (RescheduleMIBelowKill(mbbi, mi, nmi, regB)) { + ++NumReSchedDowns; + return true; + } + if (TargetRegisterInfo::isVirtualRegister(regA)) ScanUses(regA, &*mbbi, Processed); @@ -928,6 +1257,13 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, } } + // If there is one more use of regB later in the same MBB, consider + // re-schedule it before this MI if it's legal. + if (RescheduleKillAboveMI(mbbi, mi, nmi, regB)) { + ++NumReSchedUps; + return true; + } + // If this is an instruction with a load folded into it, try unfolding // the load, e.g. avoid this: // movq %rdx, %rcx @@ -940,7 +1276,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // Determine if a load can be unfolded. unsigned LoadRegIndex; unsigned NewOpc = - TII->getOpcodeAfterMemoryUnfold(mi->getOpcode(), + TII->getOpcodeAfterMemoryUnfold(MI.getOpcode(), /*UnfoldLoad=*/true, /*UnfoldStore=*/false, &LoadRegIndex); @@ -950,12 +1286,12 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, MachineFunction &MF = *mbbi->getParent(); // Unfold the load. - DEBUG(dbgs() << "2addr: UNFOLDING: " << *mi); + DEBUG(dbgs() << "2addr: UNFOLDING: " << MI); const TargetRegisterClass *RC = TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI); unsigned Reg = MRI->createVirtualRegister(RC); SmallVector NewMIs; - if (!TII->unfoldMemoryOperand(MF, mi, Reg, + if (!TII->unfoldMemoryOperand(MF, &MI, Reg, /*UnfoldLoad=*/true,/*UnfoldStore=*/false, NewMIs)) { DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); @@ -986,21 +1322,21 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, // Success, or at least we made an improvement. Keep the unfolded // instructions and discard the original. if (LV) { - for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { - MachineOperand &MO = mi->getOperand(i); + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) { if (MO.isUse()) { if (MO.isKill()) { if (NewMIs[0]->killsRegister(MO.getReg())) - LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[0]); + LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[0]); else { assert(NewMIs[1]->killsRegister(MO.getReg()) && "Kill missing after load unfold!"); - LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[1]); + LV->replaceKillInstruction(MO.getReg(), &MI, NewMIs[1]); } } - } else if (LV->removeVirtualRegisterDead(MO.getReg(), mi)) { + } else if (LV->removeVirtualRegisterDead(MO.getReg(), &MI)) { if (NewMIs[1]->registerDefIsDead(MO.getReg())) LV->addVirtualRegisterDead(MO.getReg(), NewMIs[1]); else { @@ -1013,7 +1349,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, } LV->addVirtualRegisterKilled(Reg, NewMIs[1]); } - mi->eraseFromParent(); + MI.eraseFromParent(); mi = NewMIs[1]; if (TransformSuccess) return true; @@ -1040,6 +1376,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); + InstrItins = TM.getInstrItineraryData(); LV = getAnalysisIfAvailable(); AA = &getAnalysis(); -- cgit v1.1 From 6296ee3ee25a41f4d36d4fd61181598cd165235a Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Mon, 14 Nov 2011 19:51:48 +0000 Subject: Unbreak Release builds. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144560 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index c01ddf0..b595f03 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -1932,8 +1932,8 @@ AnalyzeMips64CallOperands(CCState CCInfo, else R = CC_MipsN_VarArg(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); -#ifndef NDEBUG if (R) { +#ifndef NDEBUG dbgs() << "Call operand #" << i << " has unhandled type " << EVT(ArgVT).getEVTString(); #endif -- cgit v1.1 From 57b299796685033c87a5414e179b95b5ae7dc8d4 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Mon, 14 Nov 2011 20:22:27 +0000 Subject: Add support for Thumb load/stores with negative offsets. rdar://10412592 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144565 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 76 +++++++++++++++++++++++++++++++++--------- 1 file changed, 60 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 432abb5..81a93b1 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -870,12 +870,17 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) { case MVT::i8: case MVT::i16: case MVT::i32: - if (!useAM3) + if (!useAM3) { // Integer loads/stores handle 12-bit offsets. needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset); - else + // Handle negative offsets. + if (isThumb2) + needsLowering = !(needsLowering && Subtarget->hasV6T2Ops() && + Addr.Offset < 0 && Addr.Offset > -256); + } else { // ARM halfword load/stores and signed byte loads use +/-imm8 offsets. needsLowering = (Addr.Offset > 255 || Addr.Offset < -255); + } break; case MVT::f32: case MVT::f64: @@ -967,24 +972,42 @@ bool ARMFastISel::ARMEmitLoad(EVT VT, unsigned &ResultReg, Address &Addr, default: return false; case MVT::i1: case MVT::i8: - if (isZExt) { - Opc = isThumb2 ? ARM::t2LDRBi12 : ARM::LDRBi12; + if (isThumb2) { + if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) + Opc = isZExt ? ARM::t2LDRBi8 : ARM::t2LDRSBi8; + else + Opc = isZExt ? ARM::t2LDRBi12 : ARM::t2LDRSBi12; } else { - Opc = isThumb2 ? ARM::t2LDRSBi12 : ARM::LDRSB; - if (!isThumb2) useAM3 = true; + if (isZExt) { + Opc = ARM::LDRBi12; + } else { + Opc = ARM::LDRSB; + useAM3 = true; + } } RC = ARM::GPRRegisterClass; break; case MVT::i16: - if (isZExt) - Opc = isThumb2 ? ARM::t2LDRHi12 : ARM::LDRH; - else - Opc = isThumb2 ? ARM::t2LDRSHi12 : ARM::LDRSH; - if (!isThumb2) useAM3 = true; + if (isThumb2) { + if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) + Opc = isZExt ? ARM::t2LDRHi8 : ARM::t2LDRSHi8; + else + Opc = isZExt ? ARM::t2LDRHi12 : ARM::t2LDRSHi12; + } else { + Opc = isZExt ? ARM::LDRH : ARM::LDRSH; + useAM3 = true; + } RC = ARM::GPRRegisterClass; break; case MVT::i32: - Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12; + if (isThumb2) { + if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) + Opc = ARM::t2LDRi8; + else + Opc = ARM::t2LDRi12; + } else { + Opc = ARM::LDRi12; + } RC = ARM::GPRRegisterClass; break; case MVT::f32: @@ -1045,14 +1068,35 @@ bool ARMFastISel::ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr) { SrcReg = Res; } // Fallthrough here. case MVT::i8: - StrOpc = isThumb2 ? ARM::t2STRBi12 : ARM::STRBi12; + if (isThumb2) { + if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) + StrOpc = ARM::t2STRBi8; + else + StrOpc = ARM::t2STRBi12; + } else { + StrOpc = ARM::STRBi12; + } break; case MVT::i16: - StrOpc = isThumb2 ? ARM::t2STRHi12 : ARM::STRH; - if (!isThumb2) useAM3 = true; + if (isThumb2) { + if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) + StrOpc = ARM::t2STRHi8; + else + StrOpc = ARM::t2STRHi12; + } else { + StrOpc = ARM::STRH; + useAM3 = true; + } break; case MVT::i32: - StrOpc = isThumb2 ? ARM::t2STRi12 : ARM::STRi12; + if (isThumb2) { + if (Addr.Offset < 0 && Addr.Offset > -256 && Subtarget->hasV6T2Ops()) + StrOpc = ARM::t2STRi8; + else + StrOpc = ARM::t2STRi12; + } else { + StrOpc = ARM::STRi12; + } break; case MVT::f32: if (!Subtarget->hasVFP2()) return false; -- cgit v1.1 From 76c8f08567c1e06e8555a910e919d4896f18f5e2 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Mon, 14 Nov 2011 20:35:52 +0000 Subject: Add a missing pattern for X86ISD::MOVLPD. rdar://10436044 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144566 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'lib') diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index caaf544..6deee4f 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -520,6 +520,8 @@ let Predicates = [HasSSE2] in { // is during lowering, where it's not possible to recognize the fold cause // it has two uses through a bitcast. One use disappears at isel time and the // fold opportunity reappears. + def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), + (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2),sub_sd))>; def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), (MOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2),sub_sd))>; def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), @@ -647,6 +649,9 @@ let Predicates = [HasAVX] in { // is during lowering, where it's not possible to recognize the fold cause // it has two uses through a bitcast. One use disappears at isel time and the // fold opportunity reappears. + def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), + (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v2f64 VR128:$src2), + sub_sd))>; def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), (VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4f32 VR128:$src2), sub_sd))>; -- cgit v1.1 From 4d0a9ff36574da0c042e9bd3ae816301b392ac41 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Mon, 14 Nov 2011 20:50:16 +0000 Subject: Add support for tsan annotations (thread sanitizer, a valgrind-based tool). These annotations are disabled entirely when either ENABLE_THREADS is off, or building a release build. When enabled, they add calls to functions with no statements to ManagedStatic's getters. Use these annotations to inform tsan that the race used inside ManagedStatic initialization is actually benign. Thanks to Kostya Serebryany for helping write this patch! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144567 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/ManagedStatic.cpp | 8 +++++++- lib/Support/Valgrind.cpp | 11 +++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Support/ManagedStatic.cpp b/lib/Support/ManagedStatic.cpp index c767c15..098cccb 100644 --- a/lib/Support/ManagedStatic.cpp +++ b/lib/Support/ManagedStatic.cpp @@ -27,8 +27,15 @@ void ManagedStaticBase::RegisterManagedStatic(void *(*Creator)(), if (Ptr == 0) { void* tmp = Creator ? Creator() : 0; + TsanHappensBefore(this); sys::MemoryFence(); + + // This write is racy against the first read in the ManagedStatic + // accessors. The race is benign because it does a second read after a + // memory fence, at which point it isn't possible to get a partial value. + TsanIgnoreWritesBegin(); Ptr = tmp; + TsanIgnoreWritesEnd(); DeleterFn = Deleter; // Add to list of managed statics. @@ -72,4 +79,3 @@ void llvm::llvm_shutdown() { if (llvm_is_multithreaded()) llvm_stop_multithreaded(); } - diff --git a/lib/Support/Valgrind.cpp b/lib/Support/Valgrind.cpp index 7034485..078d705 100644 --- a/lib/Support/Valgrind.cpp +++ b/lib/Support/Valgrind.cpp @@ -52,3 +52,14 @@ void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) { } #endif // !HAVE_VALGRIND_VALGRIND_H + +// These functions require no implementation, tsan just looks at the arguments +// they're called with. +extern "C" { +void AnnotateHappensBefore(const char *file, int line, + const volatile void *cv) {} +void AnnotateHappensAfter(const char *file, int line, + const volatile void *cv) {} +void AnnotateIgnoreWritesBegin(const char *file, int line) {} +void AnnotateIgnoreWritesEnd(const char *file, int line) {} +} -- cgit v1.1 From 41e00172c51a1666cbcda5df2c6f45d685068fbb Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Mon, 14 Nov 2011 21:02:09 +0000 Subject: At -O0, multiple uses of a virtual registers in the same BB are being marked "kill". This looks like a bug upstream. Since that's going to take some time to understand, loosen the assertion and disable the optimization when multiple kills are seen. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144568 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TwoAddressInstructionPass.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 33ed4cc..a702c6c 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -502,7 +502,8 @@ MachineInstr *findLocalKill(unsigned Reg, MachineBasicBlock *MBB, continue; if (!UI.getOperand().isKill()) return 0; - assert(!KillMI && "More than one local kills?"); + if (KillMI) + return 0; // -O0 kill markers cannot be trusted? KillMI = UseMI; } -- cgit v1.1 From 8aee7d8f9eab71a29f8f3a5a541144bef4fb5ac4 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Mon, 14 Nov 2011 21:11:15 +0000 Subject: Avoid dereferencing off the beginning of lists. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144569 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/TwoAddressInstructionPass.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index a702c6c..6796312 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -999,10 +999,8 @@ TwoAddressInstructionPass::RescheduleMIBelowKill(MachineBasicBlock *MBB, } // Move debug info as well. - if (From != MBB->begin()) { - while (llvm::prior(From)->isDebugValue()) - --From; - } + while (From != MBB->begin() && llvm::prior(From)->isDebugValue()) + --From; // Copies following MI may have been moved as well. nmi = To; @@ -1146,9 +1144,8 @@ TwoAddressInstructionPass::RescheduleKillAboveMI(MachineBasicBlock *MBB, // Move the old kill above MI, don't forget to move debug info as well. MachineBasicBlock::iterator InsertPos = mi; - if (InsertPos != MBB->begin()) - while (llvm::prior(InsertPos)->isDebugValue()) - --InsertPos; + while (InsertPos != MBB->begin() && llvm::prior(InsertPos)->isDebugValue()) + --InsertPos; MachineBasicBlock::iterator From = KillMI; MachineBasicBlock::iterator To = llvm::next(From); while (llvm::prior(From)->isDebugValue()) -- cgit v1.1 From 02e3d9268fe456ebe4fe6ae277507bb7933ec3df Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Mon, 14 Nov 2011 22:28:39 +0000 Subject: ARM assembly parsing type suffix options for VLDR/VSTR. rdar://10435076 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144575 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrFormats.td | 19 +++++++++++++++++++ lib/Target/ARM/ARMInstrVFP.td | 9 +++++++++ 2 files changed, 28 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 12cb464..6d5b6a4 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1991,3 +1991,22 @@ class NVDupLane op19_16, bit op6, dag oops, dag iops, class NEONFPPat : Pat { list Predicates = [HasNEON,UseNEONForFP]; } + +// VFP/NEON Instruction aliases for type suffices. +class VFPDataTypeInstAlias : + InstAlias; +multiclass VFPDT32InstAlias { + def I32 : VFPDataTypeInstAlias; + def S32 : VFPDataTypeInstAlias; + def U32 : VFPDataTypeInstAlias; + def F32 : VFPDataTypeInstAlias; + def F : VFPDataTypeInstAlias; +} +multiclass VFPDT64InstAlias { + def I64 : VFPDataTypeInstAlias; + def S64 : VFPDataTypeInstAlias; + def U64 : VFPDataTypeInstAlias; + def F64 : VFPDataTypeInstAlias; + def D : VFPDataTypeInstAlias; +} + diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 22a464e..06cb79a 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -1172,3 +1172,12 @@ def : VFP2InstAlias<"vstr$p $Dd, $addr", (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>; def : VFP2InstAlias<"vstr$p $Sd, $addr", (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>; +// The suffix can also by typed. +defm : VFPDT32InstAlias<"vldr$p", "$Sd, $addr", + (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>; +defm : VFPDT32InstAlias<"vstr$p", "$Sd, $addr", + (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>; +defm : VFPDT64InstAlias<"vldr$p", "$Dd, $addr", + (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>; +defm : VFPDT64InstAlias<"vstr$p", "$Dd, $addr", + (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>; -- cgit v1.1 From e489af8dce12249be26ac0c8e371557378886bc2 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Mon, 14 Nov 2011 22:34:48 +0000 Subject: Fix a performance regression from r144565. Positive offsets were being lowered into registers, rather then encoded directly in the load/store. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144576 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 81a93b1..0b728a9 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -874,9 +874,9 @@ void ARMFastISel::ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3) { // Integer loads/stores handle 12-bit offsets. needsLowering = ((Addr.Offset & 0xfff) != Addr.Offset); // Handle negative offsets. - if (isThumb2) - needsLowering = !(needsLowering && Subtarget->hasV6T2Ops() && - Addr.Offset < 0 && Addr.Offset > -256); + if (needsLowering && isThumb2) + needsLowering = !(Subtarget->hasV6T2Ops() && Addr.Offset < 0 && + Addr.Offset > -256); } else { // ARM halfword load/stores and signed byte loads use +/-imm8 offsets. needsLowering = (Addr.Offset > 255 || Addr.Offset < -255); -- cgit v1.1 From 909cb4f2f2d227ea01852cb318c80a79c46bc9bf Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Mon, 14 Nov 2011 22:46:17 +0000 Subject: Add support for inlining small memcpys. rdar://10412592 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144578 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 65 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 0b728a9..67ba14b 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -185,6 +185,9 @@ class ARMFastISel : public FastISel { bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr); bool ARMComputeAddress(const Value *Obj, Address &Addr); void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3); + bool ARMIsMemXferSmall(uint64_t Len); + bool ARMTryEmitSmallMemXfer(Address Dest, Address Src, uint64_t Len, + bool isMemCpy); unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt); unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT); unsigned ARMMaterializeInt(const Constant *C, EVT VT); @@ -2193,18 +2196,76 @@ bool ARMFastISel::SelectCall(const Instruction *I, return true; } +bool ARMFastISel::ARMIsMemXferSmall(uint64_t Len) { + return Len <= 16; +} + +bool ARMFastISel::ARMTryEmitSmallMemXfer(Address Dest, Address Src, uint64_t Len, + bool isMemCpy) { + // FIXME: Memmove's require a little more care because their source and + // destination may overlap. + if (!isMemCpy) + return false; + + // Make sure we don't bloat code by inlining very large memcpy's. + if (!ARMIsMemXferSmall(Len)) + return false; + + // We don't care about alignment here since we just emit integer accesses. + while (Len) { + MVT VT; + if (Len >= 4) + VT = MVT::i32; + else if (Len >= 2) + VT = MVT::i16; + else { + assert(Len == 1); + VT = MVT::i8; + } + + bool RV; + unsigned ResultReg; + RV = ARMEmitLoad(VT, ResultReg, Src); + assert (RV = true && "Should be able to handle this load."); + RV = ARMEmitStore(VT, ResultReg, Dest); + assert (RV = true && "Should be able to handle this store."); + + unsigned Size = VT.getSizeInBits()/8; + Len -= Size; + Dest.Offset += Size; + Src.Offset += Size; + } + + return true; +} + bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { // FIXME: Handle more intrinsics. switch (I.getIntrinsicID()) { default: return false; case Intrinsic::memcpy: case Intrinsic::memmove: { - // FIXME: Small memcpy/memmove's are common enough that we want to do them - // without a call if possible. const MemTransferInst &MTI = cast(I); // Don't handle volatile. if (MTI.isVolatile()) return false; + + // Disable inlining for memmove before calls to ComputeAddress. Otherwise, + // we would emit dead code because we don't currently handle memmoves. + bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy); + if (isa(MTI.getLength()) && isMemCpy) { + // Small memcpy/memmove's are common enough that we want to do them + // without a call if possible. + uint64_t Len = cast(MTI.getLength())->getZExtValue(); + if (ARMIsMemXferSmall(Len)) { + Address Dest, Src; + if (!ARMComputeAddress(MTI.getRawDest(), Dest) || + !ARMComputeAddress(MTI.getRawSource(), Src)) + return false; + if (ARMTryEmitSmallMemXfer(Dest, Src, Len, isMemCpy)) + return true; + } + } if (!MTI.getLength()->getType()->isIntegerTy(32)) return false; -- cgit v1.1 From 88990248d3bfb2f265fcf27f8a032ac0eb14d09f Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Mon, 14 Nov 2011 22:49:42 +0000 Subject: Refactor capture tracking (which already had a couple flags for whether returns and stores capture) to permit the caller to see each capture point and decide whether to continue looking. Use this inside memdep to do an analysis that basicaa won't do. This lets us solve another devirtualization case, fixing PR8908! git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144580 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Analysis/CaptureTracking.cpp | 147 +++++++----------------------- lib/Analysis/MemoryDependenceAnalysis.cpp | 80 +++++++++++++++- 2 files changed, 110 insertions(+), 117 deletions(-) (limited to 'lib') diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index b2c27d1..a84dafb 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -17,24 +17,30 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CaptureTracking.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/Value.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/CallSite.h" using namespace llvm; -/// As its comment mentions, PointerMayBeCaptured can be expensive. -/// However, it's not easy for BasicAA to cache the result, because -/// it's an ImmutablePass. To work around this, bound queries at a -/// fixed number of uses. -/// -/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep -/// a cache. Then we can move the code from BasicAliasAnalysis into -/// that path, and remove this threshold. -static int const Threshold = 20; +namespace { + struct SimpleCaptureTracker { + explicit SimpleCaptureTracker(bool ReturnCaptures) + : ReturnCaptures(ReturnCaptures), Captured(false) {} + + void tooManyUses() { Captured = true; } + + bool shouldExplore(Use *U) { return true; } + + bool captured(Instruction *I) { + if (isa(I) && !ReturnCaptures) + return false; + + Captured = true; + return true; + } + + bool ReturnCaptures; + + bool Captured; + }; +} /// PointerMayBeCaptured - Return true if this pointer value may be captured /// by the enclosing function (which is required to exist). This routine can @@ -45,104 +51,13 @@ static int const Threshold = 20; /// counts as capturing it or not. bool llvm::PointerMayBeCaptured(const Value *V, bool ReturnCaptures, bool StoreCaptures) { - assert(V->getType()->isPointerTy() && "Capture is for pointers only!"); - SmallVector Worklist; - SmallSet Visited; - int Count = 0; - - for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end(); - UI != UE; ++UI) { - // If there are lots of uses, conservatively say that the value - // is captured to avoid taking too much compile time. - if (Count++ >= Threshold) - return true; - - Use *U = &UI.getUse(); - Visited.insert(U); - Worklist.push_back(U); - } - - while (!Worklist.empty()) { - Use *U = Worklist.pop_back_val(); - Instruction *I = cast(U->getUser()); - V = U->get(); - - switch (I->getOpcode()) { - case Instruction::Call: - case Instruction::Invoke: { - CallSite CS(I); - // Not captured if the callee is readonly, doesn't return a copy through - // its return value and doesn't unwind (a readonly function can leak bits - // by throwing an exception or not depending on the input value). - if (CS.onlyReadsMemory() && CS.doesNotThrow() && I->getType()->isVoidTy()) - break; - - // Not captured if only passed via 'nocapture' arguments. Note that - // calling a function pointer does not in itself cause the pointer to - // be captured. This is a subtle point considering that (for example) - // the callee might return its own address. It is analogous to saying - // that loading a value from a pointer does not cause the pointer to be - // captured, even though the loaded value might be the pointer itself - // (think of self-referential objects). - CallSite::arg_iterator B = CS.arg_begin(), E = CS.arg_end(); - for (CallSite::arg_iterator A = B; A != E; ++A) - if (A->get() == V && !CS.paramHasAttr(A - B + 1, Attribute::NoCapture)) - // The parameter is not marked 'nocapture' - captured. - return true; - // Only passed via 'nocapture' arguments, or is the called function - not - // captured. - break; - } - case Instruction::Load: - // Loading from a pointer does not cause it to be captured. - break; - case Instruction::VAArg: - // "va-arg" from a pointer does not cause it to be captured. - break; - case Instruction::Ret: - if (ReturnCaptures) - return true; - break; - case Instruction::Store: - if (V == I->getOperand(0)) - // Stored the pointer - conservatively assume it may be captured. - // TODO: If StoreCaptures is not true, we could do Fancy analysis - // to determine whether this store is not actually an escape point. - // In that case, BasicAliasAnalysis should be updated as well to - // take advantage of this. - return true; - // Storing to the pointee does not cause the pointer to be captured. - break; - case Instruction::BitCast: - case Instruction::GetElementPtr: - case Instruction::PHI: - case Instruction::Select: - // The original value is not captured via this if the new value isn't. - for (Instruction::use_iterator UI = I->use_begin(), UE = I->use_end(); - UI != UE; ++UI) { - Use *U = &UI.getUse(); - if (Visited.insert(U)) - Worklist.push_back(U); - } - break; - case Instruction::ICmp: - // Don't count comparisons of a no-alias return value against null as - // captures. This allows us to ignore comparisons of malloc results - // with null, for example. - if (isNoAliasCall(V->stripPointerCasts())) - if (ConstantPointerNull *CPN = - dyn_cast(I->getOperand(1))) - if (CPN->getType()->getAddressSpace() == 0) - break; - // Otherwise, be conservative. There are crazy ways to capture pointers - // using comparisons. - return true; - default: - // Something else - be conservative and say it is captured. - return true; - } - } - - // All uses examined - not captured. - return false; + // TODO: If StoreCaptures is not true, we could do Fancy analysis + // to determine whether this store is not actually an escape point. + // In that case, BasicAliasAnalysis should be updated as well to + // take advantage of this. + (void)StoreCaptures; + + SimpleCaptureTracker SCT(ReturnCaptures); + PointerMayBeCaptured(V, SCT); + return SCT.Captured; } diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 92967c0..323c84f 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -22,6 +22,7 @@ #include "llvm/Function.h" #include "llvm/LLVMContext.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" @@ -91,6 +92,7 @@ void MemoryDependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { bool MemoryDependenceAnalysis::runOnFunction(Function &) { AA = &getAnalysis(); TD = getAnalysisIfAvailable(); + DT = getAnalysisIfAvailable(); if (PredCache == 0) PredCache.reset(new PredIteratorCache()); return false; @@ -331,6 +333,82 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, return 0; } +namespace { + /// Only find pointer captures which happen before the given instruction. Uses + /// the dominator tree to determine whether one instruction is before another. + struct CapturesBefore { + CapturesBefore(const Instruction *I, DominatorTree *DT) + : BeforeHere(I), DT(DT), Captured(false) {} + + void tooManyUses() { Captured = true; } + + bool shouldExplore(Use *U) { + Instruction *I = cast(U->getUser()); + if (BeforeHere != I && DT->dominates(BeforeHere, I)) + return false; + return true; + } + + bool captured(Instruction *I) { + if (BeforeHere != I && DT->dominates(BeforeHere, I)) + return false; + Captured = true; + return true; + } + + const Instruction *BeforeHere; + DominatorTree *DT; + + bool Captured; + }; +} + +AliasAnalysis::ModRefResult +MemoryDependenceAnalysis::getModRefInfo(const Instruction *Inst, + const AliasAnalysis::Location &MemLoc) { + AliasAnalysis::ModRefResult MR = AA->getModRefInfo(Inst, MemLoc); + if (MR != AliasAnalysis::ModRef) return MR; + + // FIXME: this is really just shoring-up a deficiency in alias analysis. + // BasicAA isn't willing to spend linear time determining whether an alloca + // was captured before or after this particular call, while we are. However, + // with a smarter AA in place, this test is just wasting compile time. + if (!DT) return AliasAnalysis::ModRef; + const Value *Object = GetUnderlyingObject(MemLoc.Ptr, TD); + if (!isIdentifiedObject(Object) || isa(Object)) + return AliasAnalysis::ModRef; + ImmutableCallSite CS(Inst); + if (!CS.getInstruction()) return AliasAnalysis::ModRef; + + CapturesBefore CB(Inst, DT); + llvm::PointerMayBeCaptured(Object, CB); + + if (isa(Object) || CS.getInstruction() == Object || CB.Captured) + return AliasAnalysis::ModRef; + + unsigned ArgNo = 0; + for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); + CI != CE; ++CI, ++ArgNo) { + // Only look at the no-capture or byval pointer arguments. If this + // pointer were passed to arguments that were neither of these, then it + // couldn't be no-capture. + if (!(*CI)->getType()->isPointerTy() || + (!CS.paramHasAttr(ArgNo+1, Attribute::NoCapture) && + !CS.paramHasAttr(ArgNo+1, Attribute::ByVal))) + continue; + + // If this is a no-capture pointer argument, see if we can tell that it + // is impossible to alias the pointer we're checking. If not, we have to + // assume that the call could touch the pointer, even though it doesn't + // escape. + if (!AA->isNoAlias(AliasAnalysis::Location(*CI), + AliasAnalysis::Location(Object))) { + return AliasAnalysis::ModRef; + } + } + return AliasAnalysis::NoModRef; +} + /// getPointerDependencyFrom - Return the instruction on which a memory /// location depends. If isLoad is true, this routine ignores may-aliases with /// read-only operations. If isLoad is false, this routine ignores may-aliases @@ -478,7 +556,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, } // See if this instruction (e.g. a call or vaarg) mod/ref's the pointer. - switch (AA->getModRefInfo(Inst, MemLoc)) { + switch (getModRefInfo(Inst, MemLoc)) { case AliasAnalysis::NoModRef: // If the call has no effect on the queried pointer, just ignore it. continue; -- cgit v1.1 From ffc658b056b7cc0b3f6a2626694b6a4216ed728d Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Mon, 14 Nov 2011 23:03:21 +0000 Subject: ARM VLDR/VSTR instructions don't need a size suffix. Canonicallize on the non-suffixed form, but continue to accept assembly that has any correctly sized type suffix. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144583 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrFormats.td | 2 ++ lib/Target/ARM/ARMInstrVFP.td | 27 +++++++++------------------ 2 files changed, 11 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 6d5b6a4..841ca74 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1996,6 +1996,7 @@ class NEONFPPat : Pat { class VFPDataTypeInstAlias : InstAlias; multiclass VFPDT32InstAlias { + def _32 : VFPDataTypeInstAlias; def I32 : VFPDataTypeInstAlias; def S32 : VFPDataTypeInstAlias; def U32 : VFPDataTypeInstAlias; @@ -2003,6 +2004,7 @@ multiclass VFPDT32InstAlias { def F : VFPDataTypeInstAlias; } multiclass VFPDT64InstAlias { + def _64 : VFPDataTypeInstAlias; def I64 : VFPDataTypeInstAlias; def S64 : VFPDataTypeInstAlias; def U64 : VFPDataTypeInstAlias; diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 06cb79a..488c508 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -69,11 +69,11 @@ def vfp_f64imm : Operand, let canFoldAsLoad = 1, isReMaterializable = 1 in { def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$Dd), (ins addrmode5:$addr), - IIC_fpLoad64, "vldr", ".64\t$Dd, $addr", + IIC_fpLoad64, "vldr", "\t$Dd, $addr", [(set DPR:$Dd, (f64 (load addrmode5:$addr)))]>; def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr), - IIC_fpLoad32, "vldr", ".32\t$Sd, $addr", + IIC_fpLoad32, "vldr", "\t$Sd, $addr", [(set SPR:$Sd, (load addrmode5:$addr))]> { // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. @@ -83,11 +83,11 @@ def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$Sd), (ins addrmode5:$addr), } // End of 'let canFoldAsLoad = 1, isReMaterializable = 1 in' def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$Dd, addrmode5:$addr), - IIC_fpStore64, "vstr", ".64\t$Dd, $addr", + IIC_fpStore64, "vstr", "\t$Dd, $addr", [(store (f64 DPR:$Dd), addrmode5:$addr)]>; def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$Sd, addrmode5:$addr), - IIC_fpStore32, "vstr", ".32\t$Sd, $addr", + IIC_fpStore32, "vstr", "\t$Sd, $addr", [(store SPR:$Sd, addrmode5:$addr)]> { // Some single precision VFP instructions may be executed on both NEON and VFP // pipelines. @@ -1163,21 +1163,12 @@ def FCONSTS : VFPAI<(outs SPR:$Sd), (ins vfp_f32imm:$imm), def : VFP2InstAlias<"fmstat${p}", (FMSTAT pred:$p)>; -// The size suffix is optional for VLDR/VSTR -def : VFP2InstAlias<"vldr$p $Dd, $addr", - (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>; -def : VFP2InstAlias<"vldr$p $Sd, $addr", - (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>; -def : VFP2InstAlias<"vstr$p $Dd, $addr", - (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>; -def : VFP2InstAlias<"vstr$p $Sd, $addr", - (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>; -// The suffix can also by typed. -defm : VFPDT32InstAlias<"vldr$p", "$Sd, $addr", +// VLDR/VSTR accept an optional type suffix. +defm : VFPDT32InstAlias<"vldr${p}", "$Sd, $addr", (VLDRS SPR:$Sd, addrmode5:$addr, pred:$p)>; -defm : VFPDT32InstAlias<"vstr$p", "$Sd, $addr", +defm : VFPDT32InstAlias<"vstr${p}", "$Sd, $addr", (VSTRS SPR:$Sd, addrmode5:$addr, pred:$p)>; -defm : VFPDT64InstAlias<"vldr$p", "$Dd, $addr", +defm : VFPDT64InstAlias<"vldr${p}", "$Dd, $addr", (VLDRD DPR:$Dd, addrmode5:$addr, pred:$p)>; -defm : VFPDT64InstAlias<"vstr$p", "$Dd, $addr", +defm : VFPDT64InstAlias<"vstr${p}", "$Dd, $addr", (VSTRD DPR:$Dd, addrmode5:$addr, pred:$p)>; -- cgit v1.1 From 2c42b8c912b62071c27454182cdef60e3b584083 Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Mon, 14 Nov 2011 23:04:09 +0000 Subject: Supporting inline memmove isn't going to be worthwhile. The only way to avoid violating a dependency is to emit all loads prior to stores. This would likely cause a great deal of spillage offsetting any potential gains. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144585 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMFastISel.cpp | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 67ba14b..4df084f 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -185,9 +185,8 @@ class ARMFastISel : public FastISel { bool ARMEmitStore(EVT VT, unsigned SrcReg, Address &Addr); bool ARMComputeAddress(const Value *Obj, Address &Addr); void ARMSimplifyAddress(Address &Addr, EVT VT, bool useAM3); - bool ARMIsMemXferSmall(uint64_t Len); - bool ARMTryEmitSmallMemXfer(Address Dest, Address Src, uint64_t Len, - bool isMemCpy); + bool ARMIsMemCpySmall(uint64_t Len); + bool ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len); unsigned ARMEmitIntExt(EVT SrcVT, unsigned SrcReg, EVT DestVT, bool isZExt); unsigned ARMMaterializeFP(const ConstantFP *CFP, EVT VT); unsigned ARMMaterializeInt(const Constant *C, EVT VT); @@ -2196,19 +2195,13 @@ bool ARMFastISel::SelectCall(const Instruction *I, return true; } -bool ARMFastISel::ARMIsMemXferSmall(uint64_t Len) { +bool ARMFastISel::ARMIsMemCpySmall(uint64_t Len) { return Len <= 16; } -bool ARMFastISel::ARMTryEmitSmallMemXfer(Address Dest, Address Src, uint64_t Len, - bool isMemCpy) { - // FIXME: Memmove's require a little more care because their source and - // destination may overlap. - if (!isMemCpy) - return false; - +bool ARMFastISel::ARMTryEmitSmallMemCpy(Address Dest, Address Src, uint64_t Len) { // Make sure we don't bloat code by inlining very large memcpy's. - if (!ARMIsMemXferSmall(Len)) + if (!ARMIsMemCpySmall(Len)) return false; // We don't care about alignment here since we just emit integer accesses. @@ -2254,15 +2247,15 @@ bool ARMFastISel::SelectIntrinsicCall(const IntrinsicInst &I) { // we would emit dead code because we don't currently handle memmoves. bool isMemCpy = (I.getIntrinsicID() == Intrinsic::memcpy); if (isa(MTI.getLength()) && isMemCpy) { - // Small memcpy/memmove's are common enough that we want to do them - // without a call if possible. + // Small memcpy's are common enough that we want to do them without a call + // if possible. uint64_t Len = cast(MTI.getLength())->getZExtValue(); - if (ARMIsMemXferSmall(Len)) { + if (ARMIsMemCpySmall(Len)) { Address Dest, Src; if (!ARMComputeAddress(MTI.getRawDest(), Dest) || !ARMComputeAddress(MTI.getRawSource(), Src)) return false; - if (ARMTryEmitSmallMemXfer(Dest, Src, Len, isMemCpy)) + if (ARMTryEmitSmallMemCpy(Dest, Src, Len)) return true; } } -- cgit v1.1 From ef448767a35148261d6c82a8e55e6e2f4be8e631 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Mon, 14 Nov 2011 23:11:19 +0000 Subject: ARM parsing optional datatype suffix for VAND/VEOR/VORR instructions. rdar://10435076 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144587 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrFormats.td | 21 ++++++++++++++++++++- lib/Target/ARM/ARMInstrNEON.td | 18 ++++++++++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 841ca74..90ee018 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1995,6 +1995,20 @@ class NEONFPPat : Pat { // VFP/NEON Instruction aliases for type suffices. class VFPDataTypeInstAlias : InstAlias; +multiclass VFPDT8InstAlias { + def _8 : VFPDataTypeInstAlias; + def I8 : VFPDataTypeInstAlias; + def S8 : VFPDataTypeInstAlias; + def U8 : VFPDataTypeInstAlias; + def F8 : VFPDataTypeInstAlias; +} +multiclass VFPDT16InstAlias { + def _16 : VFPDataTypeInstAlias; + def I16 : VFPDataTypeInstAlias; + def S16 : VFPDataTypeInstAlias; + def U16 : VFPDataTypeInstAlias; + def F16 : VFPDataTypeInstAlias; +} multiclass VFPDT32InstAlias { def _32 : VFPDataTypeInstAlias; def I32 : VFPDataTypeInstAlias; @@ -2011,4 +2025,9 @@ multiclass VFPDT64InstAlias { def F64 : VFPDataTypeInstAlias; def D : VFPDataTypeInstAlias; } - +multiclass VFPDTAnyInstAlias { + defm : VFPDT8InstAlias; + defm : VFPDT16InstAlias; + defm : VFPDT32InstAlias; + defm : VFPDT64InstAlias; +} diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 07403c1..d83f6b8 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -5179,3 +5179,21 @@ def : Pat<(v2f64 (bitconvert (v4i32 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v8i16 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v16i8 QPR:$src))), (v2f64 QPR:$src)>; def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; + + +//===----------------------------------------------------------------------===// +// Assembler aliases +// + +defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", + (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", + (VANDq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", + (VEORd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"veor${p}", "$Vd, $Vn, $Vm", + (VEORq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", + (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; +defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", + (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; -- cgit v1.1 From 0530d0d5d9dfaae2e3c78a52729abcbf9fcdd21b Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Mon, 14 Nov 2011 23:20:14 +0000 Subject: Split out the plain '.{8|16|32|64}' suffix handling. Make it easier to deal with aliases for instructions that do require a suffix but accept more specific variants of the same size. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144588 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrFormats.td | 32 ++++++++++++++++++++++++-------- 1 file changed, 24 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td index 90ee018..06ee2c8 100644 --- a/lib/Target/ARM/ARMInstrFormats.td +++ b/lib/Target/ARM/ARMInstrFormats.td @@ -1995,36 +1995,52 @@ class NEONFPPat : Pat { // VFP/NEON Instruction aliases for type suffices. class VFPDataTypeInstAlias : InstAlias; -multiclass VFPDT8InstAlias { - def _8 : VFPDataTypeInstAlias; +multiclass VFPDT8ReqInstAlias { def I8 : VFPDataTypeInstAlias; def S8 : VFPDataTypeInstAlias; def U8 : VFPDataTypeInstAlias; def F8 : VFPDataTypeInstAlias; } -multiclass VFPDT16InstAlias { - def _16 : VFPDataTypeInstAlias; +// VFPDT8ReqInstAlias plus plain ".8" +multiclass VFPDT8InstAlias { + def _8 : VFPDataTypeInstAlias; + defm : VFPDT8ReqInstAlias; +} +multiclass VFPDT16ReqInstAlias { def I16 : VFPDataTypeInstAlias; def S16 : VFPDataTypeInstAlias; def U16 : VFPDataTypeInstAlias; def F16 : VFPDataTypeInstAlias; } -multiclass VFPDT32InstAlias { - def _32 : VFPDataTypeInstAlias; +// VFPDT16ReqInstAlias plus plain ".16" +multiclass VFPDT16InstAlias { + def _16 : VFPDataTypeInstAlias; + defm : VFPDT16ReqInstAlias; +} +multiclass VFPDT32ReqInstAlias { def I32 : VFPDataTypeInstAlias; def S32 : VFPDataTypeInstAlias; def U32 : VFPDataTypeInstAlias; def F32 : VFPDataTypeInstAlias; def F : VFPDataTypeInstAlias; } -multiclass VFPDT64InstAlias { - def _64 : VFPDataTypeInstAlias; +// VFPDT32ReqInstAlias plus plain ".32" +multiclass VFPDT32InstAlias { + def _32 : VFPDataTypeInstAlias; + defm : VFPDT32ReqInstAlias; +} +multiclass VFPDT64ReqInstAlias { def I64 : VFPDataTypeInstAlias; def S64 : VFPDataTypeInstAlias; def U64 : VFPDataTypeInstAlias; def F64 : VFPDataTypeInstAlias; def D : VFPDataTypeInstAlias; } +// VFPDT64ReqInstAlias plus plain ".64" +multiclass VFPDT64InstAlias { + def _64 : VFPDataTypeInstAlias; + defm : VFPDT64ReqInstAlias; +} multiclass VFPDTAnyInstAlias { defm : VFPDT8InstAlias; defm : VFPDT16InstAlias; -- cgit v1.1 From 04db7f7a7d5d9312d2e40032883b708e321d55b3 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Mon, 14 Nov 2011 23:21:09 +0000 Subject: Add explanatory comment. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144589 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index d83f6b8..ce93bea 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -5185,6 +5185,7 @@ def : Pat<(v2f64 (bitconvert (v4f32 QPR:$src))), (v2f64 QPR:$src)>; // Assembler aliases // +// VAND/VEOR/VORR accept but do not require a type suffix. defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", (VANDd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; defm : VFPDTAnyInstAlias<"vand${p}", "$Vd, $Vn, $Vm", -- cgit v1.1 From e052b9afa1301419f8b52eed9ed370393fcad78d Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Mon, 14 Nov 2011 23:32:59 +0000 Subject: ARM parsing datatype suffix variants for non-writeback VLD1 instructions. rdar://10435076 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144592 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index ce93bea..8474737 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -5198,3 +5198,44 @@ defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", (VORRd DPR:$Vd, DPR:$Vn, DPR:$Vm, pred:$p)>; defm : VFPDTAnyInstAlias<"vorr${p}", "$Vd, $Vn, $Vm", (VORRq QPR:$Vd, QPR:$Vn, QPR:$Vm, pred:$p)>; + +// VLD1 requires a size suffix, but also accepts type specific variants. +// Load one D register. +defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1d8 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1d16 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1d32 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1d64 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; + +// Load two D registers. +defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1q8 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1q16 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1q32 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1q64 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; + +// Load three D registers. +defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1d8T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1d16T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1d32T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1d64T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; + +// Load four D registers. +defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1d8Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1d16Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1d32Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", + (VLD1d64Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; -- cgit v1.1 From dd47e0b5d4850fede4b2581c41f1e0a5eff5f05a Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Mon, 14 Nov 2011 23:43:46 +0000 Subject: ARM parsing datatype suffix variants for non-writeback VST1 instructions. rdar://10435076 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144593 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 44 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 8474737..3ccf992 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -5239,3 +5239,47 @@ defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", (VLD1d32Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", (VLD1d64Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; + +// VST1 requires a size suffix, but also accepts type specific variants. +// Load one D register. +defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", + (VST1d8 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", + (VST1d16 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", + (VST1d32 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", + (VST1d64 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; + +// Load two D registers. +defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", + (VST1q8 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", + (VST1q16 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", + (VST1q32 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", + (VST1q64 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; + +// FIXME: The three and four register VST1 instructions haven't been moved +// to the VecList* encoding yet, so we can't do assembly parsing support +// for them. Uncomment these when that happens. +// Load three D registers. +//defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", +// (VST1d8T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>; +//defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", +// (VST1d16T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>; +//defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", +// (VST1d32T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>; +//defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", +// (VST1d64T addrmode6:$Rn, VecListThreeD:$Vd, pred:$p)>; + +// Load four D registers. +//defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", +// (VST1d8Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; +//defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", +// (VST1d16Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; +//defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", +// (VST1d32Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; +//defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", +// (VST1d64Q addrmode6:$Rn, VecListFourD:$Vd, pred:$p)>; -- cgit v1.1 From ec381a4183f70122305f810c6b11b8f0e3bf83c0 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Tue, 15 Nov 2011 00:14:04 +0000 Subject: Fix linking for some users who already have tsan enabled code and are trying to link it against llvm code, by making our definitions weak. "Some users." git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144596 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Valgrind.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Support/Valgrind.cpp b/lib/Support/Valgrind.cpp index 078d705..46d93a2 100644 --- a/lib/Support/Valgrind.cpp +++ b/lib/Support/Valgrind.cpp @@ -56,10 +56,10 @@ void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) { // These functions require no implementation, tsan just looks at the arguments // they're called with. extern "C" { -void AnnotateHappensBefore(const char *file, int line, - const volatile void *cv) {} -void AnnotateHappensAfter(const char *file, int line, - const volatile void *cv) {} -void AnnotateIgnoreWritesBegin(const char *file, int line) {} -void AnnotateIgnoreWritesEnd(const char *file, int line) {} +LLVM_ATTRIBUTE_WEAK void AnnotateHappensBefore(const char *file, int line, + const volatile void *cv) {} +LLVM_ATTRIBUTE_WEAK void AnnotateHappensAfter(const char *file, int line, + const volatile void *cv) {} +LLVM_ATTRIBUTE_WEAK void AnnotateIgnoreWritesBegin(const char *file, int line){} +LLVM_ATTRIBUTE_WEAK void AnnotateIgnoreWritesEnd(const char *file, int line) {} } -- cgit v1.1 From 2947f730a96fc602ea008bba1929ae4f0638850a Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 15 Nov 2011 01:15:25 +0000 Subject: Track register ages more accurately. Keep track of the last instruction to define each register individually instead of per DomainValue. This lets us track more accurately when a register was last written. Also track register ages across basic blocks. When entering a new basic block, use the least stale predecessor def as a worst case estimate for register age. The register age is used to arbitrate between conflicting domains. The most recently defined register wins. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144601 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ExecutionDepsFix.cpp | 285 +++++++++++++++++++++++++-------------- 1 file changed, 184 insertions(+), 101 deletions(-) (limited to 'lib') diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index fc0b612..d094411 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -45,7 +45,7 @@ using namespace llvm; /// DomainValue for each register, but it may contain multiple execution /// domains. A register value is initially created in a single execution /// domain, but if we were forced to pay the penalty of a domain crossing, we -/// keep track of the fact the the register is now available in multiple +/// keep track of the fact that the register is now available in multiple /// domains. namespace { struct DomainValue { @@ -57,9 +57,6 @@ struct DomainValue { // domains where the register is available for free. unsigned AvailableDomains; - // Position of the last defining instruction. - unsigned Dist; - // Pointer to the next DomainValue in a chain. When two DomainValues are // merged, Victim.Next is set to point to Victor, so old DomainValue // references can be updated by folowing the chain. @@ -101,7 +98,7 @@ struct DomainValue { // Clear this DomainValue and point to next which has all its data. void clear() { - AvailableDomains = Dist = 0; + AvailableDomains = 0; Next = 0; Instrs.clear(); } @@ -109,6 +106,21 @@ struct DomainValue { } namespace { +/// LiveReg - Information about a live register. +struct LiveReg { + /// Value currently in this register, or NULL when no value is being tracked. + /// This counts as a DomainValue reference. + DomainValue *Value; + + /// Instruction that defined this register, relative to the beginning of the + /// current basic block. When a LiveReg is used to represent a live-out + /// register, this value is relative to the end of the basic block, so it + /// will be a negative number. + int Def; +}; +} // anonynous namespace + +namespace { class ExeDepsFix : public MachineFunctionPass { static char ID; SpecificBumpPtrAllocator Allocator; @@ -120,10 +132,17 @@ class ExeDepsFix : public MachineFunctionPass { const TargetRegisterInfo *TRI; std::vector AliasMap; const unsigned NumRegs; - DomainValue **LiveRegs; - typedef DenseMap LiveOutMap; + LiveReg *LiveRegs; + typedef DenseMap LiveOutMap; LiveOutMap LiveOuts; - unsigned Distance; + + /// Current instruction number. + /// The first instruction in each basic block is 0. + int CurInstr; + + /// True when the current block has a predecessor that hasn't been visited + /// yet. + bool SeenUnknownBackEdge; public: ExeDepsFix(const TargetRegisterClass *rc) @@ -160,10 +179,10 @@ private: void collapse(DomainValue *dv, unsigned domain); bool merge(DomainValue *A, DomainValue *B); - bool enterBasicBlock(MachineBasicBlock*); + void enterBasicBlock(MachineBasicBlock*); void leaveBasicBlock(MachineBasicBlock*); void visitInstr(MachineInstr*); - void visitGenericInstr(MachineInstr*); + void processDefs(MachineInstr*, bool Kill); void visitSoftInstr(MachineInstr*, unsigned mask); void visitHardInstr(MachineInstr*, unsigned domain); }; @@ -182,7 +201,6 @@ DomainValue *ExeDepsFix::alloc(int domain) { DomainValue *dv = Avail.empty() ? new(Allocator.Allocate()) DomainValue : Avail.pop_back_val(); - dv->Dist = Distance; if (domain >= 0) dv->addDomain(domain); assert(dv->Refs == 0 && "Reference count wasn't cleared"); @@ -231,32 +249,31 @@ DomainValue *ExeDepsFix::resolve(DomainValue *&DVRef) { /// Set LiveRegs[rx] = dv, updating reference counts. void ExeDepsFix::setLiveReg(int rx, DomainValue *dv) { assert(unsigned(rx) < NumRegs && "Invalid index"); - if (!LiveRegs) { - LiveRegs = new DomainValue*[NumRegs]; - std::fill(LiveRegs, LiveRegs+NumRegs, (DomainValue*)0); - } + assert(LiveRegs && "Must enter basic block first."); - if (LiveRegs[rx] == dv) + if (LiveRegs[rx].Value == dv) return; - if (LiveRegs[rx]) - release(LiveRegs[rx]); - LiveRegs[rx] = retain(dv); + if (LiveRegs[rx].Value) + release(LiveRegs[rx].Value); + LiveRegs[rx].Value = retain(dv); } // Kill register rx, recycle or collapse any DomainValue. void ExeDepsFix::kill(int rx) { assert(unsigned(rx) < NumRegs && "Invalid index"); - if (!LiveRegs || !LiveRegs[rx]) return; + assert(LiveRegs && "Must enter basic block first."); + if (!LiveRegs[rx].Value) + return; - release(LiveRegs[rx]); - LiveRegs[rx] = 0; + release(LiveRegs[rx].Value); + LiveRegs[rx].Value = 0; } /// Force register rx into domain. void ExeDepsFix::force(int rx, unsigned domain) { assert(unsigned(rx) < NumRegs && "Invalid index"); - DomainValue *dv; - if (LiveRegs && (dv = LiveRegs[rx])) { + assert(LiveRegs && "Must enter basic block first."); + if (DomainValue *dv = LiveRegs[rx].Value) { if (dv->isCollapsed()) dv->addDomain(domain); else if (dv->hasDomain(domain)) @@ -265,8 +282,8 @@ void ExeDepsFix::force(int rx, unsigned domain) { // This is an incompatible open DomainValue. Collapse it to whatever and // force the new value into domain. This costs a domain crossing. collapse(dv, dv->getFirstDomain()); - assert(LiveRegs[rx] && "Not live after collapse?"); - LiveRegs[rx]->addDomain(domain); + assert(LiveRegs[rx].Value && "Not live after collapse?"); + LiveRegs[rx].Value->addDomain(domain); } } else { // Set up basic collapsed DomainValue. @@ -287,7 +304,7 @@ void ExeDepsFix::collapse(DomainValue *dv, unsigned domain) { // If there are multiple users, give them new, unique DomainValues. if (LiveRegs && dv->Refs > 1) for (unsigned rx = 0; rx != NumRegs; ++rx) - if (LiveRegs[rx] == dv) + if (LiveRegs[rx].Value == dv) setLiveReg(rx, alloc(domain)); } @@ -303,7 +320,6 @@ bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) { if (!common) return false; A->AvailableDomains = common; - A->Dist = std::max(A->Dist, B->Dist); A->Instrs.append(B->Instrs.begin(), B->Instrs.end()); // Clear the old DomainValue so we won't try to swizzle instructions twice. @@ -312,66 +328,103 @@ bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) { B->Next = retain(A); for (unsigned rx = 0; rx != NumRegs; ++rx) - if (LiveRegs[rx] == B) + if (LiveRegs[rx].Value == B) setLiveReg(rx, A); return true; } // enterBasicBlock - Set up LiveRegs by merging predecessor live-out values. -// Return true if some predecessor hasn't been processed yet (like on a loop -// back-edge). -bool ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { +void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { // Detect back-edges from predecessors we haven't processed yet. - bool seenBackEdge = false; + SeenUnknownBackEdge = false; - // Try to coalesce live-out registers from predecessors. - for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), + // Reset instruction counter in each basic block. + CurInstr = 0; + + // Set up LiveRegs to represent registers entering MBB. + if (!LiveRegs) + LiveRegs = new LiveReg[NumRegs]; + + // Default values are 'nothing happened a long time ago'. + for (unsigned rx = 0; rx != NumRegs; ++rx) { + LiveRegs[rx].Value = 0; + LiveRegs[rx].Def = -(1 << 20); + } + + // This is the entry block. + if (MBB->pred_empty()) { + for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(), e = MBB->livein_end(); i != e; ++i) { - int rx = regIndex(*i); - if (rx < 0) continue; - for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), - pe = MBB->pred_end(); pi != pe; ++pi) { - LiveOutMap::const_iterator fi = LiveOuts.find(*pi); - if (fi == LiveOuts.end()) { - seenBackEdge = true; + int rx = regIndex(*i); + if (rx < 0) continue; - } - if (!fi->second) + // Treat function live-ins as if they were defined just before the first + // instruction. Usually, function arguments are set up immediately + // before the call. + LiveRegs[rx].Def = -1; + } + DEBUG(dbgs() << "BB#" << MBB->getNumber() << ": entry\n"); + return; + } + + // Try to coalesce live-out registers from predecessors. + for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(), + pe = MBB->pred_end(); pi != pe; ++pi) { + LiveOutMap::const_iterator fi = LiveOuts.find(*pi); + if (fi == LiveOuts.end()) { + SeenUnknownBackEdge = true; + continue; + } + assert(fi->second && "Can't have NULL entries"); + + for (unsigned rx = 0; rx != NumRegs; ++rx) { + // Use the most recent predecessor def for each register. + LiveRegs[rx].Def = std::max(LiveRegs[rx].Def, fi->second[rx].Def); + + DomainValue *pdv = resolve(fi->second[rx].Value); + if (!pdv) continue; - DomainValue *pdv = resolve(fi->second[rx]); - if (!pdv) continue; - if (!LiveRegs || !LiveRegs[rx]) { + if (!LiveRegs[rx].Value) { setLiveReg(rx, pdv); continue; } // We have a live DomainValue from more than one predecessor. - if (LiveRegs[rx]->isCollapsed()) { + if (LiveRegs[rx].Value->isCollapsed()) { // We are already collapsed, but predecessor is not. Force him. - unsigned domain = LiveRegs[rx]->getFirstDomain(); - if (!pdv->isCollapsed() && pdv->hasDomain(domain)) - collapse(pdv, domain); + unsigned Domain = LiveRegs[rx].Value->getFirstDomain(); + if (!pdv->isCollapsed() && pdv->hasDomain(Domain)) + collapse(pdv, Domain); continue; } // Currently open, merge in predecessor. if (!pdv->isCollapsed()) - merge(LiveRegs[rx], pdv); + merge(LiveRegs[rx].Value, pdv); else force(rx, pdv->getFirstDomain()); } } - return seenBackEdge; + DEBUG(dbgs() << "BB#" << MBB->getNumber() + << (SeenUnknownBackEdge ? ": incomplete\n" : ": all preds known\n")); } void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) { + assert(LiveRegs && "Must enter basic block first."); // Save live registers at end of MBB - used by enterBasicBlock(). // Also use LiveOuts as a visited set to detect back-edges. - if (!LiveOuts.insert(std::make_pair(MBB, LiveRegs)).second && LiveRegs) { + bool First = LiveOuts.insert(std::make_pair(MBB, LiveRegs)).second; + + if (First) { + // LiveRegs was inserted in LiveOuts. Adjust all defs to be relative to + // the end of this block instead of the beginning. + for (unsigned i = 0, e = NumRegs; i != e; ++i) + LiveRegs[i].Def -= CurInstr; + } else { // Insertion failed, this must be the second pass. // Release all the DomainValues instead of keeping them. for (unsigned i = 0, e = NumRegs; i != e; ++i) - release(LiveRegs[i]); + release(LiveRegs[i].Value); delete[] LiveRegs; } LiveRegs = 0; @@ -380,15 +433,52 @@ void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) { void ExeDepsFix::visitInstr(MachineInstr *MI) { if (MI->isDebugValue()) return; - ++Distance; - std::pair domp = TII->getExecutionDomain(MI); - if (domp.first) - if (domp.second) - visitSoftInstr(MI, domp.second); + + // Update instructions with explicit execution domains. + std::pair DomP = TII->getExecutionDomain(MI); + if (DomP.first) { + if (DomP.second) + visitSoftInstr(MI, DomP.second); else - visitHardInstr(MI, domp.first); - else if (LiveRegs) - visitGenericInstr(MI); + visitHardInstr(MI, DomP.first); + } + + // Process defs to track register ages, and kill values clobbered by generic + // instructions. + processDefs(MI, !DomP.first); +} + +// Update def-ages for registers defined by MI. +// If Kill is set, also kill off DomainValues clobbered by the defs. +void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { + assert(!MI->isDebugValue() && "Won't process debug values"); + const MCInstrDesc &MCID = MI->getDesc(); + for (unsigned i = 0, + e = MCID.isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); + i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + if (MO.isImplicit()) + break; + if (MO.isUse()) + continue; + int rx = regIndex(MO.getReg()); + if (rx < 0) + continue; + + // This instruction explicitly defines rx. + DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr + << '\t' << *MI); + + LiveRegs[rx].Def = CurInstr; + + // Kill off domains redefined by generic instructions. + if (Kill) + kill(rx); + } + + ++CurInstr; } // A hard instruction only works in one domain. All input registers will be @@ -430,7 +520,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { if (!mo.isReg()) continue; int rx = regIndex(mo.getReg()); if (rx < 0) continue; - if (DomainValue *dv = LiveRegs[rx]) { + if (DomainValue *dv = LiveRegs[rx].Value) { // Bitmask of domains that dv and available have in common. unsigned common = dv->getCommonDomains(available); // Is it possible to use this collapsed register for free? @@ -459,52 +549,53 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { // Kill off any remaining uses that don't match available, and build a list of // incoming DomainValues that we want to merge. - SmallVector doms; + SmallVector Regs; for (SmallVector::iterator i=used.begin(), e=used.end(); i!=e; ++i) { int rx = *i; - DomainValue *dv = LiveRegs[rx]; + const LiveReg &LR = LiveRegs[rx]; // This useless DomainValue could have been missed above. - if (!dv->getCommonDomains(available)) { - kill(*i); + if (!LR.Value->getCommonDomains(available)) { + kill(rx); continue; } - // sorted, uniqued insert. - bool inserted = false; - for (SmallVector::iterator i = doms.begin(), e = doms.end(); - i != e && !inserted; ++i) { - if (dv == *i) - inserted = true; - else if (dv->Dist < (*i)->Dist) { - inserted = true; - doms.insert(i, dv); + // Sorted insertion. + bool Inserted = false; + for (SmallVector::iterator i = Regs.begin(), e = Regs.end(); + i != e && !Inserted; ++i) { + if (LR.Def < i->Def) { + Inserted = true; + Regs.insert(i, LR); } } - if (!inserted) - doms.push_back(dv); + if (!Inserted) + Regs.push_back(LR); } // doms are now sorted in order of appearance. Try to merge them all, giving // priority to the latest ones. DomainValue *dv = 0; - while (!doms.empty()) { + while (!Regs.empty()) { if (!dv) { - dv = doms.pop_back_val(); + dv = Regs.pop_back_val().Value; continue; } - DomainValue *latest = doms.pop_back_val(); - if (merge(dv, latest)) continue; + DomainValue *Latest = Regs.pop_back_val().Value; + // Skip already merged values. + if (Latest == dv || Latest->Next) + continue; + if (merge(dv, Latest)) + continue; // If latest didn't merge, it is useless now. Kill all registers using it. for (SmallVector::iterator i=used.begin(), e=used.end(); i != e; ++i) - if (LiveRegs[*i] == latest) + if (LiveRegs[*i].Value == Latest) kill(*i); } // dv is the DomainValue we are going to use for this instruction. if (!dv) dv = alloc(); - dv->Dist = Distance; dv->AvailableDomains = available; dv->Instrs.push_back(mi); @@ -514,32 +605,23 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { if (!mo.isReg()) continue; int rx = regIndex(mo.getReg()); if (rx < 0) continue; - if (!LiveRegs || !LiveRegs[rx] || (mo.isDef() && LiveRegs[rx]!=dv)) { + if (!LiveRegs[rx].Value || (mo.isDef() && LiveRegs[rx].Value != dv)) { kill(rx); setLiveReg(rx, dv); } } } -void ExeDepsFix::visitGenericInstr(MachineInstr *mi) { - // Process explicit defs, kill any relevant registers redefined. - for (unsigned i = 0, e = mi->getDesc().getNumDefs(); i != e; ++i) { - MachineOperand &mo = mi->getOperand(i); - if (!mo.isReg()) continue; - int rx = regIndex(mo.getReg()); - if (rx < 0) continue; - kill(rx); - } -} - bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { MF = &mf; TII = MF->getTarget().getInstrInfo(); TRI = MF->getTarget().getRegisterInfo(); LiveRegs = 0; - Distance = 0; assert(NumRegs == RC->getNumRegs() && "Bad regclass"); + DEBUG(dbgs() << "********** FIX EXECUTION DEPENDENCIES: " + << RC->getName() << " **********\n"); + // If no relevant registers are used in the function, we can skip it // completely. bool anyregs = false; @@ -567,7 +649,8 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { for (ReversePostOrderTraversal::rpo_iterator MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) { MachineBasicBlock *MBB = *MBBI; - if (enterBasicBlock(MBB)) + enterBasicBlock(MBB); + if (SeenUnknownBackEdge) Loops.push_back(MBB); for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) @@ -590,8 +673,8 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { if (FI == LiveOuts.end() || !FI->second) continue; for (unsigned i = 0, e = NumRegs; i != e; ++i) - if (FI->second[i]) - release(FI->second[i]); + if (FI->second[i].Value) + release(FI->second[i].Value); delete[] FI->second; } LiveOuts.clear(); -- cgit v1.1 From c2ecf3efbf375fc82bb1cea6afd7448498f9ae75 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Tue, 15 Nov 2011 01:15:30 +0000 Subject: Break false dependencies before partial register updates. Two new TargetInstrInfo hooks lets the target tell ExecutionDepsFix about instructions with partial register updates causing false unwanted dependencies. The ExecutionDepsFix pass will break the false dependencies if the updated register was written in the previoius N instructions. The small loop added to sse-domains.ll runs twice as fast with dependency-breaking instructions inserted. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144602 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/ExecutionDepsFix.cpp | 27 +++++++++++++++++++++ lib/Target/X86/X86InstrInfo.cpp | 52 ++++++++++++++++++++++++++++++++++++++++ lib/Target/X86/X86InstrInfo.h | 5 ++++ 3 files changed, 84 insertions(+) (limited to 'lib') diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index d094411..050edce 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -471,11 +471,34 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr << '\t' << *MI); + // How many instructions since rx was last written? + unsigned Clearance = CurInstr - LiveRegs[rx].Def; LiveRegs[rx].Def = CurInstr; // Kill off domains redefined by generic instructions. if (Kill) kill(rx); + + // Verify clearance before partial register updates. + unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI); + if (!Pref) + continue; + DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); + if (Pref > Clearance) { + DEBUG(dbgs() << ": Break dependency.\n"); + TII->breakPartialRegDependency(MI, i, TRI); + continue; + } + + // The current clearance seems OK, but we may be ignoring a def from a + // back-edge. + if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) { + DEBUG(dbgs() << ": OK.\n"); + continue; + } + + // A def from an unprocessed back-edge may make us break this dependency. + DEBUG(dbgs() << ": Wait for back-edge to resolve.\n"); } ++CurInstr; @@ -663,6 +686,10 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { for (unsigned i = 0, e = Loops.size(); i != e; ++i) { MachineBasicBlock *MBB = Loops[i]; enterBasicBlock(MBB); + for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; + ++I) + if (!I->isDebugValue()) + processDefs(I, false); leaveBasicBlock(MBB); } diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index d9ffd81..9428fff 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -2761,6 +2761,10 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, /// static bool hasPartialRegUpdate(unsigned Opcode) { switch (Opcode) { + case X86::CVTSI2SSrr: + case X86::CVTSI2SS64rr: + case X86::CVTSI2SDrr: + case X86::CVTSI2SD64rr: case X86::CVTSD2SSrr: case X86::Int_CVTSD2SSrr: case X86::CVTSS2SDrr: @@ -2789,6 +2793,54 @@ static bool hasPartialRegUpdate(unsigned Opcode) { return false; } +/// getPartialRegUpdateClearance - Inform the ExeDepsFix pass how many idle +/// instructions we would like before a partial register update. +unsigned X86InstrInfo:: +getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum, + const TargetRegisterInfo *TRI) const { + if (OpNum != 0 || !hasPartialRegUpdate(MI->getOpcode())) + return 0; + + // If MI is marked as reading Reg, the partial register update is wanted. + const MachineOperand &MO = MI->getOperand(0); + unsigned Reg = MO.getReg(); + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (MO.readsReg() || MI->readsVirtualRegister(Reg)) + return 0; + } else { + if (MI->readsRegister(Reg, TRI)) + return 0; + } + + // If any of the preceding 16 instructions are reading Reg, insert a + // dependency breaking instruction. The magic number is based on a few + // Nehalem experiments. + return 16; +} + +void X86InstrInfo:: +breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum, + const TargetRegisterInfo *TRI) const { + unsigned Reg = MI->getOperand(OpNum).getReg(); + if (X86::VR128RegClass.contains(Reg)) { + // These instructions are all floating point domain, so xorps is the best + // choice. + bool HasAVX = TM.getSubtarget().hasAVX(); + unsigned Opc = HasAVX ? X86::VXORPSrr : X86::XORPSrr; + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(Opc), Reg) + .addReg(Reg, RegState::Undef).addReg(Reg, RegState::Undef); + } else if (X86::VR256RegClass.contains(Reg)) { + // Use vxorps to clear the full ymm register. + // It wants to read and write the xmm sub-register. + unsigned XReg = TRI->getSubReg(Reg, X86::sub_xmm); + BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), get(X86::VXORPSrr), XReg) + .addReg(XReg, RegState::Undef).addReg(XReg, RegState::Undef) + .addReg(Reg, RegState::ImplicitDefine); + } else + return; + MI->addRegisterKilled(Reg, TRI, true); +} + MachineInstr* X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, const SmallVectorImpl &Ops, diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h index 97009db..ee488d8 100644 --- a/lib/Target/X86/X86InstrInfo.h +++ b/lib/Target/X86/X86InstrInfo.h @@ -345,6 +345,11 @@ public: void setExecutionDomain(MachineInstr *MI, unsigned Domain) const; + unsigned getPartialRegUpdateClearance(const MachineInstr *MI, unsigned OpNum, + const TargetRegisterInfo *TRI) const; + void breakPartialRegDependency(MachineBasicBlock::iterator MI, unsigned OpNum, + const TargetRegisterInfo *TRI) const; + MachineInstr* foldMemoryOperandImpl(MachineFunction &MF, MachineInstr* MI, unsigned OpNum, -- cgit v1.1 From e7c1aef2b824f29ea92b2b324975915fe2115fa4 Mon Sep 17 00:00:00 2001 From: Nick Lewycky Date: Tue, 15 Nov 2011 01:23:22 +0000 Subject: Move WEAK marking to the declaration. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144603 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Support/Valgrind.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/lib/Support/Valgrind.cpp b/lib/Support/Valgrind.cpp index 46d93a2..078d705 100644 --- a/lib/Support/Valgrind.cpp +++ b/lib/Support/Valgrind.cpp @@ -56,10 +56,10 @@ void llvm::sys::ValgrindDiscardTranslations(const void *Addr, size_t Len) { // These functions require no implementation, tsan just looks at the arguments // they're called with. extern "C" { -LLVM_ATTRIBUTE_WEAK void AnnotateHappensBefore(const char *file, int line, - const volatile void *cv) {} -LLVM_ATTRIBUTE_WEAK void AnnotateHappensAfter(const char *file, int line, - const volatile void *cv) {} -LLVM_ATTRIBUTE_WEAK void AnnotateIgnoreWritesBegin(const char *file, int line){} -LLVM_ATTRIBUTE_WEAK void AnnotateIgnoreWritesEnd(const char *file, int line) {} +void AnnotateHappensBefore(const char *file, int line, + const volatile void *cv) {} +void AnnotateHappensAfter(const char *file, int line, + const volatile void *cv) {} +void AnnotateIgnoreWritesBegin(const char *file, int line) {} +void AnnotateIgnoreWritesEnd(const char *file, int line) {} } -- cgit v1.1 From bfc9429c2b814469adf3930dda31539d1c3319d8 Mon Sep 17 00:00:00 2001 From: Jim Grosbach Date: Tue, 15 Nov 2011 01:46:57 +0000 Subject: ARM parsing datatype suffix variants for fixed-writeback VLD1/VST1 instructions. rdar://10435076 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144606 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMInstrNEON.td | 69 ++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 66 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 3ccf992..49cc254 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -5209,6 +5209,15 @@ defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", (VLD1d32 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", (VLD1d64 VecListOneD:$Vd, addrmode6:$Rn, pred:$p)>; +// with writeback, fixed stride +defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1d8wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1d16wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1d32wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1d64wb_fixed VecListOneD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; // Load two D registers. defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", @@ -5219,6 +5228,15 @@ defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", (VLD1q32 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", (VLD1q64 VecListTwoD:$Vd, addrmode6:$Rn, pred:$p)>; +// with writeback, fixed stride +defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1q8wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1q16wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1q32wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1q64wb_fixed VecListTwoD:$Vd, zero_reg, addrmode6:$Rn, pred:$p)>; // Load three D registers. defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", @@ -5229,6 +5247,20 @@ defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", (VLD1d32T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", (VLD1d64T VecListThreeD:$Vd, addrmode6:$Rn, pred:$p)>; +// with writeback, fixed stride +defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1d8Twb_fixed VecListThreeD:$Vd, zero_reg, + addrmode6:$Rn, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1d16Twb_fixed VecListThreeD:$Vd, zero_reg, + addrmode6:$Rn, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1d32Twb_fixed VecListThreeD:$Vd, zero_reg, + addrmode6:$Rn, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1d64Twb_fixed VecListThreeD:$Vd, zero_reg, + addrmode6:$Rn, pred:$p)>; + // Load four D registers. defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn", @@ -5239,9 +5271,22 @@ defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn", (VLD1d32Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn", (VLD1d64Q VecListFourD:$Vd, addrmode6:$Rn, pred:$p)>; +// with writeback, fixed stride +defm : VFPDT8ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1d8Qwb_fixed VecListFourD:$Vd, zero_reg, + addrmode6:$Rn, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1d16Qwb_fixed VecListFourD:$Vd, zero_reg, + addrmode6:$Rn, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1d32Qwb_fixed VecListFourD:$Vd, zero_reg, + addrmode6:$Rn, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vld1${p}", "$Vd, $Rn!", + (VLD1d64Qwb_fixed VecListFourD:$Vd, zero_reg, + addrmode6:$Rn, pred:$p)>; // VST1 requires a size suffix, but also accepts type specific variants. -// Load one D register. +// Store one D register. defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", (VST1d8 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", @@ -5250,8 +5295,17 @@ defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", (VST1d32 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", (VST1d64 addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; - -// Load two D registers. +// with writeback, fixed stride +defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn!", + (VST1d8wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn!", + (VST1d16wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!", + (VST1d32wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!", + (VST1d64wb_fixed zero_reg, addrmode6:$Rn, VecListOneD:$Vd, pred:$p)>; + +// Store two D registers. defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn", (VST1q8 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn", @@ -5260,6 +5314,15 @@ defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn", (VST1q32 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn", (VST1q64 addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; +// with writeback, fixed stride +defm : VFPDT8ReqInstAlias<"vst1${p}", "$Vd, $Rn!", + (VST1q8wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; +defm : VFPDT16ReqInstAlias<"vst1${p}", "$Vd, $Rn!", + (VST1q16wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; +defm : VFPDT32ReqInstAlias<"vst1${p}", "$Vd, $Rn!", + (VST1q32wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; +defm : VFPDT64ReqInstAlias<"vst1${p}", "$Vd, $Rn!", + (VST1q64wb_fixed zero_reg, addrmode6:$Rn, VecListTwoD:$Vd, pred:$p)>; // FIXME: The three and four register VST1 instructions haven't been moved // to the VecList* encoding yet, so we can't do assembly parsing support -- cgit v1.1