diff options
-rw-r--r-- | lib/CodeGen/RegisterCoalescer.cpp | 50 | ||||
-rw-r--r-- | lib/CodeGen/RegisterCoalescer.h | 3 | ||||
-rw-r--r-- | test/CodeGen/X86/crash-nosse.ll | 27 |
3 files changed, 80 insertions, 0 deletions
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index b91f92c..887954f 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -760,6 +760,49 @@ bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt, return true; } +/// eliminateUndefCopy - ProcessImpicitDefs may leave some copies of <undef> +/// values, it only removes local variables. When we have a copy like: +/// +/// %vreg1 = COPY %vreg2<undef> +/// +/// We delete the copy and remove the corresponding value number from %vreg1. +/// Any uses of that value number are marked as <undef>. +bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI, + const CoalescerPair &CP) { + SlotIndex Idx = li_->getInstructionIndex(CopyMI); + LiveInterval *SrcInt = &li_->getInterval(CP.getSrcReg()); + if (SrcInt->liveAt(Idx)) + return false; + LiveInterval *DstInt = &li_->getInterval(CP.getDstReg()); + if (DstInt->liveAt(Idx)) + return false; + + // No intervals are live-in to CopyMI - it is undef. + if (CP.isFlipped()) + DstInt = SrcInt; + SrcInt = 0; + + VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getDefIndex()); + assert(DeadVNI && "No value defined in DstInt"); + DstInt->removeValNo(DeadVNI); + + // Find new undef uses. + for (MachineRegisterInfo::reg_nodbg_iterator + I = mri_->reg_nodbg_begin(DstInt->reg), E = mri_->reg_nodbg_end(); + I != E; ++I) { + MachineOperand &MO = I.getOperand(); + if (MO.isDef() || MO.isUndef()) + continue; + MachineInstr *MI = MO.getParent(); + SlotIndex Idx = li_->getInstructionIndex(MI); + if (DstInt->liveAt(Idx)) + continue; + MO.setIsUndef(true); + DEBUG(dbgs() << "\tnew undef: " << Idx << '\t' << *MI); + } + return true; +} + /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and /// update the subregister number if it is not zero. If DstReg is a /// physical register and the existing subregister number of the def / use @@ -1018,6 +1061,13 @@ bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) { return false; // Not coalescable. } + // Eliminate undefs. + if (!CP.isPhys() && eliminateUndefCopy(CopyMI, CP)) { + markAsJoined(CopyMI); + DEBUG(dbgs() << "\tEliminated copy of <undef> value.\n"); + return false; // Not coalescable. + } + DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_) << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx()) << "\n"); diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h index 4131d91..7ba6ef7 100644 --- a/lib/CodeGen/RegisterCoalescer.h +++ b/lib/CodeGen/RegisterCoalescer.h @@ -136,6 +136,9 @@ namespace llvm { /// markAsJoined - Remember that CopyMI has already been joined. void markAsJoined(MachineInstr *CopyMI); + /// eliminateUndefCopy - Handle copies of undef values. + bool eliminateUndefCopy(MachineInstr *CopyMI, const CoalescerPair &CP); + public: static char ID; // Class identification, replacement for typeinfo RegisterCoalescer() : MachineFunctionPass(ID) { diff --git a/test/CodeGen/X86/crash-nosse.ll b/test/CodeGen/X86/crash-nosse.ll new file mode 100644 index 0000000..1cec25b --- /dev/null +++ b/test/CodeGen/X86/crash-nosse.ll @@ -0,0 +1,27 @@ +; RUN: llc < %s -mattr=-sse2,-sse41 -verify-machineinstrs +target triple = "x86_64-unknown-linux-gnu" + +; PR10503 +; This test case produces INSERT_SUBREG 0, <undef> instructions that +; ProcessImplicitDefs doesn't eliminate. +define void @autogen_136178_500() { +BB: + %Shuff6 = shufflevector <32 x i32> undef, <32 x i32> undef, <32 x i32> <i32 27, i32 29, i32 31, i32 undef, i32 undef, i32 37, i32 39, i32 41, i32 undef, i32 45, i32 47, i32 49, i32 51, i32 53, i32 55, i32 57, i32 undef, i32 61, i32 63, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 undef, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25> + %S17 = select i1 true, <8 x float>* null, <8 x float>* null + br label %CF + +CF: ; preds = %CF, %BB + %L19 = load <8 x float>* %S17 + %BC = bitcast <32 x i32> %Shuff6 to <32 x float> + %S28 = fcmp ord double 0x3ED1A1F787BB2185, 0x3EE59DE55A8DF890 + br i1 %S28, label %CF, label %CF39 + +CF39: ; preds = %CF39, %CF + store <8 x float> %L19, <8 x float>* %S17 + %I35 = insertelement <32 x float> %BC, float 0x3EC2489F60000000, i32 9 + %S38 = fcmp ule double 0x3EE59DE55A8DF890, 0x3EC4AB0CBB986A1A + br i1 %S38, label %CF39, label %CF40 + +CF40: ; preds = %CF39 + ret void +} |