diff options
Diffstat (limited to 'lib/Target/X86/X86FastISel.cpp')
-rw-r--r-- | lib/Target/X86/X86FastISel.cpp | 102 |
1 files changed, 73 insertions, 29 deletions
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 295a577..5bc3420 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -79,8 +79,10 @@ private: bool X86FastEmitLoad(EVT VT, const X86AddressMode &AM, unsigned &RR); - bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM); - bool X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM); + bool X86FastEmitStore(EVT VT, const Value *Val, const X86AddressMode &AM, + bool Aligned = false); + bool X86FastEmitStore(EVT VT, unsigned ValReg, const X86AddressMode &AM, + bool Aligned = false); bool X86FastEmitExtend(ISD::NodeType Opc, EVT DstVT, unsigned Src, EVT SrcVT, unsigned &ResultReg); @@ -233,7 +235,8 @@ bool X86FastISel::X86FastEmitLoad(EVT VT, const X86AddressMode &AM, /// and a displacement offset, or a GlobalAddress, /// i.e. V. Return true if it is possible. bool -X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) { +X86FastISel::X86FastEmitStore(EVT VT, unsigned ValReg, + const X86AddressMode &AM, bool Aligned) { // Get opcode and regclass of the output for the given store instruction. unsigned Opc = 0; switch (VT.getSimpleVT().SimpleTy) { @@ -243,8 +246,8 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) { // Mask out all but lowest bit. unsigned AndResult = createResultReg(&X86::GR8RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(X86::AND8ri), AndResult).addReg(Val).addImm(1); - Val = AndResult; + TII.get(X86::AND8ri), AndResult).addReg(ValReg).addImm(1); + ValReg = AndResult; } // FALLTHROUGH, handling i1 as i8. case MVT::i8: Opc = X86::MOV8mr; break; @@ -260,26 +263,35 @@ X86FastISel::X86FastEmitStore(EVT VT, unsigned Val, const X86AddressMode &AM) { (Subtarget->hasAVX() ? X86::VMOVSDmr : X86::MOVSDmr) : X86::ST_Fp64m; break; case MVT::v4f32: - Opc = X86::MOVAPSmr; + if (Aligned) + Opc = Subtarget->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr; + else + Opc = Subtarget->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr; break; case MVT::v2f64: - Opc = X86::MOVAPDmr; + if (Aligned) + Opc = Subtarget->hasAVX() ? X86::VMOVAPDmr : X86::MOVAPDmr; + else + Opc = Subtarget->hasAVX() ? X86::VMOVUPDmr : X86::MOVUPDmr; break; case MVT::v4i32: case MVT::v2i64: case MVT::v8i16: case MVT::v16i8: - Opc = X86::MOVDQAmr; + if (Aligned) + Opc = Subtarget->hasAVX() ? X86::VMOVDQAmr : X86::MOVDQAmr; + else + Opc = Subtarget->hasAVX() ? X86::VMOVDQUmr : X86::MOVDQUmr; break; } addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, - DL, TII.get(Opc)), AM).addReg(Val); + DL, TII.get(Opc)), AM).addReg(ValReg); return true; } bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, - const X86AddressMode &AM) { + const X86AddressMode &AM, bool Aligned) { // Handle 'null' like i32/i64 0. if (isa<ConstantPointerNull>(Val)) Val = Constant::getNullValue(TD.getIntPtrType(Val->getContext())); @@ -314,7 +326,7 @@ bool X86FastISel::X86FastEmitStore(EVT VT, const Value *Val, if (ValReg == 0) return false; - return X86FastEmitStore(VT, ValReg, AM); + return X86FastEmitStore(VT, ValReg, AM, Aligned); } /// X86FastEmitExtend - Emit a machine instruction to extend a value Src of @@ -688,6 +700,10 @@ bool X86FastISel::X86SelectStore(const Instruction *I) { if (S->isAtomic()) return false; + unsigned SABIAlignment = + TD.getABITypeAlignment(S->getValueOperand()->getType()); + bool Aligned = S->getAlignment() == 0 || S->getAlignment() >= SABIAlignment; + MVT VT; if (!isTypeLegal(I->getOperand(0)->getType(), VT, /*AllowI1=*/true)) return false; @@ -696,7 +712,7 @@ bool X86FastISel::X86SelectStore(const Instruction *I) { if (!X86SelectAddress(I->getOperand(1), AM)) return false; - return X86FastEmitStore(VT, I->getOperand(0), AM); + return X86FastEmitStore(VT, I->getOperand(0), AM, Aligned); } /// X86SelectRet - Select and emit code to implement ret instructions. @@ -712,10 +728,11 @@ bool X86FastISel::X86SelectRet(const Instruction *I) { CallingConv::ID CC = F.getCallingConv(); if (CC != CallingConv::C && CC != CallingConv::Fast && - CC != CallingConv::X86_FastCall) + CC != CallingConv::X86_FastCall && + CC != CallingConv::X86_64_SysV) return false; - if (Subtarget->isTargetWin64()) + if (Subtarget->isCallingConvWin64(CC)) return false; // Don't handle popping bytes on return for now. @@ -1376,10 +1393,37 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) { // Generate the DIV/IDIV instruction. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpEntry.OpDivRem)).addReg(Op1Reg); - // Copy output register into result register. - unsigned ResultReg = createResultReg(TypeEntry.RC); - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(Copy), ResultReg).addReg(OpEntry.DivRemResultReg); + // For i8 remainder, we can't reference AH directly, as we'll end + // up with bogus copies like %R9B = COPY %AH. Reference AX + // instead to prevent AH references in a REX instruction. + // + // The current assumption of the fast register allocator is that isel + // won't generate explicit references to the GPR8_NOREX registers. If + // the allocator and/or the backend get enhanced to be more robust in + // that regard, this can be, and should be, removed. + unsigned ResultReg = 0; + if ((I->getOpcode() == Instruction::SRem || + I->getOpcode() == Instruction::URem) && + OpEntry.DivRemResultReg == X86::AH && Subtarget->is64Bit()) { + unsigned SourceSuperReg = createResultReg(&X86::GR16RegClass); + unsigned ResultSuperReg = createResultReg(&X86::GR16RegClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(Copy), SourceSuperReg).addReg(X86::AX); + + // Shift AX right by 8 bits instead of using AH. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(X86::SHR16ri), + ResultSuperReg).addReg(SourceSuperReg).addImm(8); + + // Now reference the 8-bit subreg of the result. + ResultReg = FastEmitInst_extractsubreg(MVT::i8, ResultSuperReg, + /*Kill=*/true, X86::sub_8bit); + } + // Copy the result out of the physreg if we haven't already. + if (!ResultReg) { + ResultReg = createResultReg(TypeEntry.RC); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Copy), ResultReg) + .addReg(OpEntry.DivRemResultReg); + } UpdateValueMap(I, ResultReg); return true; @@ -1678,9 +1722,6 @@ bool X86FastISel::FastLowerArguments() { if (!FuncInfo.CanLowerReturn) return false; - if (Subtarget->isTargetWin64()) - return false; - const Function *F = FuncInfo.Fn; if (F->isVarArg()) return false; @@ -1688,7 +1729,10 @@ bool X86FastISel::FastLowerArguments() { CallingConv::ID CC = F->getCallingConv(); if (CC != CallingConv::C) return false; - + + if (Subtarget->isCallingConvWin64(CC)) + return false; + if (!Subtarget->is64Bit()) return false; @@ -1732,8 +1776,6 @@ bool X86FastISel::FastLowerArguments() { const TargetRegisterClass *RC64 = TLI.getRegClassFor(MVT::i64); for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++Idx) { - if (I->use_empty()) - continue; bool is32Bit = TLI.getValueType(I->getType()) == MVT::i32; const TargetRegisterClass *RC = is32Bit ? RC32 : RC64; unsigned SrcReg = is32Bit ? GPR32ArgRegs[Idx] : GPR64ArgRegs[Idx]; @@ -1792,8 +1834,10 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // Handle only C and fastcc calling conventions for now. ImmutableCallSite CS(CI); CallingConv::ID CC = CS.getCallingConv(); + bool isWin64 = Subtarget->isCallingConvWin64(CC); if (CC != CallingConv::C && CC != CallingConv::Fast && - CC != CallingConv::X86_FastCall) + CC != CallingConv::X86_FastCall && CC != CallingConv::X86_64_Win64 && + CC != CallingConv::X86_64_SysV) return false; // fastcc with -tailcallopt is intended to provide a guaranteed @@ -1807,7 +1851,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // Don't know how to handle Win64 varargs yet. Nothing special needed for // x86-32. Special handling for x86-64 is implemented. - if (isVarArg && Subtarget->isTargetWin64()) + if (isVarArg && isWin64) return false; // Fast-isel doesn't know about callee-pop yet. @@ -1937,7 +1981,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { I->getParent()->getContext()); // Allocate shadow area for Win64 - if (Subtarget->isTargetWin64()) + if (isWin64) CCInfo.AllocateStack(32, 8); CCInfo.AnalyzeCallOperands(ArgVTs, ArgFlags, CC_X86); @@ -2053,7 +2097,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { X86::EBX).addReg(Base); } - if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) { + if (Subtarget->is64Bit() && isVarArg && !isWin64) { // Count the number of XMM registers allocated. static const uint16_t XMMArgRegs[] = { X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, @@ -2122,7 +2166,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { if (Subtarget->isPICStyleGOT()) MIB.addReg(X86::EBX, RegState::Implicit); - if (Subtarget->is64Bit() && isVarArg && !Subtarget->isTargetWin64()) + if (Subtarget->is64Bit() && isVarArg && !isWin64) MIB.addReg(X86::AL, RegState::Implicit); // Add implicit physical register uses to the call. |