From 6202e45d53d49e0f973a8d3b362941411b7604b5 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 9 Jan 2012 02:28:15 +0000 Subject: Change some places that were checking for AVX OR SSE1/2 to use hasXMM/hasXMMInt instead. Also fix one place that checked SSE3, but accidentally excluded AVX to use hasSSE3orAVX. This is a step towards removing the AVX hack from the X86Subtarget.h git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147764 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/Target/X86/X86FastISel.cpp') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 1589439..cca2fd8 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -60,8 +60,8 @@ public: explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) { Subtarget = &TM.getSubtarget(); StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; - X86ScalarSSEf64 = Subtarget->hasSSE2() || Subtarget->hasAVX(); - X86ScalarSSEf32 = Subtarget->hasSSE1() || Subtarget->hasAVX(); + X86ScalarSSEf64 = Subtarget->hasXMMInt(); + X86ScalarSSEf32 = Subtarget->hasXMM(); } virtual bool TargetSelectInstruction(const Instruction *I); @@ -837,8 +837,8 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) { static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) { bool HasAVX = Subtarget->hasAVX(); - bool X86ScalarSSEf32 = HasAVX || Subtarget->hasSSE1(); - bool X86ScalarSSEf64 = HasAVX || Subtarget->hasSSE2(); + bool X86ScalarSSEf32 = Subtarget->hasXMM(); + bool X86ScalarSSEf64 = Subtarget->hasXMMInt(); switch (VT.getSimpleVT().SimpleTy) { default: return 0; -- cgit v1.1 From 1accb7ed98d823c291a4d5df172d0538451aba9e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 10 Jan 2012 06:54:16 +0000 Subject: Remove hasXMM/hasXMMInt functions. Move callers to hasSSE1/hasSSE2. This is the final piece to remove the AVX hack that disabled SSE. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147843 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/Target/X86/X86FastISel.cpp') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index cca2fd8..bae5ede 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -60,8 +60,8 @@ public: explicit X86FastISel(FunctionLoweringInfo &funcInfo) : FastISel(funcInfo) { Subtarget = &TM.getSubtarget(); StackPtr = Subtarget->is64Bit() ? X86::RSP : X86::ESP; - X86ScalarSSEf64 = Subtarget->hasXMMInt(); - X86ScalarSSEf32 = Subtarget->hasXMM(); + X86ScalarSSEf64 = Subtarget->hasSSE2(); + X86ScalarSSEf32 = Subtarget->hasSSE1(); } virtual bool TargetSelectInstruction(const Instruction *I); @@ -837,8 +837,8 @@ bool X86FastISel::X86SelectLoad(const Instruction *I) { static unsigned X86ChooseCmpOpcode(EVT VT, const X86Subtarget *Subtarget) { bool HasAVX = Subtarget->hasAVX(); - bool X86ScalarSSEf32 = Subtarget->hasXMM(); - bool X86ScalarSSEf64 = Subtarget->hasXMMInt(); + bool X86ScalarSSEf32 = Subtarget->hasSSE1(); + bool X86ScalarSSEf64 = Subtarget->hasSSE2(); switch (VT.getSimpleVT().SimpleTy) { default: return 0; -- cgit v1.1 From 9a2478ac1a9aafcd5e89808868e170cfdfefcdc1 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Fri, 20 Jan 2012 00:05:46 +0000 Subject: Support MSVC x86-32 sret convention. PR11688. Patch by Joe Groff. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@148513 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/Target/X86/X86FastISel.cpp') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index bae5ede..13e20f4 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1855,7 +1855,8 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // Issue CALLSEQ_END unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); unsigned NumBytesCallee = 0; - if (!Subtarget->is64Bit() && CS.paramHasAttr(1, Attribute::StructRet)) + if (!Subtarget->is64Bit() && !Subtarget->isTargetWindows() && + CS.paramHasAttr(1, Attribute::StructRet)) NumBytesCallee = 4; BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(AdjStackUp)) .addImm(NumBytes).addImm(NumBytesCallee); -- cgit v1.1 From 15b4497333430fbc3e20e5fb7718d4e0d838168b Mon Sep 17 00:00:00 2001 From: Chad Rosier Date: Wed, 15 Feb 2012 00:36:26 +0000 Subject: Use a temporary variable, rather then a series of redundant calls. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@150538 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'lib/Target/X86/X86FastISel.cpp') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 13e20f4..952cd60 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1576,10 +1576,11 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { SmallVector Args; SmallVector ArgVTs; SmallVector ArgFlags; - Args.reserve(CS.arg_size()); - ArgVals.reserve(CS.arg_size()); - ArgVTs.reserve(CS.arg_size()); - ArgFlags.reserve(CS.arg_size()); + unsigned arg_size = CS.arg_size(); + Args.reserve(arg_size); + ArgVals.reserve(arg_size); + ArgVTs.reserve(arg_size); + ArgFlags.reserve(arg_size); for (ImmutableCallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end(); i != e; ++i) { // If we're lowering a mem intrinsic instead of a regular call, skip the -- cgit v1.1 From 8bcde2aa66415ba2e63b22d2a3c1750bfb25b550 Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Thu, 16 Feb 2012 00:02:50 +0000 Subject: Enable register mask operands for x86 calls. Call instructions no longer have a list of 43 call-clobbered registers. Instead, they get a single register mask operand with a bit vector of call-preserved registers. This saves a lot of memory, 42 x 32 bytes = 1344 bytes per call instruction, and it speeds up building call instructions because those 43 imp-def operands no longer need to be added to use-def lists. (And removed and shifted and re-added for every explicit call operand). Passes like LiveVariables, LiveIntervals, RAGreedy, PEI, and BranchFolding are significantly faster because they can deal with call clobbers in bulk. Overall, clang -O2 is between 0% and 8% faster, uniformly distributed depending on call density in the compiled code. Debug builds using clang -O0 are 0% - 3% faster. I have verified that this patch doesn't change the assembly generated for the LLVM nightly test suite when building with -disable-copyprop and -disable-branch-fold. Branch folding behaves slightly differently in a few cases because call instructions have different hash values now. Copy propagation flushes its data structures when it crosses a register mask operand. This causes it to leave a few dead copies behind, on the order of 20 instruction across the entire nightly test suite, including SPEC. Fixing this properly would require the pass to use different data structures. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@150638 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib/Target/X86/X86FastISel.cpp') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 952cd60..6ab218a 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1853,6 +1853,10 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) MIB.addReg(RegArgs[i]); + // Add a register mask with the call-preserved registers. + // Proper defs for return values will be added by setPhysRegsDeadExcept(). + MIB.addRegMask(TRI.getCallPreservedMask(CS.getCallingConv())); + // Issue CALLSEQ_END unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); unsigned NumBytesCallee = 0; -- cgit v1.1 From 527a08b253795cf09de41c289c9dc071f00b1d4a Mon Sep 17 00:00:00 2001 From: Jakob Stoklund Olesen Date: Thu, 16 Feb 2012 17:56:02 +0000 Subject: Use the same CALL instructions for Windows as for everything else. The different calling conventions and call-preserved registers are represented with regmask operands that are added dynamically. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@150708 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'lib/Target/X86/X86FastISel.cpp') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index 6ab218a..efc4c06 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1793,9 +1793,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { if (CalleeOp) { // Register-indirect call. unsigned CallOpc; - if (Subtarget->isTargetWin64()) - CallOpc = X86::WINCALL64r; - else if (Subtarget->is64Bit()) + if (Subtarget->is64Bit()) CallOpc = X86::CALL64r; else CallOpc = X86::CALL32r; @@ -1806,9 +1804,7 @@ bool X86FastISel::DoSelectCall(const Instruction *I, const char *MemIntName) { // Direct call. assert(GV && "Not a direct call"); unsigned CallOpc; - if (Subtarget->isTargetWin64()) - CallOpc = X86::WINCALL64pcrel32; - else if (Subtarget->is64Bit()) + if (Subtarget->is64Bit()) CallOpc = X86::CALL64pcrel32; else CallOpc = X86::CALLpcrel32; -- cgit v1.1 From 44d23825d61d530b8d562329ec8fc2d4f843bb8d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 22 Feb 2012 05:59:10 +0000 Subject: Make all pointers to TargetRegisterClass const since they are all pointers to static data that should not be modified. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@151134 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86FastISel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Target/X86/X86FastISel.cpp') diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index efc4c06..f90764e 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -2104,7 +2104,7 @@ unsigned X86FastISel::TargetMaterializeAlloca(const AllocaInst *C) { if (!X86SelectAddress(C, AM)) return 0; unsigned Opc = Subtarget->is64Bit() ? X86::LEA64r : X86::LEA32r; - TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); + const TargetRegisterClass* RC = TLI.getRegClassFor(TLI.getPointerTy()); unsigned ResultReg = createResultReg(RC); addFullAddress(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg), AM); -- cgit v1.1