diff options
author | Tim Northover <tnorthover@apple.com> | 2013-05-30 13:19:42 +0000 |
---|---|---|
committer | Tim Northover <tnorthover@apple.com> | 2013-05-30 13:19:42 +0000 |
commit | 15983b80a0ceb224b74d2ee5ef53d3eed37dc03b (patch) | |
tree | 04df22eecc061cf808aa3a7a8a33ba02dc2b8ebf /lib/Target/X86/X86FastISel.cpp | |
parent | 52d65ab72dcdb3b5de8b84743537355067819c31 (diff) | |
download | external_llvm-15983b80a0ceb224b74d2ee5ef53d3eed37dc03b.zip external_llvm-15983b80a0ceb224b74d2ee5ef53d3eed37dc03b.tar.gz external_llvm-15983b80a0ceb224b74d2ee5ef53d3eed37dc03b.tar.bz2 |
X86: use sub-register sequences for MOV*r0 operations
Instead of having a bunch of separate MOV8r0, MOV16r0, ... pseudo-instructions,
it's better to use a single MOV32r0 (which will expand to "xorl %reg, %reg")
and obtain other sizes with EXTRACT_SUBREG and SUBREG_TO_REG. The encoding is
smaller and partial register updates can sometimes be avoided.
Until recently, this sequence was a barrier to rematerialization though. That
should now be fixed so it's an appropriate time to make the change.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182928 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86/X86FastISel.cpp')
-rw-r--r-- | lib/Target/X86/X86FastISel.cpp | 31 |
1 files changed, 25 insertions, 6 deletions
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp index eeb934f..d5423ce 100644 --- a/lib/Target/X86/X86FastISel.cpp +++ b/lib/Target/X86/X86FastISel.cpp @@ -1294,8 +1294,8 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) { { &X86::GR16RegClass, X86::AX, X86::DX, { { X86::IDIV16r, X86::CWD, Copy, X86::AX, S }, // SDiv { X86::IDIV16r, X86::CWD, Copy, X86::DX, S }, // SRem - { X86::DIV16r, X86::MOV16r0, Copy, X86::AX, U }, // UDiv - { X86::DIV16r, X86::MOV16r0, Copy, X86::DX, U }, // URem + { X86::DIV16r, X86::MOV32r0, Copy, X86::AX, U }, // UDiv + { X86::DIV16r, X86::MOV32r0, Copy, X86::DX, U }, // URem } }, // i16 { &X86::GR32RegClass, X86::EAX, X86::EDX, { @@ -1308,8 +1308,8 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) { { &X86::GR64RegClass, X86::RAX, X86::RDX, { { X86::IDIV64r, X86::CQO, Copy, X86::RAX, S }, // SDiv { X86::IDIV64r, X86::CQO, Copy, X86::RDX, S }, // SRem - { X86::DIV64r, X86::MOV64r0, Copy, X86::RAX, U }, // UDiv - { X86::DIV64r, X86::MOV64r0, Copy, X86::RDX, U }, // URem + { X86::DIV64r, X86::MOV32r0, Copy, X86::RAX, U }, // UDiv + { X86::DIV64r, X86::MOV32r0, Copy, X86::RDX, U }, // URem } }, // i64 }; @@ -1355,9 +1355,28 @@ bool X86FastISel::X86SelectDivRem(const Instruction *I) { if (OpEntry.IsOpSigned) BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(OpEntry.OpSignExtend)); - else + else { + unsigned Zero32 = createResultReg(&X86::GR32RegClass); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(OpEntry.OpSignExtend), TypeEntry.HighInReg); + TII.get(X86::MOV32r0), Zero32); + + // Copy the zero into the appropriate sub/super/identical physical + // register. Unfortunately the operations needed are not uniform enough to + // fit neatly into the table above. + if (VT.SimpleTy == MVT::i16) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), TypeEntry.HighInReg) + .addReg(Zero32, 0, X86::sub_16bit); + } else if (VT.SimpleTy == MVT::i32) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), TypeEntry.HighInReg) + .addReg(Zero32); + } else if (VT.SimpleTy == MVT::i64) { + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::SUBREG_TO_REG), TypeEntry.HighInReg) + .addImm(0).addReg(Zero32).addImm(X86::sub_32bit); + } + } } // Generate the DIV/IDIV instruction. BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, |