diff options
author | Chris Lattner <sabre@nondot.org> | 2004-05-04 19:33:58 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2004-05-04 19:33:58 +0000 |
commit | c8af02c403c1547c7d66aa3add3c8a2116d3d26e (patch) | |
tree | 8e146b2601df112722e2fd9106bc089b7dd5dc51 /lib/Target | |
parent | 2dd5c96866406711cf20a6bb677a7d147ad3ac3d (diff) | |
download | external_llvm-c8af02c403c1547c7d66aa3add3c8a2116d3d26e.zip external_llvm-c8af02c403c1547c7d66aa3add3c8a2116d3d26e.tar.gz external_llvm-c8af02c403c1547c7d66aa3add3c8a2116d3d26e.tar.bz2 |
Improve signed division by power of 2 *dramatically* from this:
div:
mov %EDX, DWORD PTR [%ESP + 4]
mov %ECX, 64
mov %EAX, %EDX
sar %EDX, 31
idiv %ECX
ret
to this:
div:
mov %EAX, DWORD PTR [%ESP + 4]
mov %ECX, %EAX
sar %ECX, 5
shr %ECX, 26
mov %EDX, %EAX
add %EDX, %ECX
sar %EAX, 6
ret
Note that the intel compiler is currently making this:
div:
movl 4(%esp), %edx #3.5
movl %edx, %eax #4.14
sarl $5, %eax #4.14
shrl $26, %eax #4.14
addl %edx, %eax #4.14
sarl $6, %eax #4.14
ret #4.14
Which has one less register->register copy. (hint hint alkis :)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@13354 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r-- | lib/Target/X86/InstSelectSimple.cpp | 72 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelSimple.cpp | 72 |
2 files changed, 126 insertions, 18 deletions
diff --git a/lib/Target/X86/InstSelectSimple.cpp b/lib/Target/X86/InstSelectSimple.cpp index bf98466..da96813 100644 --- a/lib/Target/X86/InstSelectSimple.cpp +++ b/lib/Target/X86/InstSelectSimple.cpp @@ -2177,7 +2177,7 @@ void ISel::doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI, // ExactLog2 - This function solves for (Val == 1 << (N-1)) and returns N. It // returns zero when the input is not exactly a power of two. static unsigned ExactLog2(unsigned Val) { - if (Val == 0) return 0; + if (Val == 0 || (Val & (Val-1))) return 0; unsigned Count = 0; while (Val != 1) { if (Val & 1) return 0; @@ -2488,9 +2488,61 @@ void ISel::emitDivRemOperation(MachineBasicBlock *BB, default: assert(0 && "Unknown class!"); } - static const unsigned Regs[] ={ X86::AL , X86::AX , X86::EAX }; static const unsigned MovOpcode[]={ X86::MOV8rr, X86::MOV16rr, X86::MOV32rr }; - static const unsigned SarOpcode[]={ X86::SAR8ri, X86::SAR16ri, X86::SAR32ri }; + static const unsigned NEGOpcode[] = { X86::NEG8r, X86::NEG16r, X86::NEG32r }; + static const unsigned SAROpcode[]={ X86::SAR8ri, X86::SAR16ri, X86::SAR32ri }; + static const unsigned SHROpcode[]={ X86::SHR8ri, X86::SHR16ri, X86::SHR32ri }; + static const unsigned ADDOpcode[]={ X86::ADD8rr, X86::ADD16rr, X86::ADD32rr }; + + // Special case signed division by power of 2. + if (isDiv) + if (ConstantSInt *CI = dyn_cast<ConstantSInt>(Op1)) { + assert(Class != cLong && "This doesn't handle 64-bit divides!"); + int V = CI->getValue(); + + if (V == 1) { // X /s 1 => X + unsigned Op0Reg = getReg(Op0, BB, IP); + BuildMI(*BB, IP, MovOpcode[Class], 1, ResultReg).addReg(Op0Reg); + return; + } + + if (V == -1) { // X /s -1 => -X + unsigned Op0Reg = getReg(Op0, BB, IP); + BuildMI(*BB, IP, NEGOpcode[Class], 1, ResultReg).addReg(Op0Reg); + return; + } + + bool isNeg = false; + if (V < 0) { // Not a positive power of 2? + V = -V; + isNeg = true; // Maybe it's a negative power of 2. + } + if (unsigned Log = ExactLog2(V)) { + --Log; + unsigned Op0Reg = getReg(Op0, BB, IP); + unsigned TmpReg = makeAnotherReg(Op0->getType()); + if (Log != 1) + BuildMI(*BB, IP, SAROpcode[Class], 2, TmpReg) + .addReg(Op0Reg).addImm(Log-1); + else + BuildMI(*BB, IP, MovOpcode[Class], 1, TmpReg).addReg(Op0Reg); + unsigned TmpReg2 = makeAnotherReg(Op0->getType()); + BuildMI(*BB, IP, SHROpcode[Class], 2, TmpReg2) + .addReg(TmpReg).addImm(32-Log); + unsigned TmpReg3 = makeAnotherReg(Op0->getType()); + BuildMI(*BB, IP, ADDOpcode[Class], 2, TmpReg3) + .addReg(Op0Reg).addReg(TmpReg2); + + unsigned TmpReg4 = isNeg ? makeAnotherReg(Op0->getType()) : ResultReg; + BuildMI(*BB, IP, SAROpcode[Class], 2, TmpReg4) + .addReg(Op0Reg).addImm(Log); + if (isNeg) + BuildMI(*BB, IP, NEGOpcode[Class], 1, ResultReg).addReg(TmpReg4); + return; + } + } + + static const unsigned Regs[] ={ X86::AL , X86::AX , X86::EAX }; static const unsigned ClrOpcode[]={ X86::MOV8ri, X86::MOV16ri, X86::MOV32ri }; static const unsigned ExtRegs[] ={ X86::AH , X86::DX , X86::EDX }; @@ -2499,7 +2551,6 @@ void ISel::emitDivRemOperation(MachineBasicBlock *BB, { X86::IDIV8r, X86::IDIV16r, X86::IDIV32r, 0 }, // Signed division }; - bool isSigned = Ty->isSigned(); unsigned Reg = Regs[Class]; unsigned ExtReg = ExtRegs[Class]; @@ -2508,18 +2559,21 @@ void ISel::emitDivRemOperation(MachineBasicBlock *BB, unsigned Op1Reg = getReg(Op1, BB, IP); BuildMI(*BB, IP, MovOpcode[Class], 1, Reg).addReg(Op0Reg); - if (isSigned) { + if (Ty->isSigned()) { // Emit a sign extension instruction... unsigned ShiftResult = makeAnotherReg(Op0->getType()); - BuildMI(*BB, IP, SarOpcode[Class], 2,ShiftResult).addReg(Op0Reg).addImm(31); + BuildMI(*BB, IP, SAROpcode[Class], 2,ShiftResult).addReg(Op0Reg).addImm(31); BuildMI(*BB, IP, MovOpcode[Class], 1, ExtReg).addReg(ShiftResult); + + // Emit the appropriate divide or remainder instruction... + BuildMI(*BB, IP, DivOpcode[1][Class], 1).addReg(Op1Reg); } else { // If unsigned, emit a zeroing instruction... (reg = 0) BuildMI(*BB, IP, ClrOpcode[Class], 2, ExtReg).addImm(0); - } - // Emit the appropriate divide or remainder instruction... - BuildMI(*BB, IP, DivOpcode[isSigned][Class], 1).addReg(Op1Reg); + // Emit the appropriate divide or remainder instruction... + BuildMI(*BB, IP, DivOpcode[0][Class], 1).addReg(Op1Reg); + } // Figure out which register we want to pick the result out of... unsigned DestReg = isDiv ? Reg : ExtReg; diff --git a/lib/Target/X86/X86ISelSimple.cpp b/lib/Target/X86/X86ISelSimple.cpp index bf98466..da96813 100644 --- a/lib/Target/X86/X86ISelSimple.cpp +++ b/lib/Target/X86/X86ISelSimple.cpp @@ -2177,7 +2177,7 @@ void ISel::doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI, // ExactLog2 - This function solves for (Val == 1 << (N-1)) and returns N. It // returns zero when the input is not exactly a power of two. static unsigned ExactLog2(unsigned Val) { - if (Val == 0) return 0; + if (Val == 0 || (Val & (Val-1))) return 0; unsigned Count = 0; while (Val != 1) { if (Val & 1) return 0; @@ -2488,9 +2488,61 @@ void ISel::emitDivRemOperation(MachineBasicBlock *BB, default: assert(0 && "Unknown class!"); } - static const unsigned Regs[] ={ X86::AL , X86::AX , X86::EAX }; static const unsigned MovOpcode[]={ X86::MOV8rr, X86::MOV16rr, X86::MOV32rr }; - static const unsigned SarOpcode[]={ X86::SAR8ri, X86::SAR16ri, X86::SAR32ri }; + static const unsigned NEGOpcode[] = { X86::NEG8r, X86::NEG16r, X86::NEG32r }; + static const unsigned SAROpcode[]={ X86::SAR8ri, X86::SAR16ri, X86::SAR32ri }; + static const unsigned SHROpcode[]={ X86::SHR8ri, X86::SHR16ri, X86::SHR32ri }; + static const unsigned ADDOpcode[]={ X86::ADD8rr, X86::ADD16rr, X86::ADD32rr }; + + // Special case signed division by power of 2. + if (isDiv) + if (ConstantSInt *CI = dyn_cast<ConstantSInt>(Op1)) { + assert(Class != cLong && "This doesn't handle 64-bit divides!"); + int V = CI->getValue(); + + if (V == 1) { // X /s 1 => X + unsigned Op0Reg = getReg(Op0, BB, IP); + BuildMI(*BB, IP, MovOpcode[Class], 1, ResultReg).addReg(Op0Reg); + return; + } + + if (V == -1) { // X /s -1 => -X + unsigned Op0Reg = getReg(Op0, BB, IP); + BuildMI(*BB, IP, NEGOpcode[Class], 1, ResultReg).addReg(Op0Reg); + return; + } + + bool isNeg = false; + if (V < 0) { // Not a positive power of 2? + V = -V; + isNeg = true; // Maybe it's a negative power of 2. + } + if (unsigned Log = ExactLog2(V)) { + --Log; + unsigned Op0Reg = getReg(Op0, BB, IP); + unsigned TmpReg = makeAnotherReg(Op0->getType()); + if (Log != 1) + BuildMI(*BB, IP, SAROpcode[Class], 2, TmpReg) + .addReg(Op0Reg).addImm(Log-1); + else + BuildMI(*BB, IP, MovOpcode[Class], 1, TmpReg).addReg(Op0Reg); + unsigned TmpReg2 = makeAnotherReg(Op0->getType()); + BuildMI(*BB, IP, SHROpcode[Class], 2, TmpReg2) + .addReg(TmpReg).addImm(32-Log); + unsigned TmpReg3 = makeAnotherReg(Op0->getType()); + BuildMI(*BB, IP, ADDOpcode[Class], 2, TmpReg3) + .addReg(Op0Reg).addReg(TmpReg2); + + unsigned TmpReg4 = isNeg ? makeAnotherReg(Op0->getType()) : ResultReg; + BuildMI(*BB, IP, SAROpcode[Class], 2, TmpReg4) + .addReg(Op0Reg).addImm(Log); + if (isNeg) + BuildMI(*BB, IP, NEGOpcode[Class], 1, ResultReg).addReg(TmpReg4); + return; + } + } + + static const unsigned Regs[] ={ X86::AL , X86::AX , X86::EAX }; static const unsigned ClrOpcode[]={ X86::MOV8ri, X86::MOV16ri, X86::MOV32ri }; static const unsigned ExtRegs[] ={ X86::AH , X86::DX , X86::EDX }; @@ -2499,7 +2551,6 @@ void ISel::emitDivRemOperation(MachineBasicBlock *BB, { X86::IDIV8r, X86::IDIV16r, X86::IDIV32r, 0 }, // Signed division }; - bool isSigned = Ty->isSigned(); unsigned Reg = Regs[Class]; unsigned ExtReg = ExtRegs[Class]; @@ -2508,18 +2559,21 @@ void ISel::emitDivRemOperation(MachineBasicBlock *BB, unsigned Op1Reg = getReg(Op1, BB, IP); BuildMI(*BB, IP, MovOpcode[Class], 1, Reg).addReg(Op0Reg); - if (isSigned) { + if (Ty->isSigned()) { // Emit a sign extension instruction... unsigned ShiftResult = makeAnotherReg(Op0->getType()); - BuildMI(*BB, IP, SarOpcode[Class], 2,ShiftResult).addReg(Op0Reg).addImm(31); + BuildMI(*BB, IP, SAROpcode[Class], 2,ShiftResult).addReg(Op0Reg).addImm(31); BuildMI(*BB, IP, MovOpcode[Class], 1, ExtReg).addReg(ShiftResult); + + // Emit the appropriate divide or remainder instruction... + BuildMI(*BB, IP, DivOpcode[1][Class], 1).addReg(Op1Reg); } else { // If unsigned, emit a zeroing instruction... (reg = 0) BuildMI(*BB, IP, ClrOpcode[Class], 2, ExtReg).addImm(0); - } - // Emit the appropriate divide or remainder instruction... - BuildMI(*BB, IP, DivOpcode[isSigned][Class], 1).addReg(Op1Reg); + // Emit the appropriate divide or remainder instruction... + BuildMI(*BB, IP, DivOpcode[0][Class], 1).addReg(Op1Reg); + } // Figure out which register we want to pick the result out of... unsigned DestReg = isDiv ? Reg : ExtReg; |