aboutsummaryrefslogtreecommitdiffstats
path: root/lib/Target
diff options
context:
space:
mode:
authorChris Lattner <sabre@nondot.org>2004-05-04 19:33:58 +0000
committerChris Lattner <sabre@nondot.org>2004-05-04 19:33:58 +0000
commitc8af02c403c1547c7d66aa3add3c8a2116d3d26e (patch)
tree8e146b2601df112722e2fd9106bc089b7dd5dc51 /lib/Target
parent2dd5c96866406711cf20a6bb677a7d147ad3ac3d (diff)
downloadexternal_llvm-c8af02c403c1547c7d66aa3add3c8a2116d3d26e.zip
external_llvm-c8af02c403c1547c7d66aa3add3c8a2116d3d26e.tar.gz
external_llvm-c8af02c403c1547c7d66aa3add3c8a2116d3d26e.tar.bz2
Improve signed division by power of 2 *dramatically* from this:
div: mov %EDX, DWORD PTR [%ESP + 4] mov %ECX, 64 mov %EAX, %EDX sar %EDX, 31 idiv %ECX ret to this: div: mov %EAX, DWORD PTR [%ESP + 4] mov %ECX, %EAX sar %ECX, 5 shr %ECX, 26 mov %EDX, %EAX add %EDX, %ECX sar %EAX, 6 ret Note that the intel compiler is currently making this: div: movl 4(%esp), %edx #3.5 movl %edx, %eax #4.14 sarl $5, %eax #4.14 shrl $26, %eax #4.14 addl %edx, %eax #4.14 sarl $6, %eax #4.14 ret #4.14 Which has one less register->register copy. (hint hint alkis :) git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@13354 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target')
-rw-r--r--lib/Target/X86/InstSelectSimple.cpp72
-rw-r--r--lib/Target/X86/X86ISelSimple.cpp72
2 files changed, 126 insertions, 18 deletions
diff --git a/lib/Target/X86/InstSelectSimple.cpp b/lib/Target/X86/InstSelectSimple.cpp
index bf98466..da96813 100644
--- a/lib/Target/X86/InstSelectSimple.cpp
+++ b/lib/Target/X86/InstSelectSimple.cpp
@@ -2177,7 +2177,7 @@ void ISel::doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI,
// ExactLog2 - This function solves for (Val == 1 << (N-1)) and returns N. It
// returns zero when the input is not exactly a power of two.
static unsigned ExactLog2(unsigned Val) {
- if (Val == 0) return 0;
+ if (Val == 0 || (Val & (Val-1))) return 0;
unsigned Count = 0;
while (Val != 1) {
if (Val & 1) return 0;
@@ -2488,9 +2488,61 @@ void ISel::emitDivRemOperation(MachineBasicBlock *BB,
default: assert(0 && "Unknown class!");
}
- static const unsigned Regs[] ={ X86::AL , X86::AX , X86::EAX };
static const unsigned MovOpcode[]={ X86::MOV8rr, X86::MOV16rr, X86::MOV32rr };
- static const unsigned SarOpcode[]={ X86::SAR8ri, X86::SAR16ri, X86::SAR32ri };
+ static const unsigned NEGOpcode[] = { X86::NEG8r, X86::NEG16r, X86::NEG32r };
+ static const unsigned SAROpcode[]={ X86::SAR8ri, X86::SAR16ri, X86::SAR32ri };
+ static const unsigned SHROpcode[]={ X86::SHR8ri, X86::SHR16ri, X86::SHR32ri };
+ static const unsigned ADDOpcode[]={ X86::ADD8rr, X86::ADD16rr, X86::ADD32rr };
+
+ // Special case signed division by power of 2.
+ if (isDiv)
+ if (ConstantSInt *CI = dyn_cast<ConstantSInt>(Op1)) {
+ assert(Class != cLong && "This doesn't handle 64-bit divides!");
+ int V = CI->getValue();
+
+ if (V == 1) { // X /s 1 => X
+ unsigned Op0Reg = getReg(Op0, BB, IP);
+ BuildMI(*BB, IP, MovOpcode[Class], 1, ResultReg).addReg(Op0Reg);
+ return;
+ }
+
+ if (V == -1) { // X /s -1 => -X
+ unsigned Op0Reg = getReg(Op0, BB, IP);
+ BuildMI(*BB, IP, NEGOpcode[Class], 1, ResultReg).addReg(Op0Reg);
+ return;
+ }
+
+ bool isNeg = false;
+ if (V < 0) { // Not a positive power of 2?
+ V = -V;
+ isNeg = true; // Maybe it's a negative power of 2.
+ }
+ if (unsigned Log = ExactLog2(V)) {
+ --Log;
+ unsigned Op0Reg = getReg(Op0, BB, IP);
+ unsigned TmpReg = makeAnotherReg(Op0->getType());
+ if (Log != 1)
+ BuildMI(*BB, IP, SAROpcode[Class], 2, TmpReg)
+ .addReg(Op0Reg).addImm(Log-1);
+ else
+ BuildMI(*BB, IP, MovOpcode[Class], 1, TmpReg).addReg(Op0Reg);
+ unsigned TmpReg2 = makeAnotherReg(Op0->getType());
+ BuildMI(*BB, IP, SHROpcode[Class], 2, TmpReg2)
+ .addReg(TmpReg).addImm(32-Log);
+ unsigned TmpReg3 = makeAnotherReg(Op0->getType());
+ BuildMI(*BB, IP, ADDOpcode[Class], 2, TmpReg3)
+ .addReg(Op0Reg).addReg(TmpReg2);
+
+ unsigned TmpReg4 = isNeg ? makeAnotherReg(Op0->getType()) : ResultReg;
+ BuildMI(*BB, IP, SAROpcode[Class], 2, TmpReg4)
+ .addReg(Op0Reg).addImm(Log);
+ if (isNeg)
+ BuildMI(*BB, IP, NEGOpcode[Class], 1, ResultReg).addReg(TmpReg4);
+ return;
+ }
+ }
+
+ static const unsigned Regs[] ={ X86::AL , X86::AX , X86::EAX };
static const unsigned ClrOpcode[]={ X86::MOV8ri, X86::MOV16ri, X86::MOV32ri };
static const unsigned ExtRegs[] ={ X86::AH , X86::DX , X86::EDX };
@@ -2499,7 +2551,6 @@ void ISel::emitDivRemOperation(MachineBasicBlock *BB,
{ X86::IDIV8r, X86::IDIV16r, X86::IDIV32r, 0 }, // Signed division
};
- bool isSigned = Ty->isSigned();
unsigned Reg = Regs[Class];
unsigned ExtReg = ExtRegs[Class];
@@ -2508,18 +2559,21 @@ void ISel::emitDivRemOperation(MachineBasicBlock *BB,
unsigned Op1Reg = getReg(Op1, BB, IP);
BuildMI(*BB, IP, MovOpcode[Class], 1, Reg).addReg(Op0Reg);
- if (isSigned) {
+ if (Ty->isSigned()) {
// Emit a sign extension instruction...
unsigned ShiftResult = makeAnotherReg(Op0->getType());
- BuildMI(*BB, IP, SarOpcode[Class], 2,ShiftResult).addReg(Op0Reg).addImm(31);
+ BuildMI(*BB, IP, SAROpcode[Class], 2,ShiftResult).addReg(Op0Reg).addImm(31);
BuildMI(*BB, IP, MovOpcode[Class], 1, ExtReg).addReg(ShiftResult);
+
+ // Emit the appropriate divide or remainder instruction...
+ BuildMI(*BB, IP, DivOpcode[1][Class], 1).addReg(Op1Reg);
} else {
// If unsigned, emit a zeroing instruction... (reg = 0)
BuildMI(*BB, IP, ClrOpcode[Class], 2, ExtReg).addImm(0);
- }
- // Emit the appropriate divide or remainder instruction...
- BuildMI(*BB, IP, DivOpcode[isSigned][Class], 1).addReg(Op1Reg);
+ // Emit the appropriate divide or remainder instruction...
+ BuildMI(*BB, IP, DivOpcode[0][Class], 1).addReg(Op1Reg);
+ }
// Figure out which register we want to pick the result out of...
unsigned DestReg = isDiv ? Reg : ExtReg;
diff --git a/lib/Target/X86/X86ISelSimple.cpp b/lib/Target/X86/X86ISelSimple.cpp
index bf98466..da96813 100644
--- a/lib/Target/X86/X86ISelSimple.cpp
+++ b/lib/Target/X86/X86ISelSimple.cpp
@@ -2177,7 +2177,7 @@ void ISel::doMultiply(MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI,
// ExactLog2 - This function solves for (Val == 1 << (N-1)) and returns N. It
// returns zero when the input is not exactly a power of two.
static unsigned ExactLog2(unsigned Val) {
- if (Val == 0) return 0;
+ if (Val == 0 || (Val & (Val-1))) return 0;
unsigned Count = 0;
while (Val != 1) {
if (Val & 1) return 0;
@@ -2488,9 +2488,61 @@ void ISel::emitDivRemOperation(MachineBasicBlock *BB,
default: assert(0 && "Unknown class!");
}
- static const unsigned Regs[] ={ X86::AL , X86::AX , X86::EAX };
static const unsigned MovOpcode[]={ X86::MOV8rr, X86::MOV16rr, X86::MOV32rr };
- static const unsigned SarOpcode[]={ X86::SAR8ri, X86::SAR16ri, X86::SAR32ri };
+ static const unsigned NEGOpcode[] = { X86::NEG8r, X86::NEG16r, X86::NEG32r };
+ static const unsigned SAROpcode[]={ X86::SAR8ri, X86::SAR16ri, X86::SAR32ri };
+ static const unsigned SHROpcode[]={ X86::SHR8ri, X86::SHR16ri, X86::SHR32ri };
+ static const unsigned ADDOpcode[]={ X86::ADD8rr, X86::ADD16rr, X86::ADD32rr };
+
+ // Special case signed division by power of 2.
+ if (isDiv)
+ if (ConstantSInt *CI = dyn_cast<ConstantSInt>(Op1)) {
+ assert(Class != cLong && "This doesn't handle 64-bit divides!");
+ int V = CI->getValue();
+
+ if (V == 1) { // X /s 1 => X
+ unsigned Op0Reg = getReg(Op0, BB, IP);
+ BuildMI(*BB, IP, MovOpcode[Class], 1, ResultReg).addReg(Op0Reg);
+ return;
+ }
+
+ if (V == -1) { // X /s -1 => -X
+ unsigned Op0Reg = getReg(Op0, BB, IP);
+ BuildMI(*BB, IP, NEGOpcode[Class], 1, ResultReg).addReg(Op0Reg);
+ return;
+ }
+
+ bool isNeg = false;
+ if (V < 0) { // Not a positive power of 2?
+ V = -V;
+ isNeg = true; // Maybe it's a negative power of 2.
+ }
+ if (unsigned Log = ExactLog2(V)) {
+ --Log;
+ unsigned Op0Reg = getReg(Op0, BB, IP);
+ unsigned TmpReg = makeAnotherReg(Op0->getType());
+ if (Log != 1)
+ BuildMI(*BB, IP, SAROpcode[Class], 2, TmpReg)
+ .addReg(Op0Reg).addImm(Log-1);
+ else
+ BuildMI(*BB, IP, MovOpcode[Class], 1, TmpReg).addReg(Op0Reg);
+ unsigned TmpReg2 = makeAnotherReg(Op0->getType());
+ BuildMI(*BB, IP, SHROpcode[Class], 2, TmpReg2)
+ .addReg(TmpReg).addImm(32-Log);
+ unsigned TmpReg3 = makeAnotherReg(Op0->getType());
+ BuildMI(*BB, IP, ADDOpcode[Class], 2, TmpReg3)
+ .addReg(Op0Reg).addReg(TmpReg2);
+
+ unsigned TmpReg4 = isNeg ? makeAnotherReg(Op0->getType()) : ResultReg;
+ BuildMI(*BB, IP, SAROpcode[Class], 2, TmpReg4)
+ .addReg(Op0Reg).addImm(Log);
+ if (isNeg)
+ BuildMI(*BB, IP, NEGOpcode[Class], 1, ResultReg).addReg(TmpReg4);
+ return;
+ }
+ }
+
+ static const unsigned Regs[] ={ X86::AL , X86::AX , X86::EAX };
static const unsigned ClrOpcode[]={ X86::MOV8ri, X86::MOV16ri, X86::MOV32ri };
static const unsigned ExtRegs[] ={ X86::AH , X86::DX , X86::EDX };
@@ -2499,7 +2551,6 @@ void ISel::emitDivRemOperation(MachineBasicBlock *BB,
{ X86::IDIV8r, X86::IDIV16r, X86::IDIV32r, 0 }, // Signed division
};
- bool isSigned = Ty->isSigned();
unsigned Reg = Regs[Class];
unsigned ExtReg = ExtRegs[Class];
@@ -2508,18 +2559,21 @@ void ISel::emitDivRemOperation(MachineBasicBlock *BB,
unsigned Op1Reg = getReg(Op1, BB, IP);
BuildMI(*BB, IP, MovOpcode[Class], 1, Reg).addReg(Op0Reg);
- if (isSigned) {
+ if (Ty->isSigned()) {
// Emit a sign extension instruction...
unsigned ShiftResult = makeAnotherReg(Op0->getType());
- BuildMI(*BB, IP, SarOpcode[Class], 2,ShiftResult).addReg(Op0Reg).addImm(31);
+ BuildMI(*BB, IP, SAROpcode[Class], 2,ShiftResult).addReg(Op0Reg).addImm(31);
BuildMI(*BB, IP, MovOpcode[Class], 1, ExtReg).addReg(ShiftResult);
+
+ // Emit the appropriate divide or remainder instruction...
+ BuildMI(*BB, IP, DivOpcode[1][Class], 1).addReg(Op1Reg);
} else {
// If unsigned, emit a zeroing instruction... (reg = 0)
BuildMI(*BB, IP, ClrOpcode[Class], 2, ExtReg).addImm(0);
- }
- // Emit the appropriate divide or remainder instruction...
- BuildMI(*BB, IP, DivOpcode[isSigned][Class], 1).addReg(Op1Reg);
+ // Emit the appropriate divide or remainder instruction...
+ BuildMI(*BB, IP, DivOpcode[0][Class], 1).addReg(Op1Reg);
+ }
// Figure out which register we want to pick the result out of...
unsigned DestReg = isDiv ? Reg : ExtReg;