diff options
author | Chris Lattner <sabre@nondot.org> | 2004-04-06 04:29:36 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2004-04-06 04:29:36 +0000 |
commit | 028adc422da9eaa4fd16df839c89c1c343894ef6 (patch) | |
tree | d5d4bd5d19fd6a6466383ddf981dca7f7d9faeef /lib/Target/X86 | |
parent | 1e07e0c0c9a0fff3f667327037b05fe9b7523fcc (diff) | |
download | external_llvm-028adc422da9eaa4fd16df839c89c1c343894ef6.zip external_llvm-028adc422da9eaa4fd16df839c89c1c343894ef6.tar.gz external_llvm-028adc422da9eaa4fd16df839c89c1c343894ef6.tar.bz2 |
Efficiently handle a long multiplication by a constant. For this testcase:
long %test(long %X) {
%Y = mul long %X, 123
ret long %Y
}
we used to generate:
test:
sub %ESP, 12
mov DWORD PTR [%ESP + 8], %ESI
mov DWORD PTR [%ESP + 4], %EDI
mov DWORD PTR [%ESP], %EBX
mov %ECX, DWORD PTR [%ESP + 16]
mov %ESI, DWORD PTR [%ESP + 20]
mov %EDI, 123
mov %EBX, 0
mov %EAX, %ECX
mul %EDI
imul %ESI, %EDI
add %ESI, %EDX
imul %ECX, %EBX
add %ESI, %ECX
mov %EDX, %ESI
mov %EBX, DWORD PTR [%ESP]
mov %EDI, DWORD PTR [%ESP + 4]
mov %ESI, DWORD PTR [%ESP + 8]
add %ESP, 12
ret
Now we emit:
test:
mov %EAX, DWORD PTR [%ESP + 4]
mov %ECX, DWORD PTR [%ESP + 8]
mov %EDX, 123
mul %EDX
imul %ECX, %ECX, 123
add %ECX, %EDX
mov %EDX, %ECX
ret
Which, incidently, is substantially nicer than what GCC manages:
T:
sub %esp, 8
mov %eax, 123
mov DWORD PTR [%esp], %ebx
mov %ebx, DWORD PTR [%esp+16]
mov DWORD PTR [%esp+4], %esi
mov %esi, DWORD PTR [%esp+12]
imul %ecx, %ebx, 123
mov %ebx, DWORD PTR [%esp]
mul %esi
mov %esi, DWORD PTR [%esp+4]
add %esp, 8
lea %edx, [%ecx+%edx]
ret
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@12692 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/Target/X86')
-rw-r--r-- | lib/Target/X86/InstSelectSimple.cpp | 83 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelSimple.cpp | 83 |
2 files changed, 116 insertions, 50 deletions
diff --git a/lib/Target/X86/InstSelectSimple.cpp b/lib/Target/X86/InstSelectSimple.cpp index 4e59ed1..63117c1 100644 --- a/lib/Target/X86/InstSelectSimple.cpp +++ b/lib/Target/X86/InstSelectSimple.cpp @@ -1940,7 +1940,7 @@ void ISel::visitMul(BinaryOperator &I) { unsigned DestReg = getReg(I); // Simple scalar multiply? - if (I.getType() != Type::LongTy && I.getType() != Type::ULongTy) { + if (getClass(I.getType()) != cLong) { if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1))) { unsigned Val = (unsigned)CI->getRawValue(); // Cannot be 64-bit constant MachineBasicBlock::iterator MBBI = BB->end(); @@ -1951,31 +1951,64 @@ void ISel::visitMul(BinaryOperator &I) { doMultiply(BB, MBBI, DestReg, I.getType(), Op0Reg, Op1Reg); } } else { - unsigned Op1Reg = getReg(I.getOperand(1)); - // Long value. We have to do things the hard way... - // Multiply the two low parts... capturing carry into EDX - BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg); - BuildMI(BB, X86::MUL32r, 1).addReg(Op1Reg); // AL*BL - - unsigned OverflowReg = makeAnotherReg(Type::UIntTy); - BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL - BuildMI(BB, X86::MOV32rr, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32 - - MachineBasicBlock::iterator MBBI = BB->end(); - unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL - BuildMI(*BB, MBBI, X86::IMUL32rr,2,AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg); - - unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy); - BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32) - AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg); - - MBBI = BB->end(); - unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH - BuildMI(*BB, MBBI, X86::IMUL32rr,2,ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1); - - BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32) - DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg); + if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1))) { + unsigned CLow = CI->getRawValue(); + unsigned CHi = CI->getRawValue() >> 32; + + // Multiply the two low parts... capturing carry into EDX + unsigned Op1RegL = makeAnotherReg(Type::UIntTy); + BuildMI(BB, X86::MOV32ri, 1, Op1RegL).addImm(CLow); + BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg); + BuildMI(BB, X86::MUL32r, 1).addReg(Op1RegL); // AL*BL + + unsigned OverflowReg = makeAnotherReg(Type::UIntTy); + BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL + BuildMI(BB, X86::MOV32rr, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32 + + unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL + BuildMI(BB, X86::IMUL32rri, 2, AHBLReg).addReg(Op0Reg+1).addImm(CLow); + + unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy); + BuildMI(BB, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32) + AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg); + + if (CHi != 0) { + unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH + BuildMI(BB, X86::IMUL32rri, 2, ALBHReg).addReg(Op0Reg).addImm(CHi); + + BuildMI(BB, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32) + DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg); + } else { + BuildMI(BB, X86::MOV32rr, 1, DestReg+1).addReg(AHBLplusOverflowReg); + } + } else { + unsigned Op1Reg = getReg(I.getOperand(1)); + // Multiply the two low parts... capturing carry into EDX + BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg); + BuildMI(BB, X86::MUL32r, 1).addReg(Op1Reg); // AL*BL + + unsigned OverflowReg = makeAnotherReg(Type::UIntTy); + BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL + BuildMI(BB, X86::MOV32rr, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32 + + MachineBasicBlock::iterator MBBI = BB->end(); + unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL + BuildMI(*BB, MBBI, X86::IMUL32rr, 2, + AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg); + + unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy); + BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32) + AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg); + + MBBI = BB->end(); + unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH + BuildMI(*BB, MBBI, X86::IMUL32rr, 2, + ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1); + + BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32) + DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg); + } } } diff --git a/lib/Target/X86/X86ISelSimple.cpp b/lib/Target/X86/X86ISelSimple.cpp index 4e59ed1..63117c1 100644 --- a/lib/Target/X86/X86ISelSimple.cpp +++ b/lib/Target/X86/X86ISelSimple.cpp @@ -1940,7 +1940,7 @@ void ISel::visitMul(BinaryOperator &I) { unsigned DestReg = getReg(I); // Simple scalar multiply? - if (I.getType() != Type::LongTy && I.getType() != Type::ULongTy) { + if (getClass(I.getType()) != cLong) { if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1))) { unsigned Val = (unsigned)CI->getRawValue(); // Cannot be 64-bit constant MachineBasicBlock::iterator MBBI = BB->end(); @@ -1951,31 +1951,64 @@ void ISel::visitMul(BinaryOperator &I) { doMultiply(BB, MBBI, DestReg, I.getType(), Op0Reg, Op1Reg); } } else { - unsigned Op1Reg = getReg(I.getOperand(1)); - // Long value. We have to do things the hard way... - // Multiply the two low parts... capturing carry into EDX - BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg); - BuildMI(BB, X86::MUL32r, 1).addReg(Op1Reg); // AL*BL - - unsigned OverflowReg = makeAnotherReg(Type::UIntTy); - BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL - BuildMI(BB, X86::MOV32rr, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32 - - MachineBasicBlock::iterator MBBI = BB->end(); - unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL - BuildMI(*BB, MBBI, X86::IMUL32rr,2,AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg); - - unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy); - BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32) - AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg); - - MBBI = BB->end(); - unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH - BuildMI(*BB, MBBI, X86::IMUL32rr,2,ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1); - - BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32) - DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg); + if (ConstantInt *CI = dyn_cast<ConstantInt>(I.getOperand(1))) { + unsigned CLow = CI->getRawValue(); + unsigned CHi = CI->getRawValue() >> 32; + + // Multiply the two low parts... capturing carry into EDX + unsigned Op1RegL = makeAnotherReg(Type::UIntTy); + BuildMI(BB, X86::MOV32ri, 1, Op1RegL).addImm(CLow); + BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg); + BuildMI(BB, X86::MUL32r, 1).addReg(Op1RegL); // AL*BL + + unsigned OverflowReg = makeAnotherReg(Type::UIntTy); + BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL + BuildMI(BB, X86::MOV32rr, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32 + + unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL + BuildMI(BB, X86::IMUL32rri, 2, AHBLReg).addReg(Op0Reg+1).addImm(CLow); + + unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy); + BuildMI(BB, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32) + AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg); + + if (CHi != 0) { + unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH + BuildMI(BB, X86::IMUL32rri, 2, ALBHReg).addReg(Op0Reg).addImm(CHi); + + BuildMI(BB, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32) + DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg); + } else { + BuildMI(BB, X86::MOV32rr, 1, DestReg+1).addReg(AHBLplusOverflowReg); + } + } else { + unsigned Op1Reg = getReg(I.getOperand(1)); + // Multiply the two low parts... capturing carry into EDX + BuildMI(BB, X86::MOV32rr, 1, X86::EAX).addReg(Op0Reg); + BuildMI(BB, X86::MUL32r, 1).addReg(Op1Reg); // AL*BL + + unsigned OverflowReg = makeAnotherReg(Type::UIntTy); + BuildMI(BB, X86::MOV32rr, 1, DestReg).addReg(X86::EAX); // AL*BL + BuildMI(BB, X86::MOV32rr, 1, OverflowReg).addReg(X86::EDX); // AL*BL >> 32 + + MachineBasicBlock::iterator MBBI = BB->end(); + unsigned AHBLReg = makeAnotherReg(Type::UIntTy); // AH*BL + BuildMI(*BB, MBBI, X86::IMUL32rr, 2, + AHBLReg).addReg(Op0Reg+1).addReg(Op1Reg); + + unsigned AHBLplusOverflowReg = makeAnotherReg(Type::UIntTy); + BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AH*BL+(AL*BL >> 32) + AHBLplusOverflowReg).addReg(AHBLReg).addReg(OverflowReg); + + MBBI = BB->end(); + unsigned ALBHReg = makeAnotherReg(Type::UIntTy); // AL*BH + BuildMI(*BB, MBBI, X86::IMUL32rr, 2, + ALBHReg).addReg(Op0Reg).addReg(Op1Reg+1); + + BuildMI(*BB, MBBI, X86::ADD32rr, 2, // AL*BH + AH*BL + (AL*BL >> 32) + DestReg+1).addReg(AHBLplusOverflowReg).addReg(ALBHReg); + } } } |