diff options
-rw-r--r-- | include/llvm/Target/TargetLowering.h | 22 | ||||
-rw-r--r-- | include/llvm/Transforms/Utils/BypassSlowDivision.h | 58 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/TargetLowering.cpp | 1 | ||||
-rw-r--r-- | lib/Target/X86/X86.td | 6 | ||||
-rw-r--r-- | lib/Target/X86/X86ISelLowering.cpp | 4 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.cpp | 1 | ||||
-rw-r--r-- | lib/Target/X86/X86Subtarget.h | 5 | ||||
-rw-r--r-- | lib/Transforms/Scalar/CodeGenPrepare.cpp | 15 | ||||
-rw-r--r-- | lib/Transforms/Utils/BypassSlowDivision.cpp | 251 | ||||
-rw-r--r-- | lib/Transforms/Utils/CMakeLists.txt | 1 | ||||
-rw-r--r-- | test/CodeGen/X86/atom-bypass-slow-division.ll | 112 |
11 files changed, 473 insertions, 3 deletions
diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index b8c070b..ef63422 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -25,6 +25,7 @@ #include "llvm/CallingConv.h" #include "llvm/InlineAsm.h" #include "llvm/Attributes.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/Support/CallSite.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/RuntimeLibcalls.h" @@ -154,6 +155,16 @@ public: /// a sequence of several shifts, adds, and multiplies for this target. bool isIntDivCheap() const { return IntDivIsCheap; } + /// isSlowDivBypassed - Returns true if target has indicated at least one + /// type should be bypassed. + bool isSlowDivBypassed() const { return !BypassSlowDivTypes.empty(); } + + /// getBypassSlowDivTypes - Returns map of slow types for division or + /// remainder with corresponding fast types + const DenseMap<Type *, Type *> &getBypassSlowDivTypes() const { + return BypassSlowDivTypes; + } + /// isPow2DivCheap() - Return true if pow2 div is cheaper than a chain of /// srl/add/sra. bool isPow2DivCheap() const { return Pow2DivIsCheap; } @@ -1055,6 +1066,11 @@ protected: /// of instructions not containing an integer divide. void setIntDivIsCheap(bool isCheap = true) { IntDivIsCheap = isCheap; } + /// addBypassSlowDivType - Tells the code generator which types to bypass. + void addBypassSlowDivType(Type *slow_type, Type *fast_type) { + BypassSlowDivTypes[slow_type] = fast_type; + } + /// setPow2DivIsCheap - Tells the code generator that it shouldn't generate /// srl/add/sra for a signed divide by power of two, and let the target handle /// it. @@ -1772,6 +1788,12 @@ private: /// set to true unconditionally. bool IntDivIsCheap; + /// BypassSlowDivTypes - Tells the code generator to bypass slow divide or + /// remainder instructions. For example, SlowDivBypass[i32,u8] tells the code + /// generator to bypass 32-bit signed integer div/rem with an 8-bit unsigned + /// integer div/rem when the operands are positive and less than 256. + DenseMap <Type *, Type *> BypassSlowDivTypes; + /// Pow2DivIsCheap - Tells the code generator that it shouldn't generate /// srl/add/sra for a signed divide by power of two, and let the target handle /// it. diff --git a/include/llvm/Transforms/Utils/BypassSlowDivision.h b/include/llvm/Transforms/Utils/BypassSlowDivision.h new file mode 100644 index 0000000..4a9838a --- /dev/null +++ b/include/llvm/Transforms/Utils/BypassSlowDivision.h @@ -0,0 +1,58 @@ +//===- llvm/Transforms/Utils/BypassSlowDivision.h --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an optimization for div and rem on architectures that +// execute short instructions significantly faster than longer instructions. +// For example, on Intel Atom 32-bit divides are slow enough that during +// runtime it is profitable to check the value of the operands, and if they are +// positive and less than 256 use an unsigned 8-bit divide. +// +//===----------------------------------------------------------------------===// + +#ifndef TRANSFORMS_UTILS_BYPASSSLOWDIVISION_H +#define TRANSFORMS_UTILS_BYPASSSLOWDIVISION_H + +#include "llvm/Function.h" + +/// This optimization identifies DIV instructions that can be +/// profitably bypassed and carried out with a shorter, faster divide. +bool bypassSlowDivision(llvm::Function &F, + llvm::Function::iterator &I, + const llvm::DenseMap<llvm::Type *, llvm::Type *> &BypassTypeMap); + +#endif +//===- llvm/Transforms/Utils/BypassSlowDivision.h --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an optimization for div and rem on architectures that +// execute short instructions significantly faster than longer instructions. +// For example, on Intel Atom 32-bit divides are slow enough that during +// runtime it is profitable to check the value of the operands, and if they are +// positive and less than 256 use an unsigned 8-bit divide. +// +//===----------------------------------------------------------------------===// + +#ifndef TRANSFORMS_UTILS_BYPASSSLOWDIVISION_H +#define TRANSFORMS_UTILS_BYPASSSLOWDIVISION_H + +#include "llvm/Function.h" + +/// This optimization identifies DIV instructions that can be +/// profitably bypassed and carried out with a shorter, faster divide. +bool bypassSlowDivision(llvm::Function &F, + llvm::Function::iterator &I, + const llvm::DenseMap<llvm::Type *, llvm::Type *> &BypassTypeMap); + +#endif diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 6820175..10a534f 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -898,7 +898,6 @@ const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { return NULL; } - EVT TargetLowering::getSetCCResultType(EVT VT) const { assert(!VT.isVector() && "No default SetCC type for vectors!"); return PointerTy.SimpleTy; diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 18e6b7c..d078a7b 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -120,6 +120,9 @@ def FeatureBMI2 : SubtargetFeature<"bmi2", "HasBMI2", "true", "Support BMI2 instructions">; def FeatureLeaForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", "Use LEA for adjusting the stack pointer">; +def FeatureSlowDivide : SubtargetFeature<"idiv-to-divb", + "HasSlowDivide", "true", + "Use small divide for positive values less than 256">; //===----------------------------------------------------------------------===// // X86 processors supported. @@ -160,7 +163,8 @@ def : Proc<"core2", [FeatureSSSE3, FeatureCMPXCHG16B, def : Proc<"penryn", [FeatureSSE41, FeatureCMPXCHG16B, FeatureSlowBTMem]>; def : AtomProc<"atom", [ProcIntelAtom, FeatureSSE3, FeatureCMPXCHG16B, - FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP]>; + FeatureMOVBE, FeatureSlowBTMem, FeatureLeaForSP, + FeatureSlowDivide]>; // "Arrandale" along with corei3 and corei5 def : Proc<"corei7", [FeatureSSE42, FeatureCMPXCHG16B, FeatureSlowBTMem, FeatureFastUAMem, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 9f487a6..67ad99d 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -182,6 +182,10 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM) setSchedulingPreference(Sched::RegPressure); setStackPointerRegisterToSaveRestore(X86StackPtr); + // Bypass i32 with i8 on Atom when compiling with O2 + if (Subtarget->hasSlowDivide() && TM.getOptLevel() >= CodeGenOpt::Default) + addBypassSlowDivType(Type::getInt32Ty(getGlobalContext()), Type::getInt8Ty(getGlobalContext())); + if (Subtarget->isTargetWindows() && !Subtarget->isTargetCygMing()) { // Setup Windows compiler runtime calls. setLibcallName(RTLIB::SDIV_I64, "_alldiv"); diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 9087852..0d7b664 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -346,6 +346,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, , HasVectorUAMem(false) , HasCmpxchg16b(false) , UseLeaForSP(false) + , HasSlowDivide(false) , PostRAScheduler(false) , stackAlignment(4) // FIXME: this is a known good value for Yonah. How about others? diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 33608bb..dde7e24 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -136,6 +136,10 @@ protected: /// the stack pointer. This is an optimization for Intel Atom processors. bool UseLeaForSP; + /// HasSlowDivide - True if smaller divides are significantly faster than + /// full divides and should be used when possible. + bool HasSlowDivide; + /// PostRAScheduler - True if using post-register-allocation scheduler. bool PostRAScheduler; @@ -221,6 +225,7 @@ public: bool hasVectorUAMem() const { return HasVectorUAMem; } bool hasCmpxchg16b() const { return HasCmpxchg16b; } bool useLeaForSP() const { return UseLeaForSP; } + bool hasSlowDivide() const { return HasSlowDivide; } bool isAtom() const { return X86ProcFamily == IntelAtom; } diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp index 57a648f..5912107 100644 --- a/lib/Transforms/Scalar/CodeGenPrepare.cpp +++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp @@ -43,6 +43,7 @@ #include "llvm/Transforms/Utils/AddrModeMatcher.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/BuildLibCalls.h" +#include "llvm/Transforms/Utils/BypassSlowDivision.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -148,7 +149,19 @@ bool CodeGenPrepare::runOnFunction(Function &F) { PFI = getAnalysisIfAvailable<ProfileInfo>(); OptSize = F.hasFnAttr(Attribute::OptimizeForSize); - // First pass, eliminate blocks that contain only PHI nodes and an + /// This optimization identifies DIV instructions that can be + /// profitably bypassed and carried out with a shorter, faster divide. + if (TLI && TLI->isSlowDivBypassed()) { + const DenseMap<Type *, Type *> &BypassTypeMap = TLI->getBypassSlowDivTypes(); + + for (Function::iterator I = F.begin(); I != F.end(); I++) { + EverMadeChange |= bypassSlowDivision(F, + I, + BypassTypeMap); + } + } + + // Eliminate blocks that contain only PHI nodes and an // unconditional branch. EverMadeChange |= EliminateMostlyEmptyBlocks(F); diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp new file mode 100644 index 0000000..1c58bec --- /dev/null +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -0,0 +1,251 @@ +//===-- BypassSlowDivision.cpp - Bypass slow division ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains an optimization for div and rem on architectures that +// execute short instructions significantly faster than longer instructions. +// For example, on Intel Atom 32-bit divides are slow enough that during +// runtime it is profitable to check the value of the operands, and if they are +// positive and less than 256 use an unsigned 8-bit divide. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "bypass-slow-division" +#include "llvm/Instructions.h" +#include "llvm/Function.h" +#include "llvm/IRBuilder.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Transforms/Utils/BypassSlowDivision.h" + +using namespace llvm; + +namespace llvm { + struct DivOpInfo { + bool SignedOp; + Value *Dividend; + Value *Divisor; + + DivOpInfo(bool InSignedOp, Value *InDividend, Value *InDivisor) + : SignedOp(InSignedOp), Dividend(InDividend), Divisor(InDivisor) {} + }; + + struct DivPhiNodes { + PHINode *Quotient; + PHINode *Remainder; + + DivPhiNodes(PHINode *InQuotient, PHINode *InRemainder) + : Quotient(InQuotient), Remainder(InRemainder) {} + }; + + template<> + struct DenseMapInfo<DivOpInfo> { + static bool isEqual(const DivOpInfo &Val1, const DivOpInfo &Val2) { + return Val1.SignedOp == Val2.SignedOp && + Val1.Dividend == Val2.Dividend && + Val1.Divisor == Val2.Divisor; + } + + static DivOpInfo getEmptyKey() { + return DivOpInfo(false, 0, 0); + } + + static DivOpInfo getTombstoneKey() { + return DivOpInfo(true, 0, 0); + } + + static unsigned getHashValue(const DivOpInfo &Val) { + return (unsigned)(reinterpret_cast<uintptr_t>(Val.Dividend) ^ + reinterpret_cast<uintptr_t>(Val.Divisor)) ^ + (unsigned)Val.SignedOp; + } + }; + + typedef DenseMap<DivOpInfo, DivPhiNodes> DivCacheTy; +} + +// insertFastDiv - Substitutes the div/rem instruction with code that checks the +// value of the operands and uses a shorter-faster div/rem instruction when +// possible and the longer-slower div/rem instruction otherwise. +static void insertFastDiv(Function &F, + Function::iterator &I, + BasicBlock::iterator &J, + IntegerType *BypassType, + bool UseDivOp, + bool UseSignedOp, + DivCacheTy &PerBBDivCache) +{ + // Get instruction operands + Instruction *Instr = J; + Value *Dividend = Instr->getOperand(0); + Value *Divisor = Instr->getOperand(1); + + if (dyn_cast<ConstantInt>(Divisor) != 0 || + (dyn_cast<ConstantInt>(Dividend) != 0 && + dyn_cast<ConstantInt>(Divisor) != 0)) { + // Operations with immediate values should have + // been solved and replaced during compile time. + return; + } + + // Basic Block is split before divide + BasicBlock *MainBB = I; + BasicBlock *SuccessorBB = I->splitBasicBlock(J); + I++; //advance iterator I to successorBB + + // Add new basic block for slow divide operation + BasicBlock *SlowBB = BasicBlock::Create(F.getContext(), "", + MainBB->getParent(), SuccessorBB); + SlowBB->moveBefore(SuccessorBB); + IRBuilder<> SlowBuilder(SlowBB, SlowBB->begin()); + Value *SlowQuotientV; + Value *SlowRemainderV; + if (UseSignedOp) { + SlowQuotientV = SlowBuilder.CreateSDiv(Dividend, Divisor); + SlowRemainderV = SlowBuilder.CreateSRem(Dividend, Divisor); + } else { + SlowQuotientV = SlowBuilder.CreateUDiv(Dividend, Divisor); + SlowRemainderV = SlowBuilder.CreateURem(Dividend, Divisor); + } + SlowBuilder.CreateBr(SuccessorBB); + + // Add new basic block for fast divide operation + BasicBlock *FastBB = BasicBlock::Create(F.getContext(), "", + MainBB->getParent(), SuccessorBB); + FastBB->moveBefore(SlowBB); + IRBuilder<> FastBuilder(FastBB, FastBB->begin()); + Value *ShortDivisorV = FastBuilder.CreateCast(Instruction::Trunc, Divisor, BypassType); + Value *ShortDividendV = FastBuilder.CreateCast(Instruction::Trunc, Dividend, BypassType); + + // udiv/urem because optimization only handles positive numbers + Value *ShortQuotientV = FastBuilder.CreateExactUDiv(ShortDividendV, + ShortDivisorV); + Value *ShortRemainderV = FastBuilder.CreateURem(ShortDividendV, + ShortDivisorV); + Value *FastQuotientV = FastBuilder.CreateCast(Instruction::ZExt, + ShortQuotientV, + Dividend->getType()); + Value *FastRemainderV = FastBuilder.CreateCast(Instruction::ZExt, + ShortRemainderV, + Dividend->getType()); + FastBuilder.CreateBr(SuccessorBB); + + // Phi nodes for result of div and rem + IRBuilder<> SuccessorBuilder(SuccessorBB, SuccessorBB->begin()); + PHINode *QuoPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); + QuoPhi->addIncoming(SlowQuotientV, SlowBB); + QuoPhi->addIncoming(FastQuotientV, FastBB); + PHINode *RemPhi = SuccessorBuilder.CreatePHI(Instr->getType(), 2); + RemPhi->addIncoming(SlowRemainderV, SlowBB); + RemPhi->addIncoming(FastRemainderV, FastBB); + + // Replace Instr with appropriate phi node + if (UseDivOp) { + Instr->replaceAllUsesWith(QuoPhi); + } else { + Instr->replaceAllUsesWith(RemPhi); + } + Instr->eraseFromParent(); + + // Combine operands into a single value with OR for value testing below + MainBB->getInstList().back().eraseFromParent(); + IRBuilder<> MainBuilder(MainBB, MainBB->end()); + Value *OrV = MainBuilder.CreateOr(Dividend, Divisor); + + // BitMask is inverted to check if the operands are + // larger than the bypass type + uint64_t BitMask = ~BypassType->getBitMask(); + Value *AndV = MainBuilder.CreateAnd(OrV, BitMask); + + // Compare operand values and branch + Value *ZeroV = MainBuilder.getInt32(0); + Value *CmpV = MainBuilder.CreateICmpEQ(AndV, ZeroV); + MainBuilder.CreateCondBr(CmpV, FastBB, SlowBB); + + // point iterator J at first instruction of successorBB + J = I->begin(); + + // Cache phi nodes to be used later in place of other instances + // of div or rem with the same sign, dividend, and divisor + DivOpInfo Key(UseSignedOp, Dividend, Divisor); + DivPhiNodes Value(QuoPhi, RemPhi); + PerBBDivCache.insert(std::pair<DivOpInfo, DivPhiNodes>(Key, Value)); +} + +// reuseOrInsertFastDiv - Reuses previously computed dividend or remainder if +// operands and operation are identical. Otherwise call insertFastDiv to perform +// the optimization and cache the resulting dividend and remainder. +static void reuseOrInsertFastDiv(Function &F, + Function::iterator &I, + BasicBlock::iterator &J, + IntegerType *BypassType, + bool UseDivOp, + bool UseSignedOp, + DivCacheTy &PerBBDivCache) +{ + // Get instruction operands + Instruction *Instr = J; + DivOpInfo Key(UseSignedOp, Instr->getOperand(0), Instr->getOperand(1)); + DivCacheTy::const_iterator CacheI = PerBBDivCache.find(Key); + + if (CacheI == PerBBDivCache.end()) { + // If previous instance does not exist, insert fast div + insertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp, PerBBDivCache); + return; + } + + // Replace operation value with previously generated phi node + DivPhiNodes Value = CacheI->second; + if (UseDivOp) { + // Replace all uses of div instruction with quotient phi node + J->replaceAllUsesWith(Value.Quotient); + } else { + // Replace all uses of rem instruction with remainder phi node + J->replaceAllUsesWith(Value.Remainder); + } + + // Advance to next operation + J++; + + // Remove redundant operation + Instr->eraseFromParent(); +} + +// bypassSlowDivision - This optimization identifies DIV instructions that can +// be profitably bypassed and carried out with a shorter, faster divide. +bool bypassSlowDivision(Function &F, + Function::iterator &I, + const llvm::DenseMap<Type *, Type *> &BypassTypeMap) +{ + DivCacheTy DivCache; + + bool MadeChange = false; + for (BasicBlock::iterator J = I->begin(); J != I->end(); J++) { + + // Get instruction details + unsigned Opcode = J->getOpcode(); + bool UseDivOp = Opcode == Instruction::SDiv || Opcode == Instruction::UDiv; + bool UseRemOp = Opcode == Instruction::SRem || Opcode == Instruction::URem; + bool UseSignedOp = Opcode == Instruction::SDiv || Opcode == Instruction::SRem; + + // Only optimize div or rem ops + if (!UseDivOp && !UseRemOp) { + continue; + } + // Continue if div/rem type is not bypassed + DenseMap<Type *, Type *>::const_iterator BT = BypassTypeMap.find(J->getType()); + if (BT == BypassTypeMap.end()) { + continue; + } + + IntegerType *BypassType = (IntegerType *)BT->second; + reuseOrInsertFastDiv(F, I, J, BypassType, UseDivOp, UseSignedOp, DivCache); + MadeChange = true; + } + + return MadeChange; +} diff --git a/lib/Transforms/Utils/CMakeLists.txt b/lib/Transforms/Utils/CMakeLists.txt index 4ff31ca..215a16f 100644 --- a/lib/Transforms/Utils/CMakeLists.txt +++ b/lib/Transforms/Utils/CMakeLists.txt @@ -3,6 +3,7 @@ add_llvm_library(LLVMTransformUtils BasicBlockUtils.cpp BreakCriticalEdges.cpp BuildLibCalls.cpp + BypassSlowDivision.cpp CloneFunction.cpp CloneModule.cpp CmpInstAnalysis.cpp diff --git a/test/CodeGen/X86/atom-bypass-slow-division.ll b/test/CodeGen/X86/atom-bypass-slow-division.ll new file mode 100644 index 0000000..e7c9605 --- /dev/null +++ b/test/CodeGen/X86/atom-bypass-slow-division.ll @@ -0,0 +1,112 @@ +; RUN: llc < %s -mcpu=atom -mtriple=i686-linux | FileCheck %s + +define i32 @test_get_quotient(i32 %a, i32 %b) nounwind { +; CHECK: test_get_quotient +; CHECK: orl %ecx, %edx +; CHECK-NEXT: testl $-256, %edx +; CHECK-NEXT: je +; CHECK: idivl +; CHECK: ret +; CHECK: divb +; CHECK: ret + %result = sdiv i32 %a, %b + ret i32 %result +} + +define i32 @test_get_remainder(i32 %a, i32 %b) nounwind { +; CHECK: test_get_remainder +; CHECK: orl %ecx, %edx +; CHECK-NEXT: testl $-256, %edx +; CHECK-NEXT: je +; CHECK: idivl +; CHECK: ret +; CHECK: divb +; CHECK: ret + %result = srem i32 %a, %b + ret i32 %result +} + +define i32 @test_get_quotient_and_remainder(i32 %a, i32 %b) nounwind { +; CHECK: test_get_quotient_and_remainder +; CHECK: orl %ecx, %edx +; CHECK-NEXT: testl $-256, %edx +; CHECK-NEXT: je +; CHECK: idivl +; CHECK: divb +; CHECK: addl +; CHECK: ret +; CEECK-NOT: idivl +; CHECK-NOT: divb + %resultdiv = sdiv i32 %a, %b + %resultrem = srem i32 %a, %b + %result = add i32 %resultdiv, %resultrem + ret i32 %result +} + +define i32 @test_use_div_and_idiv(i32 %a, i32 %b) nounwind { +; CHECK: test_use_div_and_idiv +; CHECK: idivl +; CHECK: divb +; CHECK: divl +; CHECK: divb +; CHECK: addl +; CHECK: ret + %resultidiv = sdiv i32 %a, %b + %resultdiv = udiv i32 %a, %b + %result = add i32 %resultidiv, %resultdiv + ret i32 %result +} + +define i32 @test_use_div_imm_imm() nounwind { +; CHECK: test_use_div_imm_imm +; CHECK: movl $64 + %resultdiv = sdiv i32 256, 4 + ret i32 %resultdiv +} + +define i32 @test_use_div_reg_imm(i32 %a) nounwind { +; CHECK: test_use_div_reg_imm +; CEHCK-NOT: test +; CHECK-NOT: idiv +; CHECK-NOT: divb + %resultdiv = sdiv i32 %a, 33 + ret i32 %resultdiv +} + +define i32 @test_use_rem_reg_imm(i32 %a) nounwind { +; CHECK: test_use_rem_reg_imm +; CEHCK-NOT: test +; CHECK-NOT: idiv +; CHECK-NOT: divb + %resultrem = srem i32 %a, 33 + ret i32 %resultrem +} + +define i32 @test_use_divrem_reg_imm(i32 %a) nounwind { +; CHECK: test_use_divrem_reg_imm +; CEHCK-NOT: test +; CHECK-NOT: idiv +; CHECK-NOT: divb + %resultdiv = sdiv i32 %a, 33 + %resultrem = srem i32 %a, 33 + %result = add i32 %resultdiv, %resultrem + ret i32 %result +} + +define i32 @test_use_div_imm_reg(i32 %a) nounwind { +; CHECK: test_use_div_imm_reg +; CHECK: test +; CHECK: idiv +; CHECK: divb + %resultdiv = sdiv i32 4, %a + ret i32 %resultdiv +} + +define i32 @test_use_rem_imm_reg(i32 %a) nounwind { +; CHECK: test_use_rem_imm_reg +; CHECK: test +; CHECK: idiv +; CHECK: divb + %resultdiv = sdiv i32 4, %a + ret i32 %resultdiv +} |