diff options
author | Jim Grosbach <grosbach@apple.com> | 2010-07-16 23:05:05 +0000 |
---|---|---|
committer | Jim Grosbach <grosbach@apple.com> | 2010-07-16 23:05:05 +0000 |
commit | 469bbdb597f27d6900c95b6d8ae20a45b79ce91b (patch) | |
tree | 3972372fc21435a8b44745ef8375f13d90c10521 | |
parent | 6dd26ba4bab4e3ebb1545e7e2211297f66e61e0b (diff) | |
download | external_llvm-469bbdb597f27d6900c95b6d8ae20a45b79ce91b.zip external_llvm-469bbdb597f27d6900c95b6d8ae20a45b79ce91b.tar.gz external_llvm-469bbdb597f27d6900c95b6d8ae20a45b79ce91b.tar.bz2 |
Add basic support to code-gen the ARM/Thumb2 bit-field insert (BFI) instruction
and a combine pattern to use it for setting a bit-field to a constant
value. More to come for non-constant stores.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@108570 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.cpp | 66 | ||||
-rw-r--r-- | lib/Target/ARM/ARMISelLowering.h | 6 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrInfo.td | 28 | ||||
-rw-r--r-- | lib/Target/ARM/ARMInstrThumb2.td | 10 | ||||
-rw-r--r-- | lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp | 8 | ||||
-rw-r--r-- | lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h | 22 | ||||
-rw-r--r-- | test/CodeGen/ARM/bfi.ll | 17 | ||||
-rw-r--r-- | test/CodeGen/Thumb2/bfi.ll | 17 |
8 files changed, 138 insertions, 36 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 444e30e..34b662a 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -530,6 +530,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::SUB); setTargetDAGCombine(ISD::MUL); + if (Subtarget->hasV6T2Ops()) + setTargetDAGCombine(ISD::OR); + setStackPointerRegisterToSaveRestore(ARM::SP); if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2()) @@ -4232,6 +4235,53 @@ static SDValue PerformMULCombine(SDNode *N, return SDValue(); } +/// PerformORCombine - Target-specific dag combine xforms for ISD::OR +static SDValue PerformORCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + // BFI is only available on V6T2+ + if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); + // or (and A, mask), val => ARMbfi A, val, mask + // iff (val & mask) == val + if (N0->getOpcode() != ISD::AND) + return SDValue(); + + EVT VT = N->getValueType(0); + if (VT != MVT::i32) + return SDValue(); + + // The value and the mask need to be constants so we can verify this is + // actually a bitfield set. If the mask is 0xffff, we can do better + // via a movt instruction, so don't use BFI in that case. + ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); + if (!C) + return SDValue(); + unsigned Mask = C->getZExtValue(); + if (Mask == 0xffff) + return SDValue(); + C = dyn_cast<ConstantSDNode>(N1); + if (!C) + return SDValue(); + unsigned Val = C->getZExtValue(); + if (ARM::isBitFieldInvertedMask(Mask) && (Val & ~Mask) != Val) + return SDValue(); + Val >>= CountTrailingZeros_32(~Mask); + + DebugLoc DL = N->getDebugLoc(); + SDValue Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0), + DAG.getConstant(Val, MVT::i32), + DAG.getConstant(Mask, MVT::i32)); + + // Do not add new nodes to DAG combiner worklist. + DCI.CombineTo(N, Res, false); + + return SDValue(); +} + /// PerformVMOVRRDCombine - Target-specific dag combine xforms for /// ARMISD::VMOVRRD. static SDValue PerformVMOVRRDCombine(SDNode *N, @@ -4649,6 +4699,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ISD::ADD: return PerformADDCombine(N, DCI); case ISD::SUB: return PerformSUBCombine(N, DCI); case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget); + case ISD::OR: return PerformORCombine(N, DCI, Subtarget); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI); case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); @@ -5380,6 +5431,21 @@ int ARM::getVFPf64Imm(const APFloat &FPImm) { return ((int)Sign << 7) | (Exp << 4) | Mantissa; } +bool ARM::isBitFieldInvertedMask(unsigned v) { + if (v == 0xffffffff) + return 0; + // there can be 1's on either or both "outsides", all the "inside" + // bits must be 0's + unsigned int lsb = 0, msb = 31; + while (v & (1 << msb)) --msb; + while (v & (1 << lsb)) ++lsb; + for (unsigned int i = lsb; i <= msb; ++i) { + if (v & (1 << i)) + return 0; + } + return 1; +} + /// isFPImmLegal - Returns true if the target can instruction select the /// specified FP immediate natively. If false, the legalizer will /// materialize the FP immediate as a load from a constant pool. diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 128b72e..1c178b3 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -150,7 +150,10 @@ namespace llvm { // Floating-point max and min: FMAX, - FMIN + FMIN, + + // Bit-field insert + BFI }; } @@ -162,6 +165,7 @@ namespace llvm { /// returns -1. int getVFPf32Imm(const APFloat &FPImm); int getVFPf64Imm(const APFloat &FPImm); + bool isBitFieldInvertedMask(unsigned v); } //===--------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 51fc152..7e00b4b 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -61,6 +61,9 @@ def SDT_ARMSYNCBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>; def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; +def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, + SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; + // Node definitions. def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>; def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>; @@ -131,6 +134,9 @@ def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>; def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET, [SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>; + +def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>; + //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // @@ -221,19 +227,7 @@ def sext_16_node : PatLeaf<(i32 GPR:$a), [{ /// e.g., 0xf000ffff def bf_inv_mask_imm : Operand<i32>, PatLeaf<(imm), [{ - uint32_t v = (uint32_t)N->getZExtValue(); - if (v == 0xffffffff) - return 0; - // there can be 1's on either or both "outsides", all the "inside" - // bits must be 0's - unsigned int lsb = 0, msb = 31; - while (v & (1 << msb)) --msb; - while (v & (1 << lsb)) ++lsb; - for (unsigned int i = lsb; i <= msb; ++i) { - if (v & (1 << i)) - return 0; - } - return 1; + return ARM::isBitFieldInvertedMask(N->getZExtValue()); }] > { let PrintMethod = "printBitfieldInvMaskImmOperand"; } @@ -1858,11 +1852,11 @@ def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), } // A8.6.18 BFI - Bitfield insert (Encoding A1) -// Added for disassembler with the pattern field purposely left blank. -def BFI : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm), +def BFI : I<(outs GPR:$dst), (ins GPR:$src, GPR:$val, bf_inv_mask_imm:$imm), AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi, - "bfi", "\t$dst, $src, $imm", "", - [/* For disassembly only; pattern left blank */]>, + "bfi", "\t$dst, $val, $imm", "$src = $dst", + [(set GPR:$dst, (ARMbfi GPR:$src, GPR:$val, + bf_inv_mask_imm:$imm))]>, Requires<[IsARM, HasV6T2]> { let Inst{27-21} = 0b0111110; let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15 diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index bbe675e..d082a6b 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -1666,10 +1666,12 @@ def t2UBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), } // A8.6.18 BFI - Bitfield insert (Encoding T1) -// Added for disassembler with the pattern field purposely left blank. -// FIXME: Utilize this instruction in codgen. -def t2BFI : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width), - IIC_iALUi, "bfi", "\t$dst, $src, $lsb, $width", []> { +let Constraints = "$src = $dst" in +def t2BFI : T2I<(outs GPR:$dst), + (ins GPR:$src, GPR:$val, bf_inv_mask_imm:$imm), + IIC_iALUi, "bfi", "\t$dst, $val, $imm", + [(set GPR:$dst, (ARMbfi GPR:$src, GPR:$val, + bf_inv_mask_imm:$imm))]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-20} = 0b10110; diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp index a07ff28..57972d1 100644 --- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp +++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp @@ -989,10 +989,12 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn, // Special-case handling of BFC/BFI/SBFX/UBFX. if (Opcode == ARM::BFC || Opcode == ARM::BFI) { - // TIED_TO operand skipped for BFC and Inst{3-0} (Reg) for BFI. - MI.addOperand(MCOperand::CreateReg(Opcode == ARM::BFC ? 0 - : getRegisterEnum(B, ARM::GPRRegClassID, + MI.addOperand(MCOperand::CreateReg(0)); + if (Opcode == ARM::BFI) { + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, decodeRm(insn)))); + ++OpIdx; + } uint32_t mask = 0; if (!getBFCInvMask(insn, mask)) return false; diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h index 4b7a0bf..3e7d4d6 100644 --- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h +++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h @@ -1555,6 +1555,13 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, ++OpIdx; } + if (Opcode == ARM::t2BFI) { + // Add val reg operand. + MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID, + decodeRn(insn)))); + ++OpIdx; + } + assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate() && !OpInfo[OpIdx].isOptionalDef() && "Pure imm operand expected"); @@ -1567,7 +1574,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn))); else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16) MI.addOperand(MCOperand::CreateImm(getImm16(insn))); - else if (Opcode == ARM::t2BFC) { + else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) { uint32_t mask = 0; if (getBitfieldInvMask(insn, mask)) MI.addOperand(MCOperand::CreateImm(mask)); @@ -1575,17 +1582,10 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode, return false; } else { // Handle the case of: lsb width - assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX || - Opcode == ARM::t2BFI) && "Unexpected opcode"); + assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX) + && "Unexpected opcode"); MI.addOperand(MCOperand::CreateImm(getLsb(insn))); - if (Opcode == ARM::t2BFI) { - if (getMsb(insn) < getLsb(insn)) { - DEBUG(errs() << "Encoding error: msb < lsb\n"); - return false; - } - MI.addOperand(MCOperand::CreateImm(getMsb(insn) - getLsb(insn) + 1)); - } else - MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1)); + MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1)); ++OpIdx; } diff --git a/test/CodeGen/ARM/bfi.ll b/test/CodeGen/ARM/bfi.ll new file mode 100644 index 0000000..48ef437 --- /dev/null +++ b/test/CodeGen/ARM/bfi.ll @@ -0,0 +1,17 @@ +; RUN: llc -march=arm -mattr=+v6t2 < %s | FileCheck %s + +%struct.F = type { [3 x i8], i8 } + +@X = common global %struct.F zeroinitializer, align 4 ; <%struct.F*> [#uses=1] + +define void @f1([1 x i32] %f.coerce0) nounwind { +entry: +; CHECK: f1 +; CHECK: mov r2, #10 +; CHECK: bfi r1, r2, #22, #4 + %0 = load i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1] + %1 = and i32 %0, -62914561 ; <i32> [#uses=1] + %2 = or i32 %1, 41943040 ; <i32> [#uses=1] + store i32 %2, i32* bitcast (%struct.F* @X to i32*), align 4 + ret void +} diff --git a/test/CodeGen/Thumb2/bfi.ll b/test/CodeGen/Thumb2/bfi.ll new file mode 100644 index 0000000..a256d67 --- /dev/null +++ b/test/CodeGen/Thumb2/bfi.ll @@ -0,0 +1,17 @@ +; RUN: llc -march=thumb -mattr=+v6t2 < %s | FileCheck %s + +%struct.F = type { [3 x i8], i8 } + +@X = common global %struct.F zeroinitializer, align 4 ; <%struct.F*> [#uses=1] + +define void @f1([1 x i32] %f.coerce0) nounwind { +entry: +; CHECK: f1 +; CHECK: movs r2, #10 +; CHECK: bfi r1, r2, #22, #4 + %0 = load i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1] + %1 = and i32 %0, -62914561 ; <i32> [#uses=1] + %2 = or i32 %1, 41943040 ; <i32> [#uses=1] + store i32 %2, i32* bitcast (%struct.F* @X to i32*), align 4 + ret void +} |