aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJim Grosbach <grosbach@apple.com>2010-07-16 23:05:05 +0000
committerJim Grosbach <grosbach@apple.com>2010-07-16 23:05:05 +0000
commit469bbdb597f27d6900c95b6d8ae20a45b79ce91b (patch)
tree3972372fc21435a8b44745ef8375f13d90c10521
parent6dd26ba4bab4e3ebb1545e7e2211297f66e61e0b (diff)
downloadexternal_llvm-469bbdb597f27d6900c95b6d8ae20a45b79ce91b.zip
external_llvm-469bbdb597f27d6900c95b6d8ae20a45b79ce91b.tar.gz
external_llvm-469bbdb597f27d6900c95b6d8ae20a45b79ce91b.tar.bz2
Add basic support to code-gen the ARM/Thumb2 bit-field insert (BFI) instruction
and a combine pattern to use it for setting a bit-field to a constant value. More to come for non-constant stores. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@108570 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp66
-rw-r--r--lib/Target/ARM/ARMISelLowering.h6
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td28
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td10
-rw-r--r--lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp8
-rw-r--r--lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h22
-rw-r--r--test/CodeGen/ARM/bfi.ll17
-rw-r--r--test/CodeGen/Thumb2/bfi.ll17
8 files changed, 138 insertions, 36 deletions
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 444e30e..34b662a 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -530,6 +530,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setTargetDAGCombine(ISD::SUB);
setTargetDAGCombine(ISD::MUL);
+ if (Subtarget->hasV6T2Ops())
+ setTargetDAGCombine(ISD::OR);
+
setStackPointerRegisterToSaveRestore(ARM::SP);
if (UseSoftFloat || Subtarget->isThumb1Only() || !Subtarget->hasVFP2())
@@ -4232,6 +4235,53 @@ static SDValue PerformMULCombine(SDNode *N,
return SDValue();
}
+/// PerformORCombine - Target-specific dag combine xforms for ISD::OR
+static SDValue PerformORCombine(SDNode *N,
+ TargetLowering::DAGCombinerInfo &DCI,
+ const ARMSubtarget *Subtarget) {
+ // BFI is only available on V6T2+
+ if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
+ // or (and A, mask), val => ARMbfi A, val, mask
+ // iff (val & mask) == val
+ if (N0->getOpcode() != ISD::AND)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ if (VT != MVT::i32)
+ return SDValue();
+
+ // The value and the mask need to be constants so we can verify this is
+ // actually a bitfield set. If the mask is 0xffff, we can do better
+ // via a movt instruction, so don't use BFI in that case.
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!C)
+ return SDValue();
+ unsigned Mask = C->getZExtValue();
+ if (Mask == 0xffff)
+ return SDValue();
+ C = dyn_cast<ConstantSDNode>(N1);
+ if (!C)
+ return SDValue();
+ unsigned Val = C->getZExtValue();
+ if (ARM::isBitFieldInvertedMask(Mask) && (Val & ~Mask) != Val)
+ return SDValue();
+ Val >>= CountTrailingZeros_32(~Mask);
+
+ DebugLoc DL = N->getDebugLoc();
+ SDValue Res = DAG.getNode(ARMISD::BFI, DL, VT, N0.getOperand(0),
+ DAG.getConstant(Val, MVT::i32),
+ DAG.getConstant(Mask, MVT::i32));
+
+ // Do not add new nodes to DAG combiner worklist.
+ DCI.CombineTo(N, Res, false);
+
+ return SDValue();
+}
+
/// PerformVMOVRRDCombine - Target-specific dag combine xforms for
/// ARMISD::VMOVRRD.
static SDValue PerformVMOVRRDCombine(SDNode *N,
@@ -4649,6 +4699,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ADD: return PerformADDCombine(N, DCI);
case ISD::SUB: return PerformSUBCombine(N, DCI);
case ISD::MUL: return PerformMULCombine(N, DCI, Subtarget);
+ case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
@@ -5380,6 +5431,21 @@ int ARM::getVFPf64Imm(const APFloat &FPImm) {
return ((int)Sign << 7) | (Exp << 4) | Mantissa;
}
+bool ARM::isBitFieldInvertedMask(unsigned v) {
+ if (v == 0xffffffff)
+ return 0;
+ // there can be 1's on either or both "outsides", all the "inside"
+ // bits must be 0's
+ unsigned int lsb = 0, msb = 31;
+ while (v & (1 << msb)) --msb;
+ while (v & (1 << lsb)) ++lsb;
+ for (unsigned int i = lsb; i <= msb; ++i) {
+ if (v & (1 << i))
+ return 0;
+ }
+ return 1;
+}
+
/// isFPImmLegal - Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 128b72e..1c178b3 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -150,7 +150,10 @@ namespace llvm {
// Floating-point max and min:
FMAX,
- FMIN
+ FMIN,
+
+ // Bit-field insert
+ BFI
};
}
@@ -162,6 +165,7 @@ namespace llvm {
/// returns -1.
int getVFPf32Imm(const APFloat &FPImm);
int getVFPf64Imm(const APFloat &FPImm);
+ bool isBitFieldInvertedMask(unsigned v);
}
//===--------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 51fc152..7e00b4b 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -61,6 +61,9 @@ def SDT_ARMSYNCBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>;
+def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>,
+ SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
+
// Node definitions.
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>;
@@ -131,6 +134,9 @@ def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>;
def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET,
[SDNPHasChain, SDNPOptInFlag, SDNPVariadic]>;
+
+def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>;
+
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
@@ -221,19 +227,7 @@ def sext_16_node : PatLeaf<(i32 GPR:$a), [{
/// e.g., 0xf000ffff
def bf_inv_mask_imm : Operand<i32>,
PatLeaf<(imm), [{
- uint32_t v = (uint32_t)N->getZExtValue();
- if (v == 0xffffffff)
- return 0;
- // there can be 1's on either or both "outsides", all the "inside"
- // bits must be 0's
- unsigned int lsb = 0, msb = 31;
- while (v & (1 << msb)) --msb;
- while (v & (1 << lsb)) ++lsb;
- for (unsigned int i = lsb; i <= msb; ++i) {
- if (v & (1 << i))
- return 0;
- }
- return 1;
+ return ARM::isBitFieldInvertedMask(N->getZExtValue());
}] > {
let PrintMethod = "printBitfieldInvMaskImmOperand";
}
@@ -1858,11 +1852,11 @@ def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
}
// A8.6.18 BFI - Bitfield insert (Encoding A1)
-// Added for disassembler with the pattern field purposely left blank.
-def BFI : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
+def BFI : I<(outs GPR:$dst), (ins GPR:$src, GPR:$val, bf_inv_mask_imm:$imm),
AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
- "bfi", "\t$dst, $src, $imm", "",
- [/* For disassembly only; pattern left blank */]>,
+ "bfi", "\t$dst, $val, $imm", "$src = $dst",
+ [(set GPR:$dst, (ARMbfi GPR:$src, GPR:$val,
+ bf_inv_mask_imm:$imm))]>,
Requires<[IsARM, HasV6T2]> {
let Inst{27-21} = 0b0111110;
let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index bbe675e..d082a6b 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1666,10 +1666,12 @@ def t2UBFX : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
}
// A8.6.18 BFI - Bitfield insert (Encoding T1)
-// Added for disassembler with the pattern field purposely left blank.
-// FIXME: Utilize this instruction in codgen.
-def t2BFI : T2I<(outs GPR:$dst), (ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
- IIC_iALUi, "bfi", "\t$dst, $src, $lsb, $width", []> {
+let Constraints = "$src = $dst" in
+def t2BFI : T2I<(outs GPR:$dst),
+ (ins GPR:$src, GPR:$val, bf_inv_mask_imm:$imm),
+ IIC_iALUi, "bfi", "\t$dst, $val, $imm",
+ [(set GPR:$dst, (ARMbfi GPR:$src, GPR:$val,
+ bf_inv_mask_imm:$imm))]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
let Inst{24-20} = 0b10110;
diff --git a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
index a07ff28..57972d1 100644
--- a/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
+++ b/lib/Target/ARM/Disassembler/ARMDisassemblerCore.cpp
@@ -989,10 +989,12 @@ static bool DisassembleDPFrm(MCInst &MI, unsigned Opcode, uint32_t insn,
// Special-case handling of BFC/BFI/SBFX/UBFX.
if (Opcode == ARM::BFC || Opcode == ARM::BFI) {
- // TIED_TO operand skipped for BFC and Inst{3-0} (Reg) for BFI.
- MI.addOperand(MCOperand::CreateReg(Opcode == ARM::BFC ? 0
- : getRegisterEnum(B, ARM::GPRRegClassID,
+ MI.addOperand(MCOperand::CreateReg(0));
+ if (Opcode == ARM::BFI) {
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
decodeRm(insn))));
+ ++OpIdx;
+ }
uint32_t mask = 0;
if (!getBFCInvMask(insn, mask))
return false;
diff --git a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
index 4b7a0bf..3e7d4d6 100644
--- a/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
+++ b/lib/Target/ARM/Disassembler/ThumbDisassemblerCore.h
@@ -1555,6 +1555,13 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
++OpIdx;
}
+ if (Opcode == ARM::t2BFI) {
+ // Add val reg operand.
+ MI.addOperand(MCOperand::CreateReg(getRegisterEnum(B, ARM::GPRRegClassID,
+ decodeRn(insn))));
+ ++OpIdx;
+ }
+
assert(OpInfo[OpIdx].RegClass < 0 && !OpInfo[OpIdx].isPredicate()
&& !OpInfo[OpIdx].isOptionalDef()
&& "Pure imm operand expected");
@@ -1567,7 +1574,7 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
MI.addOperand(MCOperand::CreateImm(getIImm3Imm8(insn)));
else if (Opcode == ARM::t2MOVi16 || Opcode == ARM::t2MOVTi16)
MI.addOperand(MCOperand::CreateImm(getImm16(insn)));
- else if (Opcode == ARM::t2BFC) {
+ else if (Opcode == ARM::t2BFC || Opcode == ARM::t2BFI) {
uint32_t mask = 0;
if (getBitfieldInvMask(insn, mask))
MI.addOperand(MCOperand::CreateImm(mask));
@@ -1575,17 +1582,10 @@ static bool DisassembleThumb2DPBinImm(MCInst &MI, unsigned Opcode,
return false;
} else {
// Handle the case of: lsb width
- assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX ||
- Opcode == ARM::t2BFI) && "Unexpected opcode");
+ assert((Opcode == ARM::t2SBFX || Opcode == ARM::t2UBFX)
+ && "Unexpected opcode");
MI.addOperand(MCOperand::CreateImm(getLsb(insn)));
- if (Opcode == ARM::t2BFI) {
- if (getMsb(insn) < getLsb(insn)) {
- DEBUG(errs() << "Encoding error: msb < lsb\n");
- return false;
- }
- MI.addOperand(MCOperand::CreateImm(getMsb(insn) - getLsb(insn) + 1));
- } else
- MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1));
+ MI.addOperand(MCOperand::CreateImm(getWidthMinus1(insn) + 1));
++OpIdx;
}
diff --git a/test/CodeGen/ARM/bfi.ll b/test/CodeGen/ARM/bfi.ll
new file mode 100644
index 0000000..48ef437
--- /dev/null
+++ b/test/CodeGen/ARM/bfi.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=arm -mattr=+v6t2 < %s | FileCheck %s
+
+%struct.F = type { [3 x i8], i8 }
+
+@X = common global %struct.F zeroinitializer, align 4 ; <%struct.F*> [#uses=1]
+
+define void @f1([1 x i32] %f.coerce0) nounwind {
+entry:
+; CHECK: f1
+; CHECK: mov r2, #10
+; CHECK: bfi r1, r2, #22, #4
+ %0 = load i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1]
+ %1 = and i32 %0, -62914561 ; <i32> [#uses=1]
+ %2 = or i32 %1, 41943040 ; <i32> [#uses=1]
+ store i32 %2, i32* bitcast (%struct.F* @X to i32*), align 4
+ ret void
+}
diff --git a/test/CodeGen/Thumb2/bfi.ll b/test/CodeGen/Thumb2/bfi.ll
new file mode 100644
index 0000000..a256d67
--- /dev/null
+++ b/test/CodeGen/Thumb2/bfi.ll
@@ -0,0 +1,17 @@
+; RUN: llc -march=thumb -mattr=+v6t2 < %s | FileCheck %s
+
+%struct.F = type { [3 x i8], i8 }
+
+@X = common global %struct.F zeroinitializer, align 4 ; <%struct.F*> [#uses=1]
+
+define void @f1([1 x i32] %f.coerce0) nounwind {
+entry:
+; CHECK: f1
+; CHECK: movs r2, #10
+; CHECK: bfi r1, r2, #22, #4
+ %0 = load i32* bitcast (%struct.F* @X to i32*), align 4 ; <i32> [#uses=1]
+ %1 = and i32 %0, -62914561 ; <i32> [#uses=1]
+ %2 = or i32 %1, 41943040 ; <i32> [#uses=1]
+ store i32 %2, i32* bitcast (%struct.F* @X to i32*), align 4
+ ret void
+}