aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/llvm/IR/IntrinsicsMips.td12
-rw-r--r--lib/Target/Mips/MSA.txt17
-rw-r--r--lib/Target/Mips/MipsMSAInstrInfo.td49
-rw-r--r--lib/Target/Mips/MipsSEISelLowering.cpp108
-rw-r--r--test/CodeGen/Mips/msa/bitwise.ll51
-rw-r--r--test/CodeGen/Mips/msa/compare.ll10
-rw-r--r--test/CodeGen/Mips/msa/i8.ll64
-rw-r--r--test/CodeGen/Mips/msa/vec.ll336
8 files changed, 471 insertions, 176 deletions
diff --git a/include/llvm/IR/IntrinsicsMips.td b/include/llvm/IR/IntrinsicsMips.td
index 6276646..6c8aa4c 100644
--- a/include/llvm/IR/IntrinsicsMips.td
+++ b/include/llvm/IR/IntrinsicsMips.td
@@ -610,16 +610,20 @@ def int_mips_binsri_d : GCCBuiltin<"__builtin_msa_binsri_d">,
[IntrNoMem]>;
def int_mips_bmnz_v : GCCBuiltin<"__builtin_msa_bmnz_v">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
def int_mips_bmnzi_b : GCCBuiltin<"__builtin_msa_bmnzi_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
def int_mips_bmz_v : GCCBuiltin<"__builtin_msa_bmz_v">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
+ [IntrNoMem]>;
def int_mips_bmzi_b : GCCBuiltin<"__builtin_msa_bmzi_b">,
- Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>;
+ Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
+ [IntrNoMem]>;
def int_mips_bneg_b : GCCBuiltin<"__builtin_msa_bneg_b">,
Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>;
diff --git a/lib/Target/Mips/MSA.txt b/lib/Target/Mips/MSA.txt
index 802217a..270a723 100644
--- a/lib/Target/Mips/MSA.txt
+++ b/lib/Target/Mips/MSA.txt
@@ -54,3 +54,20 @@ binsri.[bhwd], binsli.[bhwd]:
appropriate.
Furthermore, the compiler may use bsel.[bhwd] for some masks that do
not survive the legalization process (this is a bug and will be fixed).
+
+bmnz.v, bmz.v, bsel.v:
+ These three operations differ only in the operand that is tied to the
+ result.
+ It is (currently) not possible to emit bmz.v, or bsel.v since bmnz.v is
+ the same operation and will be emitted instead.
+ In future, the compiler may choose between these three instructions
+ according to register allocation.
+
+bmnzi.b, bmzi.b:
+ Like their non-immediate counterparts, bmnzi.v and bmzi.v are the same
+ operation with the operands swapped. bmnzi.v will (currently) be emitted
+ for both cases.
+
+bseli.v:
+ Unlike the non-immediate versions, bseli.v is distinguishable from
+ bmnzi.b and bmzi.b and can be emitted.
diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td
index a393fdc..60cd44b 100644
--- a/lib/Target/Mips/MipsMSAInstrInfo.td
+++ b/lib/Target/Mips/MipsMSAInstrInfo.td
@@ -1538,14 +1538,53 @@ class BINSRI_H_DESC : MSA_BIT_BINSRI_DESC_BASE<"binsri.h", v8i16, MSA128HOpnd>;
class BINSRI_W_DESC : MSA_BIT_BINSRI_DESC_BASE<"binsri.w", v4i32, MSA128WOpnd>;
class BINSRI_D_DESC : MSA_BIT_BINSRI_DESC_BASE<"binsri.d", v2i64, MSA128DOpnd>;
-class BMNZ_V_DESC : MSA_VEC_DESC_BASE<"bmnz.v", int_mips_bmnz_v, MSA128BOpnd>;
+class BMNZ_V_DESC {
+ dag OutOperandList = (outs MSA128BOpnd:$wd);
+ dag InOperandList = (ins MSA128BOpnd:$wd_in, MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt);
+ string AsmString = "bmnz.v\t$wd, $ws, $wt";
+ list<dag> Pattern = [(set MSA128BOpnd:$wd, (vselect MSA128BOpnd:$wt,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wd_in))];
+ InstrItinClass Itinerary = NoItinerary;
+ string Constraints = "$wd = $wd_in";
+}
-class BMNZI_B_DESC : MSA_I8_X_DESC_BASE<"bmnzi.b", int_mips_bmnzi_b,
- MSA128BOpnd>;
+class BMNZI_B_DESC {
+ dag OutOperandList = (outs MSA128BOpnd:$wd);
+ dag InOperandList = (ins MSA128BOpnd:$wd_in, MSA128BOpnd:$ws,
+ vsplat_uimm8:$u8);
+ string AsmString = "bmnzi.b\t$wd, $ws, $u8";
+ list<dag> Pattern = [(set MSA128BOpnd:$wd, (vselect vsplati8_uimm8:$u8,
+ MSA128BOpnd:$ws,
+ MSA128BOpnd:$wd_in))];
+ InstrItinClass Itinerary = NoItinerary;
+ string Constraints = "$wd = $wd_in";
+}
-class BMZ_V_DESC : MSA_VEC_DESC_BASE<"bmz.v", int_mips_bmz_v, MSA128BOpnd>;
+class BMZ_V_DESC {
+ dag OutOperandList = (outs MSA128BOpnd:$wd);
+ dag InOperandList = (ins MSA128BOpnd:$wd_in, MSA128BOpnd:$ws,
+ MSA128BOpnd:$wt);
+ string AsmString = "bmz.v\t$wd, $ws, $wt";
+ list<dag> Pattern = [(set MSA128BOpnd:$wd, (vselect MSA128BOpnd:$wt,
+ MSA128BOpnd:$wd_in,
+ MSA128BOpnd:$ws))];
+ InstrItinClass Itinerary = NoItinerary;
+ string Constraints = "$wd = $wd_in";
+}
-class BMZI_B_DESC : MSA_I8_X_DESC_BASE<"bmzi.b", int_mips_bmzi_b, MSA128BOpnd>;
+class BMZI_B_DESC {
+ dag OutOperandList = (outs MSA128BOpnd:$wd);
+ dag InOperandList = (ins MSA128BOpnd:$wd_in, MSA128BOpnd:$ws,
+ vsplat_uimm8:$u8);
+ string AsmString = "bmzi.b\t$wd, $ws, $u8";
+ list<dag> Pattern = [(set MSA128BOpnd:$wd, (vselect vsplati8_uimm8:$u8,
+ MSA128BOpnd:$wd_in,
+ MSA128BOpnd:$ws))];
+ InstrItinClass Itinerary = NoItinerary;
+ string Constraints = "$wd = $wd_in";
+}
class BNEG_B_DESC : MSA_3R_DESC_BASE<"bneg.b", int_mips_bneg_b, MSA128BOpnd>;
class BNEG_H_DESC : MSA_3R_DESC_BASE<"bneg.h", int_mips_bneg_h, MSA128HOpnd>;
diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp
index 9c54333..c6ca87c 100644
--- a/lib/Target/Mips/MipsSEISelLowering.cpp
+++ b/lib/Target/Mips/MipsSEISelLowering.cpp
@@ -516,6 +516,44 @@ static bool isVSplat(SDValue N, APInt &Imm, bool IsLittleEndian) {
return true;
}
+// Test whether the given node is an all-ones build_vector.
+static bool isVectorAllOnes(SDValue N) {
+ // Look through bitcasts. Endianness doesn't matter because we are looking
+ // for an all-ones value.
+ if (N->getOpcode() == ISD::BITCAST)
+ N = N->getOperand(0);
+
+ BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(N);
+
+ if (!BVN)
+ return false;
+
+ APInt SplatValue, SplatUndef;
+ unsigned SplatBitSize;
+ bool HasAnyUndefs;
+
+ // Endianness doesn't matter in this context because we are looking for
+ // an all-ones value.
+ if (BVN->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, HasAnyUndefs))
+ return SplatValue.isAllOnesValue();
+
+ return false;
+}
+
+// Test whether N is the bitwise inverse of OfNode.
+static bool isBitwiseInverse(SDValue N, SDValue OfNode) {
+ if (N->getOpcode() != ISD::XOR)
+ return false;
+
+ if (isVectorAllOnes(N->getOperand(0)))
+ return N->getOperand(1) == OfNode;
+
+ if (isVectorAllOnes(N->getOperand(1)))
+ return N->getOperand(0) == OfNode;
+
+ return false;
+}
+
// Perform combines where ISD::OR is the root node.
//
// Performs the following transformations:
@@ -544,6 +582,7 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
bool IsLittleEndian = !Subtarget->isLittle();
SDValue IfSet, IfClr, Cond;
+ bool IsConstantMask = false;
APInt Mask, InvMask;
// If Op0Op0 is an appropriate mask, try to find it's inverse in either
@@ -558,6 +597,8 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
IfClr = Op1Op1;
else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && Mask == ~InvMask)
IfClr = Op1Op0;
+
+ IsConstantMask = true;
}
// If IfClr is not yet set, and Op0Op1 is an appropriate mask, try the same
@@ -571,6 +612,47 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
IfClr = Op1Op1;
else if (isVSplat(Op1Op1, InvMask, IsLittleEndian) && Mask == ~InvMask)
IfClr = Op1Op0;
+
+ IsConstantMask = true;
+ }
+
+ // If IfClr is not yet set, try looking for a non-constant match.
+ // IfClr will be set if we find a valid match amongst the eight
+ // possibilities.
+ if (!IfClr.getNode()) {
+ if (isBitwiseInverse(Op0Op0, Op1Op0)) {
+ Cond = Op1Op0;
+ IfSet = Op1Op1;
+ IfClr = Op0Op1;
+ } else if (isBitwiseInverse(Op0Op1, Op1Op0)) {
+ Cond = Op1Op0;
+ IfSet = Op1Op1;
+ IfClr = Op0Op0;
+ } else if (isBitwiseInverse(Op0Op0, Op1Op1)) {
+ Cond = Op1Op1;
+ IfSet = Op1Op0;
+ IfClr = Op0Op1;
+ } else if (isBitwiseInverse(Op0Op1, Op1Op1)) {
+ Cond = Op1Op1;
+ IfSet = Op1Op0;
+ IfClr = Op0Op0;
+ } else if (isBitwiseInverse(Op1Op0, Op0Op0)) {
+ Cond = Op0Op0;
+ IfSet = Op0Op1;
+ IfClr = Op1Op1;
+ } else if (isBitwiseInverse(Op1Op1, Op0Op0)) {
+ Cond = Op0Op0;
+ IfSet = Op0Op1;
+ IfClr = Op1Op0;
+ } else if (isBitwiseInverse(Op1Op0, Op0Op1)) {
+ Cond = Op0Op1;
+ IfSet = Op0Op0;
+ IfClr = Op1Op1;
+ } else if (isBitwiseInverse(Op1Op1, Op0Op1)) {
+ Cond = Op0Op1;
+ IfSet = Op0Op0;
+ IfClr = Op1Op0;
+ }
}
// At this point, IfClr will be set if we have a valid match.
@@ -580,10 +662,12 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
assert(Cond.getNode() && IfSet.getNode());
// Fold degenerate cases.
- if (Mask.isAllOnesValue())
- return IfSet;
- else if (Mask == 0)
- return IfClr;
+ if (IsConstantMask) {
+ if (Mask.isAllOnesValue())
+ return IfSet;
+ else if (Mask == 0)
+ return IfClr;
+ }
// Transform the DAG into an equivalent VSELECT.
return DAG.getNode(ISD::VSELECT, SDLoc(N), Ty, Cond, IfClr, IfSet);
@@ -1284,6 +1368,20 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
DAG.getConstant(Mask, VecTy, true), Op->getOperand(1),
Op->getOperand(2));
}
+ case Intrinsic::mips_bmnz_v:
+ return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3),
+ Op->getOperand(2), Op->getOperand(1));
+ case Intrinsic::mips_bmnzi_b:
+ return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
+ lowerMSASplatImm(Op, 3, DAG), Op->getOperand(2),
+ Op->getOperand(1));
+ case Intrinsic::mips_bmz_v:
+ return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0), Op->getOperand(3),
+ Op->getOperand(1), Op->getOperand(2));
+ case Intrinsic::mips_bmzi_b:
+ return DAG.getNode(ISD::VSELECT, DL, Op->getValueType(0),
+ lowerMSASplatImm(Op, 3, DAG), Op->getOperand(1),
+ Op->getOperand(2));
case Intrinsic::mips_bnz_b:
case Intrinsic::mips_bnz_h:
case Intrinsic::mips_bnz_w:
@@ -1872,7 +1970,7 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_VOID(SDValue Op,
/// true.
static bool isSplatVector(const BuildVectorSDNode *N) {
unsigned int nOps = N->getNumOperands();
- assert(nOps > 1 && "isSplat has 0 or 1 sized build vector");
+ assert(nOps > 1 && "isSplatVector has 0 or 1 sized build vector");
SDValue Operand0 = N->getOperand(0);
diff --git a/test/CodeGen/Mips/msa/bitwise.ll b/test/CodeGen/Mips/msa/bitwise.ll
index d0b13f6..1ec373b 100644
--- a/test/CodeGen/Mips/msa/bitwise.ll
+++ b/test/CodeGen/Mips/msa/bitwise.ll
@@ -972,29 +972,56 @@ define void @ctlz_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind {
; CHECK: .size ctlz_v2i64
}
-define void @bsel_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind {
+define void @bsel_v16i8(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b, <16 x i8>* %m) nounwind {
; CHECK: bsel_v16i8:
%1 = load <16 x i8>* %a
; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
%2 = load <16 x i8>* %b
; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6)
- %3 = and <16 x i8> %1, <i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6,
- i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6, i8 6>
- %4 = and <16 x i8> %2, <i8 249, i8 249, i8 249, i8 249,
- i8 249, i8 249, i8 249, i8 249,
- i8 249, i8 249, i8 249, i8 249,
- i8 249, i8 249, i8 249, i8 249>
- %5 = or <16 x i8> %3, %4
- ; CHECK-DAG: ldi.b [[R3:\$w[0-9]+]], 6
- ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]]
- store <16 x i8> %5, <16 x i8>* %c
- ; CHECK-DAG: st.b [[R3]], 0($4)
+ %3 = load <16 x i8>* %m
+ ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7)
+ %4 = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1>
+ %5 = and <16 x i8> %1, %3
+ %6 = and <16 x i8> %2, %4
+ %7 = or <16 x i8> %5, %6
+ ; bmnz is the same operation
+ ; CHECK-DAG: bmnz.v [[R1]], [[R2]], [[R3]]
+ store <16 x i8> %7, <16 x i8>* %c
+ ; CHECK-DAG: st.b [[R1]], 0($4)
ret void
; CHECK: .size bsel_v16i8
}
+define void @bsel_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %m) nounwind {
+ ; CHECK: bsel_v16i8_i:
+
+ %1 = load <16 x i8>* %a
+ ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5)
+ %2 = load <16 x i8>* %m
+ ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($6)
+ %3 = xor <16 x i8> %2, <i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1,
+ i8 -1, i8 -1, i8 -1, i8 -1>
+ %4 = and <16 x i8> %1, %3
+ %5 = and <16 x i8> <i8 6, i8 6, i8 6, i8 6,
+ i8 6, i8 6, i8 6, i8 6,
+ i8 6, i8 6, i8 6, i8 6,
+ i8 6, i8 6, i8 6, i8 6>, %2
+ %6 = or <16 x i8> %4, %5
+ ; CHECK-DAG: bseli.b [[R3]], [[R1]], 6
+ store <16 x i8> %6, <16 x i8>* %c
+ ; CHECK-DAG: st.b [[R3]], 0($4)
+
+ ret void
+ ; CHECK: .size bsel_v16i8_i
+}
+
define void @bsel_v8i16(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind {
; CHECK: bsel_v8i16:
diff --git a/test/CodeGen/Mips/msa/compare.ll b/test/CodeGen/Mips/msa/compare.ll
index e45e849..a7c704e 100644
--- a/test/CodeGen/Mips/msa/compare.ll
+++ b/test/CodeGen/Mips/msa/compare.ll
@@ -653,9 +653,10 @@ define void @bsel_s_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
%4 = icmp sgt <16 x i8> %1, %2
; CHECK-DAG: clt_s.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
%5 = select <16 x i1> %4, <16 x i8> %1, <16 x i8> %3
- ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+ ; bmnz.v is the same operation
+ ; CHECK-DAG: bmnz.v [[R3]], [[R1]], [[R4]]
store <16 x i8> %5, <16 x i8>* %d
- ; CHECK-DAG: st.b [[R4]], 0($4)
+ ; CHECK-DAG: st.b [[R3]], 0($4)
ret void
; CHECK: .size bsel_s_v16i8
@@ -737,9 +738,10 @@ define void @bsel_u_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b,
%4 = icmp ugt <16 x i8> %1, %2
; CHECK-DAG: clt_u.b [[R4:\$w[0-9]+]], [[R2]], [[R1]]
%5 = select <16 x i1> %4, <16 x i8> %1, <16 x i8> %3
- ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]]
+ ; bmnz.v is the same operation
+ ; CHECK-DAG: bmnz.v [[R3]], [[R1]], [[R4]]
store <16 x i8> %5, <16 x i8>* %d
- ; CHECK-DAG: st.b [[R4]], 0($4)
+ ; CHECK-DAG: st.b [[R3]], 0($4)
ret void
; CHECK: .size bsel_u_v16i8
diff --git a/test/CodeGen/Mips/msa/i8.ll b/test/CodeGen/Mips/msa/i8.ll
index 1406588..f3e8dfc 100644
--- a/test/CodeGen/Mips/msa/i8.ll
+++ b/test/CodeGen/Mips/msa/i8.ll
@@ -20,64 +20,80 @@ declare <16 x i8> @llvm.mips.andi.b(<16 x i8>, i32) nounwind
; CHECK: andi.b
; CHECK: st.b
; CHECK: .size llvm_mips_andi_b_test
-;
-@llvm_mips_bmnzi_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+
+@llvm_mips_bmnzi_b_ARG1 = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+@llvm_mips_bmnzi_b_ARG2 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
@llvm_mips_bmnzi_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
define void @llvm_mips_bmnzi_b_test() nounwind {
entry:
%0 = load <16 x i8>* @llvm_mips_bmnzi_b_ARG1
- %1 = tail call <16 x i8> @llvm.mips.bmnzi.b(<16 x i8> %0, i32 25)
- store <16 x i8> %1, <16 x i8>* @llvm_mips_bmnzi_b_RES
+ %1 = load <16 x i8>* @llvm_mips_bmnzi_b_ARG2
+ %2 = tail call <16 x i8> @llvm.mips.bmnzi.b(<16 x i8> %0, <16 x i8> %1, i32 25)
+ store <16 x i8> %2, <16 x i8>* @llvm_mips_bmnzi_b_RES
ret void
}
-declare <16 x i8> @llvm.mips.bmnzi.b(<16 x i8>, i32) nounwind
+declare <16 x i8> @llvm.mips.bmnzi.b(<16 x i8>, <16 x i8>, i32) nounwind
; CHECK: llvm_mips_bmnzi_b_test:
-; CHECK: ld.b
-; CHECK: bmnzi.b
-; CHECK: st.b
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmnzi_b_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmnzi_b_ARG2)(
+; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: bmnzi.b [[R3]], [[R4]], 25
+; CHECK-DAG: st.b [[R3]], 0(
; CHECK: .size llvm_mips_bmnzi_b_test
-;
-@llvm_mips_bmzi_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+
+@llvm_mips_bmzi_b_ARG1 = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+@llvm_mips_bmzi_b_ARG2 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
@llvm_mips_bmzi_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
define void @llvm_mips_bmzi_b_test() nounwind {
entry:
%0 = load <16 x i8>* @llvm_mips_bmzi_b_ARG1
- %1 = tail call <16 x i8> @llvm.mips.bmzi.b(<16 x i8> %0, i32 25)
- store <16 x i8> %1, <16 x i8>* @llvm_mips_bmzi_b_RES
+ %1 = load <16 x i8>* @llvm_mips_bmzi_b_ARG2
+ %2 = tail call <16 x i8> @llvm.mips.bmzi.b(<16 x i8> %0, <16 x i8> %1, i32 25)
+ store <16 x i8> %2, <16 x i8>* @llvm_mips_bmzi_b_RES
ret void
}
-declare <16 x i8> @llvm.mips.bmzi.b(<16 x i8>, i32) nounwind
+declare <16 x i8> @llvm.mips.bmzi.b(<16 x i8>, <16 x i8>, i32) nounwind
; CHECK: llvm_mips_bmzi_b_test:
-; CHECK: ld.b
-; CHECK: bmzi.b
-; CHECK: st.b
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmzi_b_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmzi_b_ARG2)(
+; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R2]])
+; bmnzi.b is the same as bmzi.b with ws and wd_in swapped
+; CHECK-DAG: bmnzi.b [[R4]], [[R3]], 25
+; CHECK-DAG: st.b [[R4]], 0(
; CHECK: .size llvm_mips_bmzi_b_test
-;
-@llvm_mips_bseli_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
+
+@llvm_mips_bseli_b_ARG1 = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
+@llvm_mips_bseli_b_ARG2 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
@llvm_mips_bseli_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
define void @llvm_mips_bseli_b_test() nounwind {
entry:
%0 = load <16 x i8>* @llvm_mips_bseli_b_ARG1
- %1 = tail call <16 x i8> @llvm.mips.bseli.b(<16 x i8> %0, <16 x i8> %0, i32 25)
- store <16 x i8> %1, <16 x i8>* @llvm_mips_bseli_b_RES
+ %1 = load <16 x i8>* @llvm_mips_bseli_b_ARG2
+ %2 = tail call <16 x i8> @llvm.mips.bseli.b(<16 x i8> %0, <16 x i8> %1, i32 25)
+ store <16 x i8> %2, <16 x i8>* @llvm_mips_bseli_b_RES
ret void
}
declare <16 x i8> @llvm.mips.bseli.b(<16 x i8>, <16 x i8>, i32) nounwind
; CHECK: llvm_mips_bseli_b_test:
-; CHECK: ld.b
-; CHECK: bseli.b
-; CHECK: st.b
+; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bseli_b_ARG1)(
+; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bseli_b_ARG2)(
+; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[R1]])
+; CHECK-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R2]])
+; CHECK-DAG: bseli.b [[R3]], [[R4]], 25
+; CHECK-DAG: st.b [[R3]], 0(
; CHECK: .size llvm_mips_bseli_b_test
-;
+
@llvm_mips_nori_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
@llvm_mips_nori_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
diff --git a/test/CodeGen/Mips/msa/vec.ll b/test/CodeGen/Mips/msa/vec.ll
index c26144e..5bddf5a 100644
--- a/test/CodeGen/Mips/msa/vec.ll
+++ b/test/CodeGen/Mips/msa/vec.ll
@@ -163,280 +163,372 @@ entry:
;
@llvm_mips_bmnz_v_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
@llvm_mips_bmnz_v_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_bmnz_v_b_ARG3 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
@llvm_mips_bmnz_v_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
define void @llvm_mips_bmnz_v_b_test() nounwind {
entry:
%0 = load <16 x i8>* @llvm_mips_bmnz_v_b_ARG1
%1 = load <16 x i8>* @llvm_mips_bmnz_v_b_ARG2
- %2 = bitcast <16 x i8> %0 to <16 x i8>
- %3 = bitcast <16 x i8> %1 to <16 x i8>
- %4 = tail call <16 x i8> @llvm.mips.bmnz.v(<16 x i8> %2, <16 x i8> %3)
- %5 = bitcast <16 x i8> %4 to <16 x i8>
- store <16 x i8> %5, <16 x i8>* @llvm_mips_bmnz_v_b_RES
+ %2 = load <16 x i8>* @llvm_mips_bmnz_v_b_ARG3
+ %3 = bitcast <16 x i8> %0 to <16 x i8>
+ %4 = bitcast <16 x i8> %1 to <16 x i8>
+ %5 = bitcast <16 x i8> %2 to <16 x i8>
+ %6 = tail call <16 x i8> @llvm.mips.bmnz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+ %7 = bitcast <16 x i8> %6 to <16 x i8>
+ store <16 x i8> %7, <16 x i8>* @llvm_mips_bmnz_v_b_RES
ret void
}
; ANYENDIAN: llvm_mips_bmnz_v_b_test:
-; ANYENDIAN: ld.b
-; ANYENDIAN: ld.b
-; ANYENDIAN: bmnz.v
-; ANYENDIAN: st.b
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmnz_v_b_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmnz_v_b_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmnz_v_b_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; ANYENDIAN-DAG: bmnz.v [[R4]], [[R5]], [[R6]]
+; ANYENDIAN-DAG: st.b [[R4]], 0(
; ANYENDIAN: .size llvm_mips_bmnz_v_b_test
-;
+
@llvm_mips_bmnz_v_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
@llvm_mips_bmnz_v_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_bmnz_v_h_ARG3 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
@llvm_mips_bmnz_v_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
define void @llvm_mips_bmnz_v_h_test() nounwind {
entry:
%0 = load <8 x i16>* @llvm_mips_bmnz_v_h_ARG1
%1 = load <8 x i16>* @llvm_mips_bmnz_v_h_ARG2
- %2 = bitcast <8 x i16> %0 to <16 x i8>
- %3 = bitcast <8 x i16> %1 to <16 x i8>
- %4 = tail call <16 x i8> @llvm.mips.bmnz.v(<16 x i8> %2, <16 x i8> %3)
- %5 = bitcast <16 x i8> %4 to <8 x i16>
- store <8 x i16> %5, <8 x i16>* @llvm_mips_bmnz_v_h_RES
+ %2 = load <8 x i16>* @llvm_mips_bmnz_v_h_ARG3
+ %3 = bitcast <8 x i16> %0 to <16 x i8>
+ %4 = bitcast <8 x i16> %1 to <16 x i8>
+ %5 = bitcast <8 x i16> %2 to <16 x i8>
+ %6 = tail call <16 x i8> @llvm.mips.bmnz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+ %7 = bitcast <16 x i8> %6 to <8 x i16>
+ store <8 x i16> %7, <8 x i16>* @llvm_mips_bmnz_v_h_RES
ret void
}
; ANYENDIAN: llvm_mips_bmnz_v_h_test:
-; ANYENDIAN: ld.b
-; ANYENDIAN: ld.b
-; ANYENDIAN: bmnz.v
-; ANYENDIAN: st.b
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmnz_v_h_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmnz_v_h_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmnz_v_h_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; ANYENDIAN-DAG: bmnz.v [[R4]], [[R5]], [[R6]]
+; ANYENDIAN-DAG: st.b [[R4]], 0(
; ANYENDIAN: .size llvm_mips_bmnz_v_h_test
-;
+
@llvm_mips_bmnz_v_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
@llvm_mips_bmnz_v_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_bmnz_v_w_ARG3 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
@llvm_mips_bmnz_v_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
define void @llvm_mips_bmnz_v_w_test() nounwind {
entry:
%0 = load <4 x i32>* @llvm_mips_bmnz_v_w_ARG1
%1 = load <4 x i32>* @llvm_mips_bmnz_v_w_ARG2
- %2 = bitcast <4 x i32> %0 to <16 x i8>
- %3 = bitcast <4 x i32> %1 to <16 x i8>
- %4 = tail call <16 x i8> @llvm.mips.bmnz.v(<16 x i8> %2, <16 x i8> %3)
- %5 = bitcast <16 x i8> %4 to <4 x i32>
- store <4 x i32> %5, <4 x i32>* @llvm_mips_bmnz_v_w_RES
+ %2 = load <4 x i32>* @llvm_mips_bmnz_v_w_ARG3
+ %3 = bitcast <4 x i32> %0 to <16 x i8>
+ %4 = bitcast <4 x i32> %1 to <16 x i8>
+ %5 = bitcast <4 x i32> %2 to <16 x i8>
+ %6 = tail call <16 x i8> @llvm.mips.bmnz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+ %7 = bitcast <16 x i8> %6 to <4 x i32>
+ store <4 x i32> %7, <4 x i32>* @llvm_mips_bmnz_v_w_RES
ret void
}
; ANYENDIAN: llvm_mips_bmnz_v_w_test:
-; ANYENDIAN: ld.b
-; ANYENDIAN: ld.b
-; ANYENDIAN: bmnz.v
-; ANYENDIAN: st.b
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmnz_v_w_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmnz_v_w_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmnz_v_w_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; ANYENDIAN-DAG: bmnz.v [[R4]], [[R5]], [[R6]]
+; ANYENDIAN-DAG: st.b [[R4]], 0(
; ANYENDIAN: .size llvm_mips_bmnz_v_w_test
-;
+
@llvm_mips_bmnz_v_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
@llvm_mips_bmnz_v_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_bmnz_v_d_ARG3 = global <2 x i64> <i64 0, i64 1>, align 16
@llvm_mips_bmnz_v_d_RES = global <2 x i64> <i64 0, i64 0>, align 16
define void @llvm_mips_bmnz_v_d_test() nounwind {
entry:
%0 = load <2 x i64>* @llvm_mips_bmnz_v_d_ARG1
%1 = load <2 x i64>* @llvm_mips_bmnz_v_d_ARG2
- %2 = bitcast <2 x i64> %0 to <16 x i8>
- %3 = bitcast <2 x i64> %1 to <16 x i8>
- %4 = tail call <16 x i8> @llvm.mips.bmnz.v(<16 x i8> %2, <16 x i8> %3)
- %5 = bitcast <16 x i8> %4 to <2 x i64>
- store <2 x i64> %5, <2 x i64>* @llvm_mips_bmnz_v_d_RES
+ %2 = load <2 x i64>* @llvm_mips_bmnz_v_d_ARG3
+ %3 = bitcast <2 x i64> %0 to <16 x i8>
+ %4 = bitcast <2 x i64> %1 to <16 x i8>
+ %5 = bitcast <2 x i64> %2 to <16 x i8>
+ %6 = tail call <16 x i8> @llvm.mips.bmnz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+ %7 = bitcast <16 x i8> %6 to <2 x i64>
+ store <2 x i64> %7, <2 x i64>* @llvm_mips_bmnz_v_d_RES
ret void
}
; ANYENDIAN: llvm_mips_bmnz_v_d_test:
-; ANYENDIAN: ld.b
-; ANYENDIAN: ld.b
-; ANYENDIAN: bmnz.v
-; ANYENDIAN: st.b
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmnz_v_d_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmnz_v_d_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmnz_v_d_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; ANYENDIAN-DAG: bmnz.v [[R4]], [[R5]], [[R6]]
+; ANYENDIAN-DAG: st.b [[R4]], 0(
; ANYENDIAN: .size llvm_mips_bmnz_v_d_test
-;
+
@llvm_mips_bmz_v_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
@llvm_mips_bmz_v_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_bmz_v_b_ARG3 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
@llvm_mips_bmz_v_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
define void @llvm_mips_bmz_v_b_test() nounwind {
entry:
%0 = load <16 x i8>* @llvm_mips_bmz_v_b_ARG1
%1 = load <16 x i8>* @llvm_mips_bmz_v_b_ARG2
- %2 = bitcast <16 x i8> %0 to <16 x i8>
- %3 = bitcast <16 x i8> %1 to <16 x i8>
- %4 = tail call <16 x i8> @llvm.mips.bmz.v(<16 x i8> %2, <16 x i8> %3)
- %5 = bitcast <16 x i8> %4 to <16 x i8>
- store <16 x i8> %5, <16 x i8>* @llvm_mips_bmz_v_b_RES
+ %2 = load <16 x i8>* @llvm_mips_bmz_v_b_ARG3
+ %3 = bitcast <16 x i8> %0 to <16 x i8>
+ %4 = bitcast <16 x i8> %1 to <16 x i8>
+ %5 = bitcast <16 x i8> %2 to <16 x i8>
+ %6 = tail call <16 x i8> @llvm.mips.bmz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+ %7 = bitcast <16 x i8> %6 to <16 x i8>
+ store <16 x i8> %7, <16 x i8>* @llvm_mips_bmz_v_b_RES
ret void
}
; ANYENDIAN: llvm_mips_bmz_v_b_test:
-; ANYENDIAN: ld.b
-; ANYENDIAN: ld.b
-; ANYENDIAN: bmz.v
-; ANYENDIAN: st.b
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmz_v_b_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmz_v_b_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmz_v_b_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; bmnz.v is the same as bmz.v with ws and wd_in swapped
+; ANYENDIAN-DAG: bmnz.v [[R5]], [[R4]], [[R6]]
+; ANYENDIAN-DAG: st.b [[R5]], 0(
; ANYENDIAN: .size llvm_mips_bmz_v_b_test
-;
+
@llvm_mips_bmz_v_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
@llvm_mips_bmz_v_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_bmz_v_h_ARG3 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
@llvm_mips_bmz_v_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
define void @llvm_mips_bmz_v_h_test() nounwind {
entry:
%0 = load <8 x i16>* @llvm_mips_bmz_v_h_ARG1
%1 = load <8 x i16>* @llvm_mips_bmz_v_h_ARG2
- %2 = bitcast <8 x i16> %0 to <16 x i8>
- %3 = bitcast <8 x i16> %1 to <16 x i8>
- %4 = tail call <16 x i8> @llvm.mips.bmz.v(<16 x i8> %2, <16 x i8> %3)
- %5 = bitcast <16 x i8> %4 to <8 x i16>
- store <8 x i16> %5, <8 x i16>* @llvm_mips_bmz_v_h_RES
+ %2 = load <8 x i16>* @llvm_mips_bmz_v_h_ARG3
+ %3 = bitcast <8 x i16> %0 to <16 x i8>
+ %4 = bitcast <8 x i16> %1 to <16 x i8>
+ %5 = bitcast <8 x i16> %2 to <16 x i8>
+ %6 = tail call <16 x i8> @llvm.mips.bmz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+ %7 = bitcast <16 x i8> %6 to <8 x i16>
+ store <8 x i16> %7, <8 x i16>* @llvm_mips_bmz_v_h_RES
ret void
}
; ANYENDIAN: llvm_mips_bmz_v_h_test:
-; ANYENDIAN: ld.b
-; ANYENDIAN: ld.b
-; ANYENDIAN: bmz.v
-; ANYENDIAN: st.b
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmz_v_h_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmz_v_h_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmz_v_h_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; bmnz.v is the same as bmz.v with ws and wd_in swapped
+; ANYENDIAN-DAG: bmnz.v [[R5]], [[R4]], [[R6]]
+; ANYENDIAN-DAG: st.b [[R5]], 0(
; ANYENDIAN: .size llvm_mips_bmz_v_h_test
-;
+
@llvm_mips_bmz_v_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
@llvm_mips_bmz_v_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_bmz_v_w_ARG3 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
@llvm_mips_bmz_v_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
define void @llvm_mips_bmz_v_w_test() nounwind {
entry:
%0 = load <4 x i32>* @llvm_mips_bmz_v_w_ARG1
%1 = load <4 x i32>* @llvm_mips_bmz_v_w_ARG2
- %2 = bitcast <4 x i32> %0 to <16 x i8>
- %3 = bitcast <4 x i32> %1 to <16 x i8>
- %4 = tail call <16 x i8> @llvm.mips.bmz.v(<16 x i8> %2, <16 x i8> %3)
- %5 = bitcast <16 x i8> %4 to <4 x i32>
- store <4 x i32> %5, <4 x i32>* @llvm_mips_bmz_v_w_RES
+ %2 = load <4 x i32>* @llvm_mips_bmz_v_w_ARG3
+ %3 = bitcast <4 x i32> %0 to <16 x i8>
+ %4 = bitcast <4 x i32> %1 to <16 x i8>
+ %5 = bitcast <4 x i32> %2 to <16 x i8>
+ %6 = tail call <16 x i8> @llvm.mips.bmz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+ %7 = bitcast <16 x i8> %6 to <4 x i32>
+ store <4 x i32> %7, <4 x i32>* @llvm_mips_bmz_v_w_RES
ret void
}
; ANYENDIAN: llvm_mips_bmz_v_w_test:
-; ANYENDIAN: ld.b
-; ANYENDIAN: ld.b
-; ANYENDIAN: bmz.v
-; ANYENDIAN: st.b
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmz_v_w_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmz_v_w_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmz_v_w_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; bmnz.v is the same as bmz.v with ws and wd_in swapped
+; ANYENDIAN-DAG: bmnz.v [[R5]], [[R4]], [[R6]]
+; ANYENDIAN-DAG: st.b [[R5]], 0(
; ANYENDIAN: .size llvm_mips_bmz_v_w_test
-;
+
@llvm_mips_bmz_v_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
@llvm_mips_bmz_v_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_bmz_v_d_ARG3 = global <2 x i64> <i64 0, i64 1>, align 16
@llvm_mips_bmz_v_d_RES = global <2 x i64> <i64 0, i64 0>, align 16
define void @llvm_mips_bmz_v_d_test() nounwind {
entry:
%0 = load <2 x i64>* @llvm_mips_bmz_v_d_ARG1
%1 = load <2 x i64>* @llvm_mips_bmz_v_d_ARG2
- %2 = bitcast <2 x i64> %0 to <16 x i8>
- %3 = bitcast <2 x i64> %1 to <16 x i8>
- %4 = tail call <16 x i8> @llvm.mips.bmz.v(<16 x i8> %2, <16 x i8> %3)
- %5 = bitcast <16 x i8> %4 to <2 x i64>
- store <2 x i64> %5, <2 x i64>* @llvm_mips_bmz_v_d_RES
+ %2 = load <2 x i64>* @llvm_mips_bmz_v_d_ARG3
+ %3 = bitcast <2 x i64> %0 to <16 x i8>
+ %4 = bitcast <2 x i64> %1 to <16 x i8>
+ %5 = bitcast <2 x i64> %2 to <16 x i8>
+ %6 = tail call <16 x i8> @llvm.mips.bmz.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+ %7 = bitcast <16 x i8> %6 to <2 x i64>
+ store <2 x i64> %7, <2 x i64>* @llvm_mips_bmz_v_d_RES
ret void
}
; ANYENDIAN: llvm_mips_bmz_v_d_test:
-; ANYENDIAN: ld.b
-; ANYENDIAN: ld.b
-; ANYENDIAN: bmz.v
-; ANYENDIAN: st.b
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bmz_v_d_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bmz_v_d_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bmz_v_d_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; bmnz.v is the same as bmz.v with ws and wd_in swapped
+; ANYENDIAN-DAG: bmnz.v [[R5]], [[R4]], [[R6]]
+; ANYENDIAN-DAG: st.b [[R5]], 0(
; ANYENDIAN: .size llvm_mips_bmz_v_d_test
-;
+
@llvm_mips_bsel_v_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
@llvm_mips_bsel_v_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
+@llvm_mips_bsel_v_b_ARG3 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
@llvm_mips_bsel_v_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
define void @llvm_mips_bsel_v_b_test() nounwind {
entry:
%0 = load <16 x i8>* @llvm_mips_bsel_v_b_ARG1
%1 = load <16 x i8>* @llvm_mips_bsel_v_b_ARG2
- %2 = bitcast <16 x i8> %0 to <16 x i8>
- %3 = bitcast <16 x i8> %1 to <16 x i8>
- %4 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %2, <16 x i8> %2, <16 x i8> %3)
- %5 = bitcast <16 x i8> %4 to <16 x i8>
- store <16 x i8> %5, <16 x i8>* @llvm_mips_bsel_v_b_RES
+ %2 = load <16 x i8>* @llvm_mips_bsel_v_b_ARG3
+ %3 = bitcast <16 x i8> %0 to <16 x i8>
+ %4 = bitcast <16 x i8> %1 to <16 x i8>
+ %5 = bitcast <16 x i8> %2 to <16 x i8>
+ %6 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+ %7 = bitcast <16 x i8> %6 to <16 x i8>
+ store <16 x i8> %7, <16 x i8>* @llvm_mips_bsel_v_b_RES
ret void
}
; ANYENDIAN: llvm_mips_bsel_v_b_test:
-; ANYENDIAN: ld.b
-; ANYENDIAN: ld.b
-; ANYENDIAN: bsel.v
-; ANYENDIAN: st.b
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bsel_v_b_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bsel_v_b_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bsel_v_b_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; bmnz.v is the same as bsel.v with wt and wd_in swapped
+; ANYENDIAN-DAG: bmnz.v [[R6]], [[R5]], [[R4]]
+; ANYENDIAN-DAG: st.b [[R6]], 0(
; ANYENDIAN: .size llvm_mips_bsel_v_b_test
-;
+
@llvm_mips_bsel_v_h_ARG1 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
@llvm_mips_bsel_v_h_ARG2 = global <8 x i16> <i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, align 16
+@llvm_mips_bsel_v_h_ARG3 = global <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, align 16
@llvm_mips_bsel_v_h_RES = global <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, align 16
define void @llvm_mips_bsel_v_h_test() nounwind {
entry:
%0 = load <8 x i16>* @llvm_mips_bsel_v_h_ARG1
%1 = load <8 x i16>* @llvm_mips_bsel_v_h_ARG2
- %2 = bitcast <8 x i16> %0 to <16 x i8>
- %3 = bitcast <8 x i16> %1 to <16 x i8>
- %4 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %2, <16 x i8> %2, <16 x i8> %3)
- %5 = bitcast <16 x i8> %4 to <8 x i16>
- store <8 x i16> %5, <8 x i16>* @llvm_mips_bsel_v_h_RES
+ %2 = load <8 x i16>* @llvm_mips_bsel_v_h_ARG3
+ %3 = bitcast <8 x i16> %0 to <16 x i8>
+ %4 = bitcast <8 x i16> %1 to <16 x i8>
+ %5 = bitcast <8 x i16> %2 to <16 x i8>
+ %6 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+ %7 = bitcast <16 x i8> %6 to <8 x i16>
+ store <8 x i16> %7, <8 x i16>* @llvm_mips_bsel_v_h_RES
ret void
}
; ANYENDIAN: llvm_mips_bsel_v_h_test:
-; ANYENDIAN: ld.b
-; ANYENDIAN: ld.b
-; ANYENDIAN: bsel.v
-; ANYENDIAN: st.b
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bsel_v_h_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bsel_v_h_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bsel_v_h_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; bmnz.v is the same as bsel.v with wt and wd_in swapped
+; ANYENDIAN-DAG: bmnz.v [[R6]], [[R5]], [[R4]]
+; ANYENDIAN-DAG: st.b [[R6]], 0(
; ANYENDIAN: .size llvm_mips_bsel_v_h_test
-;
+
@llvm_mips_bsel_v_w_ARG1 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
@llvm_mips_bsel_v_w_ARG2 = global <4 x i32> <i32 4, i32 5, i32 6, i32 7>, align 16
+@llvm_mips_bsel_v_w_ARG3 = global <4 x i32> <i32 0, i32 1, i32 2, i32 3>, align 16
@llvm_mips_bsel_v_w_RES = global <4 x i32> <i32 0, i32 0, i32 0, i32 0>, align 16
define void @llvm_mips_bsel_v_w_test() nounwind {
entry:
%0 = load <4 x i32>* @llvm_mips_bsel_v_w_ARG1
%1 = load <4 x i32>* @llvm_mips_bsel_v_w_ARG2
- %2 = bitcast <4 x i32> %0 to <16 x i8>
- %3 = bitcast <4 x i32> %1 to <16 x i8>
- %4 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %2, <16 x i8> %2, <16 x i8> %3)
- %5 = bitcast <16 x i8> %4 to <4 x i32>
- store <4 x i32> %5, <4 x i32>* @llvm_mips_bsel_v_w_RES
+ %2 = load <4 x i32>* @llvm_mips_bsel_v_w_ARG3
+ %3 = bitcast <4 x i32> %0 to <16 x i8>
+ %4 = bitcast <4 x i32> %1 to <16 x i8>
+ %5 = bitcast <4 x i32> %2 to <16 x i8>
+ %6 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+ %7 = bitcast <16 x i8> %6 to <4 x i32>
+ store <4 x i32> %7, <4 x i32>* @llvm_mips_bsel_v_w_RES
ret void
}
; ANYENDIAN: llvm_mips_bsel_v_w_test:
-; ANYENDIAN: ld.b
-; ANYENDIAN: ld.b
-; ANYENDIAN: bsel.v
-; ANYENDIAN: st.b
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bsel_v_w_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bsel_v_w_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bsel_v_w_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; bmnz.v is the same as bsel.v with wt and wd_in swapped
+; ANYENDIAN-DAG: bmnz.v [[R6]], [[R5]], [[R4]]
+; ANYENDIAN-DAG: st.b [[R6]], 0(
; ANYENDIAN: .size llvm_mips_bsel_v_w_test
-;
+
@llvm_mips_bsel_v_d_ARG1 = global <2 x i64> <i64 0, i64 1>, align 16
@llvm_mips_bsel_v_d_ARG2 = global <2 x i64> <i64 2, i64 3>, align 16
+@llvm_mips_bsel_v_d_ARG3 = global <2 x i64> <i64 0, i64 1>, align 16
@llvm_mips_bsel_v_d_RES = global <2 x i64> <i64 0, i64 0>, align 16
define void @llvm_mips_bsel_v_d_test() nounwind {
entry:
%0 = load <2 x i64>* @llvm_mips_bsel_v_d_ARG1
%1 = load <2 x i64>* @llvm_mips_bsel_v_d_ARG2
- %2 = bitcast <2 x i64> %0 to <16 x i8>
- %3 = bitcast <2 x i64> %1 to <16 x i8>
- %4 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %2, <16 x i8> %2, <16 x i8> %3)
- %5 = bitcast <16 x i8> %4 to <2 x i64>
- store <2 x i64> %5, <2 x i64>* @llvm_mips_bsel_v_d_RES
+ %2 = load <2 x i64>* @llvm_mips_bsel_v_d_ARG3
+ %3 = bitcast <2 x i64> %0 to <16 x i8>
+ %4 = bitcast <2 x i64> %1 to <16 x i8>
+ %5 = bitcast <2 x i64> %2 to <16 x i8>
+ %6 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %3, <16 x i8> %4, <16 x i8> %5)
+ %7 = bitcast <16 x i8> %6 to <2 x i64>
+ store <2 x i64> %7, <2 x i64>* @llvm_mips_bsel_v_d_RES
ret void
}
; ANYENDIAN: llvm_mips_bsel_v_d_test:
-; ANYENDIAN: ld.b
-; ANYENDIAN: ld.b
-; ANYENDIAN: bsel.v
-; ANYENDIAN: st.b
+; ANYENDIAN-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_bsel_v_d_ARG1)(
+; ANYENDIAN-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_bsel_v_d_ARG2)(
+; ANYENDIAN-DAG: lw [[R3:\$[0-9]+]], %got(llvm_mips_bsel_v_d_ARG3)(
+; ANYENDIAN-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R1]])
+; ANYENDIAN-DAG: ld.b [[R5:\$w[0-9]+]], 0([[R2]])
+; ANYENDIAN-DAG: ld.b [[R6:\$w[0-9]+]], 0([[R3]])
+; bmnz.v is the same as bsel.v with wt and wd_in swapped
+; ANYENDIAN-DAG: bmnz.v [[R6]], [[R5]], [[R4]]
+; ANYENDIAN-DAG: st.b [[R6]], 0(
; ANYENDIAN: .size llvm_mips_bsel_v_d_test
-;
+
@llvm_mips_nor_v_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16
@llvm_mips_nor_v_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16
@llvm_mips_nor_v_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16
@@ -846,8 +938,8 @@ entry:
; CHECK: .size xor_v_d_test
;
declare <16 x i8> @llvm.mips.and.v(<16 x i8>, <16 x i8>) nounwind
-declare <16 x i8> @llvm.mips.bmnz.v(<16 x i8>, <16 x i8>) nounwind
-declare <16 x i8> @llvm.mips.bmz.v(<16 x i8>, <16 x i8>) nounwind
+declare <16 x i8> @llvm.mips.bmnz.v(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
+declare <16 x i8> @llvm.mips.bmz.v(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
declare <16 x i8> @llvm.mips.bsel.v(<16 x i8>, <16 x i8>, <16 x i8>) nounwind
declare <16 x i8> @llvm.mips.nor.v(<16 x i8>, <16 x i8>) nounwind
declare <16 x i8> @llvm.mips.or.v(<16 x i8>, <16 x i8>) nounwind