diff options
author | Bob Wilson <bob.wilson@apple.com> | 2010-08-29 05:57:34 +0000 |
---|---|---|
committer | Bob Wilson <bob.wilson@apple.com> | 2010-08-29 05:57:34 +0000 |
commit | 04d6c289ab28114af5471c4dc38cbf7b7127d3c3 (patch) | |
tree | 60f08d20f5b2177290252000aa305b07a43930d8 /lib | |
parent | 63a75c13b110b89435ee5e30f1ce9ed57019ac5a (diff) | |
download | external_llvm-04d6c289ab28114af5471c4dc38cbf7b7127d3c3.zip external_llvm-04d6c289ab28114af5471c4dc38cbf7b7127d3c3.tar.gz external_llvm-04d6c289ab28114af5471c4dc38cbf7b7127d3c3.tar.bz2 |
Remove NEON vaddl, vaddw, vsubl, and vsubw intrinsics. Instead, use llvm
IR add/sub operations with one or both operands sign- or zero-extended.
Auto-upgrade the old intrinsics.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112416 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Target/ARM/ARMInstrNEON.td | 91 | ||||
-rw-r--r-- | lib/VMCore/AutoUpgrade.cpp | 34 |
2 files changed, 93 insertions, 32 deletions
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index 88d606c..741df7e 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -1294,6 +1294,19 @@ class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, let isCommutable = Commutable; } +// Long 3-register operations. +class N3VL<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType TyQ, ValueType TyD, SDNode OpNode, SDNode ExtOp, + bit Commutable> + : N3V<op24, op23, op21_20, op11_8, 0, op4, + (outs QPR:$dst), (ins DPR:$src1, DPR:$src2), N3RegFrm, itin, + OpcodeStr, Dt, "$dst, $src1, $src2", "", + [(set QPR:$dst, (OpNode (TyQ (ExtOp (TyD DPR:$src1))), + (TyQ (ExtOp (TyD DPR:$src2)))))]> { + let isCommutable = Commutable; +} + // Long 3-register intrinsics. class N3VLInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, @@ -1325,14 +1338,15 @@ class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, (OpTy (NEONvduplane (OpTy DPR_8:$src2), imm:$lane)))))]>; -// Wide 3-register intrinsics. -class N3VWInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, - string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, - Intrinsic IntOp, bit Commutable> +// Wide 3-register operations. +class N3VW<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4, + string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, + SDNode OpNode, SDNode ExtOp, bit Commutable> : N3V<op24, op23, op21_20, op11_8, 0, op4, (outs QPR:$dst), (ins QPR:$src1, DPR:$src2), N3RegFrm, IIC_VSUBiD, OpcodeStr, Dt, "$dst, $src1, $src2", "", - [(set QPR:$dst, (TyQ (IntOp (TyQ QPR:$src1), (TyD DPR:$src2))))]> { + [(set QPR:$dst, (OpNode (TyQ QPR:$src1), + (TyQ (ExtOp (TyD DPR:$src2)))))]> { let isCommutable = Commutable; } @@ -1684,6 +1698,23 @@ multiclass N3VNInt_HSD<bit op24, bit op23, bits<4> op11_8, bit op4, } +// Neon Long 3-register vector operations. + +multiclass N3VL_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + InstrItinClass itin16, InstrItinClass itin32, + string OpcodeStr, string Dt, + SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { + def v4i32 : N3VL<op24, op23, 0b01, op11_8, op4, itin16, + OpcodeStr, !strconcat(Dt, "16"), + v4i32, v4i16, OpNode, ExtOp, Commutable>; + def v2i64 : N3VL<op24, op23, 0b10, op11_8, op4, itin32, + OpcodeStr, !strconcat(Dt, "32"), + v2i64, v2i32, OpNode, ExtOp, Commutable>; + def v8i16 : N3VL<op24, op23, 0b00, op11_8, op4, itin16, + OpcodeStr, !strconcat(Dt, "8"), + v8i16, v8i8, OpNode, ExtOp, Commutable>; +} + // Neon Long 3-register vector intrinsics. // First with only element sizes of 16 and 32 bits: @@ -1723,18 +1754,18 @@ multiclass N3VLInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, // Neon Wide 3-register vector intrinsics, // source operand element sizes of 8, 16 and 32 bits: -multiclass N3VWInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, - string OpcodeStr, string Dt, - Intrinsic IntOp, bit Commutable = 0> { - def v8i16 : N3VWInt<op24, op23, 0b00, op11_8, op4, - OpcodeStr, !strconcat(Dt, "8"), - v8i16, v8i8, IntOp, Commutable>; - def v4i32 : N3VWInt<op24, op23, 0b01, op11_8, op4, - OpcodeStr, !strconcat(Dt, "16"), - v4i32, v4i16, IntOp, Commutable>; - def v2i64 : N3VWInt<op24, op23, 0b10, op11_8, op4, - OpcodeStr, !strconcat(Dt, "32"), - v2i64, v2i32, IntOp, Commutable>; +multiclass N3VW_QHS<bit op24, bit op23, bits<4> op11_8, bit op4, + string OpcodeStr, string Dt, + SDNode OpNode, SDNode ExtOp, bit Commutable = 0> { + def v8i16 : N3VW<op24, op23, 0b00, op11_8, op4, + OpcodeStr, !strconcat(Dt, "8"), + v8i16, v8i8, OpNode, ExtOp, Commutable>; + def v4i32 : N3VW<op24, op23, 0b01, op11_8, op4, + OpcodeStr, !strconcat(Dt, "16"), + v4i32, v4i16, OpNode, ExtOp, Commutable>; + def v2i64 : N3VW<op24, op23, 0b10, op11_8, op4, + OpcodeStr, !strconcat(Dt, "32"), + v2i64, v2i32, OpNode, ExtOp, Commutable>; } @@ -2073,13 +2104,13 @@ def VADDfd : N3VD<0, 0, 0b00, 0b1101, 0, IIC_VBIND, "vadd", "f32", def VADDfq : N3VQ<0, 0, 0b00, 0b1101, 0, IIC_VBINQ, "vadd", "f32", v4f32, v4f32, fadd, 1>; // VADDL : Vector Add Long (Q = D + D) -defm VADDLs : N3VLInt_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, - "vaddl", "s", int_arm_neon_vaddls, 1>; -defm VADDLu : N3VLInt_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, - "vaddl", "u", int_arm_neon_vaddlu, 1>; +defm VADDLs : N3VL_QHS<0,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, + "vaddl", "s", add, sext, 1>; +defm VADDLu : N3VL_QHS<1,1,0b0000,0, IIC_VSHLiD, IIC_VSHLiD, + "vaddl", "u", add, zext, 1>; // VADDW : Vector Add Wide (Q = Q + D) -defm VADDWs : N3VWInt_QHS<0,1,0b0001,0, "vaddw", "s", int_arm_neon_vaddws, 0>; -defm VADDWu : N3VWInt_QHS<1,1,0b0001,0, "vaddw", "u", int_arm_neon_vaddwu, 0>; +defm VADDWs : N3VW_QHS<0,1,0b0001,0, "vaddw", "s", add, sext, 0>; +defm VADDWu : N3VW_QHS<1,1,0b0001,0, "vaddw", "u", add, zext, 0>; // VHADD : Vector Halving Add defm VHADDs : N3VInt_QHS<0, 0, 0b0000, 0, N3RegFrm, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, @@ -2324,13 +2355,13 @@ def VSUBfd : N3VD<0, 0, 0b10, 0b1101, 0, IIC_VBIND, "vsub", "f32", def VSUBfq : N3VQ<0, 0, 0b10, 0b1101, 0, IIC_VBINQ, "vsub", "f32", v4f32, v4f32, fsub, 0>; // VSUBL : Vector Subtract Long (Q = D - D) -defm VSUBLs : N3VLInt_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, - "vsubl", "s", int_arm_neon_vsubls, 1>; -defm VSUBLu : N3VLInt_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, - "vsubl", "u", int_arm_neon_vsublu, 1>; +defm VSUBLs : N3VL_QHS<0,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, + "vsubl", "s", sub, sext, 0>; +defm VSUBLu : N3VL_QHS<1,1,0b0010,0, IIC_VSHLiD, IIC_VSHLiD, + "vsubl", "u", sub, zext, 0>; // VSUBW : Vector Subtract Wide (Q = Q - D) -defm VSUBWs : N3VWInt_QHS<0,1,0b0011,0, "vsubw", "s", int_arm_neon_vsubws, 0>; -defm VSUBWu : N3VWInt_QHS<1,1,0b0011,0, "vsubw", "u", int_arm_neon_vsubwu, 0>; +defm VSUBWs : N3VW_QHS<0,1,0b0011,0, "vsubw", "s", sub, sext, 0>; +defm VSUBWu : N3VW_QHS<1,1,0b0011,0, "vsubw", "u", sub, zext, 0>; // VHSUB : Vector Halving Subtract defm VHSUBs : N3VInt_QHS<0, 0, 0b0010, 0, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, @@ -2559,7 +2590,7 @@ def VABDfq : N3VQInt<1, 0, 0b10, 0b1101, 0, N3RegFrm, IIC_VBINQ, defm VABDLs : N3VLInt_QHS<0,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q, "vabdl", "s", int_arm_neon_vabdls, 0>; defm VABDLu : N3VLInt_QHS<1,1,0b0111,0, IIC_VSUBi4Q, IIC_VSUBi4Q, - "vabdl", "u", int_arm_neon_vabdlu, 0>; + "vabdl", "u", int_arm_neon_vabdlu, 0>; // VABA : Vector Absolute Difference and Accumulate defm VABAs : N3VInt3_QHS<0,0,0b0111,1, IIC_VABAD, IIC_VABAQ, diff --git a/lib/VMCore/AutoUpgrade.cpp b/lib/VMCore/AutoUpgrade.cpp index 052fd2d..62a4625 100644 --- a/lib/VMCore/AutoUpgrade.cpp +++ b/lib/VMCore/AutoUpgrade.cpp @@ -79,8 +79,17 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { return true; } } else if (Name.compare(5, 9, "arm.neon.", 9) == 0) { - if (Name.compare(14, 7, "vmovls.", 7) == 0 || - Name.compare(14, 7, "vmovlu.", 7) == 0) { + if (((Name.compare(14, 5, "vmovl", 5) == 0 || + Name.compare(14, 5, "vaddl", 5) == 0 || + Name.compare(14, 5, "vsubl", 5) == 0) && + (Name.compare(19, 2, "s.", 2) == 0 || + Name.compare(19, 2, "u.", 2) == 0)) || + + ((Name.compare(14, 5, "vaddw", 5) == 0 || + Name.compare(14, 5, "vsubw", 5) == 0) && + (Name.compare(19, 2, "s.", 2) == 0 || + Name.compare(19, 2, "u.", 2) == 0))) { + // Calls to these are transformed into IR without intrinsics. NewFn = 0; return true; @@ -371,6 +380,27 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { } else if (Name.compare(14, 7, "vmovlu.", 7) == 0) { NewI = new ZExtInst(CI->getArgOperand(0), CI->getType(), "upgraded." + CI->getName(), CI); + + } else if (Name.compare(14, 4, "vadd", 4) == 0 || + Name.compare(14, 4, "vsub", 4) == 0) { + // Extend one (vaddw/vsubw) or both (vaddl/vsubl) operands. + Value *V0 = CI->getArgOperand(0); + Value *V1 = CI->getArgOperand(1); + if (Name.at(19) == 's') { + if (Name.at(18) == 'l') + V0 = new SExtInst(CI->getArgOperand(0), CI->getType(), "", CI); + V1 = new SExtInst(CI->getArgOperand(1), CI->getType(), "", CI); + } else { + assert(Name.at(19) == 'u' && "unexpected vadd/vsub intrinsic"); + if (Name.at(18) == 'l') + V0 = new ZExtInst(CI->getArgOperand(0), CI->getType(), "", CI); + V1 = new ZExtInst(CI->getArgOperand(1), CI->getType(), "", CI); + } + if (Name.compare(14, 4, "vadd", 4) == 0) + NewI = BinaryOperator::CreateAdd(V0, V1,"upgraded."+CI->getName(),CI); + else + NewI = BinaryOperator::CreateSub(V0, V1,"upgraded."+CI->getName(),CI); + } else { llvm_unreachable("Unknown arm.neon function for CallInst upgrade."); } |