diff options
-rw-r--r-- | docs/LangRef.html | 29 | ||||
-rw-r--r-- | lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 27 | ||||
-rw-r--r-- | test/CodeGen/X86/vec_anyext.ll | 77 | ||||
-rw-r--r-- | test/CodeGen/X86/vec_sext.ll | 69 |
4 files changed, 179 insertions, 23 deletions
diff --git a/docs/LangRef.html b/docs/LangRef.html index 81fa8cc..fc581f1 100644 --- a/docs/LangRef.html +++ b/docs/LangRef.html @@ -4575,12 +4575,12 @@ entry: type <tt>ty2</tt>.</p> <h5>Arguments:</h5> -<p>The '<tt>trunc</tt>' instruction takes a <tt>value</tt> to trunc, which must - be an <a href="#t_integer">integer</a> type, and a type that specifies the - size and type of the result, which must be - an <a href="#t_integer">integer</a> type. The bit size of <tt>value</tt> must - be larger than the bit size of <tt>ty2</tt>. Equal sized types are not - allowed.</p> +<p>The '<tt>trunc</tt>' instruction takes a value to trunc, and a type to trunc it to. + Both types must be of <a href="#t_integer">integer</a> types, or vectors + of the same number of integers. + The bit size of the <tt>value</tt> must be larger than + the bit size of the destination type, <tt>ty2</tt>. + Equal sized types are not allowed.</p> <h5>Semantics:</h5> <p>The '<tt>trunc</tt>' instruction truncates the high order bits @@ -4590,9 +4590,10 @@ entry: <h5>Example:</h5> <pre> - %X = trunc i32 257 to i8 <i>; yields i8:1</i> - %Y = trunc i32 123 to i1 <i>; yields i1:true</i> - %Z = trunc i32 122 to i1 <i>; yields i1:false</i> + %X = trunc i32 257 to i8 <i>; yields i8:1</i> + %Y = trunc i32 123 to i1 <i>; yields i1:true</i> + %Z = trunc i32 122 to i1 <i>; yields i1:false</i> + %W = trunc <2 x i16> <i16 8, i16 7> to <2 x i8> <i>; yields <i8 8, i8 7></i> </pre> </div> @@ -4651,10 +4652,11 @@ entry: <p>The '<tt>sext</tt>' sign extends <tt>value</tt> to the type <tt>ty2</tt>.</p> <h5>Arguments:</h5> -<p>The '<tt>sext</tt>' instruction takes a value to cast, which must be of - <a href="#t_integer">integer</a> type, and a type to cast it to, which must - also be of <a href="#t_integer">integer</a> type. The bit size of the - <tt>value</tt> must be smaller than the bit size of the destination type, +<p>The '<tt>sext</tt>' instruction takes a value to cast, and a type to cast it to. + Both types must be of <a href="#t_integer">integer</a> types, or vectors + of the same number of integers. + The bit size of the <tt>value</tt> must be smaller than + the bit size of the destination type, <tt>ty2</tt>.</p> <h5>Semantics:</h5> @@ -4668,6 +4670,7 @@ entry: <pre> %X = sext i8 -1 to i16 <i>; yields i16 :65535</i> %Y = sext i1 true to i32 <i>; yields i32:-1</i> + %Z = sext <2 x i16> <i16 8, i16 7> to <2 x i32> <i>; yields <i32 8, i32 7></i> </pre> </div> diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index c5f0324..911dbfd 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3685,7 +3685,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { } // fold (sext (load x)) -> (sext (truncate (sextload x))) - if (ISD::isNON_EXTLoad(N0.getNode()) && + // None of the supported targets knows how to perform load and sign extend + // in one instruction. We only perform this transformation on scalars. + if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { bool DoXform = true; @@ -4096,7 +4098,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { } // fold (aext (load x)) -> (aext (truncate (extload x))) - if (ISD::isNON_EXTLoad(N0.getNode()) && + // None of the supported targets knows how to perform load and any_ext + // in one instruction. We only perform this transformation on scalars. + if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { bool DoXform = true; @@ -4506,14 +4510,17 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } // See if we can simplify the input to this truncate through knowledge that - // only the low bits are being used. For example "trunc (or (shl x, 8), y)" - // -> trunc y - SDValue Shorter = - GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), - VT.getSizeInBits())); - if (Shorter.getNode()) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter); - + // only the low bits are being used. + // For example "trunc (or (shl x, 8), y)" // -> trunc y + // Currenly we only perform this optimization on scalars because vectors + // may have different active low bits. + if (!VT.isVector()) { + SDValue Shorter = + GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), + VT.getSizeInBits())); + if (Shorter.getNode()) + return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter); + } // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { diff --git a/test/CodeGen/X86/vec_anyext.ll b/test/CodeGen/X86/vec_anyext.ll new file mode 100644 index 0000000..d2a4c7f --- /dev/null +++ b/test/CodeGen/X86/vec_anyext.ll @@ -0,0 +1,77 @@ +; RUN: llc < %s -march=x86-64 +; PR 9267 + +define<4 x i16> @func_16_32() { + %F = load <4 x i32>* undef + %G = trunc <4 x i32> %F to <4 x i16> + %H = load <4 x i32>* undef + %Y = trunc <4 x i32> %H to <4 x i16> + %T = add <4 x i16> %Y, %G + store <4 x i16>%T , <4 x i16>* undef + ret <4 x i16> %T +} + +define<4 x i16> @func_16_64() { + %F = load <4 x i64>* undef + %G = trunc <4 x i64> %F to <4 x i16> + %H = load <4 x i64>* undef + %Y = trunc <4 x i64> %H to <4 x i16> + %T = xor <4 x i16> %Y, %G + store <4 x i16>%T , <4 x i16>* undef + ret <4 x i16> %T +} + +define<4 x i32> @func_32_64() { + %F = load <4 x i64>* undef + %G = trunc <4 x i64> %F to <4 x i32> + %H = load <4 x i64>* undef + %Y = trunc <4 x i64> %H to <4 x i32> + %T = or <4 x i32> %Y, %G + ret <4 x i32> %T +} + +define<4 x i8> @func_8_16() { + %F = load <4 x i16>* undef + %G = trunc <4 x i16> %F to <4 x i8> + %H = load <4 x i16>* undef + %Y = trunc <4 x i16> %H to <4 x i8> + %T = add <4 x i8> %Y, %G + ret <4 x i8> %T +} + +define<4 x i8> @func_8_32() { + %F = load <4 x i32>* undef + %G = trunc <4 x i32> %F to <4 x i8> + %H = load <4 x i32>* undef + %Y = trunc <4 x i32> %H to <4 x i8> + %T = sub <4 x i8> %Y, %G + ret <4 x i8> %T +} + +define<4 x i8> @func_8_64() { + %F = load <4 x i64>* undef + %G = trunc <4 x i64> %F to <4 x i8> + %H = load <4 x i64>* undef + %Y = trunc <4 x i64> %H to <4 x i8> + %T = add <4 x i8> %Y, %G + ret <4 x i8> %T +} + +define<4 x i16> @const_16_32() { + %G = trunc <4 x i32> <i32 0, i32 3, i32 8, i32 7> to <4 x i16> + ret <4 x i16> %G +} + +define<4 x i16> @const_16_64() { + %G = trunc <4 x i64> <i64 0, i64 3, i64 8, i64 7> to <4 x i16> + ret <4 x i16> %G +} + +define void @bugOnTruncBitwidthReduce() nounwind { +meh: + %0 = xor <4 x i64> zeroinitializer, zeroinitializer + %1 = trunc <4 x i64> %0 to <4 x i32> + %2 = lshr <4 x i32> %1, <i32 18, i32 18, i32 18, i32 18> + %3 = xor <4 x i32> %2, %1 + ret void +} diff --git a/test/CodeGen/X86/vec_sext.ll b/test/CodeGen/X86/vec_sext.ll new file mode 100644 index 0000000..776ddec --- /dev/null +++ b/test/CodeGen/X86/vec_sext.ll @@ -0,0 +1,69 @@ +; RUN: llc < %s -march=x86-64 +; PR 9267 + +define<4 x i32> @func_16_32() { + %F = load <4 x i16>* undef + %G = sext <4 x i16> %F to <4 x i32> + %H = load <4 x i16>* undef + %Y = sext <4 x i16> %H to <4 x i32> + %T = add <4 x i32> %Y, %G + store <4 x i32>%T , <4 x i32>* undef + ret <4 x i32> %T +} + +define<4 x i64> @func_16_64() { + %F = load <4 x i16>* undef + %G = sext <4 x i16> %F to <4 x i64> + %H = load <4 x i16>* undef + %Y = sext <4 x i16> %H to <4 x i64> + %T = xor <4 x i64> %Y, %G + store <4 x i64>%T , <4 x i64>* undef + ret <4 x i64> %T +} + +define<4 x i64> @func_32_64() { + %F = load <4 x i32>* undef + %G = sext <4 x i32> %F to <4 x i64> + %H = load <4 x i32>* undef + %Y = sext <4 x i32> %H to <4 x i64> + %T = or <4 x i64> %Y, %G + ret <4 x i64> %T +} + +define<4 x i16> @func_8_16() { + %F = load <4 x i8>* undef + %G = sext <4 x i8> %F to <4 x i16> + %H = load <4 x i8>* undef + %Y = sext <4 x i8> %H to <4 x i16> + %T = add <4 x i16> %Y, %G + ret <4 x i16> %T +} + +define<4 x i32> @func_8_32() { + %F = load <4 x i8>* undef + %G = sext <4 x i8> %F to <4 x i32> + %H = load <4 x i8>* undef + %Y = sext <4 x i8> %H to <4 x i32> + %T = sub <4 x i32> %Y, %G + ret <4 x i32> %T +} + +define<4 x i64> @func_8_64() { + %F = load <4 x i8>* undef + %G = sext <4 x i8> %F to <4 x i64> + %H = load <4 x i8>* undef + %Y = sext <4 x i8> %H to <4 x i64> + %T = add <4 x i64> %Y, %G + ret <4 x i64> %T +} + +define<4 x i32> @const_16_32() { + %G = sext <4 x i16> <i16 0, i16 3, i16 8, i16 7> to <4 x i32> + ret <4 x i32> %G +} + +define<4 x i64> @const_16_64() { + %G = sext <4 x i16> <i16 0, i16 3, i16 8, i16 7> to <4 x i64> + ret <4 x i64> %G +} + |