diff options
author | Daniel Sanders <daniel.sanders@imgtec.com> | 2013-11-15 12:56:49 +0000 |
---|---|---|
committer | Daniel Sanders <daniel.sanders@imgtec.com> | 2013-11-15 12:56:49 +0000 |
commit | ea28aafa83fc2b6dd632041278c9a18e5a2b2b41 (patch) | |
tree | dc86c580e1a9b669264065be2c0fa94ca4da0d02 /test/CodeGen/Mips/msa | |
parent | 4d748eb0e4b55262619305c96a89c55c30bffe6c (diff) | |
download | external_llvm-ea28aafa83fc2b6dd632041278c9a18e5a2b2b41.zip external_llvm-ea28aafa83fc2b6dd632041278c9a18e5a2b2b41.tar.gz external_llvm-ea28aafa83fc2b6dd632041278c9a18e5a2b2b41.tar.bz2 |
Fix illegal DAG produced by SelectionDAG::getConstant() for v2i64 type
Summary:
When getConstant() is called for an expanded vector type, it is split into
multiple scalar constants which are then combined using appropriate build_vector
and bitcast operations.
In addition to the usual big/little endian differences, the case where the
element-order of the vector does not have the same endianness as the elements
themselves is also accounted for. For example, for v4i32 on big-endian MIPS,
the byte-order of the vector is <3210,7654,BA98,FEDC>. For little-endian, it is
<0123,4567,89AB,CDEF>.
Handling this case turns out to be a nop since getConstant() returns a splatted
vector (so reversing the element order doesn't change the value)
This fixes a number of cases in MIPS MSA where calling getConstant() during
operation legalization introduces illegal types (e.g. to legalize v2i64 UNDEF
into a v2i64 BUILD_VECTOR of illegal i64 zeros). It should also handle bigger
differences between illegal and legal types such as legalizing v2i64 into v8i16.
lowerMSASplatImm() in the MIPS backend no longer needs to avoid calling
getConstant() so this function has been updated in the same patch.
For the sake of transparency, the steps I've taken since the review are:
* Added 'virtual' to isVectorEltOrderLittleEndian() as requested. This revealed
that the MIPS tests were falsely passing because a polymorphic function was
not actually polymorphic in the reviewed patch.
* Fixed the tests that were now failing. This involved deleting the code to
handle the MIPS MSA element-order (which was previously doing an byte-order
swap instead of an element-order swap). This left
isVectorEltOrderLittleEndian() unused and it was deleted.
* Fixed build failures caused by rebasing beyond r194467-r194472. These build
failures involved the bset, bneg, and bclr instructions added in these commits
using lowerMSASplatImm() in a way that was no longer valid after this patch.
Some of these were fixed by calling SelectionDAG::getConstant() instead,
others were fixed by a new function getBuildVectorSplat() that provided the
removed functionality of lowerMSASplatImm() in a more sensible way.
Reviewers: bkramer
Reviewed By: bkramer
CC: llvm-commits
Differential Revision: http://llvm-reviews.chandlerc.com/D1973
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@194811 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'test/CodeGen/Mips/msa')
-rw-r--r-- | test/CodeGen/Mips/msa/3r-b.ll | 1 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/bit.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/elm_shift_slide.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/i5-a.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/i5-b.ll | 1 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/i5-c.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/i5-m.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/i5-s.ll | 4 | ||||
-rw-r--r-- | test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll | 134 |
9 files changed, 134 insertions, 26 deletions
diff --git a/test/CodeGen/Mips/msa/3r-b.ll b/test/CodeGen/Mips/msa/3r-b.ll index 7c45b2b..a05d19b 100644 --- a/test/CodeGen/Mips/msa/3r-b.ll +++ b/test/CodeGen/Mips/msa/3r-b.ll @@ -3,7 +3,6 @@ ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s -; XFAIL: * @llvm_mips_bclr_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 @llvm_mips_bclr_b_ARG2 = global <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, align 16 diff --git a/test/CodeGen/Mips/msa/bit.ll b/test/CodeGen/Mips/msa/bit.ll index dc8bb8f..59ddbe1 100644 --- a/test/CodeGen/Mips/msa/bit.ll +++ b/test/CodeGen/Mips/msa/bit.ll @@ -1,7 +1,3 @@ -; Both endians should emit the same output for immediate instructions. -; This is not currently true. -; XFAIL: * - ; Test the MSA intrinsics that are encoded with the BIT instruction format. ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s diff --git a/test/CodeGen/Mips/msa/elm_shift_slide.ll b/test/CodeGen/Mips/msa/elm_shift_slide.ll index 3b3504b..39d670d 100644 --- a/test/CodeGen/Mips/msa/elm_shift_slide.ll +++ b/test/CodeGen/Mips/msa/elm_shift_slide.ll @@ -1,7 +1,3 @@ -; Both endians should emit the same output for immediate instructions. -; This is not currently true. -; XFAIL: * - ; Test the MSA intrinsics that are encoded with the ELM instruction format and ; are either shifts or slides. diff --git a/test/CodeGen/Mips/msa/i5-a.ll b/test/CodeGen/Mips/msa/i5-a.ll index f45df07..0b50720 100644 --- a/test/CodeGen/Mips/msa/i5-a.ll +++ b/test/CodeGen/Mips/msa/i5-a.ll @@ -1,7 +1,3 @@ -; Both endians should emit the same output for immediate instructions. -; This is not currently true. -; XFAIL: * - ; Test the MSA intrinsics that are encoded with the I5 instruction format. ; There are lots of these so this covers those beginning with 'a' diff --git a/test/CodeGen/Mips/msa/i5-b.ll b/test/CodeGen/Mips/msa/i5-b.ll index f4477a0..da6be66 100644 --- a/test/CodeGen/Mips/msa/i5-b.ll +++ b/test/CodeGen/Mips/msa/i5-b.ll @@ -3,7 +3,6 @@ ; RUN: llc -march=mips -mattr=+msa,+fp64 < %s | FileCheck %s ; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s | FileCheck %s -; XFAIL: * @llvm_mips_bclri_b_ARG1 = global <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, align 16 @llvm_mips_bclri_b_RES = global <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, align 16 diff --git a/test/CodeGen/Mips/msa/i5-c.ll b/test/CodeGen/Mips/msa/i5-c.ll index d63b3cc..bf1578f 100644 --- a/test/CodeGen/Mips/msa/i5-c.ll +++ b/test/CodeGen/Mips/msa/i5-c.ll @@ -1,7 +1,3 @@ -; Both endians should emit the same output for immediate instructions. -; This is not currently true. -; XFAIL: * - ; Test the MSA intrinsics that are encoded with the I5 instruction format. ; There are lots of these so this covers those beginning with 'c' diff --git a/test/CodeGen/Mips/msa/i5-m.ll b/test/CodeGen/Mips/msa/i5-m.ll index 74e698b..2766349 100644 --- a/test/CodeGen/Mips/msa/i5-m.ll +++ b/test/CodeGen/Mips/msa/i5-m.ll @@ -1,7 +1,3 @@ -; Both endians should emit the same output for immediate instructions. -; This is not currently true. -; XFAIL: * - ; Test the MSA intrinsics that are encoded with the I5 instruction format. ; There are lots of these so this covers those beginning with 'm' diff --git a/test/CodeGen/Mips/msa/i5-s.ll b/test/CodeGen/Mips/msa/i5-s.ll index 60ba8e1..184172f 100644 --- a/test/CodeGen/Mips/msa/i5-s.ll +++ b/test/CodeGen/Mips/msa/i5-s.ll @@ -1,7 +1,3 @@ -; Both endians should emit the same output for immediate instructions. -; This is not currently true. -; XFAIL: * - ; Test the MSA intrinsics that are encoded with the I5 instruction format. ; There are lots of these so this covers those beginning with 's' diff --git a/test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll b/test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll new file mode 100644 index 0000000..4beaaa9 --- /dev/null +++ b/test/CodeGen/Mips/msa/llvm-stress-s1704963983.ll @@ -0,0 +1,134 @@ +; RUN: llc -march=mips < %s +; RUN: llc -march=mips -mattr=+msa,+fp64 < %s +; RUN: llc -march=mipsel < %s +; RUN: llc -march=mipsel -mattr=+msa,+fp64 < %s + +; This test originally failed for MSA with a +; "Unexpected illegal type!" assertion. +; It should at least successfully build. + +define void @autogen_SD1704963983(i8*, i32*, i64*, i32, i64, i8) { +BB: + %A4 = alloca <4 x double> + %A3 = alloca <8 x i64> + %A2 = alloca <1 x double> + %A1 = alloca double + %A = alloca i32 + %L = load i8* %0 + store i8 77, i8* %0 + %E = extractelement <8 x i64> zeroinitializer, i32 2 + %Shuff = shufflevector <8 x i64> zeroinitializer, <8 x i64> zeroinitializer, <8 x i32> <i32 5, i32 7, i32 undef, i32 undef, i32 13, i32 15, i32 1, i32 3> + %I = insertelement <8 x i64> zeroinitializer, i64 %E, i32 7 + %Sl = select i1 false, i8* %0, i8* %0 + %Cmp = icmp eq i32 434069, 272505 + br label %CF + +CF: ; preds = %CF, %CF78, %BB + %L5 = load i8* %Sl + store i8 %L, i8* %Sl + %E6 = extractelement <8 x i32> zeroinitializer, i32 2 + %Shuff7 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff, <8 x i32> <i32 13, i32 15, i32 1, i32 3, i32 5, i32 7, i32 9, i32 undef> + %I8 = insertelement <8 x i64> zeroinitializer, i64 %4, i32 7 + %B = shl <1 x i16> zeroinitializer, zeroinitializer + %FC = sitofp <8 x i64> zeroinitializer to <8 x float> + %Sl9 = select i1 %Cmp, i8 77, i8 77 + %Cmp10 = icmp uge <8 x i64> %Shuff, zeroinitializer + %L11 = load i8* %0 + store i8 %Sl9, i8* %0 + %E12 = extractelement <1 x i16> zeroinitializer, i32 0 + %Shuff13 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff, <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 undef, i32 3, i32 5, i32 7> + %I14 = insertelement <4 x i32> zeroinitializer, i32 %3, i32 3 + %B15 = udiv <1 x i16> %B, zeroinitializer + %Tr = trunc <8 x i64> %Shuff to <8 x i32> + %Sl16 = select i1 %Cmp, i8 77, i8 %5 + %Cmp17 = icmp ult <8 x i1> %Cmp10, %Cmp10 + %L18 = load i8* %Sl + store i8 -1, i8* %Sl + %E19 = extractelement <8 x i32> zeroinitializer, i32 3 + %Shuff20 = shufflevector <8 x float> %FC, <8 x float> %FC, <8 x i32> <i32 6, i32 8, i32 undef, i32 12, i32 14, i32 0, i32 2, i32 undef> + %I21 = insertelement <8 x i64> %Shuff13, i64 %E, i32 0 + %B22 = urem <8 x i64> %Shuff7, %I21 + %FC23 = sitofp i32 50347 to float + %Sl24 = select i1 %Cmp, double 0.000000e+00, double 0.000000e+00 + %Cmp25 = icmp ugt i32 465489, 47533 + br i1 %Cmp25, label %CF, label %CF78 + +CF78: ; preds = %CF + %L26 = load i8* %Sl + store i32 50347, i32* %A + %E27 = extractelement <8 x i1> %Cmp10, i32 2 + br i1 %E27, label %CF, label %CF77 + +CF77: ; preds = %CF77, %CF81, %CF78 + %Shuff28 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff, <8 x i32> <i32 13, i32 15, i32 1, i32 3, i32 5, i32 7, i32 9, i32 undef> + %I29 = insertelement <1 x i16> zeroinitializer, i16 -1, i32 0 + %B30 = urem <8 x i32> %Tr, zeroinitializer + %Tr31 = trunc i32 0 to i16 + %Sl32 = select i1 %Cmp, <2 x i1> zeroinitializer, <2 x i1> zeroinitializer + %L33 = load i8* %Sl + store i8 %L26, i8* %Sl + %E34 = extractelement <4 x i32> zeroinitializer, i32 0 + %Shuff35 = shufflevector <1 x i16> zeroinitializer, <1 x i16> %B, <1 x i32> undef + %I36 = insertelement <8 x i64> %Shuff28, i64 %E, i32 7 + %B37 = srem <1 x i16> %I29, zeroinitializer + %FC38 = sitofp <8 x i32> %B30 to <8 x double> + %Sl39 = select i1 %Cmp, double 0.000000e+00, double %Sl24 + %L40 = load i8* %Sl + store i8 %Sl16, i8* %Sl + %E41 = extractelement <1 x i16> zeroinitializer, i32 0 + %Shuff42 = shufflevector <8 x i1> %Cmp17, <8 x i1> %Cmp10, <8 x i32> <i32 14, i32 undef, i32 2, i32 4, i32 undef, i32 8, i32 10, i32 12> + %I43 = insertelement <4 x i32> zeroinitializer, i32 272505, i32 0 + %B44 = urem <8 x i32> %B30, %Tr + %PC = bitcast i8* %0 to i64* + %Sl45 = select i1 %Cmp, <8 x i1> %Cmp10, <8 x i1> %Shuff42 + %Cmp46 = fcmp ugt float 0xB856238A00000000, 0x47DA795E40000000 + br i1 %Cmp46, label %CF77, label %CF80 + +CF80: ; preds = %CF80, %CF77 + %L47 = load i64* %PC + store i8 77, i8* %Sl + %E48 = extractelement <8 x i64> zeroinitializer, i32 2 + %Shuff49 = shufflevector <8 x i64> zeroinitializer, <8 x i64> %Shuff7, <8 x i32> <i32 5, i32 7, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 3> + %I50 = insertelement <8 x i64> zeroinitializer, i64 %L47, i32 7 + %B51 = fdiv float 0x46CC2D8000000000, %FC23 + %PC52 = bitcast <8 x i64>* %A3 to i64* + %Sl53 = select i1 %Cmp, <8 x i64> %Shuff, <8 x i64> %Shuff + %Cmp54 = fcmp ole float 0x47DA795E40000000, 0xB856238A00000000 + br i1 %Cmp54, label %CF80, label %CF81 + +CF81: ; preds = %CF80 + %L55 = load i8* %Sl + store i8 %Sl16, i8* %Sl + %E56 = extractelement <1 x i16> %B, i32 0 + %Shuff57 = shufflevector <1 x i16> zeroinitializer, <1 x i16> zeroinitializer, <1 x i32> <i32 1> + %I58 = insertelement <8 x i64> zeroinitializer, i64 %L47, i32 7 + %B59 = srem i32 %E19, %E19 + %Sl60 = select i1 %Cmp, i8 77, i8 77 + %Cmp61 = icmp ult <1 x i16> zeroinitializer, %B + %L62 = load i8* %Sl + store i64 %L47, i64* %PC52 + %E63 = extractelement <4 x i32> %I43, i32 2 + %Shuff64 = shufflevector <4 x i1> zeroinitializer, <4 x i1> zeroinitializer, <4 x i32> <i32 undef, i32 undef, i32 1, i32 3> + %I65 = insertelement <8 x i64> %B22, i64 %L47, i32 7 + %B66 = add <8 x i64> %I50, %I65 + %FC67 = uitofp i16 %E12 to float + %Sl68 = select i1 %Cmp, <8 x i32> %B30, <8 x i32> zeroinitializer + %Cmp69 = fcmp ord double 0.000000e+00, 0.000000e+00 + br i1 %Cmp69, label %CF77, label %CF79 + +CF79: ; preds = %CF81 + %L70 = load i32* %A + store i64 %4, i64* %PC + %E71 = extractelement <4 x i32> zeroinitializer, i32 0 + %Shuff72 = shufflevector <8 x i32> zeroinitializer, <8 x i32> %B44, <8 x i32> <i32 11, i32 undef, i32 15, i32 1, i32 3, i32 undef, i32 7, i32 9> + %I73 = insertelement <8 x i16> zeroinitializer, i16 %E12, i32 5 + %B74 = fsub double 0.000000e+00, 0.000000e+00 + %Sl75 = select i1 %Cmp46, i32 %E6, i32 %E19 + %Cmp76 = icmp ugt <4 x i32> %I43, zeroinitializer + store i8 %L, i8* %Sl + store i64 %L47, i64* %PC + store i64 %L47, i64* %PC + store i8 %L5, i8* %Sl + store i8 %L5, i8* %0 + ret void +} |