Use vAny type to get rid of Neon intrinsics that differed only in whether

the overloaded vector types allowed floating-point or integer vector elements. Most of these operations actually depend on the element type, so bitcasting was not an option. If you include the vpadd intrinsics that I updated earlier, this gets rid of 20 intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78646 91177308-0d34-0410-b5e6-96231b3b80d8
author: Bob Wilson <bob.wilson@apple.com> 2009-08-11 05:39:44 +0000
committer: Bob Wilson <bob.wilson@apple.com> 2009-08-11 05:39:44 +0000
commit: 8f10b3f2521e99168e7fb89310270d821a0b00fb (patch)
tree: da1622ea7dce5183b471d7df05bcc92522bfe737 /test
parent: 7b70d4fdd784e412864279c6bf86b81f52c41719 (diff)
download: external_llvm-8f10b3f2521e99168e7fb89310270d821a0b00fb.zip
external_llvm-8f10b3f2521e99168e7fb89310270d821a0b00fb.tar.gz
external_llvm-8f10b3f2521e99168e7fb89310270d821a0b00fb.tar.bz2
19 files changed, 164 insertions, 164 deletions
diff --git a/test/CodeGen/ARM/vabd.ll b/test/CodeGen/ARM/vabd.ll
index c0497f9..e764840 100644
--- a/test/CodeGen/ARM/vabd.ll
+++ b/test/CodeGen/ARM/vabd.ll
@@ -59,7 +59,7 @@ define <2 x float> @vabdf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK: vabd.f32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm.neon.vabdf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
 
@@ -122,7 +122,7 @@ define <4 x float> @vabdQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: vabd.f32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x float> @llvm.arm.neon.vabdf.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	%tmp3 = call <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
 
@@ -134,7 +134,7 @@ declare <8 x i8>  @llvm.arm.neon.vabdu.v8i8(<8 x i8>, <8 x i8>) nounwind readnon
 declare <4 x i16> @llvm.arm.neon.vabdu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vabdu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 
-declare <2 x float> @llvm.arm.neon.vabdf.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vabds.v2f32(<2 x float>, <2 x float>) nounwind readnone
 
 declare <16 x i8> @llvm.arm.neon.vabds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
 declare <8 x i16> @llvm.arm.neon.vabds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
@@ -144,4 +144,4 @@ declare <16 x i8> @llvm.arm.neon.vabdu.v16i8(<16 x i8>, <16 x i8>) nounwind read
 declare <8 x i16> @llvm.arm.neon.vabdu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vabdu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
-declare <4 x float> @llvm.arm.neon.vabdf.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vabds.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vabs.ll b/test/CodeGen/ARM/vabs.ll
index a7979ee..1195f08 100644
--- a/test/CodeGen/ARM/vabs.ll
+++ b/test/CodeGen/ARM/vabs.ll
@@ -28,7 +28,7 @@ define <2 x float> @vabsf32(<2 x float>* %A) nounwind {
 ;CHECK: vabsf32:
 ;CHECK: vabs.f32
 	%tmp1 = load <2 x float>* %A
-	%tmp2 = call <2 x float> @llvm.arm.neon.vabsf.v2f32(<2 x float> %tmp1)
+	%tmp2 = call <2 x float> @llvm.arm.neon.vabs.v2f32(<2 x float> %tmp1)
 	ret <2 x float> %tmp2
 }
 
@@ -60,17 +60,17 @@ define <4 x float> @vabsQf32(<4 x float>* %A) nounwind {
 ;CHECK: vabsQf32:
 ;CHECK: vabs.f32
 	%tmp1 = load <4 x float>* %A
-	%tmp2 = call <4 x float> @llvm.arm.neon.vabsf.v4f32(<4 x float> %tmp1)
+	%tmp2 = call <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float> %tmp1)
 	ret <4 x float> %tmp2
 }
 
 declare <8 x i8>  @llvm.arm.neon.vabs.v8i8(<8 x i8>) nounwind readnone
 declare <4 x i16> @llvm.arm.neon.vabs.v4i16(<4 x i16>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vabs.v2i32(<2 x i32>) nounwind readnone
-declare <2 x float> @llvm.arm.neon.vabsf.v2f32(<2 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vabs.v2f32(<2 x float>) nounwind readnone
 
 declare <16 x i8> @llvm.arm.neon.vabs.v16i8(<16 x i8>) nounwind readnone
 declare <8 x i16> @llvm.arm.neon.vabs.v8i16(<8 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vabs.v4i32(<4 x i32>) nounwind readnone
-declare <4 x float> @llvm.arm.neon.vabsf.v4f32(<4 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vabs.v4f32(<4 x float>) nounwind readnone
 
diff --git a/test/CodeGen/ARM/vld1.ll b/test/CodeGen/ARM/vld1.ll
index d519133..81f1bde 100644
--- a/test/CodeGen/ARM/vld1.ll
+++ b/test/CodeGen/ARM/vld1.ll
@@ -3,81 +3,81 @@
 define <8 x i8> @vld1i8(i8* %A) nounwind {
 ;CHECK: vld1i8:
 ;CHECK: vld1.8
-	%tmp1 = call <8 x i8> @llvm.arm.neon.vld1i.v8i8(i8* %A)
+	%tmp1 = call <8 x i8> @llvm.arm.neon.vld1.v8i8(i8* %A)
 	ret <8 x i8> %tmp1
 }
 
 define <4 x i16> @vld1i16(i16* %A) nounwind {
 ;CHECK: vld1i16:
 ;CHECK: vld1.16
-	%tmp1 = call <4 x i16> @llvm.arm.neon.vld1i.v4i16(i16* %A)
+	%tmp1 = call <4 x i16> @llvm.arm.neon.vld1.v4i16(i16* %A)
 	ret <4 x i16> %tmp1
 }
 
 define <2 x i32> @vld1i32(i32* %A) nounwind {
 ;CHECK: vld1i32:
 ;CHECK: vld1.32
-	%tmp1 = call <2 x i32> @llvm.arm.neon.vld1i.v2i32(i32* %A)
+	%tmp1 = call <2 x i32> @llvm.arm.neon.vld1.v2i32(i32* %A)
 	ret <2 x i32> %tmp1
 }
 
 define <2 x float> @vld1f(float* %A) nounwind {
 ;CHECK: vld1f:
 ;CHECK: vld1.32
-	%tmp1 = call <2 x float> @llvm.arm.neon.vld1f.v2f32(float* %A)
+	%tmp1 = call <2 x float> @llvm.arm.neon.vld1.v2f32(float* %A)
 	ret <2 x float> %tmp1
 }
 
 define <1 x i64> @vld1i64(i64* %A) nounwind {
 ;CHECK: vld1i64:
 ;CHECK: vld1.64
-	%tmp1 = call <1 x i64> @llvm.arm.neon.vld1i.v1i64(i64* %A)
+	%tmp1 = call <1 x i64> @llvm.arm.neon.vld1.v1i64(i64* %A)
 	ret <1 x i64> %tmp1
 }
 
 define <16 x i8> @vld1Qi8(i8* %A) nounwind {
 ;CHECK: vld1Qi8:
 ;CHECK: vld1.8
-	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1i.v16i8(i8* %A)
+	%tmp1 = call <16 x i8> @llvm.arm.neon.vld1.v16i8(i8* %A)
 	ret <16 x i8> %tmp1
 }
 
 define <8 x i16> @vld1Qi16(i16* %A) nounwind {
 ;CHECK: vld1Qi16:
 ;CHECK: vld1.16
-	%tmp1 = call <8 x i16> @llvm.arm.neon.vld1i.v8i16(i16* %A)
+	%tmp1 = call <8 x i16> @llvm.arm.neon.vld1.v8i16(i16* %A)
 	ret <8 x i16> %tmp1
 }
 
 define <4 x i32> @vld1Qi32(i32* %A) nounwind {
 ;CHECK: vld1Qi32:
 ;CHECK: vld1.32
-	%tmp1 = call <4 x i32> @llvm.arm.neon.vld1i.v4i32(i32* %A)
+	%tmp1 = call <4 x i32> @llvm.arm.neon.vld1.v4i32(i32* %A)
 	ret <4 x i32> %tmp1
 }
 
 define <4 x float> @vld1Qf(float* %A) nounwind {
 ;CHECK: vld1Qf:
 ;CHECK: vld1.32
-	%tmp1 = call <4 x float> @llvm.arm.neon.vld1f.v4f32(float* %A)
+	%tmp1 = call <4 x float> @llvm.arm.neon.vld1.v4f32(float* %A)
 	ret <4 x float> %tmp1
 }
 
 define <2 x i64> @vld1Qi64(i64* %A) nounwind {
 ;CHECK: vld1Qi64:
 ;CHECK: vld1.64
-	%tmp1 = call <2 x i64> @llvm.arm.neon.vld1i.v2i64(i64* %A)
+	%tmp1 = call <2 x i64> @llvm.arm.neon.vld1.v2i64(i64* %A)
 	ret <2 x i64> %tmp1
 }
 
-declare <8 x i8>  @llvm.arm.neon.vld1i.v8i8(i8*) nounwind readonly
-declare <4 x i16> @llvm.arm.neon.vld1i.v4i16(i8*) nounwind readonly
-declare <2 x i32> @llvm.arm.neon.vld1i.v2i32(i8*) nounwind readonly
-declare <2 x float> @llvm.arm.neon.vld1f.v2f32(i8*) nounwind readonly
-declare <1 x i64> @llvm.arm.neon.vld1i.v1i64(i8*) nounwind readonly
+declare <8 x i8>  @llvm.arm.neon.vld1.v8i8(i8*) nounwind readonly
+declare <4 x i16> @llvm.arm.neon.vld1.v4i16(i8*) nounwind readonly
+declare <2 x i32> @llvm.arm.neon.vld1.v2i32(i8*) nounwind readonly
+declare <2 x float> @llvm.arm.neon.vld1.v2f32(i8*) nounwind readonly
+declare <1 x i64> @llvm.arm.neon.vld1.v1i64(i8*) nounwind readonly
 
-declare <16 x i8> @llvm.arm.neon.vld1i.v16i8(i8*) nounwind readonly
-declare <8 x i16> @llvm.arm.neon.vld1i.v8i16(i8*) nounwind readonly
-declare <4 x i32> @llvm.arm.neon.vld1i.v4i32(i8*) nounwind readonly
-declare <4 x float> @llvm.arm.neon.vld1f.v4f32(i8*) nounwind readonly
-declare <2 x i64> @llvm.arm.neon.vld1i.v2i64(i8*) nounwind readonly
+declare <16 x i8> @llvm.arm.neon.vld1.v16i8(i8*) nounwind readonly
+declare <8 x i16> @llvm.arm.neon.vld1.v8i16(i8*) nounwind readonly
+declare <4 x i32> @llvm.arm.neon.vld1.v4i32(i8*) nounwind readonly
+declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
+declare <2 x i64> @llvm.arm.neon.vld1.v2i64(i8*) nounwind readonly
diff --git a/test/CodeGen/ARM/vld2.ll b/test/CodeGen/ARM/vld2.ll
index 2c16ac1..168b62b 100644
--- a/test/CodeGen/ARM/vld2.ll
+++ b/test/CodeGen/ARM/vld2.ll
@@ -8,7 +8,7 @@
 define <8 x i8> @vld2i8(i8* %A) nounwind {
 ;CHECK: vld2i8:
 ;CHECK: vld2.8
-	%tmp1 = call %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vld2i.v8i8(i8* %A)
+	%tmp1 = call %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vld2.v8i8(i8* %A)
         %tmp2 = extractvalue %struct.__builtin_neon_v8qi2 %tmp1, 0
         %tmp3 = extractvalue %struct.__builtin_neon_v8qi2 %tmp1, 1
         %tmp4 = add <8 x i8> %tmp2, %tmp3
@@ -18,7 +18,7 @@ define <8 x i8> @vld2i8(i8* %A) nounwind {
 define <4 x i16> @vld2i16(i16* %A) nounwind {
 ;CHECK: vld2i16:
 ;CHECK: vld2.16
-	%tmp1 = call %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vld2i.v4i16(i16* %A)
+	%tmp1 = call %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vld2.v4i16(i16* %A)
         %tmp2 = extractvalue %struct.__builtin_neon_v4hi2 %tmp1, 0
         %tmp3 = extractvalue %struct.__builtin_neon_v4hi2 %tmp1, 1
         %tmp4 = add <4 x i16> %tmp2, %tmp3
@@ -28,7 +28,7 @@ define <4 x i16> @vld2i16(i16* %A) nounwind {
 define <2 x i32> @vld2i32(i32* %A) nounwind {
 ;CHECK: vld2i32:
 ;CHECK: vld2.32
-	%tmp1 = call %struct.__builtin_neon_v2si2 @llvm.arm.neon.vld2i.v2i32(i32* %A)
+	%tmp1 = call %struct.__builtin_neon_v2si2 @llvm.arm.neon.vld2.v2i32(i32* %A)
         %tmp2 = extractvalue %struct.__builtin_neon_v2si2 %tmp1, 0
         %tmp3 = extractvalue %struct.__builtin_neon_v2si2 %tmp1, 1
         %tmp4 = add <2 x i32> %tmp2, %tmp3
@@ -38,14 +38,14 @@ define <2 x i32> @vld2i32(i32* %A) nounwind {
 define <2 x float> @vld2f(float* %A) nounwind {
 ;CHECK: vld2f:
 ;CHECK: vld2.32
-	%tmp1 = call %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vld2f.v2f32(float* %A)
+	%tmp1 = call %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vld2.v2f32(float* %A)
         %tmp2 = extractvalue %struct.__builtin_neon_v2sf2 %tmp1, 0
         %tmp3 = extractvalue %struct.__builtin_neon_v2sf2 %tmp1, 1
         %tmp4 = add <2 x float> %tmp2, %tmp3
 	ret <2 x float> %tmp4
 }
 
-declare %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vld2i.v8i8(i8*) nounwind readonly
-declare %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vld2i.v4i16(i8*) nounwind readonly
-declare %struct.__builtin_neon_v2si2 @llvm.arm.neon.vld2i.v2i32(i8*) nounwind readonly
-declare %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vld2f.v2f32(i8*) nounwind readonly
+declare %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vld2.v8i8(i8*) nounwind readonly
+declare %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vld2.v4i16(i8*) nounwind readonly
+declare %struct.__builtin_neon_v2si2 @llvm.arm.neon.vld2.v2i32(i8*) nounwind readonly
+declare %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vld2.v2f32(i8*) nounwind readonly
diff --git a/test/CodeGen/ARM/vld3.ll b/test/CodeGen/ARM/vld3.ll
index 49665f6..5e528c0 100644
--- a/test/CodeGen/ARM/vld3.ll
+++ b/test/CodeGen/ARM/vld3.ll
@@ -8,7 +8,7 @@
 define <8 x i8> @vld3i8(i8* %A) nounwind {
 ;CHECK: vld3i8:
 ;CHECK: vld3.8
-	%tmp1 = call %struct.__builtin_neon_v8qi3 @llvm.arm.neon.vld3i.v8i8(i8* %A)
+	%tmp1 = call %struct.__builtin_neon_v8qi3 @llvm.arm.neon.vld3.v8i8(i8* %A)
         %tmp2 = extractvalue %struct.__builtin_neon_v8qi3 %tmp1, 0
         %tmp3 = extractvalue %struct.__builtin_neon_v8qi3 %tmp1, 2
         %tmp4 = add <8 x i8> %tmp2, %tmp3
@@ -18,7 +18,7 @@ define <8 x i8> @vld3i8(i8* %A) nounwind {
 define <4 x i16> @vld3i16(i16* %A) nounwind {
 ;CHECK: vld3i16:
 ;CHECK: vld3.16
-	%tmp1 = call %struct.__builtin_neon_v4hi3 @llvm.arm.neon.vld3i.v4i16(i16* %A)
+	%tmp1 = call %struct.__builtin_neon_v4hi3 @llvm.arm.neon.vld3.v4i16(i16* %A)
         %tmp2 = extractvalue %struct.__builtin_neon_v4hi3 %tmp1, 0
         %tmp3 = extractvalue %struct.__builtin_neon_v4hi3 %tmp1, 2
         %tmp4 = add <4 x i16> %tmp2, %tmp3
@@ -28,7 +28,7 @@ define <4 x i16> @vld3i16(i16* %A) nounwind {
 define <2 x i32> @vld3i32(i32* %A) nounwind {
 ;CHECK: vld3i32:
 ;CHECK: vld3.32
-	%tmp1 = call %struct.__builtin_neon_v2si3 @llvm.arm.neon.vld3i.v2i32(i32* %A)
+	%tmp1 = call %struct.__builtin_neon_v2si3 @llvm.arm.neon.vld3.v2i32(i32* %A)
         %tmp2 = extractvalue %struct.__builtin_neon_v2si3 %tmp1, 0
         %tmp3 = extractvalue %struct.__builtin_neon_v2si3 %tmp1, 2
         %tmp4 = add <2 x i32> %tmp2, %tmp3
@@ -38,14 +38,14 @@ define <2 x i32> @vld3i32(i32* %A) nounwind {
 define <2 x float> @vld3f(float* %A) nounwind {
 ;CHECK: vld3f:
 ;CHECK: vld3.32
-	%tmp1 = call %struct.__builtin_neon_v2sf3 @llvm.arm.neon.vld3f.v2f32(float* %A)
+	%tmp1 = call %struct.__builtin_neon_v2sf3 @llvm.arm.neon.vld3.v2f32(float* %A)
         %tmp2 = extractvalue %struct.__builtin_neon_v2sf3 %tmp1, 0
         %tmp3 = extractvalue %struct.__builtin_neon_v2sf3 %tmp1, 2
         %tmp4 = add <2 x float> %tmp2, %tmp3
 	ret <2 x float> %tmp4
 }
 
-declare %struct.__builtin_neon_v8qi3 @llvm.arm.neon.vld3i.v8i8(i8*) nounwind readonly
-declare %struct.__builtin_neon_v4hi3 @llvm.arm.neon.vld3i.v4i16(i8*) nounwind readonly
-declare %struct.__builtin_neon_v2si3 @llvm.arm.neon.vld3i.v2i32(i8*) nounwind readonly
-declare %struct.__builtin_neon_v2sf3 @llvm.arm.neon.vld3f.v2f32(i8*) nounwind readonly
+declare %struct.__builtin_neon_v8qi3 @llvm.arm.neon.vld3.v8i8(i8*) nounwind readonly
+declare %struct.__builtin_neon_v4hi3 @llvm.arm.neon.vld3.v4i16(i8*) nounwind readonly
+declare %struct.__builtin_neon_v2si3 @llvm.arm.neon.vld3.v2i32(i8*) nounwind readonly
+declare %struct.__builtin_neon_v2sf3 @llvm.arm.neon.vld3.v2f32(i8*) nounwind readonly
diff --git a/test/CodeGen/ARM/vld4.ll b/test/CodeGen/ARM/vld4.ll
index a0f41cf..48125be 100644
--- a/test/CodeGen/ARM/vld4.ll
+++ b/test/CodeGen/ARM/vld4.ll
@@ -8,7 +8,7 @@
 define <8 x i8> @vld4i8(i8* %A) nounwind {
 ;CHECK: vld4i8:
 ;CHECK: vld4.8
-	%tmp1 = call %struct.__builtin_neon_v8qi4 @llvm.arm.neon.vld4i.v8i8(i8* %A)
+	%tmp1 = call %struct.__builtin_neon_v8qi4 @llvm.arm.neon.vld4.v8i8(i8* %A)
         %tmp2 = extractvalue %struct.__builtin_neon_v8qi4 %tmp1, 0
         %tmp3 = extractvalue %struct.__builtin_neon_v8qi4 %tmp1, 2
         %tmp4 = add <8 x i8> %tmp2, %tmp3
@@ -18,7 +18,7 @@ define <8 x i8> @vld4i8(i8* %A) nounwind {
 define <4 x i16> @vld4i16(i16* %A) nounwind {
 ;CHECK: vld4i16:
 ;CHECK: vld4.16
-	%tmp1 = call %struct.__builtin_neon_v4hi4 @llvm.arm.neon.vld4i.v4i16(i16* %A)
+	%tmp1 = call %struct.__builtin_neon_v4hi4 @llvm.arm.neon.vld4.v4i16(i16* %A)
         %tmp2 = extractvalue %struct.__builtin_neon_v4hi4 %tmp1, 0
         %tmp3 = extractvalue %struct.__builtin_neon_v4hi4 %tmp1, 2
         %tmp4 = add <4 x i16> %tmp2, %tmp3
@@ -28,7 +28,7 @@ define <4 x i16> @vld4i16(i16* %A) nounwind {
 define <2 x i32> @vld4i32(i32* %A) nounwind {
 ;CHECK: vld4i32:
 ;CHECK: vld4.32
-	%tmp1 = call %struct.__builtin_neon_v2si4 @llvm.arm.neon.vld4i.v2i32(i32* %A)
+	%tmp1 = call %struct.__builtin_neon_v2si4 @llvm.arm.neon.vld4.v2i32(i32* %A)
         %tmp2 = extractvalue %struct.__builtin_neon_v2si4 %tmp1, 0
         %tmp3 = extractvalue %struct.__builtin_neon_v2si4 %tmp1, 2
         %tmp4 = add <2 x i32> %tmp2, %tmp3
@@ -38,14 +38,14 @@ define <2 x i32> @vld4i32(i32* %A) nounwind {
 define <2 x float> @vld4f(float* %A) nounwind {
 ;CHECK: vld4f:
 ;CHECK: vld4.32
-	%tmp1 = call %struct.__builtin_neon_v2sf4 @llvm.arm.neon.vld4f.v2f32(float* %A)
+	%tmp1 = call %struct.__builtin_neon_v2sf4 @llvm.arm.neon.vld4.v2f32(float* %A)
         %tmp2 = extractvalue %struct.__builtin_neon_v2sf4 %tmp1, 0
         %tmp3 = extractvalue %struct.__builtin_neon_v2sf4 %tmp1, 2
         %tmp4 = add <2 x float> %tmp2, %tmp3
 	ret <2 x float> %tmp4
 }
 
-declare %struct.__builtin_neon_v8qi4 @llvm.arm.neon.vld4i.v8i8(i8*) nounwind readonly
-declare %struct.__builtin_neon_v4hi4 @llvm.arm.neon.vld4i.v4i16(i8*) nounwind readonly
-declare %struct.__builtin_neon_v2si4 @llvm.arm.neon.vld4i.v2i32(i8*) nounwind readonly
-declare %struct.__builtin_neon_v2sf4 @llvm.arm.neon.vld4f.v2f32(i8*) nounwind readonly
+declare %struct.__builtin_neon_v8qi4 @llvm.arm.neon.vld4.v8i8(i8*) nounwind readonly
+declare %struct.__builtin_neon_v4hi4 @llvm.arm.neon.vld4.v4i16(i8*) nounwind readonly
+declare %struct.__builtin_neon_v2si4 @llvm.arm.neon.vld4.v2i32(i8*) nounwind readonly
+declare %struct.__builtin_neon_v2sf4 @llvm.arm.neon.vld4.v2f32(i8*) nounwind readonly
diff --git a/test/CodeGen/ARM/vmax.ll b/test/CodeGen/ARM/vmax.ll
index 60322f8..65f6076 100644
--- a/test/CodeGen/ARM/vmax.ll
+++ b/test/CodeGen/ARM/vmax.ll
@@ -52,7 +52,7 @@ define <2 x i32> @vmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <2 x float> @vmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm.neon.vmaxf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
 
@@ -101,7 +101,7 @@ define <4 x i32> @vmaxQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <4 x float> @vmaxQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x float> @llvm.arm.neon.vmaxf.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	%tmp3 = call <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
 
@@ -113,7 +113,7 @@ declare <8 x i8>  @llvm.arm.neon.vmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readnon
 declare <4 x i16> @llvm.arm.neon.vmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 
-declare <2 x float> @llvm.arm.neon.vmaxf.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
 
 declare <16 x i8> @llvm.arm.neon.vmaxs.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
 declare <8 x i16> @llvm.arm.neon.vmaxs.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
@@ -123,4 +123,4 @@ declare <16 x i8> @llvm.arm.neon.vmaxu.v16i8(<16 x i8>, <16 x i8>) nounwind read
 declare <8 x i16> @llvm.arm.neon.vmaxu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vmaxu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
-declare <4 x float> @llvm.arm.neon.vmaxf.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vmaxs.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vmin.ll b/test/CodeGen/ARM/vmin.ll
index a693693..08a3f09 100644
--- a/test/CodeGen/ARM/vmin.ll
+++ b/test/CodeGen/ARM/vmin.ll
@@ -52,7 +52,7 @@ define <2 x i32> @vminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <2 x float> @vminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm.neon.vminf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
 
@@ -101,7 +101,7 @@ define <4 x i32> @vminQu32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 define <4 x float> @vminQf32(<4 x float>* %A, <4 x float>* %B) nounwind {
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = call <4 x float> @llvm.arm.neon.vminf.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	%tmp3 = call <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
 	ret <4 x float> %tmp3
 }
 
@@ -113,7 +113,7 @@ declare <8 x i8>  @llvm.arm.neon.vminu.v8i8(<8 x i8>, <8 x i8>) nounwind readnon
 declare <4 x i16> @llvm.arm.neon.vminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 
-declare <2 x float> @llvm.arm.neon.vminf.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
 
 declare <16 x i8> @llvm.arm.neon.vmins.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
 declare <8 x i16> @llvm.arm.neon.vmins.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
@@ -123,4 +123,4 @@ declare <16 x i8> @llvm.arm.neon.vminu.v16i8(<16 x i8>, <16 x i8>) nounwind read
 declare <8 x i16> @llvm.arm.neon.vminu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vminu.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
 
-declare <4 x float> @llvm.arm.neon.vminf.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vmins.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vpmax.ll b/test/CodeGen/ARM/vpmax.ll
index 9878ca8..90ae70f 100644
--- a/test/CodeGen/ARM/vpmax.ll
+++ b/test/CodeGen/ARM/vpmax.ll
@@ -52,7 +52,7 @@ define <2 x i32> @vpmaxu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <2 x float> @vpmaxf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm.neon.vpmaxf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
 
@@ -64,4 +64,4 @@ declare <8 x i8>  @llvm.arm.neon.vpmaxu.v8i8(<8 x i8>, <8 x i8>) nounwind readno
 declare <4 x i16> @llvm.arm.neon.vpmaxu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vpmaxu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 
-declare <2 x float> @llvm.arm.neon.vpmaxf.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vpmaxs.v2f32(<2 x float>, <2 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vpmin.ll b/test/CodeGen/ARM/vpmin.ll
index 7b5348b..0f982f4 100644
--- a/test/CodeGen/ARM/vpmin.ll
+++ b/test/CodeGen/ARM/vpmin.ll
@@ -52,7 +52,7 @@ define <2 x i32> @vpminu32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 define <2 x float> @vpminf32(<2 x float>* %A, <2 x float>* %B) nounwind {
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call <2 x float> @llvm.arm.neon.vpminf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
 	ret <2 x float> %tmp3
 }
 
@@ -64,4 +64,4 @@ declare <8 x i8>  @llvm.arm.neon.vpminu.v8i8(<8 x i8>, <8 x i8>) nounwind readno
 declare <4 x i16> @llvm.arm.neon.vpminu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
 declare <2 x i32> @llvm.arm.neon.vpminu.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
 
-declare <2 x float> @llvm.arm.neon.vpminf.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vpmins.v2f32(<2 x float>, <2 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vrecpe.ll b/test/CodeGen/ARM/vrecpe.ll
index 79cb595..622725b 100644
--- a/test/CodeGen/ARM/vrecpe.ll
+++ b/test/CodeGen/ARM/vrecpe.ll
@@ -16,18 +16,18 @@ define <4 x i32> @vrecpeQi32(<4 x i32>* %A) nounwind {
 
 define <2 x float> @vrecpef32(<2 x float>* %A) nounwind {
 	%tmp1 = load <2 x float>* %A
-	%tmp2 = call <2 x float> @llvm.arm.neon.vrecpef.v2f32(<2 x float> %tmp1)
+	%tmp2 = call <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float> %tmp1)
 	ret <2 x float> %tmp2
 }
 
 define <4 x float> @vrecpeQf32(<4 x float>* %A) nounwind {
 	%tmp1 = load <4 x float>* %A
-	%tmp2 = call <4 x float> @llvm.arm.neon.vrecpef.v4f32(<4 x float> %tmp1)
+	%tmp2 = call <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float> %tmp1)
 	ret <4 x float> %tmp2
 }
 
 declare <2 x i32> @llvm.arm.neon.vrecpe.v2i32(<2 x i32>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vrecpe.v4i32(<4 x i32>) nounwind readnone
 
-declare <2 x float> @llvm.arm.neon.vrecpef.v2f32(<2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm.neon.vrecpef.v4f32(<4 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vrecpe.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrecpe.v4f32(<4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vrsqrte.ll b/test/CodeGen/ARM/vrsqrte.ll
index 10529f6..4f11977 100644
--- a/test/CodeGen/ARM/vrsqrte.ll
+++ b/test/CodeGen/ARM/vrsqrte.ll
@@ -16,18 +16,18 @@ define <4 x i32> @vrsqrteQi32(<4 x i32>* %A) nounwind {
 
 define <2 x float> @vrsqrtef32(<2 x float>* %A) nounwind {
 	%tmp1 = load <2 x float>* %A
-	%tmp2 = call <2 x float> @llvm.arm.neon.vrsqrtef.v2f32(<2 x float> %tmp1)
+	%tmp2 = call <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float> %tmp1)
 	ret <2 x float> %tmp2
 }
 
 define <4 x float> @vrsqrteQf32(<4 x float>* %A) nounwind {
 	%tmp1 = load <4 x float>* %A
-	%tmp2 = call <4 x float> @llvm.arm.neon.vrsqrtef.v4f32(<4 x float> %tmp1)
+	%tmp2 = call <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float> %tmp1)
 	ret <4 x float> %tmp2
 }
 
 declare <2 x i32> @llvm.arm.neon.vrsqrte.v2i32(<2 x i32>) nounwind readnone
 declare <4 x i32> @llvm.arm.neon.vrsqrte.v4i32(<4 x i32>) nounwind readnone
 
-declare <2 x float> @llvm.arm.neon.vrsqrtef.v2f32(<2 x float>) nounwind readnone
-declare <4 x float> @llvm.arm.neon.vrsqrtef.v4f32(<4 x float>) nounwind readnone
+declare <2 x float> @llvm.arm.neon.vrsqrte.v2f32(<2 x float>) nounwind readnone
+declare <4 x float> @llvm.arm.neon.vrsqrte.v4f32(<4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vst1.ll b/test/CodeGen/ARM/vst1.ll
index d84f758..8fbae12 100644
--- a/test/CodeGen/ARM/vst1.ll
+++ b/test/CodeGen/ARM/vst1.ll
@@ -4,7 +4,7 @@ define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst1i8:
 ;CHECK: vst1.8
 	%tmp1 = load <8 x i8>* %B
-	call void @llvm.arm.neon.vst1i.v8i8(i8* %A, <8 x i8> %tmp1)
+	call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1)
 	ret void
 }
 
@@ -12,7 +12,7 @@ define void @vst1i16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst1i16:
 ;CHECK: vst1.16
 	%tmp1 = load <4 x i16>* %B
-	call void @llvm.arm.neon.vst1i.v4i16(i16* %A, <4 x i16> %tmp1)
+	call void @llvm.arm.neon.vst1.v4i16(i16* %A, <4 x i16> %tmp1)
 	ret void
 }
 
@@ -20,7 +20,7 @@ define void @vst1i32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst1i32:
 ;CHECK: vst1.32
 	%tmp1 = load <2 x i32>* %B
-	call void @llvm.arm.neon.vst1i.v2i32(i32* %A, <2 x i32> %tmp1)
+	call void @llvm.arm.neon.vst1.v2i32(i32* %A, <2 x i32> %tmp1)
 	ret void
 }
 
@@ -28,7 +28,7 @@ define void @vst1f(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst1f:
 ;CHECK: vst1.32
 	%tmp1 = load <2 x float>* %B
-	call void @llvm.arm.neon.vst1f.v2f32(float* %A, <2 x float> %tmp1)
+	call void @llvm.arm.neon.vst1.v2f32(float* %A, <2 x float> %tmp1)
 	ret void
 }
 
@@ -36,7 +36,7 @@ define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
 ;CHECK: vst1i64:
 ;CHECK: vst1.64
 	%tmp1 = load <1 x i64>* %B
-	call void @llvm.arm.neon.vst1i.v1i64(i64* %A, <1 x i64> %tmp1)
+	call void @llvm.arm.neon.vst1.v1i64(i64* %A, <1 x i64> %tmp1)
 	ret void
 }
 
@@ -44,7 +44,7 @@ define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vst1Qi8:
 ;CHECK: vst1.8
 	%tmp1 = load <16 x i8>* %B
-	call void @llvm.arm.neon.vst1i.v16i8(i8* %A, <16 x i8> %tmp1)
+	call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1)
 	ret void
 }
 
@@ -52,7 +52,7 @@ define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vst1Qi16:
 ;CHECK: vst1.16
 	%tmp1 = load <8 x i16>* %B
-	call void @llvm.arm.neon.vst1i.v8i16(i16* %A, <8 x i16> %tmp1)
+	call void @llvm.arm.neon.vst1.v8i16(i16* %A, <8 x i16> %tmp1)
 	ret void
 }
 
@@ -60,7 +60,7 @@ define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vst1Qi32:
 ;CHECK: vst1.32
 	%tmp1 = load <4 x i32>* %B
-	call void @llvm.arm.neon.vst1i.v4i32(i32* %A, <4 x i32> %tmp1)
+	call void @llvm.arm.neon.vst1.v4i32(i32* %A, <4 x i32> %tmp1)
 	ret void
 }
 
@@ -68,7 +68,7 @@ define void @vst1Qf(float* %A, <4 x float>* %B) nounwind {
 ;CHECK: vst1Qf:
 ;CHECK: vst1.32
 	%tmp1 = load <4 x float>* %B
-	call void @llvm.arm.neon.vst1f.v4f32(float* %A, <4 x float> %tmp1)
+	call void @llvm.arm.neon.vst1.v4f32(float* %A, <4 x float> %tmp1)
 	ret void
 }
 
@@ -76,18 +76,18 @@ define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind {
 ;CHECK: vst1Qi64:
 ;CHECK: vst1.64
 	%tmp1 = load <2 x i64>* %B
-	call void @llvm.arm.neon.vst1i.v2i64(i64* %A, <2 x i64> %tmp1)
+	call void @llvm.arm.neon.vst1.v2i64(i64* %A, <2 x i64> %tmp1)
 	ret void
 }
 
-declare void @llvm.arm.neon.vst1i.v8i8(i8*, <8 x i8>) nounwind
-declare void @llvm.arm.neon.vst1i.v4i16(i8*, <4 x i16>) nounwind
-declare void @llvm.arm.neon.vst1i.v2i32(i8*, <2 x i32>) nounwind
-declare void @llvm.arm.neon.vst1f.v2f32(i8*, <2 x float>) nounwind
-declare void @llvm.arm.neon.vst1i.v1i64(i8*, <1 x i64>) nounwind
+declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>) nounwind
+declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>) nounwind
+declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>) nounwind
+declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>) nounwind
 
-declare void @llvm.arm.neon.vst1i.v16i8(i8*, <16 x i8>) nounwind
-declare void @llvm.arm.neon.vst1i.v8i16(i8*, <8 x i16>) nounwind
-declare void @llvm.arm.neon.vst1i.v4i32(i8*, <4 x i32>) nounwind
-declare void @llvm.arm.neon.vst1f.v4f32(i8*, <4 x float>) nounwind
-declare void @llvm.arm.neon.vst1i.v2i64(i8*, <2 x i64>) nounwind
+declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>) nounwind
+declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>) nounwind
+declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>) nounwind
+declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>) nounwind
+declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>) nounwind
diff --git a/test/CodeGen/ARM/vst2.ll b/test/CodeGen/ARM/vst2.ll
index f8f34f4..3e2d028 100644
--- a/test/CodeGen/ARM/vst2.ll
+++ b/test/CodeGen/ARM/vst2.ll
@@ -4,7 +4,7 @@ define void @vst2i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst2i8:
 ;CHECK: vst2.8
 	%tmp1 = load <8 x i8>* %B
-	call void @llvm.arm.neon.vst2i.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1)
+	call void @llvm.arm.neon.vst2.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1)
 	ret void
 }
 
@@ -12,7 +12,7 @@ define void @vst2i16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst2i16:
 ;CHECK: vst2.16
 	%tmp1 = load <4 x i16>* %B
-	call void @llvm.arm.neon.vst2i.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1)
+	call void @llvm.arm.neon.vst2.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1)
 	ret void
 }
 
@@ -20,7 +20,7 @@ define void @vst2i32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst2i32:
 ;CHECK: vst2.32
 	%tmp1 = load <2 x i32>* %B
-	call void @llvm.arm.neon.vst2i.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1)
+	call void @llvm.arm.neon.vst2.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1)
 	ret void
 }
 
@@ -28,11 +28,11 @@ define void @vst2f(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst2f:
 ;CHECK: vst2.32
 	%tmp1 = load <2 x float>* %B
-	call void @llvm.arm.neon.vst2f.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1)
+	call void @llvm.arm.neon.vst2.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1)
 	ret void
 }
 
-declare void @llvm.arm.neon.vst2i.v8i8(i8*, <8 x i8>, <8 x i8>) nounwind
-declare void @llvm.arm.neon.vst2i.v4i16(i8*, <4 x i16>, <4 x i16>) nounwind
-declare void @llvm.arm.neon.vst2i.v2i32(i8*, <2 x i32>, <2 x i32>) nounwind
-declare void @llvm.arm.neon.vst2f.v2f32(i8*, <2 x float>, <2 x float>) nounwind
+declare void @llvm.arm.neon.vst2.v8i8(i8*, <8 x i8>, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst2.v4i16(i8*, <4 x i16>, <4 x i16>) nounwind
+declare void @llvm.arm.neon.vst2.v2i32(i8*, <2 x i32>, <2 x i32>) nounwind
+declare void @llvm.arm.neon.vst2.v2f32(i8*, <2 x float>, <2 x float>) nounwind
diff --git a/test/CodeGen/ARM/vst3.ll b/test/CodeGen/ARM/vst3.ll
index c1a6ce8..0a47efa 100644
--- a/test/CodeGen/ARM/vst3.ll
+++ b/test/CodeGen/ARM/vst3.ll
@@ -4,7 +4,7 @@ define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst3i8:
 ;CHECK: vst3.8
 	%tmp1 = load <8 x i8>* %B
-	call void @llvm.arm.neon.vst3i.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1)
+	call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1)
 	ret void
 }
 
@@ -12,7 +12,7 @@ define void @vst3i16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst3i16:
 ;CHECK: vst3.16
 	%tmp1 = load <4 x i16>* %B
-	call void @llvm.arm.neon.vst3i.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1)
+	call void @llvm.arm.neon.vst3.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1)
 	ret void
 }
 
@@ -20,7 +20,7 @@ define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst3i32:
 ;CHECK: vst3.32
 	%tmp1 = load <2 x i32>* %B
-	call void @llvm.arm.neon.vst3i.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1)
+	call void @llvm.arm.neon.vst3.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1)
 	ret void
 }
 
@@ -28,11 +28,11 @@ define void @vst3f(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst3f:
 ;CHECK: vst3.32
 	%tmp1 = load <2 x float>* %B
-	call void @llvm.arm.neon.vst3f.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1)
+	call void @llvm.arm.neon.vst3.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1)
 	ret void
 }
 
-declare void @llvm.arm.neon.vst3i.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
-declare void @llvm.arm.neon.vst3i.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>) nounwind
-declare void @llvm.arm.neon.vst3i.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>) nounwind
-declare void @llvm.arm.neon.vst3f.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>) nounwind
+declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>) nounwind
+declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>) nounwind
+declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>) nounwind
diff --git a/test/CodeGen/ARM/vst4.ll b/test/CodeGen/ARM/vst4.ll
index 1d6f109..fa745eb 100644
--- a/test/CodeGen/ARM/vst4.ll
+++ b/test/CodeGen/ARM/vst4.ll
@@ -4,7 +4,7 @@ define void @vst4i8(i8* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vst4i8:
 ;CHECK: vst4.8
 	%tmp1 = load <8 x i8>* %B
-	call void @llvm.arm.neon.vst4i.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1)
+	call void @llvm.arm.neon.vst4.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1)
 	ret void
 }
 
@@ -12,7 +12,7 @@ define void @vst4i16(i16* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vst4i16:
 ;CHECK: vst4.16
 	%tmp1 = load <4 x i16>* %B
-	call void @llvm.arm.neon.vst4i.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1)
+	call void @llvm.arm.neon.vst4.v4i16(i16* %A, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1)
 	ret void
 }
 
@@ -20,7 +20,7 @@ define void @vst4i32(i32* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vst4i32:
 ;CHECK: vst4.32
 	%tmp1 = load <2 x i32>* %B
-	call void @llvm.arm.neon.vst4i.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1)
+	call void @llvm.arm.neon.vst4.v2i32(i32* %A, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1)
 	ret void
 }
 
@@ -28,11 +28,11 @@ define void @vst4f(float* %A, <2 x float>* %B) nounwind {
 ;CHECK: vst4f:
 ;CHECK: vst4.32
 	%tmp1 = load <2 x float>* %B
-	call void @llvm.arm.neon.vst4f.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1)
+	call void @llvm.arm.neon.vst4.v2f32(float* %A, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1)
 	ret void
 }
 
-declare void @llvm.arm.neon.vst4i.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
-declare void @llvm.arm.neon.vst4i.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>) nounwind
-declare void @llvm.arm.neon.vst4i.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>) nounwind
-declare void @llvm.arm.neon.vst4f.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>) nounwind
+declare void @llvm.arm.neon.vst4.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8>) nounwind
+declare void @llvm.arm.neon.vst4.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16>) nounwind
+declare void @llvm.arm.neon.vst4.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>) nounwind
+declare void @llvm.arm.neon.vst4.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, <2 x float>) nounwind
diff --git a/test/CodeGen/ARM/vtrn.ll b/test/CodeGen/ARM/vtrn.ll
index 205052c..36a0561 100644
--- a/test/CodeGen/ARM/vtrn.ll
+++ b/test/CodeGen/ARM/vtrn.ll
@@ -15,7 +15,7 @@ define <8 x i8> @vtrni8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vtrn.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vtrni.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vtrn.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         %tmp4 = extractvalue %struct.__builtin_neon_v8qi2 %tmp3, 0
         %tmp5 = extractvalue %struct.__builtin_neon_v8qi2 %tmp3, 1
         %tmp6 = add <8 x i8> %tmp4, %tmp5
@@ -27,7 +27,7 @@ define <4 x i16> @vtrni16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vtrn.16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vtrni.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vtrn.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         %tmp4 = extractvalue %struct.__builtin_neon_v4hi2 %tmp3, 0
         %tmp5 = extractvalue %struct.__builtin_neon_v4hi2 %tmp3, 1
         %tmp6 = add <4 x i16> %tmp4, %tmp5
@@ -39,7 +39,7 @@ define <2 x i32> @vtrni32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vtrn.32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call %struct.__builtin_neon_v2si2 @llvm.arm.neon.vtrni.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call %struct.__builtin_neon_v2si2 @llvm.arm.neon.vtrn.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         %tmp4 = extractvalue %struct.__builtin_neon_v2si2 %tmp3, 0
         %tmp5 = extractvalue %struct.__builtin_neon_v2si2 %tmp3, 1
         %tmp6 = add <2 x i32> %tmp4, %tmp5
@@ -51,7 +51,7 @@ define <2 x float> @vtrnf(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK: vtrn.32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vtrnf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vtrn.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
         %tmp4 = extractvalue %struct.__builtin_neon_v2sf2 %tmp3, 0
         %tmp5 = extractvalue %struct.__builtin_neon_v2sf2 %tmp3, 1
         %tmp6 = add <2 x float> %tmp4, %tmp5
@@ -63,7 +63,7 @@ define <16 x i8> @vtrnQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vtrn.8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call %struct.__builtin_neon_v16qi2 @llvm.arm.neon.vtrni.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	%tmp3 = call %struct.__builtin_neon_v16qi2 @llvm.arm.neon.vtrn.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         %tmp4 = extractvalue %struct.__builtin_neon_v16qi2 %tmp3, 0
         %tmp5 = extractvalue %struct.__builtin_neon_v16qi2 %tmp3, 1
         %tmp6 = add <16 x i8> %tmp4, %tmp5
@@ -75,7 +75,7 @@ define <8 x i16> @vtrnQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vtrn.16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call %struct.__builtin_neon_v8hi2 @llvm.arm.neon.vtrni.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	%tmp3 = call %struct.__builtin_neon_v8hi2 @llvm.arm.neon.vtrn.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         %tmp4 = extractvalue %struct.__builtin_neon_v8hi2 %tmp3, 0
         %tmp5 = extractvalue %struct.__builtin_neon_v8hi2 %tmp3, 1
         %tmp6 = add <8 x i16> %tmp4, %tmp5
@@ -87,7 +87,7 @@ define <4 x i32> @vtrnQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vtrn.32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call %struct.__builtin_neon_v4si2 @llvm.arm.neon.vtrni.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	%tmp3 = call %struct.__builtin_neon_v4si2 @llvm.arm.neon.vtrn.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         %tmp4 = extractvalue %struct.__builtin_neon_v4si2 %tmp3, 0
         %tmp5 = extractvalue %struct.__builtin_neon_v4si2 %tmp3, 1
         %tmp6 = add <4 x i32> %tmp4, %tmp5
@@ -99,19 +99,19 @@ define <4 x float> @vtrnQf(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: vtrn.32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = call %struct.__builtin_neon_v4sf2 @llvm.arm.neon.vtrnf.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	%tmp3 = call %struct.__builtin_neon_v4sf2 @llvm.arm.neon.vtrn.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
         %tmp4 = extractvalue %struct.__builtin_neon_v4sf2 %tmp3, 0
         %tmp5 = extractvalue %struct.__builtin_neon_v4sf2 %tmp3, 1
         %tmp6 = add <4 x float> %tmp4, %tmp5
 	ret <4 x float> %tmp6
 }
 
-declare %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vtrni.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vtrni.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare %struct.__builtin_neon_v2si2 @llvm.arm.neon.vtrni.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vtrnf.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vtrn.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vtrn.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare %struct.__builtin_neon_v2si2 @llvm.arm.neon.vtrn.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vtrn.v2f32(<2 x float>, <2 x float>) nounwind readnone
 
-declare %struct.__builtin_neon_v16qi2 @llvm.arm.neon.vtrni.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare %struct.__builtin_neon_v8hi2 @llvm.arm.neon.vtrni.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare %struct.__builtin_neon_v4si2 @llvm.arm.neon.vtrni.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare %struct.__builtin_neon_v4sf2 @llvm.arm.neon.vtrnf.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare %struct.__builtin_neon_v16qi2 @llvm.arm.neon.vtrn.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare %struct.__builtin_neon_v8hi2 @llvm.arm.neon.vtrn.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare %struct.__builtin_neon_v4si2 @llvm.arm.neon.vtrn.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare %struct.__builtin_neon_v4sf2 @llvm.arm.neon.vtrn.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vuzp.ll b/test/CodeGen/ARM/vuzp.ll
index 508ae14..883e072 100644
--- a/test/CodeGen/ARM/vuzp.ll
+++ b/test/CodeGen/ARM/vuzp.ll
@@ -15,7 +15,7 @@ define <8 x i8> @vuzpi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vuzp.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vuzpi.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vuzp.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         %tmp4 = extractvalue %struct.__builtin_neon_v8qi2 %tmp3, 0
         %tmp5 = extractvalue %struct.__builtin_neon_v8qi2 %tmp3, 1
         %tmp6 = add <8 x i8> %tmp4, %tmp5
@@ -27,7 +27,7 @@ define <4 x i16> @vuzpi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vuzp.16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vuzpi.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vuzp.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         %tmp4 = extractvalue %struct.__builtin_neon_v4hi2 %tmp3, 0
         %tmp5 = extractvalue %struct.__builtin_neon_v4hi2 %tmp3, 1
         %tmp6 = add <4 x i16> %tmp4, %tmp5
@@ -39,7 +39,7 @@ define <2 x i32> @vuzpi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vuzp.32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call %struct.__builtin_neon_v2si2 @llvm.arm.neon.vuzpi.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call %struct.__builtin_neon_v2si2 @llvm.arm.neon.vuzp.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         %tmp4 = extractvalue %struct.__builtin_neon_v2si2 %tmp3, 0
         %tmp5 = extractvalue %struct.__builtin_neon_v2si2 %tmp3, 1
         %tmp6 = add <2 x i32> %tmp4, %tmp5
@@ -51,7 +51,7 @@ define <2 x float> @vuzpf(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK: vuzp.32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vuzpf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vuzp.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
         %tmp4 = extractvalue %struct.__builtin_neon_v2sf2 %tmp3, 0
         %tmp5 = extractvalue %struct.__builtin_neon_v2sf2 %tmp3, 1
         %tmp6 = add <2 x float> %tmp4, %tmp5
@@ -63,7 +63,7 @@ define <16 x i8> @vuzpQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vuzp.8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call %struct.__builtin_neon_v16qi2 @llvm.arm.neon.vuzpi.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	%tmp3 = call %struct.__builtin_neon_v16qi2 @llvm.arm.neon.vuzp.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         %tmp4 = extractvalue %struct.__builtin_neon_v16qi2 %tmp3, 0
         %tmp5 = extractvalue %struct.__builtin_neon_v16qi2 %tmp3, 1
         %tmp6 = add <16 x i8> %tmp4, %tmp5
@@ -75,7 +75,7 @@ define <8 x i16> @vuzpQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vuzp.16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call %struct.__builtin_neon_v8hi2 @llvm.arm.neon.vuzpi.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	%tmp3 = call %struct.__builtin_neon_v8hi2 @llvm.arm.neon.vuzp.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         %tmp4 = extractvalue %struct.__builtin_neon_v8hi2 %tmp3, 0
         %tmp5 = extractvalue %struct.__builtin_neon_v8hi2 %tmp3, 1
         %tmp6 = add <8 x i16> %tmp4, %tmp5
@@ -87,7 +87,7 @@ define <4 x i32> @vuzpQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vuzp.32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call %struct.__builtin_neon_v4si2 @llvm.arm.neon.vuzpi.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	%tmp3 = call %struct.__builtin_neon_v4si2 @llvm.arm.neon.vuzp.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         %tmp4 = extractvalue %struct.__builtin_neon_v4si2 %tmp3, 0
         %tmp5 = extractvalue %struct.__builtin_neon_v4si2 %tmp3, 1
         %tmp6 = add <4 x i32> %tmp4, %tmp5
@@ -99,19 +99,19 @@ define <4 x float> @vuzpQf(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: vuzp.32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = call %struct.__builtin_neon_v4sf2 @llvm.arm.neon.vuzpf.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	%tmp3 = call %struct.__builtin_neon_v4sf2 @llvm.arm.neon.vuzp.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
         %tmp4 = extractvalue %struct.__builtin_neon_v4sf2 %tmp3, 0
         %tmp5 = extractvalue %struct.__builtin_neon_v4sf2 %tmp3, 1
         %tmp6 = add <4 x float> %tmp4, %tmp5
 	ret <4 x float> %tmp6
 }
 
-declare %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vuzpi.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vuzpi.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare %struct.__builtin_neon_v2si2 @llvm.arm.neon.vuzpi.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vuzpf.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vuzp.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vuzp.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare %struct.__builtin_neon_v2si2 @llvm.arm.neon.vuzp.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vuzp.v2f32(<2 x float>, <2 x float>) nounwind readnone
 
-declare %struct.__builtin_neon_v16qi2 @llvm.arm.neon.vuzpi.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare %struct.__builtin_neon_v8hi2 @llvm.arm.neon.vuzpi.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare %struct.__builtin_neon_v4si2 @llvm.arm.neon.vuzpi.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare %struct.__builtin_neon_v4sf2 @llvm.arm.neon.vuzpf.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare %struct.__builtin_neon_v16qi2 @llvm.arm.neon.vuzp.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare %struct.__builtin_neon_v8hi2 @llvm.arm.neon.vuzp.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare %struct.__builtin_neon_v4si2 @llvm.arm.neon.vuzp.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare %struct.__builtin_neon_v4sf2 @llvm.arm.neon.vuzp.v4f32(<4 x float>, <4 x float>) nounwind readnone
diff --git a/test/CodeGen/ARM/vzip.ll b/test/CodeGen/ARM/vzip.ll
index ede5ab6..0485b30 100644
--- a/test/CodeGen/ARM/vzip.ll
+++ b/test/CodeGen/ARM/vzip.ll
@@ -15,7 +15,7 @@ define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
 ;CHECK: vzip.8
 	%tmp1 = load <8 x i8>* %A
 	%tmp2 = load <8 x i8>* %B
-	%tmp3 = call %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vzipi.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
+	%tmp3 = call %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vzip.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
         %tmp4 = extractvalue %struct.__builtin_neon_v8qi2 %tmp3, 0
         %tmp5 = extractvalue %struct.__builtin_neon_v8qi2 %tmp3, 1
         %tmp6 = add <8 x i8> %tmp4, %tmp5
@@ -27,7 +27,7 @@ define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
 ;CHECK: vzip.16
 	%tmp1 = load <4 x i16>* %A
 	%tmp2 = load <4 x i16>* %B
-	%tmp3 = call %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vzipi.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
+	%tmp3 = call %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vzip.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
         %tmp4 = extractvalue %struct.__builtin_neon_v4hi2 %tmp3, 0
         %tmp5 = extractvalue %struct.__builtin_neon_v4hi2 %tmp3, 1
         %tmp6 = add <4 x i16> %tmp4, %tmp5
@@ -39,7 +39,7 @@ define <2 x i32> @vzipi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
 ;CHECK: vzip.32
 	%tmp1 = load <2 x i32>* %A
 	%tmp2 = load <2 x i32>* %B
-	%tmp3 = call %struct.__builtin_neon_v2si2 @llvm.arm.neon.vzipi.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
+	%tmp3 = call %struct.__builtin_neon_v2si2 @llvm.arm.neon.vzip.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
         %tmp4 = extractvalue %struct.__builtin_neon_v2si2 %tmp3, 0
         %tmp5 = extractvalue %struct.__builtin_neon_v2si2 %tmp3, 1
         %tmp6 = add <2 x i32> %tmp4, %tmp5
@@ -51,7 +51,7 @@ define <2 x float> @vzipf(<2 x float>* %A, <2 x float>* %B) nounwind {
 ;CHECK: vzip.32
 	%tmp1 = load <2 x float>* %A
 	%tmp2 = load <2 x float>* %B
-	%tmp3 = call %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vzipf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
+	%tmp3 = call %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vzip.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
         %tmp4 = extractvalue %struct.__builtin_neon_v2sf2 %tmp3, 0
         %tmp5 = extractvalue %struct.__builtin_neon_v2sf2 %tmp3, 1
         %tmp6 = add <2 x float> %tmp4, %tmp5
@@ -63,7 +63,7 @@ define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind {
 ;CHECK: vzip.8
 	%tmp1 = load <16 x i8>* %A
 	%tmp2 = load <16 x i8>* %B
-	%tmp3 = call %struct.__builtin_neon_v16qi2 @llvm.arm.neon.vzipi.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
+	%tmp3 = call %struct.__builtin_neon_v16qi2 @llvm.arm.neon.vzip.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2)
         %tmp4 = extractvalue %struct.__builtin_neon_v16qi2 %tmp3, 0
         %tmp5 = extractvalue %struct.__builtin_neon_v16qi2 %tmp3, 1
         %tmp6 = add <16 x i8> %tmp4, %tmp5
@@ -75,7 +75,7 @@ define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind {
 ;CHECK: vzip.16
 	%tmp1 = load <8 x i16>* %A
 	%tmp2 = load <8 x i16>* %B
-	%tmp3 = call %struct.__builtin_neon_v8hi2 @llvm.arm.neon.vzipi.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
+	%tmp3 = call %struct.__builtin_neon_v8hi2 @llvm.arm.neon.vzip.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2)
         %tmp4 = extractvalue %struct.__builtin_neon_v8hi2 %tmp3, 0
         %tmp5 = extractvalue %struct.__builtin_neon_v8hi2 %tmp3, 1
         %tmp6 = add <8 x i16> %tmp4, %tmp5
@@ -87,7 +87,7 @@ define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind {
 ;CHECK: vzip.32
 	%tmp1 = load <4 x i32>* %A
 	%tmp2 = load <4 x i32>* %B
-	%tmp3 = call %struct.__builtin_neon_v4si2 @llvm.arm.neon.vzipi.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
+	%tmp3 = call %struct.__builtin_neon_v4si2 @llvm.arm.neon.vzip.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2)
         %tmp4 = extractvalue %struct.__builtin_neon_v4si2 %tmp3, 0
         %tmp5 = extractvalue %struct.__builtin_neon_v4si2 %tmp3, 1
         %tmp6 = add <4 x i32> %tmp4, %tmp5
@@ -99,19 +99,19 @@ define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind {
 ;CHECK: vzip.32
 	%tmp1 = load <4 x float>* %A
 	%tmp2 = load <4 x float>* %B
-	%tmp3 = call %struct.__builtin_neon_v4sf2 @llvm.arm.neon.vzipf.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
+	%tmp3 = call %struct.__builtin_neon_v4sf2 @llvm.arm.neon.vzip.v4f32(<4 x float> %tmp1, <4 x float> %tmp2)
         %tmp4 = extractvalue %struct.__builtin_neon_v4sf2 %tmp3, 0
         %tmp5 = extractvalue %struct.__builtin_neon_v4sf2 %tmp3, 1
         %tmp6 = add <4 x float> %tmp4, %tmp5
 	ret <4 x float> %tmp6
 }
 
-declare %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vzipi.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
-declare %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vzipi.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
-declare %struct.__builtin_neon_v2si2 @llvm.arm.neon.vzipi.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-declare %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vzipf.v2f32(<2 x float>, <2 x float>) nounwind readnone
+declare %struct.__builtin_neon_v8qi2 @llvm.arm.neon.vzip.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
+declare %struct.__builtin_neon_v4hi2 @llvm.arm.neon.vzip.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
+declare %struct.__builtin_neon_v2si2 @llvm.arm.neon.vzip.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
+declare %struct.__builtin_neon_v2sf2 @llvm.arm.neon.vzip.v2f32(<2 x float>, <2 x float>) nounwind readnone
 
-declare %struct.__builtin_neon_v16qi2 @llvm.arm.neon.vzipi.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
-declare %struct.__builtin_neon_v8hi2 @llvm.arm.neon.vzipi.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
-declare %struct.__builtin_neon_v4si2 @llvm.arm.neon.vzipi.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
-declare %struct.__builtin_neon_v4sf2 @llvm.arm.neon.vzipf.v4f32(<4 x float>, <4 x float>) nounwind readnone
+declare %struct.__builtin_neon_v16qi2 @llvm.arm.neon.vzip.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
+declare %struct.__builtin_neon_v8hi2 @llvm.arm.neon.vzip.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
+declare %struct.__builtin_neon_v4si2 @llvm.arm.neon.vzip.v4i32(<4 x i32>, <4 x i32>) nounwind readnone
+declare %struct.__builtin_neon_v4sf2 @llvm.arm.neon.vzip.v4f32(<4 x float>, <4 x float>) nounwind readnone
author	Bob Wilson <bob.wilson@apple.com>	2009-08-11 05:39:44 +0000
committer	Bob Wilson <bob.wilson@apple.com>	2009-08-11 05:39:44 +0000
commit	8f10b3f2521e99168e7fb89310270d821a0b00fb (patch)
tree	da1622ea7dce5183b471d7df05bcc92522bfe737 /test
parent	7b70d4fdd784e412864279c6bf86b81f52c41719 (diff)
download	external_llvm-8f10b3f2521e99168e7fb89310270d821a0b00fb.zip external_llvm-8f10b3f2521e99168e7fb89310270d821a0b00fb.tar.gz external_llvm-8f10b3f2521e99168e7fb89310270d821a0b00fb.tar.bz2