aboutsummaryrefslogtreecommitdiffstats
path: root/test/Transforms/SLPVectorizer/X86/phi.ll
blob: f77e945aad98f88bad47af50d31903e02e207793 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
; RUN: opt < %s -basicaa -slp-vectorizer -slp-threshold=-100 -dce -S -mtriple=i386-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s

target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32-S128"
target triple = "i386-apple-macosx10.9.0"

;int foo(double *A, int k) {
;  double A0;
;  double A1;
;  if (k) {
;    A0 = 3;
;    A1 = 5;
;  } else {
;    A0 = A[10];
;    A1 = A[11];
;  }
;  A[0] = A0;
;  A[1] = A1;
;}


;CHECK: i32 @foo
;CHECK: load <2 x double>
;CHECK: phi <2 x double>
;CHECK: store <2 x double>
;CHECK: ret i32 undef
define i32 @foo(double* nocapture %A, i32 %k) {
entry:
  %tobool = icmp eq i32 %k, 0
  br i1 %tobool, label %if.else, label %if.end

if.else:                                          ; preds = %entry
  %arrayidx = getelementptr inbounds double* %A, i64 10
  %0 = load double* %arrayidx, align 8
  %arrayidx1 = getelementptr inbounds double* %A, i64 11
  %1 = load double* %arrayidx1, align 8
  br label %if.end

if.end:                                           ; preds = %entry, %if.else
  %A0.0 = phi double [ %0, %if.else ], [ 3.000000e+00, %entry ]
  %A1.0 = phi double [ %1, %if.else ], [ 5.000000e+00, %entry ]
  store double %A0.0, double* %A, align 8
  %arrayidx3 = getelementptr inbounds double* %A, i64 1
  store double %A1.0, double* %arrayidx3, align 8
  ret i32 undef
}


;int foo(double * restrict B,  double * restrict A, int n, int m) {
;  double R=A[1];
;  double G=A[0];
;  for (int i=0; i < 100; i++) {
;    R += 10;
;    G += 10;
;    R *= 4;
;    G *= 4;
;    R += 4;
;    G += 4;
;  }
;  B[0] = G;
;  B[1] = R;
;  return 0;
;}

;CHECK: foo2
;CHECK: load <2 x double>
;CHECK: phi <2 x double>
;CHECK: fmul <2 x double>
;CHECK: store <2 x double>
;CHECK: ret
define i32 @foo2(double* noalias nocapture %B, double* noalias nocapture %A, i32 %n, i32 %m) #0 {
entry:
  %arrayidx = getelementptr inbounds double* %A, i64 1
  %0 = load double* %arrayidx, align 8
  %1 = load double* %A, align 8
  br label %for.body

for.body:                                         ; preds = %for.body, %entry
  %i.019 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
  %G.018 = phi double [ %1, %entry ], [ %add5, %for.body ]
  %R.017 = phi double [ %0, %entry ], [ %add4, %for.body ]
  %add = fadd double %R.017, 1.000000e+01
  %add2 = fadd double %G.018, 1.000000e+01
  %mul = fmul double %add, 4.000000e+00
  %mul3 = fmul double %add2, 4.000000e+00
  %add4 = fadd double %mul, 4.000000e+00
  %add5 = fadd double %mul3, 4.000000e+00
  %inc = add nsw i32 %i.019, 1
  %exitcond = icmp eq i32 %inc, 100
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.body
  store double %add5, double* %B, align 8
  %arrayidx7 = getelementptr inbounds double* %B, i64 1
  store double %add4, double* %arrayidx7, align 8
  ret i32 0
}

define void @test(x86_fp80* %i1, x86_fp80* %i2, x86_fp80* %o) {
; CHECK-LABEL: @test(
;
; Test that we correctly recognize the discontiguous memory in arrays where the
; size is less than the alignment, and through various different GEP formations.

entry:
  %i1.0 = load x86_fp80* %i1, align 16
  %i1.gep1 = getelementptr x86_fp80* %i1, i64 1
  %i1.1 = load x86_fp80* %i1.gep1, align 16
; CHECK: load x86_fp80*
; CHECK: load x86_fp80*
; CHECK: insertelement <2 x x86_fp80>
; CHECK: insertelement <2 x x86_fp80>
  br i1 undef, label %then, label %end

then:
  %i2.gep0 = getelementptr inbounds x86_fp80* %i2, i64 0
  %i2.0 = load x86_fp80* %i2.gep0, align 16
  %i2.gep1 = getelementptr inbounds x86_fp80* %i2, i64 1
  %i2.1 = load x86_fp80* %i2.gep1, align 16
; CHECK: load x86_fp80*
; CHECK: load x86_fp80*
; CHECK: insertelement <2 x x86_fp80>
; CHECK: insertelement <2 x x86_fp80>
  br label %end

end:
  %phi0 = phi x86_fp80 [ %i1.0, %entry ], [ %i2.0, %then ]
  %phi1 = phi x86_fp80 [ %i1.1, %entry ], [ %i2.1, %then ]
; CHECK: phi <2 x x86_fp80>
; CHECK: extractelement <2 x x86_fp80>
; CHECK: extractelement <2 x x86_fp80>
  store x86_fp80 %phi0, x86_fp80* %o, align 16
  %o.gep1 = getelementptr inbounds x86_fp80* %o, i64 1
  store x86_fp80 %phi1, x86_fp80* %o.gep1, align 16
  ret void
}