aboutsummaryrefslogtreecommitdiff
path: root/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll
blob: f7758fa22008cdadc44ef9e029a0387feb4b2371 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=50 | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"

@known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16

; We should be able to propagate constant data through comparisons.
; For example, in this test we have a load, which becomes constant after
; unrolling, making comparison with 0 also known to be 0 (false) - and that
; will trigger further simplifications.
;
; We expect this loop to be unrolled, because in this case load would become
; constant, which is always 1, and which, in its turn, helps to simplify
; following comparison, zero-extension, and addition. In total, unrolling should help to
; optimize more than 50% of all instructions in this case.
;
; CHECK-LABEL: @const_compare
; CHECK-NOT: br i1 %
; CHECK: ret i32
define i32 @const_compare(i32* noalias nocapture readonly %b) {
entry:
  br label %for.body

for.body:                                         ; preds = %for.inc, %entry
  %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.body ]
  %r.0 = phi i32 [ 0, %entry ], [ %r.1, %for.body ]
  %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
  %x1 = load i32, i32* %arrayidx1, align 4
  %cmp = icmp eq i32 %x1, 0
  %cast = zext i1 %cmp to i32
  %iv.1 = add nuw nsw i64 %iv.0, 1
  %r.1 = add i32 %r.0, %cast
  %exitcond = icmp eq i64 %iv.1, 10
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.inc
  ret i32 %r.1
}

; If we can figure out result of comparison on each iteration, we can resolve
; the depending branch. That means, that the unrolled version of the loop would
; have less code, because we don't need not-taken basic blocks there.
; This test checks that this is taken into consideration.
; We expect this loop to be unrolled, because the most complicated part of its
; body (if.then block) is never actually executed.
; CHECK-LABEL: @branch_folded
; CHECK-NOT: br i1 %
; CHECK: ret i32
define i32 @branch_folded(i32* noalias nocapture readonly %b) {
entry:
  br label %for.body

for.body:                                         ; preds = %for.inc, %entry
  %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.inc ]
  %r.0 = phi i32 [ 0, %entry ], [ %r.1, %for.inc ]
  %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
  %x1 = load i32, i32* %arrayidx1, align 4
  %cmp = icmp eq i32 %x1, 0
  %iv.1 = add nuw nsw i64 %iv.0, 1
  br i1 %cmp, label %if.then, label %for.inc

if.then:                                          ; preds = %for.body
  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv.0
  %x2 = load i32, i32* %arrayidx2, align 4
  %add = add nsw i32 %x2, %r.0
  br label %for.inc

for.inc:                                          ; preds = %for.body, %if.then
  %r.1 = phi i32 [ %add, %if.then ], [ %x1, %for.body ]
  %exitcond = icmp eq i64 %iv.1, 10
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.inc
  ret i32 %r.1
}

; This test is similar to the previous one, but in this we use IV in comparison
; (not a loaded value as we did there).
; CHECK-LABEL: @branch_iv
; CHECK-NOT: br i1 %
; CHECK: ret i64
define i64 @branch_iv(i64* noalias nocapture readonly %b) {
entry:
  br label %for.body

for.body:                                         ; preds = %for.inc, %entry
  %indvars.iv = phi i64 [ 0, %entry ], [ %tmp3, %for.inc ]
  %r.030 = phi i64 [ 0, %entry ], [ %r.1, %for.inc ]
  %cmp3 = icmp eq i64 %indvars.iv, 5
  %tmp3 = add nuw nsw i64 %indvars.iv, 1
  br i1 %cmp3, label %if.then, label %for.inc

if.then:                                          ; preds = %for.body
  %arrayidx2 = getelementptr inbounds i64, i64* %b, i64 %tmp3
  %tmp1 = load i64, i64* %arrayidx2, align 4
  %add = add nsw i64 %tmp1, %r.030
  br label %for.inc

for.inc:                                          ; preds = %if.then, %for.body
  %r.1 = phi i64 [ %add, %if.then ], [ %r.030, %for.body ]
  %exitcond = icmp eq i64 %tmp3, 20
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.inc
  ret i64 %r.1
}

; Induction variables are often casted to another type, and that shouldn't
; prevent us from folding branches. Tthis test specifically checks if we can
; handle this. Other than thatm it's similar to the previous test.
; CHECK-LABEL: @branch_iv_trunc
; CHECK-NOT:   br i1 %
; CHECK:   ret i32
define i32 @branch_iv_trunc(i32* noalias nocapture readonly %b) {
entry:
  br label %for.body

for.body:                                         ; preds = %for.inc, %entry
  %indvars.iv = phi i64 [ 0, %entry ], [ %tmp3, %for.inc ]
  %r.030 = phi i32 [ 0, %entry ], [ %r.1, %for.inc ]
  %tmp2 = trunc i64 %indvars.iv to i32
  %cmp3 = icmp eq i32 %tmp2, 5
  %tmp3 = add nuw nsw i64 %indvars.iv, 1
  br i1 %cmp3, label %if.then, label %for.inc

if.then:                                          ; preds = %for.body
  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %tmp3
  %tmp1 = load i32, i32* %arrayidx2, align 4
  %add = add nsw i32 %tmp1, %r.030
  br label %for.inc

for.inc:                                          ; preds = %if.then, %for.body
  %r.1 = phi i32 [ %add, %if.then ], [ %r.030, %for.body ]
  %exitcond = icmp eq i64 %tmp3, 10
  br i1 %exitcond, label %for.end, label %for.body

for.end:                                          ; preds = %for.inc
  ret i32 %r.1
}

; Check that we don't crash when we analyze icmp with pointer-typed IV and a
; pointer.
; CHECK-LABEL: @ptr_cmp_crash
; CHECK:   ret void
define void @ptr_cmp_crash() {
entry:
  br label %while.body

while.body:
  %iv.0 = phi i32* [ getelementptr inbounds ([10 x i32], [10 x i32]* @known_constant, i64 0, i64 0), %entry ], [ %iv.1, %while.body ]
  %iv.1 = getelementptr inbounds i32, i32* %iv.0, i64 1
  %exitcond = icmp eq i32* %iv.1, getelementptr inbounds ([10 x i32], [10 x i32]* @known_constant, i64 0, i64 9)
  br i1 %exitcond, label %loop.exit, label %while.body

loop.exit:
  ret void
}

; Check that we don't crash when we analyze ptrtoint cast.
; CHECK-LABEL: @ptrtoint_cast_crash
; CHECK:   ret void
define void @ptrtoint_cast_crash(i8 * %a) {
entry:
  %limit = getelementptr i8, i8* %a, i64 512
  br label %loop.body

loop.body:
  %iv.0 = phi i8* [ %a, %entry ], [ %iv.1, %loop.body ]
  %cast = ptrtoint i8* %iv.0 to i64
  %iv.1 = getelementptr inbounds i8, i8* %iv.0, i64 1
  %exitcond = icmp ne i8* %iv.1, %limit
  br i1 %exitcond, label %loop.body, label %loop.exit

loop.exit:
  ret void
}

; Loop unroller should be able to predict that a comparison would become
; constant if the operands are pointers with the same base and constant
; offsets.
; We expect this loop to be unrolled, since most of its instructions would
; become constant after it.
; CHECK-LABEL: @ptr_cmp
; CHECK-NOT:   br i1 %
; CHECK:   ret i64
define i64 @ptr_cmp(i8 * %a) {
entry:
  %limit = getelementptr i8, i8* %a, i64 40
  %start.iv2 = getelementptr i8, i8* %a, i64 7
  br label %loop.body

loop.body:
  %iv.0 = phi i8* [ %a, %entry ], [ %iv.1, %loop.body ]
  %iv2.0 = phi i8* [ %start.iv2, %entry ], [ %iv2.1, %loop.body ]
  %r.0 = phi i64 [ 0, %entry ], [ %r.1, %loop.body ]
  %cast = ptrtoint i8* %iv.0 to i64
  %cmp = icmp eq i8* %iv2.0, %iv.0
  %sub = sext i1 %cmp to i64
  %mul = mul i64 %sub, %cast
  %r.1 = add i64 %r.0, %mul
  %iv.1 = getelementptr inbounds i8, i8* %iv.0, i64 1
  %iv2.1 = getelementptr inbounds i8, i8* %iv2.0, i64 1
  %exitcond = icmp ne i8* %iv.1, %limit
  br i1 %exitcond, label %loop.body, label %loop.exit

loop.exit:
  ret i64 %r.1
}