test/Transforms/ScalarRepl/dynamic-vector-gep.ll


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167

; RUN: opt < %s -scalarrepl -S | FileCheck %s

target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
target triple = "x86_64-apple-darwin10.0.0"

; CHECK: @test1
; CHECK: %[[alloc:[\.a-z0-9]*]] = alloca <4 x float>
; CHECK: store <4 x float> zeroinitializer, <4 x float>* %[[alloc]]
; CHECK: memset
; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2

; Split the array but don't replace the memset with an insert
; element as its not a constant offset.
; The load, however, can be replaced with an extract element.
define float @test1(i32 %idx1, i32 %idx2) {
entry:
  %0 = alloca [4 x <4 x float>]
  store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
  %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
  %cast = bitcast float* %ptr1 to i8*
  call void @llvm.memset.p0i8.i32(i8* %cast, i8 0, i32 4, i32 4, i1 false)
  %ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 1, i32 %idx2
  %ret = load float* %ptr2
  ret float %ret
}

; CHECK: @test2
; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
; CHECK: extractelement <4 x float> %[[ins]], i32 %idx2

; Do SROA on the array when it has dynamic vector reads and writes.
define float @test2(i32 %idx1, i32 %idx2) {
entry:
  %0 = alloca [4 x <4 x float>]
  store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
  %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
  store float 1.0, float* %ptr1
  %ptr2 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2
  %ret = load float* %ptr2
  ret float %ret
}

; CHECK: test3
; CHECK: %0 = alloca [4 x <4 x float>]
; CHECK-NOT: alloca

; Don't do SROA on a dynamically indexed vector when it spans
; more than one array element of the alloca array it is within.
define float @test3(i32 %idx1, i32 %idx2) {
entry:
  %0 = alloca [4 x <4 x float>]
  store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
  %bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>*
  %ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1
  store float 1.0, float* %ptr1
  %ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2
  %ret = load float* %ptr2
  ret float %ret
}

; CHECK: test4
; CHECK: insertelement <16 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
; CHECK: extractelement <16 x float> %0, i32 %idx2

; Don't do SROA on a dynamically indexed vector when it spans
; more than one array element of the alloca array it is within.
; However, unlike test3, the store is on the vector type
; so SROA will convert the large alloca into the large vector
; type and do all accesses with insert/extract element
define float @test4(i32 %idx1, i32 %idx2) {
entry:
  %0 = alloca [4 x <4 x float>]
  %bigvec = bitcast [4 x <4 x float>]* %0 to <16 x float>*
  store <16 x float> zeroinitializer, <16 x float>* %bigvec
  %ptr1 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx1
  store float 1.0, float* %ptr1
  %ptr2 = getelementptr <16 x float>* %bigvec, i32 0, i32 %idx2
  %ret = load float* %ptr2
  ret float %ret
}

; CHECK: @test5
; CHECK: %0 = alloca [4 x <4 x float>]
; CHECK-NOT: alloca

; Don't do SROA as the is a second dynamically indexed array
; which may span multiple elements of the alloca.
define float @test5(i32 %idx1, i32 %idx2) {
entry:
  %0 = alloca [4 x <4 x float>]
  store [4 x <4 x float>] zeroinitializer, [4 x <4 x float>]* %0
  %ptr1 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx1
  %ptr2 = bitcast float* %ptr1 to [1 x <2 x float>]*
  %ptr3 = getelementptr [1 x <2 x float>]* %ptr2, i32 0, i32 0, i32 %idx1
  store float 1.0, float* %ptr1
  %ptr4 = getelementptr [4 x <4 x float>]* %0, i32 0, i32 0, i32 %idx2
  %ret = load float* %ptr4
  ret float %ret
}

; CHECK: test6
; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2

%vector.pair = type { %vector.anon, %vector.anon }
%vector.anon = type { %vector }
%vector = type { <4 x float> }

; Dynamic GEPs on vectors were crashing when the vector was inside a struct
; as the new GEP for the new alloca might not include all the indices from
; the original GEP, just the indices it needs to get to the correct offset of
; some type, not necessarily the dynamic vector.
; This test makes sure we don't have this crash.
define float @test6(i32 %idx1, i32 %idx2) {
entry:
  %0 = alloca %vector.pair
  store %vector.pair zeroinitializer, %vector.pair* %0
  %ptr1 = getelementptr %vector.pair* %0, i32 0, i32 0, i32 0, i32 0, i32 %idx1
  store float 1.0, float* %ptr1
  %ptr2 = getelementptr %vector.pair* %0, i32 0, i32 1, i32 0, i32 0, i32 %idx2
  %ret = load float* %ptr2
  ret float %ret
}

; CHECK: test7
; CHECK: insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %idx1
; CHECK: extractelement <4 x float> zeroinitializer, i32 %idx2

%array.pair = type { [2 x %array.anon], %array.anon }
%array.anon = type { [2 x %vector] }

; This is the same as test6 and tests the same crash, but on arrays.
define float @test7(i32 %idx1, i32 %idx2) {
entry:
  %0 = alloca %array.pair
  store %array.pair zeroinitializer, %array.pair* %0
  %ptr1 = getelementptr %array.pair* %0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 %idx1
  store float 1.0, float* %ptr1
  %ptr2 = getelementptr %array.pair* %0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 %idx2
  %ret = load float* %ptr2
  ret float %ret
}

; CHECK: test8
; CHECK: %[[offset1:[\.a-z0-9]*]] = add i32 %idx1, 1
; CHECK: %[[ins:[\.a-z0-9]*]] = insertelement <4 x float> zeroinitializer, float 1.000000e+00, i32 %[[offset1]]
; CHECK: %[[offset2:[\.a-z0-9]*]] = add i32 %idx2, 2
; CHECK: extractelement <4 x float> %[[ins]], i32 %[[offset2]]

; Do SROA on the vector when it has dynamic vector reads and writes
; from a non-zero offset.
define float @test8(i32 %idx1, i32 %idx2) {
entry:
  %0 = alloca <4 x float>
  store <4 x float> zeroinitializer, <4 x float>* %0
  %ptr1 = getelementptr <4 x float>* %0, i32 0, i32 1
  %ptr2 = bitcast float* %ptr1 to <3 x float>*
  %ptr3 = getelementptr <3 x float>* %ptr2, i32 0, i32 %idx1
  store float 1.0, float* %ptr3
  %ptr4 = getelementptr <4 x float>* %0, i32 0, i32 2
  %ptr5 = bitcast float* %ptr4 to <2 x float>*
  %ptr6 = getelementptr <2 x float>* %ptr5, i32 0, i32 %idx2
  %ret = load float* %ptr6
  ret float %ret
}

declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)