aboutsummaryrefslogtreecommitdiff
path: root/contrib/llvm-project/compiler-rt/lib/builtins/aarch64/lse.S
blob: 5dc0d5320b5abddab849eedb9ef39a9428edc65b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include "assembly.h"

// Out-of-line LSE atomics helpers. Ported from libgcc library.
// N = {1, 2, 4, 8}
// M = {1, 2, 4, 8, 16}
// ORDER = {'relax', 'acq', 'rel', 'acq_rel'}
// Routines implemented:
//
//  iM __aarch64_casM_ORDER(iM expected, iM desired, iM *ptr)
//  iN __aarch64_swpN_ORDER(iN val, iN *ptr)
//  iN __aarch64_ldaddN_ORDER(iN val, iN *ptr)
//  iN __aarch64_ldclrN_ORDER(iN val, iN *ptr)
//  iN __aarch64_ldeorN_ORDER(iN val, iN *ptr)
//  iN __aarch64_ldsetN_ORDER(iN val, iN *ptr)
//
// Routines may modify temporary registers tmp0, tmp1, tmp2,
// return value x0 and the flags only.

#ifdef __aarch64__

#ifdef HAS_ASM_LSE
.arch armv8-a+lse
#else
.arch armv8-a
#endif

#if !defined(__APPLE__)
HIDDEN(__aarch64_have_lse_atomics)
#else
HIDDEN(___aarch64_have_lse_atomics)
#endif

// Generate mnemonics for
// L_cas:                                 SIZE: 1,2,4,8,16 MODEL: 1,2,3,4
// L_swp L_ldadd L_ldclr L_ldeor L_ldset: SIZE: 1,2,4,8    MODEL: 1,2,3,4

#if SIZE == 1
#define S b
#define UXT uxtb
#define B 0x00000000
#elif SIZE == 2
#define S h
#define UXT uxth
#define B 0x40000000
#elif SIZE == 4 || SIZE == 8 || SIZE == 16
#define S
#define UXT mov
#if SIZE == 4
#define B 0x80000000
#elif SIZE == 8
#define B 0xc0000000
#endif
#else
#error
#endif // SIZE

#if MODEL == 1
#define SUFF _relax
#define A
#define L
#define M 0x000000
#define N 0x000000
#elif MODEL == 2
#define SUFF _acq
#define A a
#define L
#define M 0x400000
#define N 0x800000
#elif MODEL == 3
#define SUFF _rel
#define A
#define L l
#define M 0x008000
#define N 0x400000
#elif MODEL == 4
#define SUFF _acq_rel
#define A a
#define L l
#define M 0x408000
#define N 0xc00000
#else
#error
#endif // MODEL

// Define register size.
#define x(N) GLUE2(x, N)
#define w(N) GLUE2(w, N)
#if SIZE < 8
#define s(N) w(N)
#else
#define s(N) x(N)
#endif

#define NAME(BASE) GLUE4(__aarch64_, BASE, SIZE, SUFF)
#define LDXR GLUE4(ld, A, xr, S)
#define STXR GLUE4(st, L, xr, S)

// Define temporary registers.
#define tmp0 16
#define tmp1 17
#define tmp2 15

// Macro for branch to label if no LSE available
.macro JUMP_IF_NOT_LSE label
#if !defined(__APPLE__)
        adrp    x(tmp0), __aarch64_have_lse_atomics
        ldrb    w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics]
#else
        adrp    x(tmp0), ___aarch64_have_lse_atomics@page
        ldrb    w(tmp0), [x(tmp0), ___aarch64_have_lse_atomics@pageoff]
#endif
        cbz     w(tmp0), \label
.endm

#ifdef L_cas
DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(cas))
        JUMP_IF_NOT_LSE 8f
#if SIZE < 16
#ifdef HAS_ASM_LSE
#define CAS GLUE4(cas, A, L, S) s(0), s(1), [x2]
#else
#define CAS .inst 0x08a07c41 + B + M
#endif
        CAS    // s(0), s(1), [x2]
        ret
8:
        UXT    s(tmp0), s(0)
0:
        LDXR   s(0), [x2]
        cmp    s(0), s(tmp0)
        bne    1f
        STXR   w(tmp1), s(1), [x2]
        cbnz   w(tmp1), 0b
1:
        ret
#else
#define LDXP GLUE3(ld, A, xp)
#define STXP GLUE3(st, L, xp)
#ifdef HAS_ASM_LSE
#define CASP GLUE3(casp, A, L)  x0, x1, x2, x3, [x4]
#else
#define CASP .inst 0x48207c82 + M
#endif

        CASP   // x0, x1, x2, x3, [x4]
        ret
8:
        mov    x(tmp0), x0
        mov    x(tmp1), x1
0:
        LDXP   x0, x1, [x4]
        cmp    x0, x(tmp0)
        ccmp   x1, x(tmp1), #0, eq
        bne    1f
        STXP   w(tmp2), x2, x3, [x4]
        cbnz   w(tmp2), 0b
1:
        ret
#endif
END_COMPILERRT_OUTLINE_FUNCTION(NAME(cas))
#endif // L_cas

#ifdef L_swp
#ifdef HAS_ASM_LSE
#define SWP GLUE4(swp, A, L, S)  s(0), s(0), [x1]
#else
#define SWP .inst 0x38208020 + B + N
#endif
DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(swp))
        JUMP_IF_NOT_LSE 8f
        SWP    // s(0), s(0), [x1]
        ret
8:
        mov    s(tmp0), s(0)
0:
        LDXR   s(0), [x1]
        STXR   w(tmp1), s(tmp0), [x1]
        cbnz   w(tmp1), 0b
        ret
END_COMPILERRT_OUTLINE_FUNCTION(NAME(swp))
#endif // L_swp

#if defined(L_ldadd) || defined(L_ldclr) ||                                    \
    defined(L_ldeor) || defined(L_ldset)

#ifdef L_ldadd
#define LDNM ldadd
#define OP add
#define OPN 0x0000
#elif defined(L_ldclr)
#define LDNM ldclr
#define OP bic
#define OPN 0x1000
#elif defined(L_ldeor)
#define LDNM ldeor
#define OP eor
#define OPN 0x2000
#elif defined(L_ldset)
#define LDNM ldset
#define OP orr
#define OPN 0x3000
#else
#error
#endif

#ifdef HAS_ASM_LSE
#define LDOP GLUE4(LDNM, A, L, S) s(0), s(0), [x1]
#else
#define LDOP .inst 0x38200020 + OPN + B + N
#endif

DEFINE_COMPILERRT_OUTLINE_FUNCTION_UNMANGLED(NAME(LDNM))
        JUMP_IF_NOT_LSE 8f
        LDOP // s(0), s(0), [x1]
        ret
8:
        mov    s(tmp0), s(0)
0:
        LDXR   s(0), [x1]
        OP     s(tmp1), s(0), s(tmp0)
        STXR   w(tmp2), s(tmp1), [x1]
        cbnz   w(tmp2), 0b
        ret
END_COMPILERRT_OUTLINE_FUNCTION(NAME(LDNM))
#endif // L_ldadd L_ldclr L_ldeor L_ldset

NO_EXEC_STACK_DIRECTIVE

// GNU property note for BTI and PAC
GNU_PROPERTY_BTI_PAC

#endif // __aarch64__