aboutsummaryrefslogtreecommitdiff
path: root/share/man/man9/epoch.9
blob: 8a5008a6b238041b78f3648e9e680781d1262358 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
.\"
.\" Copyright (C) 2018 Matthew Macy <mmacy@FreeBSD.org>.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\"    notice(s), this list of conditions and the following disclaimer as
.\"    the first lines of this file unmodified other than the possible
.\"    addition of one or more copyright notices.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\"    notice(s), this list of conditions and the following disclaimer in the
.\"    documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
.\" EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
.\" WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
.\" DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
.\" DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
.\" (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
.\" SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
.\" CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
.\" DAMAGE.
.\"
.\" $FreeBSD$
.\"
.Dd April 30, 2020
.Dt EPOCH 9
.Os
.Sh NAME
.Nm epoch ,
.Nm epoch_context ,
.Nm epoch_alloc ,
.Nm epoch_free ,
.Nm epoch_enter ,
.Nm epoch_exit ,
.Nm epoch_wait ,
.Nm epoch_enter_preempt ,
.Nm epoch_exit_preempt ,
.Nm epoch_wait_preempt ,
.Nm epoch_call ,
.Nm epoch_drain_callbacks ,
.Nm in_epoch ,
.Nm in_epoch_verbose
.Nd kernel epoch based reclamation
.Sh SYNOPSIS
.In sys/param.h
.In sys/proc.h
.In sys/epoch.h
.\" Types
.Bd -literal
struct epoch;		/* Opaque */
.Ed
.Vt typedef "struct epoch *epoch_t" ;
.Bd -literal
struct epoch_context {
	void	*data[2];
};
.Ed
.Vt typedef "struct epoch_context *epoch_context_t" ;
.Vt typedef "void epoch_callback_t(epoch_context_t)" ;
.Bd -literal
struct epoch_tracker;	/* Opaque */
.Ed
.Vt typedef "struct epoch_tracker *epoch_tracker_t" ;
.\" Declarations
.Ft epoch_t
.Fn epoch_alloc "const char *name" "int flags"
.Ft void
.Fn epoch_free "epoch_t epoch"
.Ft void
.Fn epoch_enter "epoch_t epoch"
.Ft void
.Fn epoch_exit "epoch_t epoch"
.Ft void
.Fn epoch_wait "epoch_t epoch"
.Ft void
.Fn epoch_enter_preempt "epoch_t epoch" "epoch_tracker_t et"
.Ft void
.Fn epoch_exit_preempt "epoch_t epoch" "epoch_tracker_t et"
.Ft void
.Fn epoch_wait_preempt "epoch_t epoch"
.Ft void
.Fn epoch_call "epoch_t epoch" "epoch_callback_t callback" "epoch_context_t ctx"
.Ft void
.Fn epoch_drain_callbacks "epoch_t epoch"
.Ft int
.Fn in_epoch "epoch_t epoch"
.Ft int
.Fn in_epoch_verbose "epoch_t epoch" "int dump_onfail"
.Sh DESCRIPTION
Epochs are used to guarantee liveness and immutability of data by
deferring reclamation and mutation until a grace period has elapsed.
Epochs do not have any lock ordering issues.
Entering and leaving an epoch section will never block.
.Pp
Epochs are allocated with
.Fn epoch_alloc .
The
.Fa name
argument is used for debugging convenience when the
.Cd EPOCH_TRACE
kernel option is configured.
By default, epochs do not allow preemption during sections.
By default mutexes cannot be held across
.Fn epoch_wait_preempt .
The
.Fa flags
specified are formed by
.Em OR Ns 'ing
the following values:
.Bl -tag -offset indent -width Ds
.It Dv EPOCH_LOCKED
Permit holding mutexes across
.Fn epoch_wait_preempt
(requires
.Dv EPOCH_PREEMPT ) .
When doing this one must be cautious of creating a situation where a deadlock
is possible.
.It Dv EPOCH_PREEMPT
The
.Vt epoch
will allow preemption during sections.
Only non-sleepable locks may be acquired during a preemptible epoch.
The functions
.Fn epoch_enter_preempt ,
.Fn epoch_exit_preempt ,
and
.Fn epoch_wait_preempt
must be used in place of
.Fn epoch_enter ,
.Fn epoch_exit ,
and
.Fn epoch_wait ,
respectively.
.El
.Pp
.Vt epoch Ns s
are freed with
.Fn epoch_free .
.Pp
Threads indicate the start of an epoch critical section by calling
.Fn epoch_enter
(or
.Fn epoch_enter_preempt
for preemptible epochs).
Threads call
.Fn epoch_exit
(or
.Fn epoch_exit_preempt
for preemptible epochs)
to indicate the end of a critical section.
.Vt struct epoch_tracker Ns s
are stack objects whose pointers are passed to
.Fn epoch_enter_preempt
and
.Fn epoch_exit_preempt
(much like
.Vt struct rm_priotracker ) .
.Pp
Threads can defer work until a grace period has expired since any thread has
entered the epoch either synchronously or asynchronously.
.Fn epoch_call
defers work asynchronously by invoking the provided
.Fa callback
at a later time.
.Fn epoch_wait
(or
.Fn epoch_wait_preempt )
blocks the current thread until the grace period has expired and the work can be
done safely.
.Pp
Default, non-preemptible epoch wait
.Fn ( epoch_wait )
is guaranteed to have much shorter completion times relative to
preemptible epoch wait
.Fn ( epoch_wait_preempt ) .
(In the default type, none of the threads in an epoch section will be preempted
before completing its section.)
.Pp
INVARIANTS can assert that a thread is in an epoch by using
.Fn in_epoch .
.Fn in_epoch "epoch"
is equivalent to invoking
.Fn in_epoch_verbose "epoch" "0" .
If
.Cd EPOCH_TRACE
is enabled,
.Fn in_epoch_verbose "epoch" "1"
provides additional verbose debugging information.
.Pp
The epoch API currently does not support sleeping in epoch_preempt sections.
A caller should never call
.Fn epoch_wait
in the middle of an epoch section for the same epoch as this will lead to a deadlock.
.Pp
The
.Fn epoch_drain_callbacks
function is used to drain all pending callbacks which have been invoked by prior
.Fn epoch_call
function calls on the same epoch.
This function is useful when there are shared memory structure(s)
referred to by the epoch callback(s) which are not refcounted and are
rarely freed.
The typical place for calling this function is right before freeing or
invalidating the shared resource(s) used by the epoch callback(s).
This function can sleep and is not optimized for performance.
.Sh RETURN VALUES
.Fn in_epoch curepoch
will return 1 if curthread is in curepoch, 0 otherwise.
.Sh EXAMPLES
Async free example:
Thread 1:
.Bd -literal
int
in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_laddr *laddr,
    struct ucred *cred)
{
    /* ... */
    epoch_enter(net_epoch);
    CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
        sa = ifa->ifa_addr;
	if (sa->sa_family != AF_INET)
	    continue;
	sin = (struct sockaddr_in *)sa;
	if (prison_check_ip4(cred, &sin->sin_addr) == 0) {
	     ia = (struct in_ifaddr *)ifa;
	     break;
	}
    }
    epoch_exit(net_epoch);
    /* ... */
}
.Ed
Thread 2:
.Bd -literal
void
ifa_free(struct ifaddr *ifa)
{

    if (refcount_release(&ifa->ifa_refcnt))
        epoch_call(net_epoch, ifa_destroy, &ifa->ifa_epoch_ctx);
}

void
if_purgeaddrs(struct ifnet *ifp)
{

    /* .... *
    IF_ADDR_WLOCK(ifp);
    CK_STAILQ_REMOVE(&ifp->if_addrhead, ifa, ifaddr, ifa_link);
    IF_ADDR_WUNLOCK(ifp);
    ifa_free(ifa);
}
.Ed
.Pp
Thread 1 traverses the ifaddr list in an epoch.
Thread 2 unlinks with the corresponding epoch safe macro, marks as logically free,
and then defers deletion.
More general mutation or a synchronous
free would have to follow a call to
.Fn epoch_wait .
.Sh NOTES
The
.Nm
kernel programming interface is under development and is subject to change.
.Sh SEE ALSO
.Xr callout 9 ,
.Xr locking 9 ,
.Xr mtx_pool 9 ,
.Xr mutex 9 ,
.Xr rwlock 9 ,
.Xr sema 9 ,
.Xr sleep 9 ,
.Xr sx 9
.Sh HISTORY
The
.Nm
framework first appeared in
.Fx 11.0 .
.Sh CAVEATS
One must be cautious when using
.Fn epoch_wait_preempt .
Threads are pinned during epoch sections, so if a thread in a section is then
preempted by a higher priority compute bound thread on that CPU, it can be
prevented from leaving the section indefinitely.
.Pp
Epochs are not a straight replacement for read locks.
Callers must use safe list and tailq traversal routines in an epoch (see ck_queue).
When modifying a list referenced from an epoch section safe removal
routines must be used and the caller can no longer modify a list entry
in place.
An item to be modified must be handled with copy on write
and frees must be deferred until after a grace period has elapsed.