aboutsummaryrefslogtreecommitdiff
path: root/sys/sys/smp.h
blob: cee1199015a7efd1ca0a4d28b3b4de77cd69f184 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
/*-
 * SPDX-License-Identifier: Beerware
 *
 * ----------------------------------------------------------------------------
 * "THE BEER-WARE LICENSE" (Revision 42):
 * <phk@FreeBSD.org> wrote this file.  As long as you retain this notice you
 * can do whatever you want with this stuff. If we meet some day, and you think
 * this stuff is worth it, you can buy me a beer in return.   Poul-Henning Kamp
 * ----------------------------------------------------------------------------
 *
 * $FreeBSD$
 */

#ifndef _SYS_SMP_H_
#define _SYS_SMP_H_

#ifdef _KERNEL

#ifndef LOCORE

#include <sys/cpuset.h>
#include <sys/queue.h>

/*
 * Types of nodes in the topological tree.
 */
typedef enum {
	/* No node has this type; can be used in topo API calls. */
	TOPO_TYPE_DUMMY,
	/* Processing unit aka computing unit aka logical CPU. */
	TOPO_TYPE_PU,
	/* Physical subdivision of a package. */
	TOPO_TYPE_CORE,
	/* CPU L1/L2/L3 cache. */
	TOPO_TYPE_CACHE,
	/* Package aka chip, equivalent to socket. */
	TOPO_TYPE_PKG,
	/* NUMA node. */
	TOPO_TYPE_NODE,
	/* Other logical or physical grouping of PUs. */
	/* E.g. PUs on the same dye, or PUs sharing an FPU. */
	TOPO_TYPE_GROUP,
	/* The whole system. */
	TOPO_TYPE_SYSTEM
} topo_node_type;

/* Hardware indenitifier of a topology component. */
typedef	unsigned int hwid_t;
/* Logical CPU idenitifier. */
typedef	int cpuid_t;

/* A node in the topology. */
struct topo_node {
	struct topo_node			*parent;
	TAILQ_HEAD(topo_children, topo_node)	children;
	TAILQ_ENTRY(topo_node)			siblings;
	cpuset_t				cpuset;
	topo_node_type				type;
	uintptr_t				subtype;
	hwid_t					hwid;
	cpuid_t					id;
	int					nchildren;
	int					cpu_count;
};

/*
 * Scheduling topology of a NUMA or SMP system.
 *
 * The top level topology is an array of pointers to groups.  Each group
 * contains a bitmask of cpus in its group or subgroups.  It may also
 * contain a pointer to an array of child groups.
 *
 * The bitmasks at non leaf groups may be used by consumers who support
 * a smaller depth than the hardware provides.
 *
 * The topology may be omitted by systems where all CPUs are equal.
 */

struct cpu_group {
	struct cpu_group *cg_parent;	/* Our parent group. */
	struct cpu_group *cg_child;	/* Optional children groups. */
	cpuset_t	cg_mask;	/* Mask of cpus in this group. */
	int32_t		cg_count;	/* Count of cpus in this group. */
	int32_t		cg_first;	/* First cpu in this group. */
	int32_t		cg_last;	/* Last cpu in this group. */
	int16_t		cg_children;	/* Number of children groups. */
	int8_t		cg_level;	/* Shared cache level. */
	int8_t		cg_flags;	/* Traversal modifiers. */
};

typedef struct cpu_group *cpu_group_t;

/*
 * Defines common resources for CPUs in the group.  The highest level
 * resource should be used when multiple are shared.
 */
#define	CG_SHARE_NONE	0
#define	CG_SHARE_L1	1
#define	CG_SHARE_L2	2
#define	CG_SHARE_L3	3

#define MAX_CACHE_LEVELS	CG_SHARE_L3

/*
 * Behavior modifiers for load balancing and affinity.
 */
#define	CG_FLAG_HTT	0x01		/* Schedule the alternate core last. */
#define	CG_FLAG_SMT	0x02		/* New age htt, less crippled. */
#define	CG_FLAG_THREAD	(CG_FLAG_HTT | CG_FLAG_SMT)	/* Any threading. */

/*
 * Convenience routines for building and traversing topologies.
 */
#ifdef SMP
void topo_init_node(struct topo_node *node);
void topo_init_root(struct topo_node *root);
struct topo_node * topo_add_node_by_hwid(struct topo_node *parent, int hwid,
    topo_node_type type, uintptr_t subtype);
struct topo_node * topo_find_node_by_hwid(struct topo_node *parent, int hwid,
    topo_node_type type, uintptr_t subtype);
void topo_promote_child(struct topo_node *child);
struct topo_node * topo_next_node(struct topo_node *top,
    struct topo_node *node);
struct topo_node * topo_next_nonchild_node(struct topo_node *top,
    struct topo_node *node);
void topo_set_pu_id(struct topo_node *node, cpuid_t id);

enum topo_level {
	TOPO_LEVEL_PKG = 0,
	/*
	 * Some systems have useful sub-package core organizations.  On these,
	 * a package has one or more subgroups.  Each subgroup contains one or
	 * more cache groups (cores that share a last level cache).
	 */
	TOPO_LEVEL_GROUP,
	TOPO_LEVEL_CACHEGROUP,
	TOPO_LEVEL_CORE,
	TOPO_LEVEL_THREAD,
	TOPO_LEVEL_COUNT	/* Must be last */
};
struct topo_analysis {
	int entities[TOPO_LEVEL_COUNT];
};
int topo_analyze(struct topo_node *topo_root, int all,
    struct topo_analysis *results);

#define	TOPO_FOREACH(i, root)	\
	for (i = root; i != NULL; i = topo_next_node(root, i))

struct cpu_group *smp_topo(void);
struct cpu_group *smp_topo_alloc(u_int count);
struct cpu_group *smp_topo_none(void);
struct cpu_group *smp_topo_1level(int l1share, int l1count, int l1flags);
struct cpu_group *smp_topo_2level(int l2share, int l2count, int l1share,
    int l1count, int l1flags);
struct cpu_group *smp_topo_find(struct cpu_group *top, int cpu);

extern void (*cpustop_restartfunc)(void);
/* The suspend/resume cpusets are x86 only, but minimize ifdefs. */
extern volatile cpuset_t resuming_cpus;	/* woken up cpus in suspend pen */
extern volatile cpuset_t started_cpus;	/* cpus to let out of stop pen */
extern volatile cpuset_t stopped_cpus;	/* cpus in stop pen */
extern volatile cpuset_t suspended_cpus; /* cpus [near] sleeping in susp pen */
extern volatile cpuset_t toresume_cpus;	/* cpus to let out of suspend pen */
extern cpuset_t hlt_cpus_mask;		/* XXX 'mask' is detail in old impl */
extern cpuset_t logical_cpus_mask;
#endif /* SMP */

extern u_int mp_maxid;
extern int mp_maxcpus;
extern int mp_ncores;
extern int mp_ncpus;
extern int smp_cpus;
extern volatile int smp_started;
extern int smp_threads_per_core;

extern cpuset_t all_cpus;
extern cpuset_t cpuset_domain[MAXMEMDOM]; 	/* CPUs in each NUMA domain. */

/*
 * Macro allowing us to determine whether a CPU is absent at any given
 * time, thus permitting us to configure sparse maps of cpuid-dependent
 * (per-CPU) structures.
 */
#define	CPU_ABSENT(x_cpu)	(!CPU_ISSET(x_cpu, &all_cpus))

/*
 * Macros to iterate over non-absent CPUs.  CPU_FOREACH() takes an
 * integer iterator and iterates over the available set of CPUs.
 * CPU_FIRST() returns the id of the first non-absent CPU.  CPU_NEXT()
 * returns the id of the next non-absent CPU.  It will wrap back to
 * CPU_FIRST() once the end of the list is reached.  The iterators are
 * currently implemented via inline functions.
 */
#define	CPU_FOREACH(i)							\
	for ((i) = 0; (i) <= mp_maxid; (i)++)				\
		if (!CPU_ABSENT((i)))

static __inline int
cpu_first(void)
{
	int i;

	for (i = 0;; i++)
		if (!CPU_ABSENT(i))
			return (i);
}

static __inline int
cpu_next(int i)
{

	for (;;) {
		i++;
		if (i > mp_maxid)
			i = 0;
		if (!CPU_ABSENT(i))
			return (i);
	}
}

#define	CPU_FIRST()	cpu_first()
#define	CPU_NEXT(i)	cpu_next((i))

#ifdef SMP
/*
 * Machine dependent functions used to initialize MP support.
 *
 * The cpu_mp_probe() should check to see if MP support is present and return
 * zero if it is not or non-zero if it is.  If MP support is present, then
 * cpu_mp_start() will be called so that MP can be enabled.  This function
 * should do things such as startup secondary processors.  It should also
 * setup mp_ncpus, all_cpus, and smp_cpus.  It should also ensure that
 * smp_started is initialized at the appropriate time.
 * Once cpu_mp_start() returns, machine independent MP startup code will be
 * executed and a simple message will be output to the console.  Finally,
 * cpu_mp_announce() will be called so that machine dependent messages about
 * the MP support may be output to the console if desired.
 *
 * The cpu_setmaxid() function is called very early during the boot process
 * so that the MD code may set mp_maxid to provide an upper bound on CPU IDs
 * that other subsystems may use.  If a platform is not able to determine
 * the exact maximum ID that early, then it may set mp_maxid to MAXCPU - 1.
 */
struct thread;

struct cpu_group *cpu_topo(void);
void	cpu_mp_announce(void);
int	cpu_mp_probe(void);
void	cpu_mp_setmaxid(void);
void	cpu_mp_start(void);

void	forward_signal(struct thread *);
int	restart_cpus(cpuset_t);
int	stop_cpus(cpuset_t);
int	stop_cpus_hard(cpuset_t);
#if defined(__amd64__) || defined(__i386__)
int	suspend_cpus(cpuset_t);
int	resume_cpus(cpuset_t);
#endif

void	smp_rendezvous_action(void);
extern	struct mtx smp_ipi_mtx;

#endif /* SMP */

int	quiesce_all_cpus(const char *, int);
int	quiesce_cpus(cpuset_t, const char *, int);
void	quiesce_all_critical(void);
void	cpus_fence_seq_cst(void);
void	smp_no_rendezvous_barrier(void *);
void	smp_rendezvous(void (*)(void *), 
		       void (*)(void *),
		       void (*)(void *),
		       void *arg);
void	smp_rendezvous_cpus(cpuset_t,
		       void (*)(void *), 
		       void (*)(void *),
		       void (*)(void *),
		       void *arg);

struct smp_rendezvous_cpus_retry_arg {
	cpuset_t cpus;
};
void	smp_rendezvous_cpus_retry(cpuset_t,
		       void (*)(void *),
		       void (*)(void *),
		       void (*)(void *),
		       void (*)(void *, int),
		       struct smp_rendezvous_cpus_retry_arg *);

void	smp_rendezvous_cpus_done(struct smp_rendezvous_cpus_retry_arg *);

#endif /* !LOCORE */
#endif /* _KERNEL */
#endif /* _SYS_SMP_H_ */