aboutsummaryrefslogtreecommitdiff
path: root/sys/net/netmap.h
blob: 0ba1537b4e1ab594043534e1430a3ef8558699fd (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
/*
 * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are
 * met:
 * 
 *   1. Redistributions of source code must retain the above copyright
 *      notice, this list of conditions and the following disclaimer.
 * 
 *   2. Redistributions in binary form must reproduce the above copyright
 *      notice, this list of conditions and the following disclaimer in the
 *      documentation and/or other materials provided with the
 *      distribution.
 * 
 *   3. Neither the name of the authors nor the names of their contributors
 *      may be used to endorse or promote products derived from this
 *      software without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY MATTEO LANDI AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL MATTEO LANDI OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
 * THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
 * $FreeBSD$
 * $Id: netmap.h 9753 2011-11-28 15:10:43Z luigi $
 *
 * This header contains the definitions of the constants and the
 * structures needed by the ``netmap'' module, both kernel and
 * userspace.
 */

#ifndef _NET_NETMAP_H_
#define _NET_NETMAP_H_

/*
 * --- Netmap data structures ---
 *
 * The data structures used by netmap are shown below. Those in
 * capital letters are in an mmapp()ed area shared with userspace,
 * while others are private to the kernel.
 * Shared structures do not contain pointers but only relative
 * offsets, so that addressing is portable between kernel and userspace.
 *
 * The 'softc' of each interface is extended with a struct netmap_adapter
 * containing information to support netmap operation. In addition to
 * the fixed fields, it has two pointers to reach the arrays of
 * 'struct netmap_kring' which in turn reaches the various
 * struct netmap_ring, shared with userspace.


 softc
+----------------+
| standard fields|
| if_pspare[0] ----------+
+----------------+       |
                         |
+----------------+<------+
|(netmap_adapter)|
|                |                             netmap_kring
| tx_rings *--------------------------------->+-------------+
|                |       netmap_kring         | ring *---------> ...
| rx_rings *---------->+--------------+       | nr_hwcur    |
+----------------+     | ring    *-------+    | nr_hwavail  |
                       | nr_hwcur     |  |    | selinfo     |
                       | nr_hwavail   |  |    +-------------+
                       | selinfo      |  |    |     ...     |
                       +--------------+  |  (na_num_rings+1 entries)
                       |    ....      |  |    |             |
                    (na_num_rings+1 entries)  +-------------+
                       |              |  |
                       +--------------+  |
                                         |      NETMAP_RING
                                         +---->+-------------+
                                             / | cur         |
   NETMAP_IF  (nifp, one per file desc.)    /  | avail       |
    +---------------+                      /   | buf_ofs     |
    | ni_num_queues |                     /    +=============+
    |               |                    /     | buf_idx     | slot[0]
    |               |                   /      | len, flags  |
    |               |                  /       +-------------+
    +===============+                 /        | buf_idx     | slot[1]
    | txring_ofs[0] | (rel.to nifp)--'         | len, flags  |
    | txring_ofs[1] |                          +-------------+
  (num_rings+1 entries)                     (nr_num_slots entries)
    | txring_ofs[n] |                          | buf_idx     | slot[n-1]
    +---------------+                          | len, flags  |
    | rxring_ofs[0] |                          +-------------+
    | rxring_ofs[1] |
  (num_rings+1 entries)
    | txring_ofs[n] |
    +---------------+

 * The NETMAP_RING is the shadow ring that mirrors the NIC rings.
 * Each slot has the index of a buffer, its length and some flags.
 * In user space, the buffer address is computed as
 *	(char *)ring + buf_ofs + index*MAX_BUF_SIZE
 * In the kernel, buffers do not necessarily need to be contiguous,
 * and the virtual and physical addresses are derived through
 * a lookup table. When userspace wants to use a different buffer
 * in a location, it must set the NS_BUF_CHANGED flag to make
 * sure that the kernel recomputes updates the hardware ring and
 * other fields (bus_dmamap, etc.) as needed.
 *
 * Normally the driver is not requested to report the result of
 * transmissions (this can dramatically speed up operation).
 * However the user may request to report completion by setting
 * NS_REPORT.
 */
struct netmap_slot {
	uint32_t buf_idx; /* buffer index */
	uint16_t len;	/* packet length, to be copied to/from the hw ring */
	uint16_t flags;	/* buf changed, etc. */
#define	NS_BUF_CHANGED	0x0001	/* must resync the map, buffer changed */
#define	NS_REPORT	0x0002	/* ask the hardware to report results
				 * e.g. by generating an interrupt
				 */
};

/*
 * Netmap representation of a TX or RX ring (also known as "queue").
 * This is a queue implemented as a fixed-size circular array.
 * At the software level, two fields are important: avail and cur.
 *
 * In TX rings:
 *	avail	indicates the number of slots available for transmission.
 *		It is decremented by the application when it appends a
 *		packet, and set to nr_hwavail (see below) on a
 *		NIOCTXSYNC to reflect the actual state of the queue
 *		(keeping track of completed transmissions).
 *	cur	indicates the empty slot to use for the next packet
 *		to send (i.e. the "tail" of the queue).
 *		It is incremented by the application.
 *
 *   The kernel side of netmap uses two additional fields in its own
 *   private ring structure, netmap_kring:
 *	nr_hwcur is a copy of nr_cur on an NIOCTXSYNC.
 *	nr_hwavail is the number of slots known as available by the
 *		hardware. It is updated on an INTR (inc by the
 *		number of packets sent) and on a NIOCTXSYNC
 *		(decrease by nr_cur - nr_hwcur)
 *		A special case, nr_hwavail is -1 if the transmit
 *		side is idle (no pending transmits).
 *
 * In RX rings:
 *	avail	is the number of packets available (possibly 0).
 *		It is decremented by the software when it consumes
 *		a packet, and set to nr_hwavail on a NIOCRXSYNC
 *	cur	indicates the first slot that contains a packet
 *		(the "head" of the queue).
 *		It is incremented by the software when it consumes
 *		a packet.
 *
 *   The kernel side of netmap uses two additional fields in the kring:
 *	nr_hwcur is a copy of nr_cur on an NIOCRXSYNC
 *	nr_hwavail is the number of packets available. It is updated
 *		on INTR (inc by the number of new packets arrived)
 *		and on NIOCRXSYNC (decreased by nr_cur - nr_hwcur).
 *
 * DATA OWNERSHIP/LOCKING:
 *	The netmap_ring is owned by the user program and it is only
 *	accessed or modified in the upper half of the kernel during
 *	a system call.
 *
 *	The netmap_kring is only modified by the upper half of the kernel.
 */
struct netmap_ring {
	/*
	 * nr_buf_base_ofs is meant to be used through macros.
	 * It contains the offset of the buffer region from this
	 * descriptor.
	 */
	const ssize_t	buf_ofs;
	const uint32_t	num_slots;	/* number of slots in the ring. */
	uint32_t	avail;		/* number of usable slots */
	uint32_t	cur;		/* 'current' r/w position */

	const uint16_t	nr_buf_size;
	uint16_t	flags;
#define	NR_TIMESTAMP	0x0002		/* set timestamp on *sync() */

	struct timeval	ts;		/* time of last *sync() */

	/* the slots follow. This struct has variable size */
	struct netmap_slot slot[0]; /* array of slots. */
};


/*
 * Netmap representation of an interface and its queue(s).
 * There is one netmap_if for each file descriptor on which we want
 * to select/poll.  We assume that on each interface has the same number
 * of receive and transmit queues.
 * select/poll operates on one or all pairs depending on the value of
 * nmr_queueid passed on the ioctl.
 */
struct netmap_if {
	char ni_name[IFNAMSIZ]; /* name of the interface. */
	const u_int ni_version;	/* API version, currently unused */
	const u_int ni_num_queues; /* number of queue pairs (TX/RX). */
	const u_int ni_rx_queues;  /* if zero, use ni_num_queues */
	/*
	 * the following array contains the offset of the
	 * each netmap ring from this structure. The first num_queues+1
	 * refer to the tx rings, the next n+1 refer to the rx rings.
	 * The area is filled up by the kernel on NIOCREG,
	 * and then only read by userspace code.
	 * entries 0..ni_num_queues-1 indicate the hardware queues,
	 * entry ni_num_queues is the queue from/to the stack.
	 */
	const ssize_t	ring_ofs[0];
};

#ifndef IFCAP_NETMAP	/* this should go in net/if.h */
#define	IFCAP_NETMAP	0x100000
#endif

#ifndef NIOCREGIF	
/*
 * ioctl names and related fields
 *
 * NIOCGINFO takes a struct ifreq, the interface name is the input,
 *	the outputs are number of queues and number of descriptor
 *	for each queue (useful to set number of threads etc.).
 *
 * NIOCREGIF takes an interface name within a struct ifreq,
 *	and activates netmap mode on the interface (if possible).
 *
 * NIOCUNREGIF unregisters the interface associated to the fd.
 *
 * NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues,
 *	whose identity is set in NIOCREGIF through nr_ringid
 */

/*
 * struct nmreq overlays a struct ifreq
 */
struct nmreq {
	char		nr_name[IFNAMSIZ];
	uint32_t	nr_version;	/* API version (unused) */
	uint32_t	nr_offset;	/* nifp offset in the shared region */
	uint32_t	nr_memsize;	/* size of the shared region */
	uint32_t	nr_numslots;	/* descriptors per queue */
	uint16_t	nr_numrings;
	uint16_t	nr_ringid;	/* ring(s) we care about */
#define NETMAP_HW_RING	0x4000		/* low bits indicate one hw ring */
#define NETMAP_SW_RING	0x2000		/* we process the sw ring */
#define NETMAP_NO_TX_POLL	0x1000	/* no gratuitous txsync on poll */
#define NETMAP_RING_MASK 0xfff		/* the ring number */
};

#define NIOCGINFO	_IOWR('i', 145, struct nmreq) /* return IF info */
#define NIOCREGIF	_IOWR('i', 146, struct nmreq) /* interface register */
#define NIOCUNREGIF	_IO('i', 147) /* interface unregister */
#define NIOCTXSYNC	_IO('i', 148) /* sync tx queues */
#define NIOCRXSYNC	_IO('i', 149) /* sync rx queues */
#endif /* !NIOCREGIF */

#endif /* _NET_NETMAP_H_ */