aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMark Johnston <markj@FreeBSD.org>2023-11-22 19:10:27 +0000
committerMark Johnston <markj@FreeBSD.org>2023-11-22 20:18:46 +0000
commitbe74aede49fb480792448bf563c5079998de7cbd (patch)
tree700b5f7f32aa48b9faf850e782e08c05de14e621
parent6332e0f1a4b34707654d6ae2cd3c1e8799970d0b (diff)
downloadsrc-be74aede49fb480792448bf563c5079998de7cbd.tar.gz
src-be74aede49fb480792448bf563c5079998de7cbd.zip
bhyve: Split backends into separate files
Currently the net_backend structure definition is private to net_backends.c, so all of the backend definitions are there. While adding a new backend to use libslirp, it was noted that this file is somewhat cluttered. Move the netmap and netgraph backends to their own files and clean up includes a bit. No functional change intended. Reviewed by: corvink, jhb MFC after: 3 weeks Sponsored by: Innovate UK Differential Revision: https://reviews.freebsd.org/D42689
-rw-r--r--usr.sbin/bhyve/Makefile5
-rw-r--r--usr.sbin/bhyve/net_backend_netgraph.c191
-rw-r--r--usr.sbin/bhyve/net_backend_netmap.c384
-rw-r--r--usr.sbin/bhyve/net_backends.c661
-rw-r--r--usr.sbin/bhyve/net_backends.h7
-rw-r--r--usr.sbin/bhyve/net_backends_priv.h152
6 files changed, 756 insertions, 644 deletions
diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile
index de8e87d2ad49..6ce7f6c7ba62 100644
--- a/usr.sbin/bhyve/Makefile
+++ b/usr.sbin/bhyve/Makefile
@@ -33,6 +33,7 @@ SRCS= \
iov.c \
mem.c \
mevent.c \
+ net_backend_netmap.c \
net_backends.c \
net_utils.c \
pci_emul.c \
@@ -92,8 +93,8 @@ CFLAGS+=-DINET
CFLAGS+=-DINET6
.endif
.if ${MK_NETGRAPH_SUPPORT} != "no"
-CFLAGS+=-DNETGRAPH
-LIBADD+= netgraph
+SRCS+= net_backend_netgraph.c
+LIBADD+= netgraph
.endif
.if ${MK_OPENSSL} == "no"
CFLAGS+=-DNO_OPENSSL
diff --git a/usr.sbin/bhyve/net_backend_netgraph.c b/usr.sbin/bhyve/net_backend_netgraph.c
new file mode 100644
index 000000000000..7d1659d611e3
--- /dev/null
+++ b/usr.sbin/bhyve/net_backend_netgraph.c
@@ -0,0 +1,191 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef WITHOUT_CAPSICUM
+#include <sys/capsicum.h>
+#endif
+#include <sys/socket.h>
+#include <sys/sysctl.h>
+
+#ifndef WITHOUT_CAPSICUM
+#include <capsicum_helpers.h>
+#endif
+#include <err.h>
+#include <netgraph.h>
+#include <string.h>
+#include <sysexits.h>
+#include <unistd.h>
+
+#include "config.h"
+#include "debug.h"
+#include "net_backends.h"
+#include "net_backends_priv.h"
+
+#define NG_SBUF_MAX_SIZE (4 * 1024 * 1024)
+
+static int
+ng_init(struct net_backend *be, const char *devname __unused,
+ nvlist_t *nvl, net_be_rxeof_t cb, void *param)
+{
+ struct tap_priv *p = NET_BE_PRIV(be);
+ struct ngm_connect ngc;
+ const char *value, *nodename;
+ int sbsz;
+ int ctrl_sock;
+ int flags;
+ unsigned long maxsbsz;
+ size_t msbsz;
+#ifndef WITHOUT_CAPSICUM
+ cap_rights_t rights;
+#endif
+
+ if (cb == NULL) {
+ EPRINTLN("Netgraph backend requires non-NULL callback");
+ return (-1);
+ }
+
+ be->fd = -1;
+
+ memset(&ngc, 0, sizeof(ngc));
+
+ value = get_config_value_node(nvl, "path");
+ if (value == NULL) {
+ EPRINTLN("path must be provided");
+ return (-1);
+ }
+ strncpy(ngc.path, value, NG_PATHSIZ - 1);
+
+ value = get_config_value_node(nvl, "hook");
+ if (value == NULL)
+ value = "vmlink";
+ strncpy(ngc.ourhook, value, NG_HOOKSIZ - 1);
+
+ value = get_config_value_node(nvl, "peerhook");
+ if (value == NULL) {
+ EPRINTLN("peer hook must be provided");
+ return (-1);
+ }
+ strncpy(ngc.peerhook, value, NG_HOOKSIZ - 1);
+
+ nodename = get_config_value_node(nvl, "socket");
+ if (NgMkSockNode(nodename,
+ &ctrl_sock, &be->fd) < 0) {
+ EPRINTLN("can't get Netgraph sockets");
+ return (-1);
+ }
+
+ if (NgSendMsg(ctrl_sock, ".",
+ NGM_GENERIC_COOKIE,
+ NGM_CONNECT, &ngc, sizeof(ngc)) < 0) {
+ EPRINTLN("can't connect to node");
+ close(ctrl_sock);
+ goto error;
+ }
+
+ close(ctrl_sock);
+
+ flags = fcntl(be->fd, F_GETFL);
+
+ if (flags < 0) {
+ EPRINTLN("can't get socket flags");
+ goto error;
+ }
+
+ if (fcntl(be->fd, F_SETFL, flags | O_NONBLOCK) < 0) {
+ EPRINTLN("can't set O_NONBLOCK flag");
+ goto error;
+ }
+
+ /*
+ * The default ng_socket(4) buffer's size is too low.
+ * Calculate the minimum value between NG_SBUF_MAX_SIZE
+ * and kern.ipc.maxsockbuf.
+ */
+ msbsz = sizeof(maxsbsz);
+ if (sysctlbyname("kern.ipc.maxsockbuf", &maxsbsz, &msbsz,
+ NULL, 0) < 0) {
+ EPRINTLN("can't get 'kern.ipc.maxsockbuf' value");
+ goto error;
+ }
+
+ /*
+ * We can't set the socket buffer size to kern.ipc.maxsockbuf value,
+ * as it takes into account the mbuf(9) overhead.
+ */
+ maxsbsz = maxsbsz * MCLBYTES / (MSIZE + MCLBYTES);
+
+ sbsz = MIN(NG_SBUF_MAX_SIZE, maxsbsz);
+
+ if (setsockopt(be->fd, SOL_SOCKET, SO_SNDBUF, &sbsz,
+ sizeof(sbsz)) < 0) {
+ EPRINTLN("can't set TX buffer size");
+ goto error;
+ }
+
+ if (setsockopt(be->fd, SOL_SOCKET, SO_RCVBUF, &sbsz,
+ sizeof(sbsz)) < 0) {
+ EPRINTLN("can't set RX buffer size");
+ goto error;
+ }
+
+#ifndef WITHOUT_CAPSICUM
+ cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
+ if (caph_rights_limit(be->fd, &rights) == -1)
+ errx(EX_OSERR, "Unable to apply rights for sandbox");
+#endif
+
+ memset(p->bbuf, 0, sizeof(p->bbuf));
+ p->bbuflen = 0;
+
+ p->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
+ if (p->mevp == NULL) {
+ EPRINTLN("Could not register event");
+ goto error;
+ }
+
+ return (0);
+
+error:
+ tap_cleanup(be);
+ return (-1);
+}
+
+static struct net_backend ng_backend = {
+ .prefix = "netgraph",
+ .priv_size = sizeof(struct tap_priv),
+ .init = ng_init,
+ .cleanup = tap_cleanup,
+ .send = tap_send,
+ .peek_recvlen = tap_peek_recvlen,
+ .recv = tap_recv,
+ .recv_enable = tap_recv_enable,
+ .recv_disable = tap_recv_disable,
+ .get_cap = tap_get_cap,
+ .set_cap = tap_set_cap,
+};
+
+DATA_SET(net_backend_set, ng_backend);
diff --git a/usr.sbin/bhyve/net_backend_netmap.c b/usr.sbin/bhyve/net_backend_netmap.c
new file mode 100644
index 000000000000..5ba11b96797c
--- /dev/null
+++ b/usr.sbin/bhyve/net_backend_netmap.c
@@ -0,0 +1,384 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <net/if.h>
+#include <net/netmap.h>
+#include <net/netmap_virt.h>
+#define NETMAP_WITH_LIBS
+#include <net/netmap_user.h>
+
+#include <assert.h>
+
+#include "debug.h"
+#include "iov.h"
+#include "mevent.h"
+#include "net_backends.h"
+#include "net_backends_priv.h"
+
+/* The virtio-net features supported by netmap. */
+#define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \
+ VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \
+ VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \
+ VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO)
+
+struct netmap_priv {
+ char ifname[IFNAMSIZ];
+ struct nm_desc *nmd;
+ uint16_t memid;
+ struct netmap_ring *rx;
+ struct netmap_ring *tx;
+ struct mevent *mevp;
+ net_be_rxeof_t cb;
+ void *cb_param;
+};
+
+static void
+nmreq_init(struct nmreq *req, char *ifname)
+{
+
+ memset(req, 0, sizeof(*req));
+ strlcpy(req->nr_name, ifname, sizeof(req->nr_name));
+ req->nr_version = NETMAP_API;
+}
+
+static int
+netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len)
+{
+ int err;
+ struct nmreq req;
+ struct netmap_priv *priv = NET_BE_PRIV(be);
+
+ nmreq_init(&req, priv->ifname);
+ req.nr_cmd = NETMAP_BDG_VNET_HDR;
+ req.nr_arg1 = vnet_hdr_len;
+ err = ioctl(be->fd, NIOCREGIF, &req);
+ if (err) {
+ EPRINTLN("Unable to set vnet header length %d", vnet_hdr_len);
+ return (err);
+ }
+
+ be->be_vnet_hdr_len = vnet_hdr_len;
+
+ return (0);
+}
+
+static int
+netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len)
+{
+ unsigned prev_hdr_len = be->be_vnet_hdr_len;
+ int ret;
+
+ if (vnet_hdr_len == prev_hdr_len) {
+ return (1);
+ }
+
+ ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len);
+ if (ret) {
+ return (0);
+ }
+
+ netmap_set_vnet_hdr_len(be, prev_hdr_len);
+
+ return (1);
+}
+
+static uint64_t
+netmap_get_cap(struct net_backend *be)
+{
+
+ return (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ?
+ NETMAP_FEATURES : 0);
+}
+
+static int
+netmap_set_cap(struct net_backend *be, uint64_t features __unused,
+ unsigned vnet_hdr_len)
+{
+
+ return (netmap_set_vnet_hdr_len(be, vnet_hdr_len));
+}
+
+static int
+netmap_init(struct net_backend *be, const char *devname,
+ nvlist_t *nvl __unused, net_be_rxeof_t cb, void *param)
+{
+ struct netmap_priv *priv = NET_BE_PRIV(be);
+
+ strlcpy(priv->ifname, devname, sizeof(priv->ifname));
+ priv->ifname[sizeof(priv->ifname) - 1] = '\0';
+
+ priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL);
+ if (priv->nmd == NULL) {
+ EPRINTLN("Unable to nm_open(): interface '%s', errno (%s)",
+ devname, strerror(errno));
+ return (-1);
+ }
+
+ priv->memid = priv->nmd->req.nr_arg2;
+ priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0);
+ priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0);
+ priv->cb = cb;
+ priv->cb_param = param;
+ be->fd = priv->nmd->fd;
+
+ priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
+ if (priv->mevp == NULL) {
+ EPRINTLN("Could not register event");
+ return (-1);
+ }
+
+ return (0);
+}
+
+static void
+netmap_cleanup(struct net_backend *be)
+{
+ struct netmap_priv *priv = NET_BE_PRIV(be);
+
+ if (priv->mevp) {
+ mevent_delete(priv->mevp);
+ }
+ if (priv->nmd) {
+ nm_close(priv->nmd);
+ }
+ be->fd = -1;
+}
+
+static ssize_t
+netmap_send(struct net_backend *be, const struct iovec *iov,
+ int iovcnt)
+{
+ struct netmap_priv *priv = NET_BE_PRIV(be);
+ struct netmap_ring *ring;
+ ssize_t totlen = 0;
+ int nm_buf_size;
+ int nm_buf_len;
+ uint32_t head;
+ uint8_t *nm_buf;
+ int j;
+
+ ring = priv->tx;
+ head = ring->head;
+ if (head == ring->tail) {
+ EPRINTLN("No space, drop %zu bytes", count_iov(iov, iovcnt));
+ goto txsync;
+ }
+ nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
+ nm_buf_size = ring->nr_buf_size;
+ nm_buf_len = 0;
+
+ for (j = 0; j < iovcnt; j++) {
+ uint8_t *iov_frag_buf = iov[j].iov_base;
+ int iov_frag_size = iov[j].iov_len;
+
+ totlen += iov_frag_size;
+
+ /*
+ * Split each iovec fragment over more netmap slots, if
+ * necessary.
+ */
+ for (;;) {
+ int copylen;
+
+ copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size;
+ memcpy(nm_buf, iov_frag_buf, copylen);
+
+ iov_frag_buf += copylen;
+ iov_frag_size -= copylen;
+ nm_buf += copylen;
+ nm_buf_size -= copylen;
+ nm_buf_len += copylen;
+
+ if (iov_frag_size == 0) {
+ break;
+ }
+
+ ring->slot[head].len = nm_buf_len;
+ ring->slot[head].flags = NS_MOREFRAG;
+ head = nm_ring_next(ring, head);
+ if (head == ring->tail) {
+ /*
+ * We ran out of netmap slots while
+ * splitting the iovec fragments.
+ */
+ EPRINTLN("No space, drop %zu bytes",
+ count_iov(iov, iovcnt));
+ goto txsync;
+ }
+ nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
+ nm_buf_size = ring->nr_buf_size;
+ nm_buf_len = 0;
+ }
+ }
+
+ /* Complete the last slot, which must not have NS_MOREFRAG set. */
+ ring->slot[head].len = nm_buf_len;
+ ring->slot[head].flags = 0;
+ head = nm_ring_next(ring, head);
+
+ /* Now update ring->head and ring->cur. */
+ ring->head = ring->cur = head;
+txsync:
+ ioctl(be->fd, NIOCTXSYNC, NULL);
+
+ return (totlen);
+}
+
+static ssize_t
+netmap_peek_recvlen(struct net_backend *be)
+{
+ struct netmap_priv *priv = NET_BE_PRIV(be);
+ struct netmap_ring *ring = priv->rx;
+ uint32_t head = ring->head;
+ ssize_t totlen = 0;
+
+ while (head != ring->tail) {
+ struct netmap_slot *slot = ring->slot + head;
+
+ totlen += slot->len;
+ if ((slot->flags & NS_MOREFRAG) == 0)
+ break;
+ head = nm_ring_next(ring, head);
+ }
+
+ return (totlen);
+}
+
+static ssize_t
+netmap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
+{
+ struct netmap_priv *priv = NET_BE_PRIV(be);
+ struct netmap_slot *slot = NULL;
+ struct netmap_ring *ring;
+ uint8_t *iov_frag_buf;
+ int iov_frag_size;
+ ssize_t totlen = 0;
+ uint32_t head;
+
+ assert(iovcnt);
+
+ ring = priv->rx;
+ head = ring->head;
+ iov_frag_buf = iov->iov_base;
+ iov_frag_size = iov->iov_len;
+
+ do {
+ uint8_t *nm_buf;
+ int nm_buf_len;
+
+ if (head == ring->tail) {
+ return (0);
+ }
+
+ slot = ring->slot + head;
+ nm_buf = NETMAP_BUF(ring, slot->buf_idx);
+ nm_buf_len = slot->len;
+
+ for (;;) {
+ int copylen = nm_buf_len < iov_frag_size ?
+ nm_buf_len : iov_frag_size;
+
+ memcpy(iov_frag_buf, nm_buf, copylen);
+ nm_buf += copylen;
+ nm_buf_len -= copylen;
+ iov_frag_buf += copylen;
+ iov_frag_size -= copylen;
+ totlen += copylen;
+
+ if (nm_buf_len == 0) {
+ break;
+ }
+
+ iov++;
+ iovcnt--;
+ if (iovcnt == 0) {
+ /* No space to receive. */
+ EPRINTLN("Short iov, drop %zd bytes",
+ totlen);
+ return (-ENOSPC);
+ }
+ iov_frag_buf = iov->iov_base;
+ iov_frag_size = iov->iov_len;
+ }
+
+ head = nm_ring_next(ring, head);
+
+ } while (slot->flags & NS_MOREFRAG);
+
+ /* Release slots to netmap. */
+ ring->head = ring->cur = head;
+
+ return (totlen);
+}
+
+static void
+netmap_recv_enable(struct net_backend *be)
+{
+ struct netmap_priv *priv = NET_BE_PRIV(be);
+
+ mevent_enable(priv->mevp);
+}
+
+static void
+netmap_recv_disable(struct net_backend *be)
+{
+ struct netmap_priv *priv = NET_BE_PRIV(be);
+
+ mevent_disable(priv->mevp);
+}
+
+static struct net_backend netmap_backend = {
+ .prefix = "netmap",
+ .priv_size = sizeof(struct netmap_priv),
+ .init = netmap_init,
+ .cleanup = netmap_cleanup,
+ .send = netmap_send,
+ .peek_recvlen = netmap_peek_recvlen,
+ .recv = netmap_recv,
+ .recv_enable = netmap_recv_enable,
+ .recv_disable = netmap_recv_disable,
+ .get_cap = netmap_get_cap,
+ .set_cap = netmap_set_cap,
+};
+
+/* A clone of the netmap backend, with a different prefix. */
+static struct net_backend vale_backend = {
+ .prefix = "vale",
+ .priv_size = sizeof(struct netmap_priv),
+ .init = netmap_init,
+ .cleanup = netmap_cleanup,
+ .send = netmap_send,
+ .peek_recvlen = netmap_peek_recvlen,
+ .recv = netmap_recv,
+ .recv_enable = netmap_recv_enable,
+ .recv_disable = netmap_recv_disable,
+ .get_cap = netmap_get_cap,
+ .set_cap = netmap_set_cap,
+};
+
+DATA_SET(net_backend_set, netmap_backend);
+DATA_SET(net_backend_set, vale_backend);
diff --git a/usr.sbin/bhyve/net_backends.c b/usr.sbin/bhyve/net_backends.c
index de6afab53854..2d11c45f217a 100644
--- a/usr.sbin/bhyve/net_backends.c
+++ b/usr.sbin/bhyve/net_backends.c
@@ -32,8 +32,7 @@
* features) is exported by net_backends.h.
*/
-#include <sys/cdefs.h>
-#include <sys/types.h> /* u_short etc */
+#include <sys/types.h>
#ifndef WITHOUT_CAPSICUM
#include <sys/capsicum.h>
#endif
@@ -43,153 +42,35 @@
#include <net/if.h>
#include <net/if_tap.h>
-#include <net/netmap.h>
-#include <net/netmap_virt.h>
-#define NETMAP_WITH_LIBS
-#include <net/netmap_user.h>
+#include <assert.h>
#ifndef WITHOUT_CAPSICUM
#include <capsicum_helpers.h>
#endif
#include <err.h>
#include <errno.h>
#include <fcntl.h>
+#include <poll.h>
+#include <pthread.h>
+#include <pthread_np.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
-#include <unistd.h>
#include <sysexits.h>
-#include <assert.h>
-#include <pthread.h>
-#include <pthread_np.h>
-#include <poll.h>
-#include <assert.h>
-
-#ifdef NETGRAPH
-#include <sys/param.h>
-#include <sys/sysctl.h>
-#include <netgraph.h>
-#endif
+#include <unistd.h>
#include "config.h"
#include "debug.h"
#include "iov.h"
#include "mevent.h"
#include "net_backends.h"
+#include "net_backends_priv.h"
#include "pci_emul.h"
-#include <sys/linker_set.h>
-
-/*
- * Each network backend registers a set of function pointers that are
- * used to implement the net backends API.
- * This might need to be exposed if we implement backends in separate files.
- */
-struct net_backend {
- const char *prefix; /* prefix matching this backend */
-
- /*
- * Routines used to initialize and cleanup the resources needed
- * by a backend. The cleanup function is used internally,
- * and should not be called by the frontend.
- */
- int (*init)(struct net_backend *be, const char *devname,
- nvlist_t *nvl, net_be_rxeof_t cb, void *param);
- void (*cleanup)(struct net_backend *be);
-
- /*
- * Called to serve a guest transmit request. The scatter-gather
- * vector provided by the caller has 'iovcnt' elements and contains
- * the packet to send.
- */
- ssize_t (*send)(struct net_backend *be, const struct iovec *iov,
- int iovcnt);
-
- /*
- * Get the length of the next packet that can be received from
- * the backend. If no packets are currently available, this
- * function returns 0.
- */
- ssize_t (*peek_recvlen)(struct net_backend *be);
-
- /*
- * Called to receive a packet from the backend. When the function
- * returns a positive value 'len', the scatter-gather vector
- * provided by the caller contains a packet with such length.
- * The function returns 0 if the backend doesn't have a new packet to
- * receive.
- */
- ssize_t (*recv)(struct net_backend *be, const struct iovec *iov,
- int iovcnt);
-
- /*
- * Ask the backend to enable or disable receive operation in the
- * backend. On return from a disable operation, it is guaranteed
- * that the receive callback won't be called until receive is
- * enabled again. Note however that it is up to the caller to make
- * sure that netbe_recv() is not currently being executed by another
- * thread.
- */
- void (*recv_enable)(struct net_backend *be);
- void (*recv_disable)(struct net_backend *be);
-
- /*
- * Ask the backend for the virtio-net features it is able to
- * support. Possible features are TSO, UFO and checksum offloading
- * in both rx and tx direction and for both IPv4 and IPv6.
- */
- uint64_t (*get_cap)(struct net_backend *be);
-
- /*
- * Tell the backend to enable/disable the specified virtio-net
- * features (capabilities).
- */
- int (*set_cap)(struct net_backend *be, uint64_t features,
- unsigned int vnet_hdr_len);
-
- struct pci_vtnet_softc *sc;
- int fd;
-
- /*
- * Length of the virtio-net header used by the backend and the
- * frontend, respectively. A zero value means that the header
- * is not used.
- */
- unsigned int be_vnet_hdr_len;
- unsigned int fe_vnet_hdr_len;
-
- /* Size of backend-specific private data. */
- size_t priv_size;
-
- /* Backend-specific private data follows. */
-};
-
-#define NET_BE_PRIV(be) ((void *)((be) + 1))
#define NET_BE_SIZE(be) (sizeof(*be) + (be)->priv_size)
-SET_DECLARE(net_backend_set, struct net_backend);
-
-#define VNET_HDR_LEN sizeof(struct virtio_net_rxhdr)
-
-#define WPRINTF(params) PRINTLN params
-
-/*
- * The tap backend
- */
-
-struct tap_priv {
- struct mevent *mevp;
- /*
- * A bounce buffer that allows us to implement the peek_recvlen
- * callback. In the future we may get the same information from
- * the kevent data.
- */
- char bbuf[1 << 16];
- ssize_t bbuflen;
-};
-
-static void
+void
tap_cleanup(struct net_backend *be)
{
struct tap_priv *priv = NET_BE_PRIV(be);
@@ -216,7 +97,7 @@ tap_init(struct net_backend *be, const char *devname,
#endif
if (cb == NULL) {
- WPRINTF(("TAP backend requires non-NULL callback"));
+ EPRINTLN("TAP backend requires non-NULL callback");
return (-1);
}
@@ -225,7 +106,7 @@ tap_init(struct net_backend *be, const char *devname,
be->fd = open(tbuf, O_RDWR);
if (be->fd == -1) {
- WPRINTF(("open of tap device %s failed", tbuf));
+ EPRINTLN("open of tap device %s failed", tbuf);
goto error;
}
@@ -234,12 +115,12 @@ tap_init(struct net_backend *be, const char *devname,
* notifications with the event loop
*/
if (ioctl(be->fd, FIONBIO, &opt) < 0) {
- WPRINTF(("tap device O_NONBLOCK failed"));
+ EPRINTLN("tap device O_NONBLOCK failed");
goto error;
}
if (ioctl(be->fd, VMIO_SIOCSIFFLAGS, up)) {
- WPRINTF(("tap device link up failed"));
+ EPRINTLN("tap device link up failed");
goto error;
}
@@ -254,7 +135,7 @@ tap_init(struct net_backend *be, const char *devname,
priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
if (priv->mevp == NULL) {
- WPRINTF(("Could not register event"));
+ EPRINTLN("Could not register event");
goto error;
}
@@ -268,13 +149,13 @@ error:
/*
* Called to send a buffer chain out to the tap device
*/
-static ssize_t
+ssize_t
tap_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
{
return (writev(be->fd, iov, iovcnt));
}
-static ssize_t
+ssize_t
tap_peek_recvlen(struct net_backend *be)
{
struct tap_priv *priv = NET_BE_PRIV(be);
@@ -304,7 +185,7 @@ tap_peek_recvlen(struct net_backend *be)
return (ret);
}
-static ssize_t
+ssize_t
tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
{
struct tap_priv *priv = NET_BE_PRIV(be);
@@ -332,7 +213,7 @@ tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
return (ret);
}
-static void
+void
tap_recv_enable(struct net_backend *be)
{
struct tap_priv *priv = NET_BE_PRIV(be);
@@ -340,7 +221,7 @@ tap_recv_enable(struct net_backend *be)
mevent_enable(priv->mevp);
}
-static void
+void
tap_recv_disable(struct net_backend *be)
{
struct tap_priv *priv = NET_BE_PRIV(be);
@@ -348,14 +229,14 @@ tap_recv_disable(struct net_backend *be)
mevent_disable(priv->mevp);
}
-static uint64_t
+uint64_t
tap_get_cap(struct net_backend *be __unused)
{
return (0); /* no capabilities for now */
}
-static int
+int
tap_set_cap(struct net_backend *be __unused, uint64_t features,
unsigned vnet_hdr_len)
{
@@ -395,508 +276,6 @@ static struct net_backend vmnet_backend = {
DATA_SET(net_backend_set, tap_backend);
DATA_SET(net_backend_set, vmnet_backend);
-#ifdef NETGRAPH
-
-/*
- * Netgraph backend
- */
-
-#define NG_SBUF_MAX_SIZE (4 * 1024 * 1024)
-
-static int
-ng_init(struct net_backend *be, const char *devname __unused,
- nvlist_t *nvl, net_be_rxeof_t cb, void *param)
-{
- struct tap_priv *p = NET_BE_PRIV(be);
- struct ngm_connect ngc;
- const char *value, *nodename;
- int sbsz;
- int ctrl_sock;
- int flags;
- unsigned long maxsbsz;
- size_t msbsz;
-#ifndef WITHOUT_CAPSICUM
- cap_rights_t rights;
-#endif
-
- if (cb == NULL) {
- WPRINTF(("Netgraph backend requires non-NULL callback"));
- return (-1);
- }
-
- be->fd = -1;
-
- memset(&ngc, 0, sizeof(ngc));
-
- value = get_config_value_node(nvl, "path");
- if (value == NULL) {
- WPRINTF(("path must be provided"));
- return (-1);
- }
- strncpy(ngc.path, value, NG_PATHSIZ - 1);
-
- value = get_config_value_node(nvl, "hook");
- if (value == NULL)
- value = "vmlink";
- strncpy(ngc.ourhook, value, NG_HOOKSIZ - 1);
-
- value = get_config_value_node(nvl, "peerhook");
- if (value == NULL) {
- WPRINTF(("peer hook must be provided"));
- return (-1);
- }
- strncpy(ngc.peerhook, value, NG_HOOKSIZ - 1);
-
- nodename = get_config_value_node(nvl, "socket");
- if (NgMkSockNode(nodename,
- &ctrl_sock, &be->fd) < 0) {
- WPRINTF(("can't get Netgraph sockets"));
- return (-1);
- }
-
- if (NgSendMsg(ctrl_sock, ".",
- NGM_GENERIC_COOKIE,
- NGM_CONNECT, &ngc, sizeof(ngc)) < 0) {
- WPRINTF(("can't connect to node"));
- close(ctrl_sock);
- goto error;
- }
-
- close(ctrl_sock);
-
- flags = fcntl(be->fd, F_GETFL);
-
- if (flags < 0) {
- WPRINTF(("can't get socket flags"));
- goto error;
- }
-
- if (fcntl(be->fd, F_SETFL, flags | O_NONBLOCK) < 0) {
- WPRINTF(("can't set O_NONBLOCK flag"));
- goto error;
- }
-
- /*
- * The default ng_socket(4) buffer's size is too low.
- * Calculate the minimum value between NG_SBUF_MAX_SIZE
- * and kern.ipc.maxsockbuf.
- */
- msbsz = sizeof(maxsbsz);
- if (sysctlbyname("kern.ipc.maxsockbuf", &maxsbsz, &msbsz,
- NULL, 0) < 0) {
- WPRINTF(("can't get 'kern.ipc.maxsockbuf' value"));
- goto error;
- }
-
- /*
- * We can't set the socket buffer size to kern.ipc.maxsockbuf value,
- * as it takes into account the mbuf(9) overhead.
- */
- maxsbsz = maxsbsz * MCLBYTES / (MSIZE + MCLBYTES);
-
- sbsz = MIN(NG_SBUF_MAX_SIZE, maxsbsz);
-
- if (setsockopt(be->fd, SOL_SOCKET, SO_SNDBUF, &sbsz,
- sizeof(sbsz)) < 0) {
- WPRINTF(("can't set TX buffer size"));
- goto error;
- }
-
- if (setsockopt(be->fd, SOL_SOCKET, SO_RCVBUF, &sbsz,
- sizeof(sbsz)) < 0) {
- WPRINTF(("can't set RX buffer size"));
- goto error;
- }
-
-#ifndef WITHOUT_CAPSICUM
- cap_rights_init(&rights, CAP_EVENT, CAP_READ, CAP_WRITE);
- if (caph_rights_limit(be->fd, &rights) == -1)
- errx(EX_OSERR, "Unable to apply rights for sandbox");
-#endif
-
- memset(p->bbuf, 0, sizeof(p->bbuf));
- p->bbuflen = 0;
-
- p->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
- if (p->mevp == NULL) {
- WPRINTF(("Could not register event"));
- goto error;
- }
-
- return (0);
-
-error:
- tap_cleanup(be);
- return (-1);
-}
-
-static struct net_backend ng_backend = {
- .prefix = "netgraph",
- .priv_size = sizeof(struct tap_priv),
- .init = ng_init,
- .cleanup = tap_cleanup,
- .send = tap_send,
- .peek_recvlen = tap_peek_recvlen,
- .recv = tap_recv,
- .recv_enable = tap_recv_enable,
- .recv_disable = tap_recv_disable,
- .get_cap = tap_get_cap,
- .set_cap = tap_set_cap,
-};
-
-DATA_SET(net_backend_set, ng_backend);
-
-#endif /* NETGRAPH */
-
-/*
- * The netmap backend
- */
-
-/* The virtio-net features supported by netmap. */
-#define NETMAP_FEATURES (VIRTIO_NET_F_CSUM | VIRTIO_NET_F_HOST_TSO4 | \
- VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_UFO | \
- VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | \
- VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_UFO)
-
-struct netmap_priv {
- char ifname[IFNAMSIZ];
- struct nm_desc *nmd;
- uint16_t memid;
- struct netmap_ring *rx;
- struct netmap_ring *tx;
- struct mevent *mevp;
- net_be_rxeof_t cb;
- void *cb_param;
-};
-
-static void
-nmreq_init(struct nmreq *req, char *ifname)
-{
-
- memset(req, 0, sizeof(*req));
- strlcpy(req->nr_name, ifname, sizeof(req->nr_name));
- req->nr_version = NETMAP_API;
-}
-
-static int
-netmap_set_vnet_hdr_len(struct net_backend *be, int vnet_hdr_len)
-{
- int err;
- struct nmreq req;
- struct netmap_priv *priv = NET_BE_PRIV(be);
-
- nmreq_init(&req, priv->ifname);
- req.nr_cmd = NETMAP_BDG_VNET_HDR;
- req.nr_arg1 = vnet_hdr_len;
- err = ioctl(be->fd, NIOCREGIF, &req);
- if (err) {
- WPRINTF(("Unable to set vnet header length %d",
- vnet_hdr_len));
- return (err);
- }
-
- be->be_vnet_hdr_len = vnet_hdr_len;
-
- return (0);
-}
-
-static int
-netmap_has_vnet_hdr_len(struct net_backend *be, unsigned vnet_hdr_len)
-{
- unsigned prev_hdr_len = be->be_vnet_hdr_len;
- int ret;
-
- if (vnet_hdr_len == prev_hdr_len) {
- return (1);
- }
-
- ret = netmap_set_vnet_hdr_len(be, vnet_hdr_len);
- if (ret) {
- return (0);
- }
-
- netmap_set_vnet_hdr_len(be, prev_hdr_len);
-
- return (1);
-}
-
-static uint64_t
-netmap_get_cap(struct net_backend *be)
-{
-
- return (netmap_has_vnet_hdr_len(be, VNET_HDR_LEN) ?
- NETMAP_FEATURES : 0);
-}
-
-static int
-netmap_set_cap(struct net_backend *be, uint64_t features __unused,
- unsigned vnet_hdr_len)
-{
-
- return (netmap_set_vnet_hdr_len(be, vnet_hdr_len));
-}
-
-static int
-netmap_init(struct net_backend *be, const char *devname,
- nvlist_t *nvl __unused, net_be_rxeof_t cb, void *param)
-{
- struct netmap_priv *priv = NET_BE_PRIV(be);
-
- strlcpy(priv->ifname, devname, sizeof(priv->ifname));
- priv->ifname[sizeof(priv->ifname) - 1] = '\0';
-
- priv->nmd = nm_open(priv->ifname, NULL, NETMAP_NO_TX_POLL, NULL);
- if (priv->nmd == NULL) {
- WPRINTF(("Unable to nm_open(): interface '%s', errno (%s)",
- devname, strerror(errno)));
- return (-1);
- }
-
- priv->memid = priv->nmd->req.nr_arg2;
- priv->tx = NETMAP_TXRING(priv->nmd->nifp, 0);
- priv->rx = NETMAP_RXRING(priv->nmd->nifp, 0);
- priv->cb = cb;
- priv->cb_param = param;
- be->fd = priv->nmd->fd;
-
- priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param);
- if (priv->mevp == NULL) {
- WPRINTF(("Could not register event"));
- return (-1);
- }
-
- return (0);
-}
-
-static void
-netmap_cleanup(struct net_backend *be)
-{
- struct netmap_priv *priv = NET_BE_PRIV(be);
-
- if (priv->mevp) {
- mevent_delete(priv->mevp);
- }
- if (priv->nmd) {
- nm_close(priv->nmd);
- }
- be->fd = -1;
-}
-
-static ssize_t
-netmap_send(struct net_backend *be, const struct iovec *iov,
- int iovcnt)
-{
- struct netmap_priv *priv = NET_BE_PRIV(be);
- struct netmap_ring *ring;
- ssize_t totlen = 0;
- int nm_buf_size;
- int nm_buf_len;
- uint32_t head;
- uint8_t *nm_buf;
- int j;
-
- ring = priv->tx;
- head = ring->head;
- if (head == ring->tail) {
- WPRINTF(("No space, drop %zu bytes", count_iov(iov, iovcnt)));
- goto txsync;
- }
- nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
- nm_buf_size = ring->nr_buf_size;
- nm_buf_len = 0;
-
- for (j = 0; j < iovcnt; j++) {
- uint8_t *iov_frag_buf = iov[j].iov_base;
- int iov_frag_size = iov[j].iov_len;
-
- totlen += iov_frag_size;
-
- /*
- * Split each iovec fragment over more netmap slots, if
- * necessary.
- */
- for (;;) {
- int copylen;
-
- copylen = iov_frag_size < nm_buf_size ? iov_frag_size : nm_buf_size;
- memcpy(nm_buf, iov_frag_buf, copylen);
-
- iov_frag_buf += copylen;
- iov_frag_size -= copylen;
- nm_buf += copylen;
- nm_buf_size -= copylen;
- nm_buf_len += copylen;
-
- if (iov_frag_size == 0) {
- break;
- }
-
- ring->slot[head].len = nm_buf_len;
- ring->slot[head].flags = NS_MOREFRAG;
- head = nm_ring_next(ring, head);
- if (head == ring->tail) {
- /*
- * We ran out of netmap slots while
- * splitting the iovec fragments.
- */
- WPRINTF(("No space, drop %zu bytes",
- count_iov(iov, iovcnt)));
- goto txsync;
- }
- nm_buf = NETMAP_BUF(ring, ring->slot[head].buf_idx);
- nm_buf_size = ring->nr_buf_size;
- nm_buf_len = 0;
- }
- }
-
- /* Complete the last slot, which must not have NS_MOREFRAG set. */
- ring->slot[head].len = nm_buf_len;
- ring->slot[head].flags = 0;
- head = nm_ring_next(ring, head);
-
- /* Now update ring->head and ring->cur. */
- ring->head = ring->cur = head;
-txsync:
- ioctl(be->fd, NIOCTXSYNC, NULL);
-
- return (totlen);
-}
-
-static ssize_t
-netmap_peek_recvlen(struct net_backend *be)
-{
- struct netmap_priv *priv = NET_BE_PRIV(be);
- struct netmap_ring *ring = priv->rx;
- uint32_t head = ring->head;
- ssize_t totlen = 0;
-
- while (head != ring->tail) {
- struct netmap_slot *slot = ring->slot + head;
-
- totlen += slot->len;
- if ((slot->flags & NS_MOREFRAG) == 0)
- break;
- head = nm_ring_next(ring, head);
- }
-
- return (totlen);
-}
-
-static ssize_t
-netmap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
-{
- struct netmap_priv *priv = NET_BE_PRIV(be);
- struct netmap_slot *slot = NULL;
- struct netmap_ring *ring;
- uint8_t *iov_frag_buf;
- int iov_frag_size;
- ssize_t totlen = 0;
- uint32_t head;
-
- assert(iovcnt);
-
- ring = priv->rx;
- head = ring->head;
- iov_frag_buf = iov->iov_base;
- iov_frag_size = iov->iov_len;
-
- do {
- uint8_t *nm_buf;
- int nm_buf_len;
-
- if (head == ring->tail) {
- return (0);
- }
-
- slot = ring->slot + head;
- nm_buf = NETMAP_BUF(ring, slot->buf_idx);
- nm_buf_len = slot->len;
-
- for (;;) {
- int copylen = nm_buf_len < iov_frag_size ?
- nm_buf_len : iov_frag_size;
-
- memcpy(iov_frag_buf, nm_buf, copylen);
- nm_buf += copylen;
- nm_buf_len -= copylen;
- iov_frag_buf += copylen;
- iov_frag_size -= copylen;
- totlen += copylen;
-
- if (nm_buf_len == 0) {
- break;
- }
-
- iov++;
- iovcnt--;
- if (iovcnt == 0) {
- /* No space to receive. */
- WPRINTF(("Short iov, drop %zd bytes",
- totlen));
- return (-ENOSPC);
- }
- iov_frag_buf = iov->iov_base;
- iov_frag_size = iov->iov_len;
- }
-
- head = nm_ring_next(ring, head);
-
- } while (slot->flags & NS_MOREFRAG);
-
- /* Release slots to netmap. */
- ring->head = ring->cur = head;
-
- return (totlen);
-}
-
-static void
-netmap_recv_enable(struct net_backend *be)
-{
- struct netmap_priv *priv = NET_BE_PRIV(be);
-
- mevent_enable(priv->mevp);
-}
-
-static void
-netmap_recv_disable(struct net_backend *be)
-{
- struct netmap_priv *priv = NET_BE_PRIV(be);
-
- mevent_disable(priv->mevp);
-}
-
-static struct net_backend netmap_backend = {
- .prefix = "netmap",
- .priv_size = sizeof(struct netmap_priv),
- .init = netmap_init,
- .cleanup = netmap_cleanup,
- .send = netmap_send,
- .peek_recvlen = netmap_peek_recvlen,
- .recv = netmap_recv,
- .recv_enable = netmap_recv_enable,
- .recv_disable = netmap_recv_disable,
- .get_cap = netmap_get_cap,
- .set_cap = netmap_set_cap,
-};
-
-/* A clone of the netmap backend, with a different prefix. */
-static struct net_backend vale_backend = {
- .prefix = "vale",
- .priv_size = sizeof(struct netmap_priv),
- .init = netmap_init,
- .cleanup = netmap_cleanup,
- .send = netmap_send,
- .peek_recvlen = netmap_peek_recvlen,
- .recv = netmap_recv,
- .recv_enable = netmap_recv_enable,
- .recv_disable = netmap_recv_disable,
- .get_cap = netmap_get_cap,
- .set_cap = netmap_set_cap,
-};
-
-DATA_SET(net_backend_set, netmap_backend);
-DATA_SET(net_backend_set, vale_backend);
-
int
netbe_legacy_config(nvlist_t *nvl, const char *opts)
{
diff --git a/usr.sbin/bhyve/net_backends.h b/usr.sbin/bhyve/net_backends.h
index 6d58e04b3470..e623b7c64637 100644
--- a/usr.sbin/bhyve/net_backends.h
+++ b/usr.sbin/bhyve/net_backends.h
@@ -28,7 +28,11 @@
#ifndef __NET_BACKENDS_H__
#define __NET_BACKENDS_H__
-#include <stdint.h>
+#include <sys/nv.h>
+#include <sys/time.h>
+#include <sys/uio.h>
+
+#include "mevent.h"
/* Opaque type representing a network backend. */
typedef struct net_backend net_backend_t;
@@ -90,5 +94,6 @@ struct virtio_net_rxhdr {
uint16_t vrh_csum_offset;
uint16_t vrh_bufs;
} __packed;
+#define VNET_HDR_LEN sizeof(struct virtio_net_rxhdr)
#endif /* __NET_BACKENDS_H__ */
diff --git a/usr.sbin/bhyve/net_backends_priv.h b/usr.sbin/bhyve/net_backends_priv.h
new file mode 100644
index 000000000000..4b3f4e4358a3
--- /dev/null
+++ b/usr.sbin/bhyve/net_backends_priv.h
@@ -0,0 +1,152 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2019 Vincenzo Maffione <vmaffione@FreeBSD.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __NET_BACKENDS_PRIV_H__
+#define __NET_BACKENDS_PRIV_H__
+
+#include <sys/linker_set.h>
+
+/*
+ * Each network backend registers a set of function pointers that are
+ * used to implement the net backends API. Frontends should not invoke
+ * these functions directly, but should instead use the interface provided by
+ * net_backends.h.
+ */
+struct net_backend {
+ const char *prefix; /* prefix matching this backend */
+
+ /*
+ * Routines used to initialize and cleanup the resources needed
+ * by a backend. The cleanup function is used internally,
+ * and should not be called by the frontend.
+ */
+ int (*init)(struct net_backend *be, const char *devname,
+ nvlist_t *nvl, net_be_rxeof_t cb, void *param);
+ void (*cleanup)(struct net_backend *be);
+
+ /*
+ * Called to serve a guest transmit request. The scatter-gather
+ * vector provided by the caller has 'iovcnt' elements and contains
+ * the packet to send.
+ */
+ ssize_t (*send)(struct net_backend *be, const struct iovec *iov,
+ int iovcnt);
+
+ /*
+ * Get the length of the next packet that can be received from
+ * the backend. If no packets are currently available, this
+ * function returns 0.
+ */
+ ssize_t (*peek_recvlen)(struct net_backend *be);
+
+ /*
+ * Called to receive a packet from the backend. When the function
+ * returns a positive value 'len', the scatter-gather vector
+ * provided by the caller contains a packet with such length.
+ * The function returns 0 if the backend doesn't have a new packet to
+ * receive.
+ */
+ ssize_t (*recv)(struct net_backend *be, const struct iovec *iov,
+ int iovcnt);
+
+ /*
+ * Ask the backend to enable or disable receive operation in the
+ * backend. On return from a disable operation, it is guaranteed
+ * that the receive callback won't be called until receive is
+ * enabled again. Note however that it is up to the caller to make
+ * sure that netbe_recv() is not currently being executed by another
+ * thread.
+ */
+ void (*recv_enable)(struct net_backend *be);
+ void (*recv_disable)(struct net_backend *be);
+
+ /*
+ * Ask the backend for the virtio-net features it is able to
+ * support. Possible features are TSO, UFO and checksum offloading
+ * in both rx and tx direction and for both IPv4 and IPv6.
+ */
+ uint64_t (*get_cap)(struct net_backend *be);
+
+ /*
+ * Tell the backend to enable/disable the specified virtio-net
+ * features (capabilities).
+ */
+ int (*set_cap)(struct net_backend *be, uint64_t features,
+ unsigned int vnet_hdr_len);
+
+ struct pci_vtnet_softc *sc;
+ int fd;
+
+ /*
+ * Length of the virtio-net header used by the backend and the
+ * frontend, respectively. A zero value means that the header
+ * is not used.
+ */
+ unsigned int be_vnet_hdr_len;
+ unsigned int fe_vnet_hdr_len;
+
+ /* Size of backend-specific private data. */
+ size_t priv_size;
+
+ /* Backend-specific private data follows. */
+};
+
+#define NET_BE_PRIV(be) ((void *)((be) + 1))
+
+SET_DECLARE(net_backend_set, struct net_backend);
+
+#define VNET_HDR_LEN sizeof(struct virtio_net_rxhdr)
+
+/*
+ * Export the tap backend routines for the benefit of other backends which have
+ * a similar interface to the kernel, i.e., they send and receive data using
+ * standard I/O system calls with a single file descriptor.
+ */
+
+struct tap_priv {
+ struct mevent *mevp;
+ /*
+ * A bounce buffer that allows us to implement the peek_recvlen
+ * callback. In the future we may get the same information from
+ * the kevent data.
+ */
+ char bbuf[1 << 16];
+ ssize_t bbuflen;
+};
+
+void tap_cleanup(struct net_backend *be);
+ssize_t tap_send(struct net_backend *be, const struct iovec *iov, int iovcnt);
+ssize_t tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt);
+ssize_t tap_peek_recvlen(struct net_backend *be);
+void tap_recv_enable(struct net_backend *be);
+ssize_t tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt);
+void tap_recv_disable(struct net_backend *be);
+uint64_t tap_get_cap(struct net_backend *be);
+int tap_set_cap(struct net_backend *be, uint64_t features,
+ unsigned vnet_hdr_len);
+
+#endif /* !__NET_BACKENDS_PRIV_H__ */