diff options
Diffstat (limited to 'usr.sbin/bhyve')
| -rw-r--r-- | usr.sbin/bhyve/Makefile | 2 | ||||
| -rw-r--r-- | usr.sbin/bhyve/bhyve.8 | 37 | ||||
| -rw-r--r-- | usr.sbin/bhyve/net_backend_slirp.c | 622 | ||||
| -rw-r--r-- | usr.sbin/bhyve/net_backends.c | 19 | ||||
| -rw-r--r-- | usr.sbin/bhyve/slirp/Makefile | 11 | ||||
| -rw-r--r-- | usr.sbin/bhyve/slirp/libslirp.h (renamed from usr.sbin/bhyve/libslirp.h) | 0 | ||||
| -rw-r--r-- | usr.sbin/bhyve/slirp/slirp-helper.c | 570 |
7 files changed, 718 insertions, 543 deletions
diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile index e5e8bf9b5567..c454a280edba 100644 --- a/usr.sbin/bhyve/Makefile +++ b/usr.sbin/bhyve/Makefile @@ -77,6 +77,8 @@ SRCS+= snapshot.c .include "${MACHINE_CPUARCH}/Makefile.inc" +SUBDIR+= slirp + .if defined(BHYVE_FDT_SUPPORT) LIBADD+= fdt CFLAGS+= -I${SRCTOP}/sys/contrib/libfdt diff --git a/usr.sbin/bhyve/bhyve.8 b/usr.sbin/bhyve/bhyve.8 index c902c265da9e..45450b5b5db5 100644 --- a/usr.sbin/bhyve/bhyve.8 +++ b/usr.sbin/bhyve/bhyve.8 @@ -1,3 +1,6 @@ +.\" +.\" SPDX-License-Identifier: BSD-2-Clause +.\" .\" Copyright (c) 2013 Peter Grehan .\" All rights reserved. .\" @@ -22,7 +25,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd August 21, 2024 +.Dd October 28, 2025 .Dt BHYVE 8 .Os .Sh NAME @@ -125,7 +128,7 @@ xAPIC mode is the default setting so this option is redundant. It will be deprecated in a future version. .It Fl C Include guest memory in core files. -.It Fl c Op Ar setting ... +.It Fl c Oo Oo Cm cpus= Oc Ns Ar numcpus Oc Ns Oo Cm ,sockets= Ns Ar n Oc Ns Oo Cm ,cores= Ns Ar n Oc Ns Oo Cm ,threads= Ns Ar n Oc Number of guest virtual CPUs and/or the CPU topology. The default value for each of @@ -518,6 +521,8 @@ considered unconnected. .Op Cm \&,mtu= Ar N .Xc .It +.Cm ngd Ar N +.It .Xo .Cm netgraph,path= Ar ADDRESS Cm \&,peerhook= Ar HOOK .Op Cm \&,socket= Ar NAME @@ -527,7 +532,9 @@ considered unconnected. .Xc .It .Xo -.Cm slirp,hostfwd= Ar proto : Ar hostaddr : Ar hostport - Ar guestaddr : Ar guestport +.Cm slirp +.Op Cm \&,open +.Op Cm \&,hostfwd= Ar proto : Ar hostaddr : Ar hostport - Ar guestaddr : Ar guestport .Xc .El .Sm on @@ -542,6 +549,19 @@ The MAC address is an ASCII string in .Xr ethers 5 format. .Pp +A +.Cm ngd +device can be used to connect a guest to a +.Xr netgraph 4 +through a +.Xr ng_device 4 +node. +This can be used to run bhyve in a +.Xr VNET 9 +jail, and give it access to the host's netgraph, that cannot be reached +directly, by exposing the ng_device through +.Xr devfs 8 . +.Pp With .Cm virtio-net devices, the @@ -572,12 +592,20 @@ must comply with .Xr netgraph 4 addressing rules. .Pp -The slirp backend can be used to provide a NATed network to the guest. +The +.Cm slirp +backend can be used to provide a NATed network to the guest. This backend has poor performance but does not require any network configuration on the host system. It depends on the .Pa net/libslirp port. +If the +.Cm open +keyword is set, the guest will be able to make outbound network +connections, and +.Nm +will transparently handle the necessary address translation. The .Cm hostfwd option takes a 5-tuple describing how connections from the host are to be @@ -585,6 +613,7 @@ forwarded to the guest. Multiple rules can be specified, separated by semicolons. Note that semicolons must be escaped or quoted to prevent the shell from interpreting them. +The backend will provide DHCP and DNS service to the guest. .Ss Block storage device backends: .Bl -bullet .Sm off diff --git a/usr.sbin/bhyve/net_backend_slirp.c b/usr.sbin/bhyve/net_backend_slirp.c index 171c5b5bdbbd..c98e54b2ee88 100644 --- a/usr.sbin/bhyve/net_backend_slirp.c +++ b/usr.sbin/bhyve/net_backend_slirp.c @@ -1,7 +1,7 @@ /*- * SPDX-License-Identifier: BSD-2-Clause * - * Copyright (c) 2023 Mark Johnston <markj@FreeBSD.org> + * Copyright (c) 2023, 2025 Mark Johnston <markj@FreeBSD.org> * * This software was developed by the University of Cambridge Computer * Laboratory (Department of Computer Science and Technology) under Innovate @@ -31,13 +31,15 @@ */ /* - * The slirp backend enables unprivileged networking via libslirp, which must be - * installed on the host system via pkg or the ports tree. bhyve dlopen()s - * libslirp.so upon instantiating the slirp backend. Various network parameters - * are hard-coded in _slirp_init(). + * The slirp backend enables unprivileged userspace networking via libslirp, + * which must be installed on the host system via pkg or the ports tree. + * libslirp.so is dlopen()ed into a helper process with which this backend + * communicates. * * Packets received from the guest (i.e., transmitted by the frontend, such as a - * virtio NIC device model) are injected into the slirp backend via slirp_send(). + * virtio NIC device model) are injected into the slirp backend via slirp_send(), + * which sends the packet to the helper process. + * * Packets to be transmitted to the guest (i.e., inserted into the frontend's * receive buffers) are buffered in a per-interface socket pair and read by the * mevent loop. Sockets instantiated by libslirp are monitored by a thread @@ -46,14 +48,12 @@ */ #include <sys/socket.h> +#include <sys/wait.h> #include <assert.h> -#include <capsicum_helpers.h> -#include <dlfcn.h> #include <errno.h> -#include <poll.h> -#include <pthread.h> -#include <pthread_np.h> +#include <signal.h> +#include <spawn.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -61,537 +61,92 @@ #include "config.h" #include "debug.h" -#include "libslirp.h" #include "mevent.h" #include "net_backends.h" #include "net_backends_priv.h" -typedef int (*slirp_add_hostxfwd_p_t)(Slirp *, - const struct sockaddr *, socklen_t, const struct sockaddr *, socklen_t, - int); -typedef void (*slirp_cleanup_p_t)(Slirp *); -typedef void (*slirp_input_p_t)(Slirp *, const uint8_t *, int); -typedef Slirp *(*slirp_new_p_t)(const SlirpConfig *, const SlirpCb *, void *); -typedef void (*slirp_pollfds_fill_p_t)(Slirp *, uint32_t *timeout, - SlirpAddPollCb, void *); -typedef void (*slirp_pollfds_poll_p_t)(Slirp *, int, SlirpGetREventsCb, void *); - -/* Function pointer table, initialized by slirp_init_once(). */ -static slirp_add_hostxfwd_p_t slirp_add_hostxfwd_p; -static slirp_cleanup_p_t slirp_cleanup_p; -static slirp_input_p_t slirp_input_p; -static slirp_new_p_t slirp_new_p; -static slirp_pollfds_fill_p_t slirp_pollfds_fill_p; -static slirp_pollfds_poll_p_t slirp_pollfds_poll_p; - -static void -checked_close(int *fdp) -{ - int error; - - if (*fdp != -1) { - error = close(*fdp); - assert(error == 0); - *fdp = -1; - } -} - -static int -slirp_init_once(void) -{ - static void *handle = NULL; - - if (handle != NULL) - return (0); - handle = dlopen("libslirp.so.0", RTLD_LAZY); - if (handle == NULL) { - EPRINTLN("Unable to open libslirp.so.0: %s", dlerror()); - return (-1); - } - -#define IMPORT_SYM(sym) do { \ - sym##_p = (sym##_p_t)dlsym(handle, #sym); \ - if (sym##_p == NULL) { \ - EPRINTLN("failed to resolve %s", #sym); \ - goto err; \ - } \ -} while (0) - IMPORT_SYM(slirp_add_hostxfwd); - IMPORT_SYM(slirp_cleanup); - IMPORT_SYM(slirp_input); - IMPORT_SYM(slirp_new); - IMPORT_SYM(slirp_pollfds_fill); - IMPORT_SYM(slirp_pollfds_poll); -#undef IMPORT_SYM - - /* - * libslirp uses glib, which uses tzdata to format log messages. Help - * it out. - * - * XXX-MJ glib will also look for charset files, not sure what we can do - * about that... - */ - caph_cache_tzdata(); - - return (0); - -err: - dlclose(handle); - handle = NULL; - return (-1); -} +#define SLIRP_MTU 2048 struct slirp_priv { - Slirp *slirp; - -#define SLIRP_MTU 2048 + int s; + pid_t helper; struct mevent *mevp; - int pipe[2]; /* used to buffer data sent to the guest */ - int wakeup[2]; /* used to wake up the pollfd thread */ - - pthread_t pollfd_td; - struct pollfd *pollfds; - size_t npollfds; - - /* Serializes libslirp calls. */ - pthread_mutex_t mtx; -}; - -static void -slirp_priv_init(struct slirp_priv *priv) -{ - int error; - - memset(priv, 0, sizeof(*priv)); - priv->pipe[0] = priv->pipe[1] = -1; - priv->wakeup[0] = priv->wakeup[1] = -1; - error = pthread_mutex_init(&priv->mtx, NULL); - assert(error == 0); -} - -static void -slirp_priv_cleanup(struct slirp_priv *priv) -{ - int error; - - checked_close(&priv->pipe[0]); - checked_close(&priv->pipe[1]); - checked_close(&priv->wakeup[0]); - checked_close(&priv->wakeup[1]); - if (priv->mevp) - mevent_delete(priv->mevp); - if (priv->slirp != NULL) - slirp_cleanup_p(priv->slirp); - error = pthread_mutex_destroy(&priv->mtx); - assert(error == 0); -} - -static int64_t -slirp_cb_clock_get_ns(void *param __unused) -{ - struct timespec ts; - int error; - - error = clock_gettime(CLOCK_MONOTONIC, &ts); - assert(error == 0); - return ((int64_t)(ts.tv_sec * 1000000000L + ts.tv_nsec)); -} - -static void -slirp_cb_notify(void *param) -{ - struct slirp_priv *priv; - - /* Wake up the poll thread. We assume that priv->mtx is held here. */ - priv = param; - (void)write(priv->wakeup[1], "M", 1); -} - -static void -slirp_cb_register_poll_fd(int fd, void *param __unused) -{ - const int one = 1; - - (void)setsockopt(fd, SOL_SOCKET, SO_NOSIGPIPE, &one, sizeof(int)); -} - -static ssize_t -slirp_cb_send_packet(const void *buf, size_t len, void *param) -{ - struct slirp_priv *priv; - ssize_t n; - - priv = param; - - assert(len <= SLIRP_MTU); - n = send(priv->pipe[1], buf, len, 0); - if (n < 0) { - EPRINTLN("slirp_cb_send_packet: send: %s", strerror(errno)); - return (n); - } - assert((size_t)n == len); - - return (n); -} - -static void -slirp_cb_unregister_poll_fd(int fd __unused, void *opaque __unused) -{ -} - -/* Callbacks invoked from within libslirp. */ -static const struct SlirpCb slirp_cbs = { - .clock_get_ns = slirp_cb_clock_get_ns, - .notify = slirp_cb_notify, - .register_poll_fd = slirp_cb_register_poll_fd, - .send_packet = slirp_cb_send_packet, - .unregister_poll_fd = slirp_cb_unregister_poll_fd, }; static int -slirpev2pollev(int events) -{ - int ret; - - ret = 0; - if (events & SLIRP_POLL_IN) - ret |= POLLIN; - if (events & SLIRP_POLL_OUT) - ret |= POLLOUT; - if (events & SLIRP_POLL_PRI) - ret |= POLLPRI; - if (events & SLIRP_POLL_ERR) - ret |= POLLERR; - if (events & SLIRP_POLL_HUP) - ret |= POLLHUP; - return (ret); -} - -static int -pollev2slirpev(int events) -{ - int ret; - - ret = 0; - if (events & POLLIN) - ret |= SLIRP_POLL_IN; - if (events & POLLOUT) - ret |= SLIRP_POLL_OUT; - if (events & POLLPRI) - ret |= SLIRP_POLL_PRI; - if (events & POLLERR) - ret |= SLIRP_POLL_ERR; - if (events & POLLHUP) - ret |= SLIRP_POLL_HUP; - return (ret); -} - -static int -slirp_addpoll_cb(int fd, int events, void *param) -{ - struct slirp_priv *priv; - struct pollfd *pollfd, *pollfds; - size_t i; - - priv = param; - - for (i = 0; i < priv->npollfds; i++) - if (priv->pollfds[i].fd == -1) - break; - if (i == priv->npollfds) { - const size_t POLLFD_GROW = 4; - - priv->npollfds += POLLFD_GROW; - pollfds = realloc(priv->pollfds, - sizeof(*pollfds) * priv->npollfds); - if (pollfds == NULL) - return (-1); - for (i = priv->npollfds - POLLFD_GROW; i < priv->npollfds; i++) - pollfds[i].fd = -1; - priv->pollfds = pollfds; - - i = priv->npollfds - POLLFD_GROW; - } - pollfd = &priv->pollfds[i]; - pollfd->fd = fd; - pollfd->events = slirpev2pollev(events); - pollfd->revents = 0; - - return ((int)i); -} - -static int -slirp_poll_revents(int idx, void *param) -{ - struct slirp_priv *priv; - struct pollfd *pollfd; - short revents; - - priv = param; - assert(idx >= 0); - assert((unsigned int)idx < priv->npollfds); - pollfd = &priv->pollfds[idx]; - assert(pollfd->fd != -1); - - /* The kernel may report POLLHUP even if we didn't ask for it. */ - revents = pollfd->revents; - if ((pollfd->events & POLLHUP) == 0) - revents &= ~POLLHUP; - return (pollev2slirpev(revents)); -} - -static void * -slirp_pollfd_td_loop(void *param) -{ - struct slirp_priv *priv; - struct pollfd *pollfds; - size_t npollfds; - uint32_t timeout; - int error; - - pthread_set_name_np(pthread_self(), "slirp pollfd"); - priv = param; - - pthread_mutex_lock(&priv->mtx); - for (;;) { - int wakeup; - - for (size_t i = 0; i < priv->npollfds; i++) - priv->pollfds[i].fd = -1; - - /* Register for notifications from slirp_cb_notify(). */ - wakeup = slirp_addpoll_cb(priv->wakeup[0], POLLIN, priv); - - timeout = UINT32_MAX; - slirp_pollfds_fill_p(priv->slirp, &timeout, slirp_addpoll_cb, - priv); - - pollfds = priv->pollfds; - npollfds = priv->npollfds; - pthread_mutex_unlock(&priv->mtx); - error = poll(pollfds, npollfds, timeout); - if (error == -1 && errno != EINTR) { - EPRINTLN("poll: %s", strerror(errno)); - exit(1); - } - pthread_mutex_lock(&priv->mtx); - slirp_pollfds_poll_p(priv->slirp, error == -1, - slirp_poll_revents, priv); - - /* - * If we were woken up by the notify callback, mask the - * interrupt. - */ - if ((pollfds[wakeup].revents & POLLIN) != 0) { - ssize_t n; - - do { - uint8_t b; - - n = read(priv->wakeup[0], &b, 1); - } while (n == 1); - if (n != -1 || errno != EAGAIN) { - EPRINTLN("read(wakeup): %s", strerror(errno)); - exit(1); - } - } - } -} - -static int -parse_addr(char *addr, struct sockaddr_in *sinp) -{ - char *port; - int error, porti; - - memset(sinp, 0, sizeof(*sinp)); - sinp->sin_family = AF_INET; - sinp->sin_len = sizeof(struct sockaddr_in); - - port = strchr(addr, ':'); - if (port == NULL) - return (EINVAL); - *port++ = '\0'; - - if (strlen(addr) > 0) { - error = inet_pton(AF_INET, addr, &sinp->sin_addr); - if (error != 1) - return (error == 0 ? EPFNOSUPPORT : errno); - } else { - sinp->sin_addr.s_addr = htonl(INADDR_ANY); - } - - porti = strlen(port) > 0 ? atoi(port) : 0; - if (porti < 0 || porti > UINT16_MAX) - return (EINVAL); - sinp->sin_port = htons(porti); - - return (0); -} - -static int -parse_hostfwd_rule(const char *descr, int *is_udp, struct sockaddr *hostaddr, - struct sockaddr *guestaddr) -{ - struct sockaddr_in *hostaddrp, *guestaddrp; - const char *proto; - char *p, *host, *guest; - int error; - - error = 0; - *is_udp = 0; - - p = strdup(descr); - if (p == NULL) - return (ENOMEM); - - host = strchr(p, ':'); - if (host == NULL) { - error = EINVAL; - goto out; - } - *host++ = '\0'; - - proto = p; - *is_udp = strcmp(proto, "udp") == 0; - - guest = strchr(host, '-'); - if (guest == NULL) { - error = EINVAL; - goto out; - } - *guest++ = '\0'; - - hostaddrp = (struct sockaddr_in *)hostaddr; - error = parse_addr(host, hostaddrp); - if (error != 0) - goto out; - - guestaddrp = (struct sockaddr_in *)guestaddr; - error = parse_addr(guest, guestaddrp); - if (error != 0) - goto out; - -out: - free(p); - return (error); -} - -static int -config_one_hostfwd(struct slirp_priv *priv, const char *rule) -{ - struct sockaddr hostaddr, guestaddr; - int error, is_udp; - - error = parse_hostfwd_rule(rule, &is_udp, &hostaddr, &guestaddr); - if (error != 0) { - EPRINTLN("Unable to parse hostfwd rule '%s': %s", - rule, strerror(error)); - return (error); - } - - error = slirp_add_hostxfwd_p(priv->slirp, &hostaddr, hostaddr.sa_len, - &guestaddr, guestaddr.sa_len, is_udp ? SLIRP_HOSTFWD_UDP : 0); - if (error != 0) { - EPRINTLN("Unable to add hostfwd rule '%s': %s", - rule, strerror(errno)); - return (error); - } - - return (0); -} - -static int -_slirp_init(struct net_backend *be, const char *devname __unused, +slirp_init(struct net_backend *be, const char *devname __unused, nvlist_t *nvl, net_be_rxeof_t cb, void *param) { + extern char **environ; struct slirp_priv *priv = NET_BE_PRIV(be); - SlirpConfig config = { - .version = 4, - .if_mtu = SLIRP_MTU, - .restricted = true, - .in_enabled = true, - .vnetwork.s_addr = htonl(0x0a000200), /* 10.0.2.0/24 */ - .vnetmask.s_addr = htonl(0xffffff00), - .vdhcp_start.s_addr = htonl(0x0a00020f),/* 10.0.2.15 */ - .vhost.s_addr = htonl(0x0a000202), /* 10.0.2.2 */ - .enable_emu = false, - }; - const char *hostfwd; - int error, sndbuf; - - error = slirp_init_once(); - if (error != 0) - return (error); - - slirp_priv_init(priv); + nvlist_t *config; + posix_spawn_file_actions_t fa; + pid_t child; + const char **argv; + char sockname[32]; + int error, s[2]; + + if (socketpair(PF_LOCAL, SOCK_SEQPACKET | SOCK_NONBLOCK, 0, s) != 0) { + EPRINTLN("socketpair"); + return (-1); + } - priv->slirp = slirp_new_p(&config, &slirp_cbs, priv); - if (priv->slirp == NULL) { - EPRINTLN("Unable to create slirp instance"); + /* + * The child will exit once its connection goes away, so make sure only + * one end is inherited by the child. + */ + if (posix_spawn_file_actions_init(&fa) != 0) { + EPRINTLN("posix_spawn_file_actions_init"); goto err; } - - hostfwd = get_config_value_node(nvl, "hostfwd"); - if (hostfwd != NULL) { - char *rules, *tofree; - const char *rule; - - tofree = rules = strdup(hostfwd); - if (rules == NULL) - goto err; - while ((rule = strsep(&rules, ";")) != NULL) { - error = config_one_hostfwd(priv, rule); - if (error != 0) { - free(tofree); - goto err; - } - } - free(tofree); + if (posix_spawn_file_actions_addclose(&fa, s[0]) != 0) { + EPRINTLN("posix_spawn_file_actions_addclose"); + posix_spawn_file_actions_destroy(&fa); + goto err; } - error = socketpair(PF_LOCAL, SOCK_DGRAM | SOCK_CLOEXEC, 0, priv->pipe); + (void)snprintf(sockname, sizeof(sockname), "%d", s[1]); + argv = (const char *[]){ + "/usr/libexec/bhyve-slirp-helper", "-S", sockname, NULL + }; + error = posix_spawn(&child, "/usr/libexec/bhyve-slirp-helper", + &fa, NULL, __DECONST(char **, argv), environ); + posix_spawn_file_actions_destroy(&fa); if (error != 0) { - EPRINTLN("Unable to create pipe: %s", strerror(errno)); + EPRINTLN("posix_spawn(bhyve-slirp-helper): %s", + strerror(error)); goto err; } - error = pipe2(priv->wakeup, O_CLOEXEC | O_NONBLOCK); - if (error != 0) { - EPRINTLN("Unable to create wakeup pipe: %s", strerror(errno)); + config = nvlist_clone(nvl); + if (config == NULL) { + EPRINTLN("nvlist_clone"); goto err; } - - /* - * Try to avoid dropping buffered packets in slirp_cb_send_packet(). - */ - sndbuf = 1024 * 1024; - error = setsockopt(priv->pipe[1], SOL_SOCKET, SO_SNDBUF, &sndbuf, - sizeof(sndbuf)); + nvlist_add_string(config, "vmname", get_config_value("name")); + error = nvlist_send(s[0], config); + nvlist_destroy(config); if (error != 0) { - EPRINTLN("Could not set socket buffer size: %s", - strerror(errno)); + EPRINTLN("nvlist_send"); goto err; } - be->fd = priv->pipe[0]; + be->fd = s[0]; priv->mevp = mevent_add_disabled(be->fd, EVF_READ, cb, param); if (priv->mevp == NULL) { EPRINTLN("Could not register event"); goto err; } - error = pthread_create(&priv->pollfd_td, NULL, slirp_pollfd_td_loop, - priv); - if (error != 0) { - EPRINTLN("Unable to create pollfd thread: %s", strerror(error)); - goto err; - } + priv->helper = child; + priv->s = s[0]; + (void)close(s[1]); return (0); err: - slirp_priv_cleanup(priv); + (void)close(s[0]); + (void)close(s[1]); return (-1); } @@ -599,52 +154,43 @@ static ssize_t slirp_send(struct net_backend *be, const struct iovec *iov, int iovcnt) { struct slirp_priv *priv = NET_BE_PRIV(be); + struct msghdr hdr; - if (iovcnt == 1) { - /* We can avoid copying if there's a single segment. */ - pthread_mutex_lock(&priv->mtx); - slirp_input_p(priv->slirp, iov->iov_base, - (int)iov->iov_len); - pthread_mutex_unlock(&priv->mtx); - return (iov[0].iov_len); - } else { - uint8_t *pkt; - size_t pktlen; - - pktlen = 0; - for (int i = 0; i < iovcnt; i++) - pktlen += iov[i].iov_len; - pkt = malloc(pktlen); - if (pkt == NULL) - return (-1); - pktlen = 0; - for (int i = 0; i < iovcnt; i++) { - memcpy(pkt + pktlen, iov[i].iov_base, iov[i].iov_len); - pktlen += iov[i].iov_len; - } - pthread_mutex_lock(&priv->mtx); - slirp_input_p(priv->slirp, pkt, (int)pktlen); - pthread_mutex_unlock(&priv->mtx); - free(pkt); - return (pktlen); - } + memset(&hdr, 0, sizeof(hdr)); + hdr.msg_iov = __DECONST(struct iovec *, iov); + hdr.msg_iovlen = iovcnt; + return (sendmsg(priv->s, &hdr, MSG_EOR)); } static void -_slirp_cleanup(struct net_backend *be) +slirp_cleanup(struct net_backend *be) { struct slirp_priv *priv = NET_BE_PRIV(be); - slirp_priv_cleanup(priv); + if (priv->helper > 0) { + int status; + + if (kill(priv->helper, SIGKILL) != 0) { + EPRINTLN("kill(bhyve-slirp-helper): %s", + strerror(errno)); + return; + } + (void)waitpid(priv->helper, &status, 0); + } } static ssize_t slirp_peek_recvlen(struct net_backend *be) { struct slirp_priv *priv = NET_BE_PRIV(be); + uint8_t buf[SLIRP_MTU]; ssize_t n; - n = recv(priv->pipe[0], NULL, 0, MSG_PEEK | MSG_DONTWAIT | MSG_TRUNC); + /* + * Copying into the buffer is totally unnecessary, but we don't + * implement MSG_TRUNC for SEQPACKET sockets. + */ + n = recv(priv->s, buf, sizeof(buf), MSG_PEEK | MSG_DONTWAIT); if (n < 0) return (errno == EWOULDBLOCK ? 0 : -1); assert((size_t)n <= SLIRP_MTU); @@ -665,7 +211,7 @@ slirp_recv(struct net_backend *be, const struct iovec *iov, int iovcnt) hdr.msg_control = NULL; hdr.msg_controllen = 0; hdr.msg_flags = 0; - n = recvmsg(priv->pipe[0], &hdr, MSG_DONTWAIT); + n = recvmsg(priv->s, &hdr, MSG_DONTWAIT); if (n < 0) { if (errno == EWOULDBLOCK) return (0); @@ -707,8 +253,8 @@ slirp_set_cap(struct net_backend *be __unused, uint64_t features __unused, static struct net_backend slirp_backend = { .prefix = "slirp", .priv_size = sizeof(struct slirp_priv), - .init = _slirp_init, - .cleanup = _slirp_cleanup, + .init = slirp_init, + .cleanup = slirp_cleanup, .send = slirp_send, .peek_recvlen = slirp_peek_recvlen, .recv = slirp_recv, diff --git a/usr.sbin/bhyve/net_backends.c b/usr.sbin/bhyve/net_backends.c index 2d11c45f217a..95909d1f8ea2 100644 --- a/usr.sbin/bhyve/net_backends.c +++ b/usr.sbin/bhyve/net_backends.c @@ -119,7 +119,8 @@ tap_init(struct net_backend *be, const char *devname, goto error; } - if (ioctl(be->fd, VMIO_SIOCSIFFLAGS, up)) { + if (strncmp("ngd", be->prefix, 3) && + ioctl(be->fd, VMIO_SIOCSIFFLAGS, up)) { EPRINTLN("tap device link up failed"); goto error; } @@ -273,8 +274,24 @@ static struct net_backend vmnet_backend = { .set_cap = tap_set_cap, }; +/* A clone of the tap backend, with a different prefix. */ +static struct net_backend ngd_backend = { + .prefix = "ngd", + .priv_size = sizeof(struct tap_priv), + .init = tap_init, + .cleanup = tap_cleanup, + .send = tap_send, + .peek_recvlen = tap_peek_recvlen, + .recv = tap_recv, + .recv_enable = tap_recv_enable, + .recv_disable = tap_recv_disable, + .get_cap = tap_get_cap, + .set_cap = tap_set_cap, +}; + DATA_SET(net_backend_set, tap_backend); DATA_SET(net_backend_set, vmnet_backend); +DATA_SET(net_backend_set, ngd_backend); int netbe_legacy_config(nvlist_t *nvl, const char *opts) diff --git a/usr.sbin/bhyve/slirp/Makefile b/usr.sbin/bhyve/slirp/Makefile new file mode 100644 index 000000000000..da76fda45e86 --- /dev/null +++ b/usr.sbin/bhyve/slirp/Makefile @@ -0,0 +1,11 @@ +.PATH: ${.CURDIR}/../ + +PROG= bhyve-slirp-helper +SRCS= slirp-helper.c config.c +CFLAGS+= -I${.CURDIR}/../ +LIBADD= nv +MAN= + +BINDIR?= /usr/libexec + +.include <bsd.prog.mk> diff --git a/usr.sbin/bhyve/libslirp.h b/usr.sbin/bhyve/slirp/libslirp.h index a679c4db7913..a679c4db7913 100644 --- a/usr.sbin/bhyve/libslirp.h +++ b/usr.sbin/bhyve/slirp/libslirp.h diff --git a/usr.sbin/bhyve/slirp/slirp-helper.c b/usr.sbin/bhyve/slirp/slirp-helper.c new file mode 100644 index 000000000000..ee62dd212369 --- /dev/null +++ b/usr.sbin/bhyve/slirp/slirp-helper.c @@ -0,0 +1,570 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2023, 2025 Mark Johnston <markj@FreeBSD.org> + * + * This software was developed by the University of Cambridge Computer + * Laboratory (Department of Computer Science and Technology) under Innovate + * UK project 105694, "Digital Security by Design (DSbD) Technology Platform + * Prototype". + */ + +/* + * A helper process which lets bhyve's libslirp-based network backend work + * outside bhyve's Capsicum sandbox. We are started with a SOCK_SEQPACKET + * socket through which we pass and receive packets from the guest's frontend. + * + * At initialization time, we receive an nvlist over the socket which describes + * the desired slirp configuration. + */ + +#include <sys/nv.h> +#include <sys/socket.h> + +#include <assert.h> +#include <capsicum_helpers.h> +#include <dlfcn.h> +#include <err.h> +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <pwd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include <unistd.h> + +#include "config.h" +#include "libslirp.h" + +#define SLIRP_MTU 2048 + +struct slirp_priv { + Slirp *slirp; /* libslirp handle */ + int sock; /* data and control socket */ + int wakeup[2]; /* used to wake up the pollfd thread */ + struct pollfd *pollfds; + size_t npollfds; + size_t lastpollfd; +}; + +typedef int (*slirp_add_hostxfwd_p_t)(Slirp *, + const struct sockaddr *, socklen_t, const struct sockaddr *, socklen_t, + int); +typedef void (*slirp_cleanup_p_t)(Slirp *); +typedef void (*slirp_input_p_t)(Slirp *, const uint8_t *, int); +typedef Slirp *(*slirp_new_p_t)(const SlirpConfig *, const SlirpCb *, void *); +typedef void (*slirp_pollfds_fill_p_t)(Slirp *, uint32_t *timeout, + SlirpAddPollCb, void *); +typedef void (*slirp_pollfds_poll_p_t)(Slirp *, int, SlirpGetREventsCb, void *); + +/* Function pointer table, initialized by libslirp_init(). */ +static slirp_add_hostxfwd_p_t slirp_add_hostxfwd_p; +static slirp_cleanup_p_t slirp_cleanup_p; +static slirp_input_p_t slirp_input_p; +static slirp_new_p_t slirp_new_p; +static slirp_pollfds_fill_p_t slirp_pollfds_fill_p; +static slirp_pollfds_poll_p_t slirp_pollfds_poll_p; + +static int64_t +slirp_cb_clock_get_ns(void *param __unused) +{ + struct timespec ts; + int error; + + error = clock_gettime(CLOCK_MONOTONIC, &ts); + assert(error == 0); + return ((int64_t)(ts.tv_sec * 1000000000L + ts.tv_nsec)); +} + +static void +slirp_cb_notify(void *param) +{ + struct slirp_priv *priv; + + /* Wake up the poll thread. We assume that priv->mtx is held here. */ + priv = param; + (void)write(priv->wakeup[1], "M", 1); +} + +static void +slirp_cb_register_poll_fd(int fd, void *param __unused) +{ + const int one = 1; + + (void)setsockopt(fd, SOL_SOCKET, SO_NOSIGPIPE, &one, sizeof(int)); +} + +static ssize_t +slirp_cb_send_packet(const void *buf, size_t len, void *param) +{ + struct slirp_priv *priv; + ssize_t n; + + priv = param; + + assert(len <= SLIRP_MTU); + n = send(priv->sock, buf, len, MSG_EOR); + if (n < 0) { + warn("slirp_cb_send_packet: send"); + return (n); + } + assert((size_t)n == len); + + return (n); +} + +static void +slirp_cb_unregister_poll_fd(int fd __unused, void *opaque __unused) +{ +} + +/* Callbacks invoked from within libslirp. */ +static const struct SlirpCb slirp_cbs = { + .clock_get_ns = slirp_cb_clock_get_ns, + .notify = slirp_cb_notify, + .register_poll_fd = slirp_cb_register_poll_fd, + .send_packet = slirp_cb_send_packet, + .unregister_poll_fd = slirp_cb_unregister_poll_fd, +}; + +static int +slirpev2pollev(int events) +{ + int ret; + + ret = 0; + if (events & SLIRP_POLL_IN) + ret |= POLLIN; + if (events & SLIRP_POLL_OUT) + ret |= POLLOUT; + if (events & SLIRP_POLL_PRI) + ret |= POLLPRI; + if (events & SLIRP_POLL_ERR) + ret |= POLLERR; + if (events & SLIRP_POLL_HUP) + ret |= POLLHUP; + return (ret); +} + +static int +pollev2slirpev(int events) +{ + int ret; + + ret = 0; + if (events & POLLIN) + ret |= SLIRP_POLL_IN; + if (events & POLLOUT) + ret |= SLIRP_POLL_OUT; + if (events & POLLPRI) + ret |= SLIRP_POLL_PRI; + if (events & POLLERR) + ret |= SLIRP_POLL_ERR; + if (events & POLLHUP) + ret |= SLIRP_POLL_HUP; + return (ret); +} + +static int +slirp_addpoll(struct slirp_priv *priv, int fd, int events) +{ + struct pollfd *pollfd, *pollfds; + size_t i; + + for (i = priv->lastpollfd + 1; i < priv->npollfds; i++) + if (priv->pollfds[i].fd == -1) + break; + if (i == priv->npollfds) { + const size_t POLLFD_GROW = 4; + + priv->npollfds += POLLFD_GROW; + pollfds = realloc(priv->pollfds, + sizeof(*pollfds) * priv->npollfds); + if (pollfds == NULL) + return (-1); + for (i = priv->npollfds - POLLFD_GROW; i < priv->npollfds; i++) + pollfds[i].fd = -1; + priv->pollfds = pollfds; + + i = priv->npollfds - POLLFD_GROW; + } + pollfd = &priv->pollfds[i]; + pollfd->fd = fd; + pollfd->events = slirpev2pollev(events); + pollfd->revents = 0; + priv->lastpollfd = i; + + return ((int)i); +} + +static int +slirp_addpoll_cb(int fd, int events, void *param) +{ + struct slirp_priv *priv; + + priv = param; + return (slirp_addpoll(priv, fd, events)); +} + +static int +slirp_poll_revents(int idx, void *param) +{ + struct slirp_priv *priv; + struct pollfd *pollfd; + short revents; + + priv = param; + assert(idx >= 0); + assert((unsigned int)idx < priv->npollfds); + pollfd = &priv->pollfds[idx]; + assert(pollfd->fd != -1); + + /* The kernel may report POLLHUP even if we didn't ask for it. */ + revents = pollfd->revents; + if ((pollfd->events & POLLHUP) == 0) + revents &= ~POLLHUP; + return (pollev2slirpev(revents)); +} + +/* + * Main loop. Poll libslirp's descriptors plus a couple of our own. + */ +static void +slirp_pollfd_loop(struct slirp_priv *priv) +{ + struct pollfd *pollfds; + size_t npollfds; + uint32_t timeout; + int error; + + for (;;) { + int input, wakeup; + + for (size_t i = 0; i < priv->npollfds; i++) + priv->pollfds[i].fd = -1; + priv->lastpollfd = -1; + + /* Register for notifications from slirp_cb_notify(). */ + wakeup = slirp_addpoll(priv, priv->wakeup[0], POLLIN); + /* Register for input from our parent process. */ + input = slirp_addpoll(priv, priv->sock, POLLIN | POLLRDHUP); + + timeout = UINT32_MAX; + slirp_pollfds_fill_p(priv->slirp, &timeout, slirp_addpoll_cb, + priv); + + pollfds = priv->pollfds; + npollfds = priv->npollfds; + error = poll(pollfds, npollfds, timeout); + if (error == -1 && errno != EINTR) + err(1, "poll"); + slirp_pollfds_poll_p(priv->slirp, error == -1, + slirp_poll_revents, priv); + + /* + * If we were woken up by the notify callback, mask the + * interrupt. + */ + if ((pollfds[wakeup].revents & POLLIN) != 0) { + ssize_t n; + + do { + uint8_t b; + + n = read(priv->wakeup[0], &b, 1); + } while (n == 1); + if (n != -1 || errno != EAGAIN) + err(1, "read"); + } + + /* + * If new packets arrived from our parent, feed them to + * libslirp. + */ + if ((pollfds[input].revents & (POLLHUP | POLLRDHUP)) != 0) + errx(1, "parent process closed connection"); + if ((pollfds[input].revents & POLLIN) != 0) { + ssize_t n; + + do { + uint8_t buf[SLIRP_MTU]; + + n = recv(priv->sock, buf, sizeof(buf), + MSG_DONTWAIT); + if (n < 0) { + if (errno == EWOULDBLOCK) + break; + err(1, "recv"); + } + slirp_input_p(priv->slirp, buf, (int)n); + } while (n >= 0); + } + } +} + +static int +parse_addr(char *addr, struct sockaddr_in *sinp) +{ + char *port; + int error, porti; + + memset(sinp, 0, sizeof(*sinp)); + sinp->sin_family = AF_INET; + sinp->sin_len = sizeof(struct sockaddr_in); + + port = strchr(addr, ':'); + if (port == NULL) + return (EINVAL); + *port++ = '\0'; + + if (strlen(addr) > 0) { + error = inet_pton(AF_INET, addr, &sinp->sin_addr); + if (error != 1) + return (error == 0 ? EPFNOSUPPORT : errno); + } else { + sinp->sin_addr.s_addr = htonl(INADDR_ANY); + } + + porti = strlen(port) > 0 ? atoi(port) : 0; + if (porti < 0 || porti > UINT16_MAX) + return (EINVAL); + sinp->sin_port = htons(porti); + + return (0); +} + +static int +parse_hostfwd_rule(const char *descr, int *is_udp, struct sockaddr *hostaddr, + struct sockaddr *guestaddr) +{ + struct sockaddr_in *hostaddrp, *guestaddrp; + const char *proto; + char *p, *host, *guest; + int error; + + error = 0; + *is_udp = 0; + + p = strdup(descr); + if (p == NULL) + return (ENOMEM); + + host = strchr(p, ':'); + if (host == NULL) { + error = EINVAL; + goto out; + } + *host++ = '\0'; + + proto = p; + *is_udp = strcmp(proto, "udp") == 0; + + guest = strchr(host, '-'); + if (guest == NULL) { + error = EINVAL; + goto out; + } + *guest++ = '\0'; + + hostaddrp = (struct sockaddr_in *)(void *)hostaddr; + error = parse_addr(host, hostaddrp); + if (error != 0) + goto out; + + guestaddrp = (struct sockaddr_in *)(void *)guestaddr; + error = parse_addr(guest, guestaddrp); + if (error != 0) + goto out; + +out: + free(p); + return (error); +} + +static void +config_one_hostfwd(Slirp *slirp, const char *rule) +{ + struct sockaddr hostaddr, guestaddr; + int error, is_udp; + + error = parse_hostfwd_rule(rule, &is_udp, &hostaddr, &guestaddr); + if (error != 0) + errx(1, "unable to parse hostfwd rule '%s': %s", rule, + strerror(error)); + + error = slirp_add_hostxfwd_p(slirp, &hostaddr, hostaddr.sa_len, + &guestaddr, guestaddr.sa_len, is_udp ? SLIRP_HOSTFWD_UDP : 0); + if (error != 0) + errx(1, "Unable to add hostfwd rule '%s': %s", rule, + strerror(errno)); +} + +/* + * Drop privileges to the "nobody" user. Ideally we'd chroot to somewhere like + * /var/empty but libslirp might need to access /etc/resolv.conf. + */ +static void +drop_privs(void) +{ + struct passwd *pw; + + if (geteuid() != 0) + return; + + pw = getpwnam("nobody"); + if (pw == NULL) + err(1, "getpwnam(nobody) failed"); + if (initgroups(pw->pw_name, pw->pw_gid) != 0) + err(1, "initgroups"); + if (setgid(pw->pw_gid) != 0) + err(1, "setgid"); + if (setuid(pw->pw_uid) != 0) + err(1, "setuid"); +} + +static void +libslirp_init(void) +{ + void *handle; + + handle = dlopen("libslirp.so.0", RTLD_LAZY); + if (handle == NULL) + errx(1, "unable to open libslirp.so.0: %s", dlerror()); + +#define IMPORT_SYM(sym) do { \ + sym##_p = (sym##_p_t)dlsym(handle, #sym); \ + if (sym##_p == NULL) \ + errx(1, "failed to resolve %s", #sym); \ +} while (0) + IMPORT_SYM(slirp_add_hostxfwd); + IMPORT_SYM(slirp_cleanup); + IMPORT_SYM(slirp_input); + IMPORT_SYM(slirp_new); + IMPORT_SYM(slirp_pollfds_fill); + IMPORT_SYM(slirp_pollfds_poll); +#undef IMPORT_SYM +} + +static void +usage(void) +{ + fprintf(stderr, "Usage: slirp-helper -S <socket>\n"); + exit(1); +} + +int +main(int argc, char **argv) +{ + struct slirp_priv priv; + SlirpConfig slirpconfig; + Slirp *slirp; + nvlist_t *config; + const char *hostfwd, *vmname; + int ch, fd, sd; + bool restricted; + + sd = -1; + while ((ch = getopt(argc, argv, "S:")) != -1) { + switch (ch) { + case 'S': + sd = atoi(optarg); + if (fcntl(sd, F_GETFD) == -1) + err(1, "invalid socket %s", optarg); + break; + default: + usage(); + /* NOTREACHED */ + } + } + argc -= optind; + argv += optind; + + if (sd == -1) + usage(); + + /* + * Clean the fd space: point stdio to /dev/null and keep our socket. + */ + fd = open("/dev/null", O_RDWR); + if (fd == -1) + err(1, "open(/dev/null)"); + if (dup2(fd, STDIN_FILENO) == -1) + err(1, "dup2(stdin)"); + if (dup2(fd, STDOUT_FILENO) == -1) + err(1, "dup2(stdout)"); + if (dup2(fd, STDERR_FILENO) == -1) + err(1, "dup2(stderr)"); + if (dup2(sd, 3) == -1) + err(1, "dup2(slirp socket)"); + sd = 3; + closefrom(sd + 1); + + memset(&priv, 0, sizeof(priv)); + priv.sock = sd; + if (pipe2(priv.wakeup, O_CLOEXEC | O_NONBLOCK) != 0) + err(1, "pipe2"); + + /* + * Apply the configuration we received from bhyve. + */ + config = nvlist_recv(sd, 0); + if (config == NULL) + err(1, "nvlist_recv"); + vmname = get_config_value_node(config, "vmname"); + if (vmname != NULL) + setproctitle("%s", vmname); + restricted = !get_config_bool_node_default(config, "open", false); + + slirpconfig = (SlirpConfig){ + .version = 4, + .if_mtu = SLIRP_MTU, + .restricted = restricted, + .in_enabled = true, + .vnetwork.s_addr = htonl(0x0a000200), /* 10.0.2.0/24 */ + .vnetmask.s_addr = htonl(0xffffff00), /* 255.255.255.0 */ + .vdhcp_start.s_addr = htonl(0x0a00020f),/* 10.0.2.15 */ + .vhost.s_addr = htonl(0x0a000202), /* 10.0.2.2 */ + .vnameserver.s_addr = htonl(0x0a000203),/* 10.0.2.3 */ + .enable_emu = false, + }; + libslirp_init(); + slirp = slirp_new_p(&slirpconfig, &slirp_cbs, &priv); + + hostfwd = get_config_value_node(config, "hostfwd"); + if (hostfwd != NULL) { + char *rules, *tofree; + const char *rule; + + tofree = rules = strdup(hostfwd); + if (rules == NULL) + err(1, "strdup"); + while ((rule = strsep(&rules, ";")) != NULL) + config_one_hostfwd(slirp, rule); + free(tofree); + } + + priv.slirp = slirp; + + /* + * In restricted mode, we can enter a Capsicum sandbox without losing + * functionality. + */ + if (restricted && caph_enter() != 0) + err(1, "caph_enter"); + + /* + * Drop root privileges if we have them. + */ + drop_privs(); + + /* + * Enter our main loop. If bhyve goes away, we should observe a hangup + * on the socket and exit. + */ + slirp_pollfd_loop(&priv); + /* NOTREACHED */ + + return (1); +} |
