diff options
Diffstat (limited to 'stand/libsa/zfs')
-rw-r--r-- | stand/libsa/zfs/Makefile.inc | 138 | ||||
-rw-r--r-- | stand/libsa/zfs/devicename_stubs.c | 47 | ||||
-rw-r--r-- | stand/libsa/zfs/libzfs.h | 8 | ||||
-rw-r--r-- | stand/libsa/zfs/nvlist.c | 3 | ||||
-rw-r--r-- | stand/libsa/zfs/spl/sys/blake3.h | 17 | ||||
-rw-r--r-- | stand/libsa/zfs/spl/sys/zfs_context.h | 32 | ||||
-rw-r--r-- | stand/libsa/zfs/zfs.c | 201 | ||||
-rw-r--r-- | stand/libsa/zfs/zfsimpl.c | 448 | ||||
-rw-r--r-- | stand/libsa/zfs/zstd_shim.c | 45 |
9 files changed, 531 insertions, 408 deletions
diff --git a/stand/libsa/zfs/Makefile.inc b/stand/libsa/zfs/Makefile.inc index 2a06db0d2f5d..2e9d5679f71f 100644 --- a/stand/libsa/zfs/Makefile.inc +++ b/stand/libsa/zfs/Makefile.inc @@ -1,96 +1,84 @@ -# $FreeBSD$ - .PATH: ${ZFSSRC} .PATH: ${SYSDIR}/crypto/skein .PATH: ${ZFSOSSRC}/spl +.PATH: ${OZFS}/module/zstd .PATH: ${OZFS}/module/zstd/lib/common .PATH: ${OZFS}/module/zstd/lib/compress .PATH: ${OZFS}/module/zstd/lib/decompress -ZFSSRC= zfs.c nvlist.c skein.c skein_block.c list.c -ZFSSRC+= zstd_shim.c -ZFSSRC+= entropy_common.c error_private.c -ZFSSRC+= fse_compress.c fse_decompress.c hist.c -ZFSSRC+= huf_compress.c huf_decompress.c pool.c xxhash.c -ZFSSRC+= zstd_common.c zstd_compress.c zstd_compress_literals.c -ZFSSRC+= zstd_compress_sequences.c zstd_compress_superblock.c -ZFSSRC+= zstd_ddict.c zstd_decompress.c zstd_decompress_block.c -ZFSSRC+= zstd_double_fast.c zstd_fast.c zstd_lazy.c zstd_ldm.c -ZFSSRC+= zstd_opt.c +.PATH: ${OZFS}/module/icp/asm-aarch64/blake3 +.PATH: ${OZFS}/module/icp/algs/blake3 +ZFS_SRC= zfs.c nvlist.c skein.c skein_block.c list.c +ZFS_SRC+= zfs_zstd.c +ZFS_SRC+= blake3.c blake3_generic.c blake3_impl.c +ZSTD_SRC+= entropy_common.c error_private.c +ZSTD_SRC+= fse_decompress.c hist.c +ZSTD_SRC+= huf_decompress.c pool.c xxhash.c +ZSTD_SRC+= zstd_common.c +ZSTD_SRC+= zstd_ddict.c zstd_decompress.c zstd_decompress_block.c +ZSTD_SRC+= zstd_double_fast.c zstd_fast.c zstd_lazy.c zstd_ldm.c -CFLAGS+= -DHAS_ZSTD_ZFS -SRCS+= ${ZFSSRC} +SRCS+= ${ZFS_SRC} ${ZSTD_SRC} ${ZFS_SRC_AS} -CFLAGS+= -I${LDRSRC} -CFLAGS+= -I${SYSDIR}/cddl/boot/zfs -CFLAGS+= -I${SYSDIR}/crypto/skein +# +# Any file that needs the FreeBSD overrides that are in +# include/os/freebssd/spl/XXX needs to have these added to +# CFLAGS_EARLY.file.c. In general, we try to build out of the OpenZFS tree +# unaltered. There's a problem, though, that since we're building for a +# standalone environment that's neither userland nor kernel, we sometimes need +# special code and that's handled by the 'nested' includes where we either setup +# something just-so before we include the include/XXX file, or if we need to +# tweak something defined in that file. +# -ZFS_EARLY= -I${ZFSOSINC} \ +ZFS_EARLY= -I${ZFSSRC}/spl \ + -I${ZFSOSINC} \ -I${ZFSOSINC}/spl \ -I${ZFSOSINC}/zfs -.for i in ${ZFSSRC} -CFLAGS.$i+= -include ${ZFSOSINC}/spl/sys/ccompile.h +# +# For all files, though, we prepend the sys/ccompile.h file to the build which +# has a bunch of defines that are present in OpenSolaris / Illumos, but missing +# from FreeBSD. +# +.for i in ${ZFS_SRC} ${ZSTD_SRC} +CFLAGS.$i+= -include ${ZFSOSINC}/spl/sys/ccompile.h -Wformat -Wall -I${OZFS}/include \ + -DNEED_SOLARIS_BOOLEAN .endfor +CFLAGS_EARLY.blake3.c+= ${ZFS_EARLY} -DOMIT_SIMD +CFLAGS_EARLY.blake3_generic.c+= ${ZFS_EARLY} -DOMIT_SIMD +CFLAGS_EARLY.blake3_impl.c+= ${ZFS_EARLY} -DOMIT_SIMD CFLAGS_EARLY.list.c+= ${ZFS_EARLY} -CFLAGS_EARLY.zstd_shim.c+= ${ZFS_EARLY} +CFLAGS_EARLY.zfs_zstd.c+= ${ZFS_EARLY} +CFLAGS_EARLY.nvlist.c+= ${ZFS_EARLY} +CFLAGS_EARLY.zfs.c += ${ZFS_EARLY} -# Can't use the early flags because there's two conflicting definitions of boolean_t in -# the zfs code that need to be unified. -CFLAGS.nvlist.c+= -I${ZFSOSINC}/spl -CFLAGS.zfs.c+= -I${ZFSOSINC}/spl \ +# +# zfs.c is special: we need to define HAS_ZSTD_ZFS to get zfssubr.c to initialize zstd +# properly. We need to have the cddl boot compat directory in the include path for zfssubr.c +# to be found, and we need a couple of other include paths for skein and lz4. Finally we +# temporarily need LDRSRC to pick up part.h until libsa has a way to look into partitions +# or enumerate them... +# +CFLAGS.zfs.c+= -DHAS_ZSTD_ZFS \ + -I${SYSDIR}/cddl/boot/zfs \ + -I${LDRSRC} \ + -I${SYSDIR}/crypto/skein \ -I${SRCTOP}/sys/cddl/contrib/opensolaris/common/lz4 -CFLAGS.entropy_common.c= -U__BMI__ -CFLAGS.entropy_common.c+= ${NO_WBITWISE_INSTEAD_OF_LOGICAL} -CFLAGS.error_private.c= -U__BMI__ -CFLAGS.error_private.c+= ${NO_WBITWISE_INSTEAD_OF_LOGICAL} -CFLAGS.fse_compress.c= -U__BMI__ -CFLAGS.fse_compress.c+= ${NO_WBITWISE_INSTEAD_OF_LOGICAL} -CFLAGS.fse_decompress.c= -U__BMI__ -CFLAGS.fse_decompress.c+= ${NO_WBITWISE_INSTEAD_OF_LOGICAL} -CFLAGS.hist.c= -U__BMI__ -CFLAGS.hist.c+= ${NO_WBITWISE_INSTEAD_OF_LOGICAL} -CFLAGS.huf_compress.c= -U__BMI__ -CFLAGS.huf_compress.c+= ${NO_WBITWISE_INSTEAD_OF_LOGICAL} -CFLAGS.huf_decompress.c= -U__BMI__ -CFLAGS.huf_decompress.c+= ${NO_WBITWISE_INSTEAD_OF_LOGICAL} -CFLAGS.pool.c= -U__BMI__ -CFLAGS.pool.c+= ${NO_WBITWISE_INSTEAD_OF_LOGICAL} -CFLAGS.xxhash.c= -U__BMI__ -CFLAGS.xxhash.c+= ${NO_WBITWISE_INSTEAD_OF_LOGICAL} -CFLAGS.zstd_common.c= -U__BMI__ -CFLAGS.zstd_common.c+= ${NO_WBITWISE_INSTEAD_OF_LOGICAL} -CFLAGS.zstd_compress.c= -U__BMI__ -CFLAGS.zstd_compress.c+= ${NO_WBITWISE_INSTEAD_OF_LOGICAL} -CFLAGS.zstd_compress_literals.c= -U__BMI__ -CFLAGS.zstd_compress_literals.c+= ${NO_WBITWISE_INSTEAD_OF_LOGICAL} -CFLAGS.zstd_compress_sequences.c= -U__BMI__ -CFLAGS.zstd_compress_sequences.c+= ${NO_WBITWISE_INSTEAD_OF_LOGICAL} -CFLAGS.zstd_compress_superblock.c= -U__BMI__ -CFLAGS.zstd_compress_superblock.c+= ${NO_WBITWISE_INSTEAD_OF_LOGICAL} -CFLAGS.zstd_double_fast.c= -U__BMI__ -CFLAGS.zstd_double_fast.c+= ${NO_WBITWISE_INSTEAD_OF_LOGICAL} -CFLAGS.zstd_fast.c= -U__BMI__ -CFLAGS.zstd_fast.c+= ${NO_WBITWISE_INSTEAD_OF_LOGICAL} -CFLAGS.zstd_lazy.c= -U__BMI__ -CFLAGS.zstd_lazy.c+= ${NO_WBITWISE_INSTEAD_OF_LOGICAL} -CFLAGS.zstd_ldm.c= -U__BMI__ -CFLAGS.zstd_ldm.c+= ${NO_WBITWISE_INSTEAD_OF_LOGICAL} -CFLAGS.zstd_opt.c= -U__BMI__ -CFLAGS.zstd_opt.c+= ${NO_WBITWISE_INSTEAD_OF_LOGICAL} -CFLAGS.zstd_ddict.c= -U__BMI__ -CFLAGS.zstd_ddict.c+= ${NO_WBITWISE_INSTEAD_OF_LOGICAL} -CFLAGS.zstd_decompress.c= -U__BMI__ -CFLAGS.zstd_decompress.c+= ${NO_WBITWISE_INSTEAD_OF_LOGICAL} -CFLAGS.zstd_decompress_block.c= -U__BMI__ -CFLAGS.zstd_decompress_block.c+= ${NO_WBITWISE_INSTEAD_OF_LOGICAL} -CFLAGS.zstd_shim.c+= -DIN_BASE -DIN_LIBSA -I${OZFS}/include + +# +# ZSTD coding style has some issues, so suppress clang's warnings. Also, zstd's +# use of BMI instrucitons is broken in this environment, so avoid them. +# +.for i in ${ZSTD_SRC} +CFLAGS.$i+= -U__BMI__ ${NO_WBITWISE_INSTEAD_OF_LOGICAL} +.endfor + +CFLAGS.zfs_zstd.c+= -DIN_BASE -DIN_LIBSA + +CFLAGS.blake3_impl.c+= -I${OZFS}/module/icp/algs/blake3 -I${OZFS}/module/icp/include -DIN_LIBSA # Do not unroll skein loops, reduce code size CFLAGS.skein_block.c+= -DSKEIN_LOOP=111 -CFLAGS+= -I${SYSDIR}/contrib/openzfs/include -CFLAGS+= -I${SYSDIR}/contrib/openzfs/include/os/freebsd/zfs -CFLAGS.zfs.c+= -I${SYSDIR}/cddl/contrib/opensolaris/common/lz4 - -CFLAGS+= -Wformat -Wall +CWARNFLAGS.zfs.c+= ${NO_WDANGLING_POINTER} diff --git a/stand/libsa/zfs/devicename_stubs.c b/stand/libsa/zfs/devicename_stubs.c deleted file mode 100644 index 41bf907e0cf1..000000000000 --- a/stand/libsa/zfs/devicename_stubs.c +++ /dev/null @@ -1,47 +0,0 @@ -/*- - * Copyright (c) 2012 Andriy Gapon <avg@FreeBSD.org> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <stand.h> -#include "libzfs.h" - -__attribute__((weak)) -int -zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path) -{ - return (EINVAL); -} - -__attribute__((weak)) -char * -zfs_fmtdev(void *vdev) -{ - static char buf[128]; - - return (buf); -} diff --git a/stand/libsa/zfs/libzfs.h b/stand/libsa/zfs/libzfs.h index e8676c0d53b8..35af864e7e0b 100644 --- a/stand/libsa/zfs/libzfs.h +++ b/stand/libsa/zfs/libzfs.h @@ -22,8 +22,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _BOOT_LIBZFS_H_ @@ -48,10 +46,8 @@ struct zfs_devdesc { uint64_t root_guid; }; -int zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, - const char **path); -char *zfs_fmtdev(void *vdev); -int zfs_probe_dev(const char *devname, uint64_t *pool_guid); +char *zfs_fmtdev(struct devdesc *); +int zfs_probe_dev(const char *devname, uint64_t *pool_guid, bool part_too); int zfs_list(const char *name); int zfs_get_bootonce(void *, const char *, char *, size_t); int zfs_get_bootenv(void *, nvlist_t **); diff --git a/stand/libsa/zfs/nvlist.c b/stand/libsa/zfs/nvlist.c index e5e0abecb274..6cb496a57b26 100644 --- a/stand/libsa/zfs/nvlist.c +++ b/stand/libsa/zfs/nvlist.c @@ -23,9 +23,6 @@ * SUCH DAMAGE. */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/endian.h> #include <sys/stdint.h> diff --git a/stand/libsa/zfs/spl/sys/blake3.h b/stand/libsa/zfs/spl/sys/blake3.h new file mode 100644 index 000000000000..aa2d3b7e55cc --- /dev/null +++ b/stand/libsa/zfs/spl/sys/blake3.h @@ -0,0 +1,17 @@ +/* + * Copyright 2022, Netflix, Inc + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +/* + * Gross and ugly hack to cope with upstream's sys/blake3.h not being standalone + * safe. + */ +#define _KERNEL + +#include_next <sys/blake3.h> + +#undef _KERNEL diff --git a/stand/libsa/zfs/spl/sys/zfs_context.h b/stand/libsa/zfs/spl/sys/zfs_context.h new file mode 100644 index 000000000000..9f12955dd05f --- /dev/null +++ b/stand/libsa/zfs/spl/sys/zfs_context.h @@ -0,0 +1,32 @@ +/* + * Copyright 2022, Netflix, Inc + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +/* TODO: openzfs/include/sys/uio_impl.h must not be included in stand */ +#ifndef _SYS_UIO_IMPL_H +#define _SYS_UIO_IMPL_H +#endif + +/* + * sys/atomic.h must be included after sys/sysmacros.h. The latter includes + * machine/atomic.h, which interferes. Sadly, upstream includes them in the + * wrong order, so we include it here to fix that. + */ +#include <sys/sysmacros.h> + +#include_next <sys/zfs_context.h> + +#define SYSCTL_HANDLER_ARGS void + +/* + * Not sure why I need these, but including the canonical stand.h fails because + * the normal string.h doesn't like all the other shenanigans in this environment. + */ +void *memcpy(void *dst, const void *src, size_t len); +void *memset(void *dest, int c, size_t len); +void *memmem(const void *big, size_t big_len, const void *little, + size_t little_len); diff --git a/stand/libsa/zfs/zfs.c b/stand/libsa/zfs/zfs.c index 633ef3b18784..70a102f6425d 100644 --- a/stand/libsa/zfs/zfs.c +++ b/stand/libsa/zfs/zfs.c @@ -22,13 +22,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $FreeBSD$ */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - /* * Stand-alone file reading package. */ @@ -66,6 +61,9 @@ static void zfs_bootenv_initial(const char *envname, spa_t *spa, static void zfs_checkpoints_initial(spa_t *spa, const char *name, const char *dsname); +static int zfs_parsedev(struct devdesc **idev, const char *devspec, + const char **path); + struct devsw zfs_dev; struct fs_ops zfs_fsops = { @@ -108,7 +106,8 @@ struct zfs_be_entry { static int zfs_open(const char *upath, struct open_file *f) { - struct zfsmount *mount = (struct zfsmount *)f->f_devdata; + struct devdesc *dev = f->f_devdata; + struct zfsmount *mount = dev->d_opendata; struct file *fp; int rc; @@ -149,7 +148,8 @@ zfs_close(struct open_file *f) static int zfs_read(struct open_file *f, void *start, size_t size, size_t *resid /* out */) { - const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa; + struct devdesc *dev = f->f_devdata; + const spa_t *spa = ((struct zfsmount *)dev->d_opendata)->spa; struct file *fp = (struct file *)f->f_fsdata; struct stat sb; size_t n; @@ -213,7 +213,8 @@ zfs_seek(struct open_file *f, off_t offset, int where) static int zfs_stat(struct open_file *f, struct stat *sb) { - const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa; + struct devdesc *dev = f->f_devdata; + const spa_t *spa = ((struct zfsmount *)dev->d_opendata)->spa; struct file *fp = (struct file *)f->f_fsdata; return (zfs_dnode_stat(spa, &fp->f_dnode, sb)); @@ -222,7 +223,8 @@ zfs_stat(struct open_file *f, struct stat *sb) static int zfs_readdir(struct open_file *f, struct dirent *d) { - const spa_t *spa = ((struct zfsmount *)f->f_devdata)->spa; + struct devdesc *dev = f->f_devdata; + const spa_t *spa = ((struct zfsmount *)dev->d_opendata)->spa; struct file *fp = (struct file *)f->f_fsdata; mzap_ent_phys_t mze; struct stat sb; @@ -366,59 +368,77 @@ zfs_readdir(struct open_file *f, struct dirent *d) } } +static spa_t * +spa_find_by_dev(struct zfs_devdesc *dev) +{ + + if (dev->dd.d_dev->dv_type != DEVT_ZFS) + return (NULL); + + if (dev->pool_guid == 0) + return (STAILQ_FIRST(&zfs_pools)); + + return (spa_find_by_guid(dev->pool_guid)); +} + /* * if path is NULL, create mount structure, but do not add it to list. */ static int zfs_mount(const char *dev, const char *path, void **data) { - struct zfs_devdesc *zfsdev; + struct zfs_devdesc *zfsdev = NULL; spa_t *spa; - struct zfsmount *mnt; + struct zfsmount *mnt = NULL; int rv; errno = 0; - zfsdev = malloc(sizeof(*zfsdev)); - if (zfsdev == NULL) - return (errno); - - rv = zfs_parsedev(zfsdev, dev + 3, NULL); + rv = zfs_parsedev((struct devdesc **)&zfsdev, dev, NULL); if (rv != 0) { - free(zfsdev); return (rv); } spa = spa_find_by_dev(zfsdev); - if (spa == NULL) - return (ENXIO); + if (spa == NULL) { + rv = ENXIO; + goto err; + } mnt = calloc(1, sizeof(*mnt)); - if (mnt != NULL && path != NULL) + if (mnt == NULL) { + rv = ENOMEM; + goto err; + } + + if (mnt->path != NULL) { mnt->path = strdup(path); - rv = errno; + if (mnt->path == NULL) { + rv = ENOMEM; + goto err; + } + } - if (mnt != NULL) - rv = zfs_mount_impl(spa, zfsdev->root_guid, mnt); - free(zfsdev); + rv = zfs_mount_impl(spa, zfsdev->root_guid, mnt); - if (rv == 0 && mnt != NULL && mnt->objset.os_type != DMU_OST_ZFS) { + if (rv == 0 && mnt->objset.os_type != DMU_OST_ZFS) { printf("Unexpected object set type %ju\n", (uintmax_t)mnt->objset.os_type); rv = EIO; } - +err: if (rv != 0) { if (mnt != NULL) free(mnt->path); free(mnt); + free(zfsdev); return (rv); } - if (mnt != NULL) { - *data = mnt; - if (path != NULL) - STAILQ_INSERT_TAIL(&zfsmount, mnt, next); - } + *data = mnt; + if (path != NULL) + STAILQ_INSERT_TAIL(&zfsmount, mnt, next); + + free(zfsdev); return (rv); } @@ -773,35 +793,12 @@ zfs_probe_partition(void *arg, const char *partname, int zfs_get_bootenv(void *vdev, nvlist_t **benvp) { - struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; - nvlist_t *benv = NULL; - vdev_t *vd; spa_t *spa; - if (dev->dd.d_dev->dv_type != DEVT_ZFS) - return (ENOTSUP); - - if ((spa = spa_find_by_dev(dev)) == NULL) + if ((spa = spa_find_by_dev((struct zfs_devdesc *)vdev)) == NULL) return (ENXIO); - if (spa->spa_bootenv == NULL) { - STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, - v_childlink) { - benv = vdev_read_bootenv(vd); - - if (benv != NULL) - break; - } - spa->spa_bootenv = benv; - } else { - benv = spa->spa_bootenv; - } - - if (benv == NULL) - return (ENOENT); - - *benvp = benv; - return (0); + return (zfs_get_bootenv_spa(spa, benvp)); } /* @@ -810,22 +807,12 @@ zfs_get_bootenv(void *vdev, nvlist_t **benvp) int zfs_set_bootenv(void *vdev, nvlist_t *benv) { - struct zfs_devdesc *dev = (struct zfs_devdesc *)vdev; spa_t *spa; - vdev_t *vd; - if (dev->dd.d_dev->dv_type != DEVT_ZFS) - return (ENOTSUP); - - if ((spa = spa_find_by_dev(dev)) == NULL) + if ((spa = spa_find_by_dev((struct zfs_devdesc *)vdev)) == NULL) return (ENXIO); - STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) { - vdev_write_bootenv(vd, benv); - } - - spa->spa_bootenv = benv; - return (0); + return (zfs_set_bootenv_spa(spa, benv)); } /* @@ -835,27 +822,12 @@ zfs_set_bootenv(void *vdev, nvlist_t *benv) int zfs_get_bootonce(void *vdev, const char *key, char *buf, size_t size) { - nvlist_t *benv; - char *result = NULL; - int result_size, rv; - - if ((rv = zfs_get_bootenv(vdev, &benv)) != 0) - return (rv); + spa_t *spa; - if ((rv = nvlist_find(benv, key, DATA_TYPE_STRING, NULL, - &result, &result_size)) == 0) { - if (result_size == 0) { - /* ignore empty string */ - rv = ENOENT; - } else { - size = MIN((size_t)result_size + 1, size); - strlcpy(buf, result, size); - } - (void) nvlist_remove(benv, key, DATA_TYPE_STRING); - (void) zfs_set_bootenv(vdev, benv); - } + if ((spa = spa_find_by_dev((struct zfs_devdesc *)vdev)) == NULL) + return (ENXIO); - return (rv); + return (zfs_get_bootonce_spa(spa, key, buf, size)); } /* @@ -1292,8 +1264,12 @@ zfs_nvstore_unset_impl(void *vdev, const char *name, bool unset_env) rv = zfs_set_bootenv(vdev, spa->spa_bootenv); } - if (unset_env) - env_discard(env_getenv(name)); + if (unset_env) { + struct env_var *ev = env_getenv(name); + + if (ev != NULL) + env_discard(ev); + } return (rv); } @@ -1490,7 +1466,7 @@ zfs_attach_nvstore(void *vdev) } int -zfs_probe_dev(const char *devname, uint64_t *pool_guid) +zfs_probe_dev(const char *devname, uint64_t *pool_guid, bool parts_too) { struct ptable *table; struct zfs_probe_args pa; @@ -1506,6 +1482,8 @@ zfs_probe_dev(const char *devname, uint64_t *pool_guid) ret = zfs_probe(pa.fd, pool_guid); if (ret == 0) return (0); + if (!parts_too) + return (ENXIO); /* Probe each partition */ ret = ioctl(pa.fd, DIOCGMEDIASIZE, &mediasz); @@ -1583,11 +1561,10 @@ zfs_dev_open(struct open_file *f, ...) rv = 0; /* This device is not set as currdev, mount us private copy. */ if (mount == NULL) - rv = zfs_mount(zfs_fmtdev(dev), NULL, (void **)&mount); + rv = zfs_mount(devformat(&dev->dd), NULL, (void **)&mount); if (rv == 0) { - f->f_devdata = mount; - free(dev); + dev->dd.d_opendata = mount; } return (rv); } @@ -1595,25 +1572,18 @@ zfs_dev_open(struct open_file *f, ...) static int zfs_dev_close(struct open_file *f) { + struct devdesc *dev; struct zfsmount *mnt, *mount; - mnt = f->f_devdata; + dev = f->f_devdata; + mnt = dev->d_opendata; STAILQ_FOREACH(mount, &zfsmount, next) { if (mnt->spa->spa_guid == mount->spa->spa_guid) break; } - /* - * devclose() will free f->f_devdata, but since we do have - * pointer to zfsmount structure in f->f_devdata, and - * zfs_unmount() will also free the zfsmount structure, - * we will get double free. To prevent double free, - * we must set f_devdata to NULL there. - */ - if (mount != NULL) - f->f_devdata = NULL; - + /* XXX */ return (0); } @@ -1633,11 +1603,13 @@ struct devsw zfs_dev = { .dv_close = zfs_dev_close, .dv_ioctl = noioctl, .dv_print = zfs_dev_print, - .dv_cleanup = NULL + .dv_cleanup = nullsys, + .dv_fmtdev = zfs_fmtdev, + .dv_parsedev = zfs_parsedev, }; -int -zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path) +static int +zfs_parsedev(struct devdesc **idev, const char *devspec, const char **path) { static char rootname[ZFS_MAXNAMELEN]; static char poolname[ZFS_MAXNAMELEN]; @@ -1646,8 +1618,9 @@ zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path) const char *np; const char *sep; int rv; + struct zfs_devdesc *dev; - np = devspec; + np = devspec + 3; /* Skip the leading 'zfs' */ if (*np != ':') return (EINVAL); np++; @@ -1670,18 +1643,24 @@ zfs_parsedev(struct zfs_devdesc *dev, const char *devspec, const char **path) spa = spa_find_by_name(poolname); if (!spa) return (ENXIO); + dev = malloc(sizeof(*dev)); + if (dev == NULL) + return (ENOMEM); dev->pool_guid = spa->spa_guid; rv = zfs_lookup_dataset(spa, rootname, &dev->root_guid); - if (rv != 0) + if (rv != 0) { + free(dev); return (rv); + } if (path != NULL) *path = (*end == '\0') ? end : end + 1; dev->dd.d_dev = &zfs_dev; + *idev = &dev->dd; return (0); } char * -zfs_fmtdev(void *vdev) +zfs_fmtdev(struct devdesc *vdev) { static char rootname[ZFS_MAXNAMELEN]; static char buf[2 * ZFS_MAXNAMELEN + 8]; @@ -1689,7 +1668,7 @@ zfs_fmtdev(void *vdev) spa_t *spa; buf[0] = '\0'; - if (dev->dd.d_dev->dv_type != DEVT_ZFS) + if (vdev->d_dev->dv_type != DEVT_ZFS) return (buf); /* Do we have any pools? */ diff --git a/stand/libsa/zfs/zfsimpl.c b/stand/libsa/zfs/zfsimpl.c index 865294dafed4..f15d9b016068 100644 --- a/stand/libsa/zfs/zfsimpl.c +++ b/stand/libsa/zfs/zfsimpl.c @@ -24,9 +24,6 @@ * SUCH DAMAGE. */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - /* * Stand-alone ZFS file reader. */ @@ -110,36 +107,27 @@ typedef struct indirect_vsd { } indirect_vsd_t; /* - * List of all vdevs, chained through v_alllink. - */ -static vdev_list_t zfs_vdevs; - -/* - * List of ZFS features supported for read + * List of supported read-incompatible ZFS features. Do not add here features + * marked as ZFEATURE_FLAG_READONLY_COMPAT, they are irrelevant for read-only! */ static const char *features_for_read[] = { - "org.illumos:lz4_compress", - "com.delphix:hole_birth", - "com.delphix:extensible_dataset", + "com.datto:bookmark_v2", + "com.datto:encryption", + "com.delphix:bookmark_written", + "com.delphix:device_removal", "com.delphix:embedded_data", - "org.open-zfs:large_blocks", + "com.delphix:extensible_dataset", + "com.delphix:head_errlog", + "com.delphix:hole_birth", + "com.joyent:multi_vdev_crash_dump", + "com.klarasystems:vdev_zaps_v2", + "org.freebsd:zstd_compress", + "org.illumos:lz4_compress", "org.illumos:sha512", "org.illumos:skein", + "org.open-zfs:large_blocks", + "org.openzfs:blake3", "org.zfsonlinux:large_dnode", - "com.joyent:multi_vdev_crash_dump", - "com.delphix:spacemap_histogram", - "com.delphix:zpool_checkpoint", - "com.delphix:spacemap_v2", - "com.datto:encryption", - "com.datto:bookmark_v2", - "org.zfsonlinux:allocation_classes", - "com.datto:resilver_defer", - "com.delphix:device_removal", - "com.delphix:obsolete_counts", - "com.intel:allocation_classes", - "org.freebsd:zstd_compress", - "com.delphix:bookmark_written", - "com.delphix:head_errlog", NULL }; @@ -174,7 +162,6 @@ vdev_indirect_mapping_entry_phys_t * static void zfs_init(void) { - STAILQ_INIT(&zfs_vdevs); STAILQ_INIT(&zfs_pools); dnode_cache_buf = malloc(SPA_MAXBLOCKSIZE); @@ -446,7 +433,7 @@ vdev_indirect_mapping_entry(vdev_indirect_mapping_t *vim, uint64_t index) * * It's possible that the given offset will not be in the mapping table * (i.e. no mapping entries contain this offset), in which case, the - * return value value depends on the "next_if_missing" parameter. + * return value depends on the "next_if_missing" parameter. * * If the offset is not found in the table and "next_if_missing" is * B_FALSE, then NULL will always be returned. The behavior is intended @@ -846,16 +833,27 @@ vdev_replacing_read(vdev_t *vdev, const blkptr_t *bp, void *buf, return (kid->v_read(kid, bp, buf, offset, bytes)); } +/* + * List of vdevs that were fully initialized from their own label, but later a + * newer label was found that obsoleted the stale label, freeing its + * configuration tree. We keep those vdevs around, since a new configuration + * may include them. + */ +static vdev_list_t orphans = STAILQ_HEAD_INITIALIZER(orphans); + static vdev_t * -vdev_find(uint64_t guid) +vdev_find(vdev_list_t *list, uint64_t guid) { - vdev_t *vdev; + vdev_t *vdev, *safe; - STAILQ_FOREACH(vdev, &zfs_vdevs, v_alllink) + STAILQ_FOREACH_SAFE(vdev, list, v_childlink, safe) { if (vdev->v_guid == guid) return (vdev); + if ((vdev = vdev_find(&vdev->v_children, guid)) != NULL) + return (vdev); + } - return (0); + return (NULL); } static vdev_t * @@ -864,6 +862,11 @@ vdev_create(uint64_t guid, vdev_read_t *_read) vdev_t *vdev; vdev_indirect_config_t *vic; + if ((vdev = vdev_find(&orphans, guid))) { + STAILQ_REMOVE(&orphans, vdev, vdev, v_childlink); + return (vdev); + } + vdev = calloc(1, sizeof(vdev_t)); if (vdev != NULL) { STAILQ_INIT(&vdev->v_children); @@ -878,7 +881,6 @@ vdev_create(uint64_t guid, vdev_read_t *_read) if (_read != NULL) { vic = &vdev->vdev_indirect_config; vic->vic_prev_indirect_vdev = UINT64_MAX; - STAILQ_INSERT_TAIL(&zfs_vdevs, vdev, v_alllink); } } @@ -1042,22 +1044,19 @@ vdev_init(uint64_t guid, const nvlist_t *nvlist, vdev_t **vdevp) * STAILQ_INSERT_AFTER. */ static vdev_t * -vdev_find_previous(vdev_t *top_vdev, vdev_t *vdev) +vdev_find_previous(vdev_t *top_vdev, uint64_t id) { vdev_t *v, *previous; - if (STAILQ_EMPTY(&top_vdev->v_children)) - return (NULL); - previous = NULL; STAILQ_FOREACH(v, &top_vdev->v_children, v_childlink) { - if (v->v_id > vdev->v_id) + if (v->v_id > id) return (previous); - if (v->v_id == vdev->v_id) + if (v->v_id == id) return (v); - if (v->v_id < vdev->v_id) + if (v->v_id < id) previous = v; } return (previous); @@ -1079,7 +1078,7 @@ vdev_child_count(vdev_t *vdev) /* * Insert vdev into top_vdev children list. List is ordered by v_id. */ -static void +static vdev_t * vdev_insert(vdev_t *top_vdev, vdev_t *vdev) { vdev_t *previous; @@ -1092,7 +1091,7 @@ vdev_insert(vdev_t *top_vdev, vdev_t *vdev) * so we can use either STAILQ_INSERT_HEAD or STAILQ_INSERT_AFTER * as STAILQ does not have insert before. */ - previous = vdev_find_previous(top_vdev, vdev); + previous = vdev_find_previous(top_vdev, vdev->v_id); if (previous == NULL) { STAILQ_INSERT_HEAD(&top_vdev->v_children, vdev, v_childlink); @@ -1101,7 +1100,8 @@ vdev_insert(vdev_t *top_vdev, vdev_t *vdev) * This vdev was configured from label config, * do not insert duplicate. */ - return; + free(vdev); + return (previous); } else { STAILQ_INSERT_AFTER(&top_vdev->v_children, previous, vdev, v_childlink); @@ -1110,24 +1110,28 @@ vdev_insert(vdev_t *top_vdev, vdev_t *vdev) count = vdev_child_count(top_vdev); if (top_vdev->v_nchildren < count) top_vdev->v_nchildren = count; + return (vdev); } static int -vdev_from_nvlist(spa_t *spa, uint64_t top_guid, const nvlist_t *nvlist) +vdev_from_nvlist(spa_t *spa, uint64_t top_guid, uint64_t label_guid, + uint64_t txg, const nvlist_t *nvlist) { vdev_t *top_vdev, *vdev; nvlist_t **kids = NULL; int rc, nkids; /* Get top vdev. */ - top_vdev = vdev_find(top_guid); + top_vdev = vdev_find(&spa->spa_root_vdev->v_children, top_guid); if (top_vdev == NULL) { rc = vdev_init(top_guid, nvlist, &top_vdev); if (rc != 0) return (rc); top_vdev->v_spa = spa; top_vdev->v_top = top_vdev; - vdev_insert(spa->spa_root_vdev, top_vdev); + top_vdev->v_label = label_guid; + top_vdev->v_txg = txg; + (void )vdev_insert(spa->spa_root_vdev, top_vdev); } /* Add children if there are any. */ @@ -1148,7 +1152,7 @@ vdev_from_nvlist(spa_t *spa, uint64_t top_guid, const nvlist_t *nvlist) vdev->v_spa = spa; vdev->v_top = top_vdev; - vdev_insert(top_vdev, vdev); + vdev = vdev_insert(top_vdev, vdev); } } else { /* @@ -1167,28 +1171,6 @@ done: return (rc); } -static int -vdev_init_from_label(spa_t *spa, const nvlist_t *nvlist) -{ - uint64_t pool_guid, top_guid; - nvlist_t *vdevs; - int rc; - - if (nvlist_find(nvlist, ZPOOL_CONFIG_POOL_GUID, DATA_TYPE_UINT64, - NULL, &pool_guid, NULL) || - nvlist_find(nvlist, ZPOOL_CONFIG_TOP_GUID, DATA_TYPE_UINT64, - NULL, &top_guid, NULL) || - nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST, - NULL, &vdevs, NULL)) { - printf("ZFS: can't find vdev details\n"); - return (ENOENT); - } - - rc = vdev_from_nvlist(spa, top_guid, vdevs); - nvlist_destroy(vdevs); - return (rc); -} - static void vdev_set_state(vdev_t *vdev) { @@ -1235,14 +1217,14 @@ vdev_set_state(vdev_t *vdev) } static int -vdev_update_from_nvlist(uint64_t top_guid, const nvlist_t *nvlist) +vdev_update_from_nvlist(vdev_t *root, uint64_t top_guid, const nvlist_t *nvlist) { vdev_t *vdev; nvlist_t **kids = NULL; int rc, nkids; /* Update top vdev. */ - vdev = vdev_find(top_guid); + vdev = vdev_find(&root->v_children, top_guid); if (vdev != NULL) vdev_set_initial_state(vdev, nvlist); @@ -1258,7 +1240,7 @@ vdev_update_from_nvlist(uint64_t top_guid, const nvlist_t *nvlist) if (rc != 0) break; - vdev = vdev_find(guid); + vdev = vdev_find(&root->v_children, guid); if (vdev != NULL) vdev_set_initial_state(vdev, kids[i]); } @@ -1274,6 +1256,19 @@ vdev_update_from_nvlist(uint64_t top_guid, const nvlist_t *nvlist) return (rc); } +static void +vdev_free(struct vdev *vdev) +{ + struct vdev *kid, *safe; + + STAILQ_FOREACH_SAFE(kid, &vdev->v_children, v_childlink, safe) + vdev_free(kid); + if (vdev->v_phys_read != NULL) + STAILQ_INSERT_HEAD(&orphans, vdev, v_childlink); + else + free(vdev); +} + static int vdev_init_from_nvlist(spa_t *spa, const nvlist_t *nvlist) { @@ -1317,14 +1312,16 @@ vdev_init_from_nvlist(spa_t *spa, const nvlist_t *nvlist) NULL, &guid, NULL); if (rc != 0) break; - vdev = vdev_find(guid); + vdev = vdev_find(&spa->spa_root_vdev->v_children, guid); /* * Top level vdev is missing, create it. + * XXXGL: how can this happen? */ if (vdev == NULL) - rc = vdev_from_nvlist(spa, guid, kids[i]); + rc = vdev_from_nvlist(spa, guid, 0, 0, kids[i]); else - rc = vdev_update_from_nvlist(guid, kids[i]); + rc = vdev_update_from_nvlist(spa->spa_root_vdev, guid, + kids[i]); if (rc != 0) break; } @@ -1342,6 +1339,53 @@ vdev_init_from_nvlist(spa_t *spa, const nvlist_t *nvlist) return (rc); } +static bool +nvlist_find_child_guid(const nvlist_t *nvlist, uint64_t guid) +{ + nvlist_t **kids = NULL; + int nkids, i; + bool rv = false; + + if (nvlist_find(nvlist, ZPOOL_CONFIG_CHILDREN, DATA_TYPE_NVLIST_ARRAY, + &nkids, &kids, NULL) != 0) + nkids = 0; + + for (i = 0; i < nkids; i++) { + uint64_t kid_guid; + + if (nvlist_find(kids[i], ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, + NULL, &kid_guid, NULL) != 0) + break; + if (kid_guid == guid) + rv = true; + else + rv = nvlist_find_child_guid(kids[i], guid); + if (rv) + break; + } + + for (i = 0; i < nkids; i++) + nvlist_destroy(kids[i]); + free(kids); + + return (rv); +} + +static bool +nvlist_find_vdev_guid(const nvlist_t *nvlist, uint64_t guid) +{ + nvlist_t *vdevs; + bool rv; + + if (nvlist_find(nvlist, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST, NULL, + &vdevs, NULL) != 0) + return (false); + rv = nvlist_find_child_guid(vdevs, guid); + nvlist_destroy(vdevs); + + return (rv); +} + static spa_t * spa_find_by_guid(uint64_t guid) { @@ -1367,19 +1411,6 @@ spa_find_by_name(const char *name) } static spa_t * -spa_find_by_dev(struct zfs_devdesc *dev) -{ - - if (dev->dd.d_dev->dv_type != DEVT_ZFS) - return (NULL); - - if (dev->pool_guid == 0) - return (STAILQ_FIRST(&zfs_pools)); - - return (spa_find_by_guid(dev->pool_guid)); -} - -static spa_t * spa_create(uint64_t guid, const char *name) { spa_t *spa; @@ -1399,7 +1430,7 @@ spa_create(uint64_t guid, const char *name) free(spa); return (NULL); } - spa->spa_root_vdev->v_name = strdup("root"); + spa->spa_root_vdev->v_name = spa->spa_name; STAILQ_INSERT_TAIL(&zfs_pools, spa, spa_link); return (spa); @@ -1701,14 +1732,14 @@ static int vdev_write_bootenv_impl(vdev_t *vdev, vdev_boot_envblock_t *be) { vdev_t *kid; - int rv = 0, rc; + int rv = 0, err; STAILQ_FOREACH(kid, &vdev->v_children, v_childlink) { if (kid->v_state != VDEV_STATE_HEALTHY) continue; - rc = vdev_write_bootenv_impl(kid, be); - if (rv == 0) - rv = rc; + err = vdev_write_bootenv_impl(kid, be); + if (err != 0) + rv = err; } /* @@ -1718,12 +1749,12 @@ vdev_write_bootenv_impl(vdev_t *vdev, vdev_boot_envblock_t *be) return (rv); for (int l = 0; l < VDEV_LABELS; l++) { - rc = vdev_label_write(vdev, l, be, + err = vdev_label_write(vdev, l, be, offsetof(vdev_label_t, vl_be)); - if (rc != 0) { + if (err != 0) { printf("failed to write bootenv to %s label %d: %d\n", - vdev->v_name ? vdev->v_name : "unknown", l, rc); - rv = rc; + vdev->v_name ? vdev->v_name : "unknown", l, err); + rv = err; } } return (rv); @@ -2023,11 +2054,10 @@ vdev_probe(vdev_phys_read_t *_read, vdev_phys_write_t *_write, void *priv, { vdev_t vtmp; spa_t *spa; - vdev_t *vdev; - nvlist_t *nvl; + vdev_t *vdev, *top; + nvlist_t *nvl, *vdevs; uint64_t val; - uint64_t guid, vdev_children; - uint64_t pool_txg, pool_guid; + uint64_t guid, pool_guid, top_guid, txg; const char *pool_name; int rc, namelen; @@ -2083,13 +2113,18 @@ vdev_probe(vdev_phys_read_t *_read, vdev_phys_write_t *_write, void *priv, } if (nvlist_find(nvl, ZPOOL_CONFIG_POOL_TXG, DATA_TYPE_UINT64, - NULL, &pool_txg, NULL) != 0 || + NULL, &txg, NULL) != 0 || + txg == 0 || + nvlist_find(nvl, ZPOOL_CONFIG_TOP_GUID, DATA_TYPE_UINT64, + NULL, &top_guid, NULL) != 0 || nvlist_find(nvl, ZPOOL_CONFIG_POOL_GUID, DATA_TYPE_UINT64, NULL, &pool_guid, NULL) != 0 || nvlist_find(nvl, ZPOOL_CONFIG_POOL_NAME, DATA_TYPE_STRING, - NULL, &pool_name, &namelen) != 0) { + NULL, &pool_name, &namelen) != 0 || + nvlist_find(nvl, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, + NULL, &guid, NULL) != 0) { /* - * Cache and spare devices end up here - just ignore + * Cache, spare and replaced devices end up here - just ignore * them. */ nvlist_destroy(nvl); @@ -2103,8 +2138,6 @@ vdev_probe(vdev_phys_read_t *_read, vdev_phys_write_t *_write, void *priv, if (spa == NULL) { char *name; - nvlist_find(nvl, ZPOOL_CONFIG_VDEV_CHILDREN, - DATA_TYPE_UINT64, NULL, &vdev_children, NULL); name = malloc(namelen + 1); if (name == NULL) { nvlist_destroy(nvl); @@ -2118,10 +2151,47 @@ vdev_probe(vdev_phys_read_t *_read, vdev_phys_write_t *_write, void *priv, nvlist_destroy(nvl); return (ENOMEM); } - spa->spa_root_vdev->v_nchildren = vdev_children; } - if (pool_txg > spa->spa_txg) - spa->spa_txg = pool_txg; + + /* + * Check if configuration is already known. If configuration is known + * and txg numbers don't match, we got 2x2 scenarios here. First, is + * the label being read right now _newer_ than the one read before. + * Second, is the vdev that provided the stale label _present_ in the + * newer configuration. If neither is true, we completely ignore the + * label. + */ + STAILQ_FOREACH(top, &spa->spa_root_vdev->v_children, v_childlink) + if (top->v_guid == top_guid) { + bool newer, present; + + if (top->v_txg == txg) + break; + newer = (top->v_txg < txg); + present = newer ? + nvlist_find_vdev_guid(nvl, top->v_label) : + (vdev_find(&top->v_children, guid) != NULL); + printf("ZFS: pool %s vdev %s %s stale label from " + "0x%jx@0x%jx, %s 0x%jx@0x%jx\n", + spa->spa_name, top->v_name, + present ? "using" : "ignoring", + newer ? top->v_label : guid, + newer ? top->v_txg : txg, + present ? "referred by" : "using", + newer ? guid : top->v_label, + newer ? txg : top->v_txg); + if (newer) { + STAILQ_REMOVE(&spa->spa_root_vdev->v_children, + top, vdev, v_childlink); + vdev_free(top); + break; + } else if (present) { + break; + } else { + nvlist_destroy(nvl); + return (EIO); + } + } /* * Get the vdev tree and create our in-core copy of it. @@ -2129,19 +2199,22 @@ vdev_probe(vdev_phys_read_t *_read, vdev_phys_write_t *_write, void *priv, * be some kind of alias (overlapping slices, dangerously dedicated * disks etc). */ - if (nvlist_find(nvl, ZPOOL_CONFIG_GUID, DATA_TYPE_UINT64, - NULL, &guid, NULL) != 0) { - nvlist_destroy(nvl); - return (EIO); - } - vdev = vdev_find(guid); + vdev = vdev_find(&spa->spa_root_vdev->v_children, guid); /* Has this vdev already been inited? */ if (vdev && vdev->v_phys_read) { nvlist_destroy(nvl); return (EIO); } - rc = vdev_init_from_label(spa, nvl); + if (nvlist_find(nvl, ZPOOL_CONFIG_VDEV_TREE, DATA_TYPE_NVLIST, NULL, + &vdevs, NULL)) { + printf("ZFS: can't find vdev details\n"); + nvlist_destroy(nvl); + return (ENOENT); + } + + rc = vdev_from_nvlist(spa, top_guid, guid, txg, vdevs); + nvlist_destroy(vdevs); nvlist_destroy(nvl); if (rc != 0) return (rc); @@ -2150,7 +2223,7 @@ vdev_probe(vdev_phys_read_t *_read, vdev_phys_write_t *_write, void *priv, * We should already have created an incomplete vdev for this * vdev. Find it and initialise it with our read proc. */ - vdev = vdev_find(guid); + vdev = vdev_find(&spa->spa_root_vdev->v_children, guid); if (vdev != NULL) { vdev->v_phys_read = _read; vdev->v_phys_write = _write; @@ -3067,11 +3140,12 @@ zfs_rlookup(const spa_t *spa, uint64_t objnum, char *result) char name[256]; char component[256]; uint64_t dir_obj, parent_obj, child_dir_zapobj; - dnode_phys_t child_dir_zap, dataset, dir, parent; + dnode_phys_t child_dir_zap, snapnames_zap, dataset, dir, parent; dsl_dir_phys_t *dd; dsl_dataset_phys_t *ds; char *p; int len; + boolean_t issnap = B_FALSE; p = &name[sizeof(name) - 1]; *p = '\0'; @@ -3082,6 +3156,8 @@ zfs_rlookup(const spa_t *spa, uint64_t objnum, char *result) } ds = (dsl_dataset_phys_t *)&dataset.dn_bonus; dir_obj = ds->ds_dir_obj; + if (ds->ds_snapnames_zapobj == 0) + issnap = B_TRUE; for (;;) { if (objset_get_dnode(spa, spa->spa_mos, dir_obj, &dir) != 0) @@ -3097,6 +3173,34 @@ zfs_rlookup(const spa_t *spa, uint64_t objnum, char *result) &parent) != 0) return (EIO); dd = (dsl_dir_phys_t *)&parent.dn_bonus; + if (issnap == B_TRUE) { + /* + * The dataset we are looking up is a snapshot + * the dir_obj is the parent already, we don't want + * the grandparent just yet. Reset to the parent. + */ + dd = (dsl_dir_phys_t *)&dir.dn_bonus; + /* Lookup the dataset to get the snapname ZAP */ + if (objset_get_dnode(spa, spa->spa_mos, + dd->dd_head_dataset_obj, &dataset)) + return (EIO); + ds = (dsl_dataset_phys_t *)&dataset.dn_bonus; + if (objset_get_dnode(spa, spa->spa_mos, + ds->ds_snapnames_zapobj, &snapnames_zap) != 0) + return (EIO); + /* Get the name of the snapshot */ + if (zap_rlookup(spa, &snapnames_zap, component, + objnum) != 0) + return (EIO); + len = strlen(component); + p -= len; + memcpy(p, component, len); + --p; + *p = '@'; + issnap = B_FALSE; + continue; + } + child_dir_zapobj = dd->dd_child_dir_zapobj; if (objset_get_dnode(spa, spa->spa_mos, child_dir_zapobj, &child_dir_zap) != 0) @@ -3126,9 +3230,11 @@ zfs_lookup_dataset(const spa_t *spa, const char *name, uint64_t *objnum) { char element[256]; uint64_t dir_obj, child_dir_zapobj; - dnode_phys_t child_dir_zap, dir; + dnode_phys_t child_dir_zap, snapnames_zap, dir, dataset; dsl_dir_phys_t *dd; + dsl_dataset_phys_t *ds; const char *p, *q; + boolean_t issnap = B_FALSE; if (objset_get_dnode(spa, spa->spa_mos, DMU_POOL_DIRECTORY_OBJECT, &dir)) @@ -3159,6 +3265,25 @@ zfs_lookup_dataset(const spa_t *spa, const char *name, uint64_t *objnum) p += strlen(p); } + if (issnap == B_TRUE) { + if (objset_get_dnode(spa, spa->spa_mos, + dd->dd_head_dataset_obj, &dataset)) + return (EIO); + ds = (dsl_dataset_phys_t *)&dataset.dn_bonus; + if (objset_get_dnode(spa, spa->spa_mos, + ds->ds_snapnames_zapobj, &snapnames_zap) != 0) + return (EIO); + /* Actual loop condition #2. */ + if (zap_lookup(spa, &snapnames_zap, element, + sizeof (dir_obj), 1, &dir_obj) != 0) + return (ENOENT); + *objnum = dir_obj; + return (0); + } else if ((q = strchr(element, '@')) != NULL) { + issnap = B_TRUE; + element[q - element] = '\0'; + p = q + 1; + } child_dir_zapobj = dd->dd_child_dir_zapobj; if (objset_get_dnode(spa, spa->spa_mos, child_dir_zapobj, &child_dir_zap) != 0) @@ -3509,8 +3634,10 @@ zfs_spa_init(spa_t *spa) return (EIO); } rc = load_nvlist(spa, config_object, &nvlist); - if (rc != 0) + if (rc != 0) { + printf("ZFS: failed to load pool %s nvlist\n", spa->spa_name); return (rc); + } rc = zap_lookup(spa, &dir, DMU_POOL_ZPOOL_CHECKPOINT, sizeof(uint64_t), sizeof(checkpoint) / sizeof(uint64_t), @@ -3813,3 +3940,82 @@ done: free(entry); return (rc); } + +/* + * Return either a cached copy of the bootenv, or read each of the vdev children + * looking for the bootenv. Cache what's found and return the results. Returns 0 + * when benvp is filled in, and some errno when not. + */ +static int +zfs_get_bootenv_spa(spa_t *spa, nvlist_t **benvp) +{ + vdev_t *vd; + nvlist_t *benv = NULL; + + if (spa->spa_bootenv == NULL) { + STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, + v_childlink) { + benv = vdev_read_bootenv(vd); + + if (benv != NULL) + break; + } + spa->spa_bootenv = benv; + } + benv = spa->spa_bootenv; + + if (benv == NULL) + return (ENOENT); + + *benvp = benv; + return (0); +} + +/* + * Store nvlist to pool label bootenv area. Also updates cached pointer in spa. + */ +static int +zfs_set_bootenv_spa(spa_t *spa, nvlist_t *benv) +{ + vdev_t *vd; + + STAILQ_FOREACH(vd, &spa->spa_root_vdev->v_children, v_childlink) { + vdev_write_bootenv(vd, benv); + } + + spa->spa_bootenv = benv; + return (0); +} + +/* + * Get bootonce value by key. The bootonce <key, value> pair is removed from the + * bootenv nvlist and the remaining nvlist is committed back to disk. This process + * the bootonce flag since we've reached the point in the boot that we've 'used' + * the BE. For chained boot scenarios, we may reach this point multiple times (but + * only remove it and return 0 the first time). + */ +static int +zfs_get_bootonce_spa(spa_t *spa, const char *key, char *buf, size_t size) +{ + nvlist_t *benv; + char *result = NULL; + int result_size, rv; + + if ((rv = zfs_get_bootenv_spa(spa, &benv)) != 0) + return (rv); + + if ((rv = nvlist_find(benv, key, DATA_TYPE_STRING, NULL, + &result, &result_size)) == 0) { + if (result_size == 0) { + /* ignore empty string */ + rv = ENOENT; + } else if (buf != NULL) { + size = MIN((size_t)result_size + 1, size); + strlcpy(buf, result, size); + } + (void)nvlist_remove(benv, key, DATA_TYPE_STRING); + (void)zfs_set_bootenv_spa(spa, benv); + } + + return (rv); +} diff --git a/stand/libsa/zfs/zstd_shim.c b/stand/libsa/zfs/zstd_shim.c deleted file mode 100644 index b1ba4babcf36..000000000000 --- a/stand/libsa/zfs/zstd_shim.c +++ /dev/null @@ -1,45 +0,0 @@ -/*- - * Copyright (c) 2020 M. Warner Losh <imp@FreeBSD.org> - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -/* - * Small amount of shim code needed to get zfs_zstd.c to compile. These items - * here should all be defined in the SPL or as part of libsa somewhere, but - * aren't for reasons that haven't been tracked down yet. Ideally, they would - * all go away and we'd compile zfs_zstd.c directly. Based on an original by - * Matt Macey, but only the #include remains untouched from that. - */ - -#define ZFS_MODULE_PARAM_ARGS void -typedef int boolean_t; /* This one may be tough to get rid of */ - -/* TODO: openzfs/include/sys/uio_impl.h must not be included */ -#ifndef _SYS_UIO_IMPL_H -#define _SYS_UIO_IMPL_H -#endif - -#include <contrib/openzfs/module/zstd/zfs_zstd.c> |