diff options
Diffstat (limited to 'usr.sbin/makefs')
62 files changed, 6108 insertions, 724 deletions
diff --git a/usr.sbin/makefs/Makefile b/usr.sbin/makefs/Makefile index 3fea648f9383..47a41cfac813 100644 --- a/usr.sbin/makefs/Makefile +++ b/usr.sbin/makefs/Makefile @@ -1,5 +1,3 @@ -# $FreeBSD$ - SRCDIR:=${.PARSEDIR:tA} .include <src.opts.mk> @@ -17,7 +15,17 @@ SRCS= cd9660.c \ MAN= makefs.8 NO_WCAST_ALIGN= -CSTD= c11 + +.if ${MK_ZFS} != "no" +SRCS+= zfs.c +CFLAGS+=-I${SRCDIR}/zfs \ + -I${SRCTOP}/stand/libsa \ + -I${SRCTOP}/sys/cddl/boot + +CFLAGS+= -DHAVE_ZFS + +.include "${SRCDIR}/zfs/Makefile.inc" +.endif .include "${SRCDIR}/cd9660/Makefile.inc" .include "${SRCDIR}/ffs/Makefile.inc" @@ -39,4 +47,9 @@ LIBADD= netbsd util sbuf HAS_TESTS= SUBDIR.${MK_TESTS}+= tests +# cd9660_generate_path_table adds a global variable to an on-stack +# TAILQ which temporarily stores a pointer to the on-stack TAILQ head +# in the global +CWARNFLAGS.cd9660.c+= ${NO_WDANGLING_POINTER} + .include <bsd.prog.mk> diff --git a/usr.sbin/makefs/Makefile.depend b/usr.sbin/makefs/Makefile.depend index 00df4e89c233..b5ea35e70f2e 100644 --- a/usr.sbin/makefs/Makefile.depend +++ b/usr.sbin/makefs/Makefile.depend @@ -1,4 +1,3 @@ -# $FreeBSD$ # Autogenerated - do NOT edit! DIRDEPS = \ diff --git a/usr.sbin/makefs/Makefile.depend.host b/usr.sbin/makefs/Makefile.depend.host new file mode 100644 index 000000000000..525b90abb269 --- /dev/null +++ b/usr.sbin/makefs/Makefile.depend.host @@ -0,0 +1,11 @@ +# Autogenerated - do NOT edit! + +DIRDEPS = \ + lib/libnetbsd \ + + +.include <dirdeps.mk> + +.if ${DEP_RELDIR} == ${_DEP_RELDIR} +# local dependencies - needed for -jN in clean tree +.endif diff --git a/usr.sbin/makefs/Makefile.depend.options b/usr.sbin/makefs/Makefile.depend.options new file mode 100644 index 000000000000..4dcc6c412671 --- /dev/null +++ b/usr.sbin/makefs/Makefile.depend.options @@ -0,0 +1,6 @@ +DIRDEPS_OPTIONS = host_egacy + +DIRDEPS.host_egacy.yes = tools/build + +.include <dirdeps-options.mk> + diff --git a/usr.sbin/makefs/cd9660.c b/usr.sbin/makefs/cd9660.c index a83ff753b069..687bfe46ac27 100644 --- a/usr.sbin/makefs/cd9660.c +++ b/usr.sbin/makefs/cd9660.c @@ -1,7 +1,7 @@ -/* $NetBSD: cd9660.c,v 1.32 2011/08/23 17:09:11 christos Exp $ */ +/* $NetBSD: cd9660.c,v 1.56 2019/10/18 04:09:02 msaitoh Exp $ */ /*- - * SPDX-License-Identifier: BSD-2-Clause-NetBSD AND BSD-4-Clause + * SPDX-License-Identifier: BSD-2-Clause AND BSD-4-Clause * * Copyright (c) 2005 Daniel Watt, Walter Deignan, Ryan Gabrys, Alan * Perez-Rathke and Ram Vedam. All rights reserved. @@ -97,9 +97,6 @@ * */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/queue.h> #include <ctype.h> @@ -110,12 +107,11 @@ __FBSDID("$FreeBSD$"); #include "makefs.h" #include "cd9660.h" #include "cd9660/iso9660_rrip.h" -#include "cd9660/cd9660_archimedes.h" static void cd9660_finalize_PVD(iso9660_disk *); static cd9660node *cd9660_allocate_cd9660node(void); static void cd9660_set_defaults(iso9660_disk *); -static int cd9660_arguments_set_string(const char *, const char *, int, +static int cd9660_arguments_set_string(const char *, const char *, size_t, char, char *); static void cd9660_populate_iso_dir_record( struct _iso_directory_record_cd9660 *, u_char, u_char, u_char, @@ -144,14 +140,11 @@ static void cd9660_convert_structure(iso9660_disk *, fsnode *, cd9660node *, int static void cd9660_free_structure(cd9660node *); static int cd9660_generate_path_table(iso9660_disk *); static int cd9660_level1_convert_filename(iso9660_disk *, const char *, char *, - int); + size_t, int); static int cd9660_level2_convert_filename(iso9660_disk *, const char *, char *, + size_t, int); +static int cd9660_convert_filename(iso9660_disk *, const char *, char *, size_t, int); -#if 0 -static int cd9660_joliet_convert_filename(iso9660_disk *, const char *, char *, - int); -#endif -static int cd9660_convert_filename(iso9660_disk *, const char *, char *, int); static void cd9660_populate_dot_records(iso9660_disk *, cd9660node *); static int64_t cd9660_compute_offsets(iso9660_disk *, cd9660node *, int64_t); #if 0 @@ -200,7 +193,6 @@ cd9660_set_defaults(iso9660_disk *diskStructure) /* Set up defaults in our own structure */ diskStructure->verbose_level = 0; diskStructure->keep_bad_images = 0; - diskStructure->follow_sym_links = 0; diskStructure->isoLevel = 2; diskStructure->rock_ridge_enabled = 0; @@ -208,7 +200,6 @@ cd9660_set_defaults(iso9660_disk *diskStructure) diskStructure->rock_ridge_move_count = 0; diskStructure->rr_moved_dir = 0; - diskStructure->archimedes_enabled = 0; diskStructure->chrp_boot = 0; diskStructure->include_padding_areas = 1; @@ -232,7 +223,8 @@ cd9660_set_defaults(iso9660_disk *diskStructure) memset(diskStructure->primaryDescriptor.abstract_file_id, 0x20,37); memset(diskStructure->primaryDescriptor.bibliographic_file_id, 0x20,37); - strcpy(diskStructure->primaryDescriptor.system_id, "FreeBSD"); + strlcpy(diskStructure->primaryDescriptor.system_id, "FreeBSD", + sizeof(diskStructure->primaryDescriptor.system_id)); /* Boot support: Initially disabled */ diskStructure->has_generic_bootimage = 0; @@ -270,10 +262,6 @@ cd9660_prep_opts(fsinfo_t *fsopts) OPT_NUM('v', "verbose", verbose_level, 0, 2, "Turns on verbose output"), - OPT_BOOL('h', "help", displayHelp, - "Show help message"), - OPT_BOOL('S', "follow-symlinks", follow_sym_links, - "Resolve symlinks in pathnames"), OPT_BOOL('R', "rockridge", rock_ridge_enabled, "Enable Rock-Ridge extensions"), OPT_BOOL('C', "chrp-boot", chrp_boot, @@ -292,8 +280,6 @@ cd9660_prep_opts(fsinfo_t *fsopts) "Omit trailing periods in filenames"), OPT_BOOL('\0', "allow-lowercase", allow_lowercase, "Allow lowercase characters in filenames"), - OPT_BOOL('\0', "archimedes", archimedes_enabled, - "Enable Archimedes structure"), OPT_BOOL('\0', "no-trailing-padding", include_padding_areas, "Include padding areas"), @@ -328,13 +314,14 @@ cd9660_cleanup_opts(fsinfo_t *fsopts) } static int -cd9660_arguments_set_string(const char *val, const char *fieldtitle, int length, - char testmode, char * dest) +cd9660_arguments_set_string(const char *val, const char *fieldtitle, + size_t length, char testmode, char *dest) { - int len, test; + size_t len; + int test; if (val == NULL) - warnx("error: The %s requires a string argument", fieldtitle); + warnx("error: '%s' requires a string argument", fieldtitle); else if ((len = strlen(val)) <= length) { if (testmode == 'd') test = cd9660_valid_d_chars(val); @@ -346,10 +333,10 @@ cd9660_arguments_set_string(const char *val, const char *fieldtitle, int length, cd9660_uppercase_characters(dest, len); return 1; } else - warnx("error: The %s must be composed of " - "%c-characters", fieldtitle, testmode); + warnx("error: '%s' must be composed of %c-characters", + fieldtitle, testmode); } else - warnx("error: The %s must be at most 32 characters long", + warnx("error: '%s' must be at most 32 characters long", fieldtitle); return 0; } @@ -475,7 +462,7 @@ cd9660_makefs(const char *image, const char *dir, fsnode *root, fsinfo_t *fsopts) { int64_t startoffset; - int numDirectories; + int ret, numDirectories; uint64_t pathTableSectors; int64_t firstAvailableSector; int64_t totalSpace; @@ -494,14 +481,6 @@ cd9660_makefs(const char *image, const char *dir, fsnode *root, assert(dir != NULL); assert(root != NULL); - if (diskStructure->displayHelp) { - /* - * Display help here - probably want to put it in - * a separate function - */ - return; - } - if (diskStructure->verbose_level > 0) printf("%s: image %s directory %s root %p\n", __func__, image, dir, root); @@ -518,7 +497,7 @@ cd9660_makefs(const char *image, const char *dir, fsnode *root, real_root->isoDirRecord = emalloc(sizeof(*real_root->isoDirRecord)); /* Leave filename blank for root */ memset(real_root->isoDirRecord->name, 0, - ISO_FILENAME_MAXLENGTH_WITH_PADDING); + sizeof(real_root->isoDirRecord->name)); real_root->level = 0; diskStructure->rootNode = real_root; @@ -546,10 +525,6 @@ cd9660_makefs(const char *image, const char *dir, fsnode *root, if (diskStructure->verbose_level > 0) printf("%s: done converting tree\n", __func__); - /* non-SUSP extensions */ - if (diskStructure->archimedes_enabled) - archimedes_convert_tree(diskStructure->rootNode); - /* Rock ridge / SUSP init pass */ if (diskStructure->rock_ridge_enabled) { cd9660_susp_initialize(diskStructure, diskStructure->rootNode, @@ -629,7 +604,7 @@ cd9660_makefs(const char *image, const char *dir, fsnode *root, if (diskStructure->include_padding_areas) diskStructure->totalSectors += 150; - cd9660_write_image(diskStructure, image); + ret = cd9660_write_image(diskStructure, image); if (diskStructure->verbose_level > 1) { debug_print_volume_descriptor_information(diskStructure); @@ -641,7 +616,10 @@ cd9660_makefs(const char *image, const char *dir, fsnode *root, cd9660_free_structure(real_root); if (diskStructure->verbose_level > 0) - printf("%s: done\n", __func__); + printf("%s: done ret = %d\n", __func__, ret); + + if (ret == 0) /* cd9660_write_image() failed */ + exit(1); } /* Generic function pointer - implement later */ @@ -727,7 +705,10 @@ cd9660_populate_iso_dir_record(struct _iso_directory_record_cd9660 *record, u_char ext_attr_length, u_char flags, u_char name_len, const char * name) { + time_t tstamp = stampst.st_ino ? stampst.st_mtime : time(NULL); + record->ext_attr_length[0] = ext_attr_length; + cd9660_time_915(record->date, tstamp); record->flags[0] = ISO_FLAG_CLEAR | flags; record->file_unit_size[0] = 0; record->interleave[0] = 0; @@ -814,15 +795,14 @@ cd9660_fill_extended_attribute_record(cd9660node *node) static int cd9660_translate_node_common(iso9660_disk *diskStructure, cd9660node *newnode) { - time_t tstamp = stampst.st_ino ? stampst.st_mtime : time(NULL); u_char flag; - char temp[ISO_FILENAME_MAXLENGTH_WITH_PADDING]; + char temp[ISO_FILENAME_MAXLENGTH]; /* Now populate the isoDirRecord structure */ - memset(temp, 0, ISO_FILENAME_MAXLENGTH_WITH_PADDING); + memset(temp, 0, sizeof(temp)); (void)cd9660_convert_filename(diskStructure, newnode->node->name, - temp, !(S_ISDIR(newnode->node->type))); + temp, sizeof(temp), !(S_ISDIR(newnode->node->type))); flag = ISO_FLAG_CLEAR; if (S_ISDIR(newnode->node->type)) @@ -831,12 +811,6 @@ cd9660_translate_node_common(iso9660_disk *diskStructure, cd9660node *newnode) cd9660_populate_iso_dir_record(newnode->isoDirRecord, 0, flag, strlen(temp), temp); - /* Set the various dates */ - - /* If we want to use the current date and time */ - - cd9660_time_915(newnode->isoDirRecord->date, tstamp); - cd9660_bothendian_dword(newnode->fileDataLength, newnode->isoDirRecord->size); /* If the file is a link, we want to set the size to 0 */ @@ -886,7 +860,7 @@ cd9660_translate_node(iso9660_disk *diskStructure, fsnode *node, * @param const char * The first file name * @param const char * The second file name * @returns : -1 if first is less than second, 0 if they are the same, 1 if - * the second is greater than the first + * the second is greater than the first */ static int cd9660_compare_filename(const char *first, const char *second) @@ -1001,7 +975,7 @@ cd9660_sorted_child_insert(cd9660node *parent, cd9660node *cn_new) /* * Called After cd9660_sorted_child_insert - * handles file collisions by suffixing each filname with ~n + * handles file collisions by suffixing each filename with ~n * where n represents the files respective place in the ordering */ static int @@ -1070,7 +1044,7 @@ cd9660_rename_filename(iso9660_disk *diskStructure, cd9660node *iter, int num, else maxlength = ISO_FILENAME_MAXLENGTH_BEFORE_VERSION; - tmp = emalloc(ISO_FILENAME_MAXLENGTH_WITH_PADDING); + tmp = emalloc(ISO_FILENAME_MAXLENGTH); while (i < num && iter) { powers = 1; @@ -1131,7 +1105,8 @@ cd9660_rename_filename(iso9660_disk *diskStructure, cd9660node *iter, int num, while (digits > 0) { digit = (int)(temp / powers); temp = temp - digit * powers; - sprintf(&tmp[numbts] , "%d", digit); + snprintf(&tmp[numbts], ISO_FILENAME_MAXLENGTH - numbts, + "%d", digit); digits--; numbts++; powers = powers / 10; @@ -1177,8 +1152,7 @@ cd9660_copy_filenames(iso9660_disk *diskStructure, cd9660node *node) TAILQ_FOREACH(cn, &node->cn_children, cn_next_child) { cd9660_copy_filenames(diskStructure, cn); - memcpy(cn->o_name, cn->isoDirRecord->name, - ISO_FILENAME_MAXLENGTH_WITH_PADDING); + memcpy(cn->o_name, cn->isoDirRecord->name, sizeof(cn->o_name)); } } @@ -1275,7 +1249,7 @@ cd9660_rrip_move_directory(iso9660_disk *diskStructure, cd9660node *dir) return NULL; diskStructure->rock_ridge_move_count++; - snprintf(newname, sizeof(newname), "%08i", + snprintf(newname, sizeof(newname), "%08u", diskStructure->rock_ridge_move_count); /* Point to old parent */ @@ -1301,7 +1275,7 @@ cd9660_rrip_move_directory(iso9660_disk *diskStructure, cd9660node *dir) /* TODO: Inherit permissions / ownership (basically the entire inode) */ /* Set the new name */ - memset(dir->isoDirRecord->name, 0, ISO_FILENAME_MAXLENGTH_WITH_PADDING); + memset(dir->isoDirRecord->name, 0, sizeof(dir->isoDirRecord->name)); strncpy(dir->isoDirRecord->name, newname, 8); dir->isoDirRecord->length[0] = 34 + 8; dir->isoDirRecord->name_len[0] = 8; @@ -1538,10 +1512,10 @@ cd9660_generate_path_table(iso9660_disk *diskStructure) TAILQ_INSERT_HEAD(&pt_head, n, ptq); /* Breadth-first traversal of file structure */ - while (pt_head.tqh_first != 0) { - n = pt_head.tqh_first; + while (!TAILQ_EMPTY(&pt_head)) { + n = TAILQ_FIRST(&pt_head); dirNode = n->node; - TAILQ_REMOVE(&pt_head, pt_head.tqh_first, ptq); + TAILQ_REMOVE(&pt_head, n, ptq); free(n); /* Update the size */ @@ -1582,18 +1556,13 @@ cd9660_compute_full_filename(cd9660node *node, char *buf) { int len; - len = CD9660MAXPATH + 1; + len = PATH_MAX; len = snprintf(buf, len, "%s/%s/%s", node->node->root, node->node->path, node->node->name); - if (len > CD9660MAXPATH) + if (len >= PATH_MAX) errx(EXIT_FAILURE, "Pathname too long."); } -/* NEW filename conversion method */ -typedef int(*cd9660_filename_conversion_functor)(iso9660_disk *, const char *, - char *, int); - - /* * TODO: These two functions are almost identical. * Some code cleanup is possible here @@ -1602,7 +1571,7 @@ typedef int(*cd9660_filename_conversion_functor)(iso9660_disk *, const char *, */ static int cd9660_level1_convert_filename(iso9660_disk *diskStructure, const char *oldname, - char *newname, int is_file) + char *newname, size_t newnamelen, int is_file) { /* * ISO 9660 : 10.1 @@ -1613,6 +1582,7 @@ cd9660_level1_convert_filename(iso9660_disk *diskStructure, const char *oldname, int namelen = 0; int extlen = 0; int found_ext = 0; + char *orignewname = newname; while (*oldname != '\0' && extlen < 3) { /* Handle period first, as it is special */ @@ -1626,11 +1596,6 @@ cd9660_level1_convert_filename(iso9660_disk *diskStructure, const char *oldname, found_ext = 1; } } else { - /* cut RISC OS file type off ISO name */ - if (diskStructure->archimedes_enabled && - *oldname == ',' && strlen(oldname) == 4) - break; - /* Enforce 12.3 / 8 */ if (namelen == 8 && !found_ext) break; @@ -1654,7 +1619,7 @@ cd9660_level1_convert_filename(iso9660_disk *diskStructure, const char *oldname, if (!found_ext && !diskStructure->omit_trailing_period) *newname++ = '.'; /* Add version */ - sprintf(newname, ";%i", 1); + snprintf(newname, newnamelen - (newname - orignewname), ";%i", 1); } return namelen + extlen + found_ext; } @@ -1662,7 +1627,7 @@ cd9660_level1_convert_filename(iso9660_disk *diskStructure, const char *oldname, /* XXX bounds checking! */ static int cd9660_level2_convert_filename(iso9660_disk *diskStructure, const char *oldname, - char *newname, int is_file) + char *newname, size_t newnamelen, int is_file) { /* * ISO 9660 : 7.5.1 @@ -1673,13 +1638,15 @@ cd9660_level2_convert_filename(iso9660_disk *diskStructure, const char *oldname, * File version number (5 characters, 1-32767) * 1 <= Sum of File name and File name extension <= 30 */ + int maxlen = is_file ? 30 : 31; int namelen = 0; int extlen = 0; int found_ext = 0; + char *orignewname = newname; - while (*oldname != '\0' && namelen + extlen < 30) { + while (*oldname != '\0' && namelen + extlen < maxlen) { /* Handle period first, as it is special */ - if (*oldname == '.') { + if (*oldname == '.' && is_file) { if (found_ext) { if (diskStructure->allow_multidot) { *newname++ = '.'; @@ -1693,22 +1660,13 @@ cd9660_level2_convert_filename(iso9660_disk *diskStructure, const char *oldname, found_ext = 1; } } else { - /* cut RISC OS file type off ISO name */ - if (diskStructure->archimedes_enabled && - *oldname == ',' && strlen(oldname) == 4) - break; - - if (islower((unsigned char)*oldname)) + if (islower((unsigned char)*oldname)) *newname++ = toupper((unsigned char)*oldname); else if (isupper((unsigned char)*oldname) || isdigit((unsigned char)*oldname)) *newname++ = *oldname; - else if (diskStructure->allow_multidot && - *oldname == '.') { - *newname++ = '.'; - } else { + else *newname++ = '_'; - } if (found_ext) extlen++; @@ -1721,21 +1679,11 @@ cd9660_level2_convert_filename(iso9660_disk *diskStructure, const char *oldname, if (!found_ext && !diskStructure->omit_trailing_period) *newname++ = '.'; /* Add version */ - sprintf(newname, ";%i", 1); + snprintf(newname, newnamelen - (newname - orignewname), ";%i", 1); } return namelen + extlen + found_ext; } -#if 0 -static int -cd9660_joliet_convert_filename(iso9660_disk *diskStructure, const char *oldname, - char *newname, int is_file) -{ - /* TODO: implement later, move to cd9660_joliet.c ?? */ -} -#endif - - /* * Convert a file name to ISO compliant file name * @param char * oldname The original filename @@ -1746,16 +1694,16 @@ cd9660_joliet_convert_filename(iso9660_disk *diskStructure, const char *oldname, */ static int cd9660_convert_filename(iso9660_disk *diskStructure, const char *oldname, - char *newname, int is_file) + char *newname, size_t newnamelen, int is_file) { assert(1 <= diskStructure->isoLevel && diskStructure->isoLevel <= 2); - /* NEW */ - cd9660_filename_conversion_functor conversion_function = NULL; if (diskStructure->isoLevel == 1) - conversion_function = &cd9660_level1_convert_filename; + return(cd9660_level1_convert_filename(diskStructure, + oldname, newname, newnamelen, is_file)); else if (diskStructure->isoLevel == 2) - conversion_function = &cd9660_level2_convert_filename; - return (*conversion_function)(diskStructure, oldname, newname, is_file); + return (cd9660_level2_convert_filename(diskStructure, + oldname, newname, newnamelen, is_file)); + abort(); } int @@ -1852,7 +1800,7 @@ cd9660_compute_offsets(iso9660_disk *diskStructure, cd9660node *node, cd9660_compute_record_size(diskStructure, child); if ((cd9660_compute_record_size(diskStructure, child) + current_sector_usage) >= - diskStructure->sectorSize) { + diskStructure->sectorSize) { current_sector_usage = 0; node->fileSectorsUsed++; } @@ -1966,7 +1914,7 @@ cd9660_create_virtual_entry(iso9660_disk *diskStructure, const char *name, temp->isoDirRecord = emalloc(sizeof(*temp->isoDirRecord)); cd9660_convert_filename(diskStructure, tfsnode->name, - temp->isoDirRecord->name, file); + temp->isoDirRecord->name, sizeof(temp->isoDirRecord->name), file); temp->node = tfsnode; temp->parent = parent; diff --git a/usr.sbin/makefs/cd9660.h b/usr.sbin/makefs/cd9660.h index 5f6525d3e94f..c6f0e6472af3 100644 --- a/usr.sbin/makefs/cd9660.h +++ b/usr.sbin/makefs/cd9660.h @@ -1,7 +1,7 @@ -/* $NetBSD: cd9660.h,v 1.17 2011/06/23 02:35:56 enami Exp $ */ +/* $NetBSD: cd9660.h,v 1.21 2015/12/24 15:52:37 christos Exp $ */ /*- - * SPDX-License-Identifier: BSD-2-Clause-NetBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2005 Daniel Watt, Walter Deignan, Ryan Gabrys, Alan * Perez-Rathke and Ram Vedam. All rights reserved. @@ -32,8 +32,6 @@ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _MAKEFS_CD9660_H @@ -53,6 +51,7 @@ #include <sys/queue.h> #include <sys/param.h> #include <sys/endian.h> +#include <sys/tree.h> #include "makefs.h" #include "iso.h" @@ -65,33 +64,8 @@ #define INODE_WARNX(__x) #endif /* DEBUG */ -#define CD9660MAXPATH 4096 - -#define ISO_STRING_FILTER_NONE = 0x00 -#define ISO_STRING_FILTER_DCHARS = 0x01 -#define ISO_STRING_FILTER_ACHARS = 0x02 - -/* -Extended preferences type, in the spirit of what makefs gives us (only ints) -*/ -typedef struct { - const char *shortName; /* Short option */ - const char *name; /* option name */ - char *value; /* where to stuff the value */ - int minLength; /* minimum for value */ - int maxLength; /* maximum for value */ - const char *desc; /* option description */ - int filterFlags; -} string_option_t; - /******** STRUCTURES **********/ -/*Defaults*/ -#define ISO_DEFAULT_VOLUMEID "MAKEFS_CD9660_IMAGE" -#define ISO_DEFAULT_APPID "MAKEFS" -#define ISO_DEFAULT_PUBLISHER "MAKEFS" -#define ISO_DEFAULT_PREPARER "MAKEFS" - #define ISO_VOLUME_DESCRIPTOR_STANDARD_ID "CD001" #define ISO_VOLUME_DESCRIPTOR_BOOT 0 #define ISO_VOLUME_DESCRIPTOR_PVD 1 @@ -99,8 +73,7 @@ typedef struct { /*30 for name and extension, as well as version number and padding bit*/ #define ISO_FILENAME_MAXLENGTH_BEFORE_VERSION 30 -#define ISO_FILENAME_MAXLENGTH 36 -#define ISO_FILENAME_MAXLENGTH_WITH_PADDING 37 +#define ISO_FILENAME_MAXLENGTH 38 #define ISO_FLAG_CLEAR 0x00 #define ISO_FLAG_HIDDEN 0x01 @@ -145,7 +118,7 @@ typedef struct _iso_directory_record_cd9660 { u_char interleave [ISODCL (28, 28)]; /* 711 */ u_char volume_sequence_number [ISODCL (29, 32)]; /* 723 */ u_char name_len [ISODCL (33, 33)]; /* 711 */ - char name [ISO_FILENAME_MAXLENGTH_WITH_PADDING]; + char name [ISO_FILENAME_MAXLENGTH]; } iso_directory_record_cd9660; /* TODO: Lots of optimization of this structure */ @@ -181,7 +154,7 @@ typedef struct _cd9660node { int fileRecordSize;/*copy of a variable, int for quicker calculations*/ /* Old name, used for renaming - needs to be optimized but low priority */ - char o_name [ISO_FILENAME_MAXLENGTH_WITH_PADDING]; + char o_name [ISO_FILENAME_MAXLENGTH]; /***** SPACE RESERVED FOR EXTENSIONS *****/ /* For memory efficiency's sake - we should move this to a separate struct @@ -221,7 +194,7 @@ typedef struct _path_table_entry u_char extended_attribute_length[ISODCL (2, 2)]; u_char first_sector[ISODCL (3, 6)]; u_char parent_number[ISODCL (7, 8)]; - char name[ISO_FILENAME_MAXLENGTH_WITH_PADDING]; + char name[ISO_FILENAME_MAXLENGTH]; } path_table_entry; typedef struct _volume_descriptor @@ -231,6 +204,12 @@ typedef struct _volume_descriptor struct _volume_descriptor *next; } volume_descriptor; +struct inode_map_node { + RB_ENTRY(inode_map_node) entry; + uint64_t key; + uint64_t value; +}; + typedef struct _iso9660_disk { int sectorSize; struct iso_primary_descriptor primaryDescriptor; @@ -263,9 +242,7 @@ typedef struct _iso9660_disk { int include_padding_areas; - int follow_sym_links; int verbose_level; - int displayHelp; int keep_bad_images; /* SUSP options and variables */ @@ -276,10 +253,12 @@ typedef struct _iso9660_disk { int rock_ridge_enabled; /* Other Rock Ridge Variables */ char *rock_ridge_renamed_dir_name; - int rock_ridge_move_count; + unsigned rock_ridge_move_count; cd9660node *rr_moved_dir; - int archimedes_enabled; + uint64_t rr_inode_next; + RB_HEAD(inode_map_tree, inode_map_node) rr_inode_map; + int chrp_boot; /* Spec breaking options */ @@ -306,18 +285,20 @@ typedef struct _iso9660_disk { } iso9660_disk; +RB_PROTOTYPE(inode_map_tree, inode_map_node, entry, inode_map_node_cmp); + /************ FUNCTIONS **************/ int cd9660_valid_a_chars(const char *); int cd9660_valid_d_chars(const char *); -void cd9660_uppercase_characters(char *, int); +void cd9660_uppercase_characters(char *, size_t); /* ISO Data Types */ void cd9660_721(uint16_t, unsigned char *); void cd9660_731(uint32_t, unsigned char *); void cd9660_722(uint16_t, unsigned char *); void cd9660_732(uint32_t, unsigned char *); -void cd9660_bothendian_dword(uint32_t dw, unsigned char *); -void cd9660_bothendian_word(uint16_t dw, unsigned char *); +void cd9660_bothendian_dword(uint32_t dw, unsigned char *); +void cd9660_bothendian_word(uint16_t dw, unsigned char *); void cd9660_set_date(char *, time_t); void cd9660_time_8426(unsigned char *, time_t); void cd9660_time_915(unsigned char *, time_t); diff --git a/usr.sbin/makefs/cd9660/Makefile.inc b/usr.sbin/makefs/cd9660/Makefile.inc index b5012a323699..ec949f1413dc 100644 --- a/usr.sbin/makefs/cd9660/Makefile.inc +++ b/usr.sbin/makefs/cd9660/Makefile.inc @@ -1,9 +1,6 @@ -# $FreeBSD$ -# - .PATH: ${SRCDIR}/cd9660 CFLAGS+=-I${SRCTOP}/sys/fs/cd9660/ SRCS+= cd9660_strings.c cd9660_debug.c cd9660_eltorito.c \ - cd9660_write.c cd9660_conversion.c iso9660_rrip.c cd9660_archimedes.c + cd9660_write.c cd9660_conversion.c iso9660_rrip.c diff --git a/usr.sbin/makefs/cd9660/cd9660_archimedes.c b/usr.sbin/makefs/cd9660/cd9660_archimedes.c deleted file mode 100644 index cf53e0123d24..000000000000 --- a/usr.sbin/makefs/cd9660/cd9660_archimedes.c +++ /dev/null @@ -1,126 +0,0 @@ -/* $NetBSD: cd9660_archimedes.c,v 1.1 2009/01/10 22:06:29 bjh21 Exp $ */ - -/*- - * SPDX-License-Identifier: BSD-3-Clause - * - * Copyright (c) 1998, 2009 Ben Harris - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -/* - * cd9660_archimedes.c - support for RISC OS "ARCHIMEDES" extension - * - * RISC OS CDFS looks for a special block at the end of the System Use - * Field for each file. If present, this contains the RISC OS load - * and exec address (used to hold the file timestamp and type), the - * file attributes, and a flag indicating whether the first character - * of the filename should be replaced with '!' (since many special - * RISC OS filenames do). - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <assert.h> -#include <stdint.h> -#include <stdio.h> -#include <string.h> -#include <util.h> - -#include "makefs.h" -#include "cd9660.h" -#include "cd9660_archimedes.h" - -/* - * Convert a Unix time_t (non-leap seconds since 1970-01-01) to a RISC - * OS time (non-leap(?) centiseconds since 1900-01-01(?)). - */ - -static u_int64_t -riscos_date(time_t unixtime) -{ - u_int64_t base; - - base = 31536000ULL * 70 + 86400 * 17; - return (((u_int64_t)unixtime) + base)*100; -} - -/* - * Add "ARCHIMEDES" metadata to a node if that seems appropriate. - * - * We touch regular files with names matching /,[0-9a-f]{3}$/ and - * directories matching /^!/. - */ -static void -archimedes_convert_node(cd9660node *node) -{ - struct ISO_ARCHIMEDES *arc; - size_t len; - int type = -1; - uint64_t stamp; - - if (node->su_tail_data != NULL) - /* Something else already has the tail. */ - return; - - len = strlen(node->node->name); - if (len < 1) return; - - if (len >= 4 && node->node->name[len-4] == ',') - /* XXX should support ,xxx and ,lxa */ - type = strtoul(node->node->name + len - 3, NULL, 16); - if (type == -1 && node->node->name[0] != '!') - return; - if (type == -1) type = 0; - - assert(sizeof(*arc) == 32); - arc = ecalloc(1, sizeof(*arc)); - - stamp = riscos_date(node->node->inode->st.st_mtime); - - memcpy(arc->magic, "ARCHIMEDES", 10); - cd9660_731(0xfff00000 | (type << 8) | (stamp >> 32), arc->loadaddr); - cd9660_731(stamp & 0x00ffffffffULL, arc->execaddr); - arc->ro_attr = RO_ACCESS_UR | RO_ACCESS_OR; - arc->cdfs_attr = node->node->name[0] == '!' ? CDFS_PLING : 0; - node->su_tail_data = (void *)arc; - node->su_tail_size = sizeof(*arc); -} - -/* - * Add "ARCHIMEDES" metadata to an entire tree recursively. - */ -void -archimedes_convert_tree(cd9660node *node) -{ - cd9660node *cn; - - assert(node != NULL); - - archimedes_convert_node(node); - - /* Recurse on children. */ - TAILQ_FOREACH(cn, &node->cn_children, cn_next_child) - archimedes_convert_tree(cn); -} diff --git a/usr.sbin/makefs/cd9660/cd9660_archimedes.h b/usr.sbin/makefs/cd9660/cd9660_archimedes.h deleted file mode 100644 index 96e30336310b..000000000000 --- a/usr.sbin/makefs/cd9660/cd9660_archimedes.h +++ /dev/null @@ -1,52 +0,0 @@ -/* $NetBSD: cd9660_archimedes.h,v 1.1 2009/01/10 22:06:29 bjh21 Exp $ */ - -/*- - * SPDX-License-Identifier: BSD-3-Clause - * - * Copyright (c) 1998, 2009 Ben Harris - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR - * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES - * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. - * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -/* - * cd9660_archimedes.c - support for RISC OS "ARCHIMEDES" extension - * - * $FreeBSD$ - */ - -struct ISO_ARCHIMEDES { - char magic[10]; /* "ARCHIMEDES" */ - unsigned char loadaddr[4]; /* Load address, little-endian */ - unsigned char execaddr[4]; /* Exec address, little-endian */ - unsigned char ro_attr; /* RISC OS attributes */ -#define RO_ACCESS_UR 0x01 /* Owner read */ -#define RO_ACCESS_UW 0x02 /* Owner write */ -#define RO_ACCESS_L 0x04 /* Locked */ -#define RO_ACCESS_OR 0x10 /* Public read */ -#define RO_ACCESS_OW 0x20 /* Public write */ - unsigned char cdfs_attr; /* Extra attributes for CDFS */ -#define CDFS_PLING 0x01 /* Filename begins with '!' */ - char reserved[12]; -}; - -extern void archimedes_convert_tree(cd9660node *); diff --git a/usr.sbin/makefs/cd9660/cd9660_conversion.c b/usr.sbin/makefs/cd9660/cd9660_conversion.c index 57a1c62a25b7..4ccc87e22661 100644 --- a/usr.sbin/makefs/cd9660/cd9660_conversion.c +++ b/usr.sbin/makefs/cd9660/cd9660_conversion.c @@ -1,7 +1,7 @@ /* $NetBSD: cd9660_conversion.c,v 1.4 2007/03/14 14:11:17 christos Exp $ */ /*- - * SPDX-License-Identifier: BSD-2-Clause-NetBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2005 Daniel Watt, Walter Deignan, Ryan Gabrys, Alan * Perez-Rathke and Ram Vedam. All rights reserved. @@ -36,8 +36,6 @@ #include "cd9660.h" #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - static char cd9660_compute_gm_offset(time_t); #if 0 diff --git a/usr.sbin/makefs/cd9660/cd9660_debug.c b/usr.sbin/makefs/cd9660/cd9660_debug.c index b1e07fb85f0f..f49c5108f4c2 100644 --- a/usr.sbin/makefs/cd9660/cd9660_debug.c +++ b/usr.sbin/makefs/cd9660/cd9660_debug.c @@ -1,7 +1,7 @@ /* $NetBSD: cd9660_debug.c,v 1.11 2010/10/27 18:51:35 christos Exp $ */ /*- - * SPDX-License-Identifier: BSD-2-Clause-NetBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2005 Daniel Watt, Walter Deignan, Ryan Gabrys, Alan * Perez-Rathke and Ram Vedam. All rights reserved. @@ -34,8 +34,6 @@ * OF SUCH DAMAGE. */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/mount.h> diff --git a/usr.sbin/makefs/cd9660/cd9660_eltorito.c b/usr.sbin/makefs/cd9660/cd9660_eltorito.c index 3628a798db4c..dd5bf67b2b09 100644 --- a/usr.sbin/makefs/cd9660/cd9660_eltorito.c +++ b/usr.sbin/makefs/cd9660/cd9660_eltorito.c @@ -1,7 +1,7 @@ /* $NetBSD: cd9660_eltorito.c,v 1.23 2018/03/28 06:48:55 nonaka Exp $ */ /*- - * SPDX-License-Identifier: BSD-2-Clause-NetBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2005 Daniel Watt, Walter Deignan, Ryan Gabrys, Alan * Perez-Rathke and Ram Vedam. All rights reserved. @@ -39,7 +39,13 @@ #include <util.h> #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); +/* + * Partition Status Information from Apple Tech Note 1189 + */ +#define APPLE_PS_VALID 0x00000001 /* Entry is valid */ +#define APPLE_PS_ALLOCATED 0x00000002 /* Entry is allocated */ +#define APPLE_PS_READABLE 0x00000010 /* Entry is readable */ +#define APPLE_PS_WRITABLE 0x00000020 /* Entry is writable */ #ifdef DEBUG #define ELTORITO_DPRINTF(__x) printf __x @@ -366,6 +372,7 @@ cd9660_setup_boot(iso9660_disk *diskStructure, int first_sector) struct boot_catalog_entry *x86_head, *mac_head, *ppc_head, *efi_head, *valid_entry, *default_entry, *temp, *head, **headp, *next; struct cd9660_boot_image *tmp_disk; + uint8_t system; headp = NULL; x86_head = mac_head = ppc_head = efi_head = NULL; @@ -377,12 +384,19 @@ cd9660_setup_boot(iso9660_disk *diskStructure, int first_sector) /* Point to catalog: For now assume it consumes one sector */ ELTORITO_DPRINTF(("Boot catalog will go in sector %d\n", first_sector)); diskStructure->boot_catalog_sector = first_sector; - cd9660_bothendian_dword(first_sector, - diskStructure->boot_descriptor->boot_catalog_pointer); + cd9660_731(first_sector, + diskStructure->boot_descriptor->boot_catalog_pointer); + + /* + * Use system type of default image for validation entry. Fallback to + * X86 system type if not found. + */ + system = default_boot_image != NULL ? default_boot_image->system : + ET_SYS_X86; /* Step 1: Generate boot catalog */ /* Step 1a: Validation entry */ - valid_entry = cd9660_boot_setup_validation_entry(ET_SYS_X86); + valid_entry = cd9660_boot_setup_validation_entry(system); if (valid_entry == NULL) return -1; @@ -540,7 +554,7 @@ cd9660_write_mbr_partition_entry(FILE *fd, int idx, off_t sector_start, if (fseeko(fd, (off_t)(idx) * 16 + 0x1be, SEEK_SET) == -1) err(1, "fseeko"); - + val = 0x80; /* Bootable */ fwrite(&val, sizeof(val), 1, fd); @@ -574,15 +588,8 @@ cd9660_write_apm_partition_entry(FILE *fd, int idx, int total_partitions, uint32_t apm32, part_status; uint16_t apm16; - /* See Apple Tech Note 1189 for the details about the pmPartStatus - * flags. - * Below the flags which are default: - * - IsValid 0x01 - * - IsAllocated 0x02 - * - IsReadable 0x10 - * - IsWritable 0x20 - */ - part_status = 0x01 | 0x02 | 0x10 | 0x20; + part_status = APPLE_PS_VALID | APPLE_PS_ALLOCATED | APPLE_PS_READABLE | + APPLE_PS_WRITABLE; if (fseeko(fd, (off_t)(idx + 1) * sector_size, SEEK_SET) == -1) err(1, "fseeko"); @@ -610,7 +617,7 @@ cd9660_write_apm_partition_entry(FILE *fd, int idx, int total_partitions, apm32 = 0; /* pmLgDataStart */ fwrite(&apm32, sizeof(apm32), 1, fd); - /* pmDataCnt */ + /* pmDataCnt */ apm32 = htobe32(nsectors); fwrite(&apm32, sizeof(apm32), 1, fd); /* pmPartStatus */ @@ -659,9 +666,9 @@ cd9660_write_boot(iso9660_disk *diskStructure, FILE *fd) } cd9660_copy_file(diskStructure, fd, t->sector, t->filename); - if (t->system == ET_SYS_MAC) + if (t->system == ET_SYS_MAC) apm_partitions++; - if (t->system == ET_SYS_PPC) + if (t->system == ET_SYS_PPC) mbr_partitions++; } diff --git a/usr.sbin/makefs/cd9660/cd9660_eltorito.h b/usr.sbin/makefs/cd9660/cd9660_eltorito.h index cbc3f8419e31..a9ad0901e1a7 100644 --- a/usr.sbin/makefs/cd9660/cd9660_eltorito.h +++ b/usr.sbin/makefs/cd9660/cd9660_eltorito.h @@ -1,7 +1,7 @@ /* $NetBSD: cd9660_eltorito.h,v 1.6 2017/01/24 11:22:43 nonaka Exp $ */ /*- - * SPDX-License-Identifier: BSD-2-Clause-NetBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2005 Daniel Watt, Walter Deignan, Ryan Gabrys, Alan * Perez-Rathke and Ram Vedam. All rights reserved. @@ -32,8 +32,6 @@ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _CD9660_ELTORITO_H_ @@ -131,7 +129,7 @@ struct boot_catalog_entry { char entry_type; union { boot_catalog_validation_entry VE; - boot_catalog_initial_entry IE; + boot_catalog_initial_entry IE; boot_catalog_section_header SH; boot_catalog_section_entry SE; boot_catalog_section_entry_extension EX; @@ -144,7 +142,7 @@ struct boot_catalog_entry { struct cd9660_boot_image { char *filename; int size; - int sector; /* copied to LoadRBA */ + int sector; /* copied to LoadRBA */ int num_sectors; unsigned int loadSegment; u_char targetMode; diff --git a/usr.sbin/makefs/cd9660/cd9660_strings.c b/usr.sbin/makefs/cd9660/cd9660_strings.c index 12d7566e463d..b3111fca6cd1 100644 --- a/usr.sbin/makefs/cd9660/cd9660_strings.c +++ b/usr.sbin/makefs/cd9660/cd9660_strings.c @@ -1,7 +1,7 @@ /* $NetBSD: cd9660_strings.c,v 1.4 2007/01/16 17:32:05 hubertf Exp $ */ /*- - * SPDX-License-Identifier: BSD-2-Clause-NetBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2005 Daniel Watt, Walter Deignan, Ryan Gabrys, Alan * Perez-Rathke and Ram Vedam. All rights reserved. @@ -36,8 +36,6 @@ #include <sys/mount.h> -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); #include <sys/param.h> #include <ctype.h> @@ -46,9 +44,9 @@ __FBSDID("$FreeBSD$"); void -cd9660_uppercase_characters(char *str, int len) +cd9660_uppercase_characters(char *str, size_t len) { - int p; + size_t p; for (p = 0; p < len; p++) { if (islower((unsigned char)str[p]) ) diff --git a/usr.sbin/makefs/cd9660/cd9660_write.c b/usr.sbin/makefs/cd9660/cd9660_write.c index 71e884f792d9..828af11669c1 100644 --- a/usr.sbin/makefs/cd9660/cd9660_write.c +++ b/usr.sbin/makefs/cd9660/cd9660_write.c @@ -1,7 +1,7 @@ /* $NetBSD: cd9660_write.c,v 1.14 2011/01/04 09:48:21 wiz Exp $ */ /*- - * SPDX-License-Identifier: BSD-2-Clause-NetBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2005 Daniel Watt, Walter Deignan, Ryan Gabrys, Alan * Perez-Rathke and Ram Vedam. All rights reserved. @@ -38,8 +38,6 @@ #include "iso9660_rrip.h" #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <util.h> static int cd9660_write_volume_descriptors(iso9660_disk *, FILE *); @@ -273,7 +271,7 @@ cd9660_write_file(iso9660_disk *diskStructure, FILE *fd, cd9660node *writenode) /* Todo : clean up variables */ - temp_file_name = ecalloc(CD9660MAXPATH + 1, 1); + temp_file_name = ecalloc(PATH_MAX, 1); buf = emalloc(diskStructure->sectorSize); if ((writenode->level != 0) && !(writenode->node->type & S_IFDIR)) { @@ -316,7 +314,7 @@ cd9660_write_file(iso9660_disk *diskStructure, FILE *fd, cd9660node *writenode) /* * Now loop over children, writing out their directory * records - beware of sector boundaries - */ + */ TAILQ_FOREACH(temp, &writenode->cn_children, cn_next_child) { /* * Copy the temporary record and adjust its size @@ -429,7 +427,6 @@ cd9660_copy_file(iso9660_disk *diskStructure, FILE *fd, off_t start_sector, { FILE *rf; int bytes_read; - off_t sector = start_sector; int buf_size = diskStructure->sectorSize; char *buf; @@ -462,7 +459,6 @@ cd9660_copy_file(iso9660_disk *diskStructure, FILE *fd, off_t start_sector, (void)fclose(rf); return 0; } - sector++; } fclose(rf); diff --git a/usr.sbin/makefs/cd9660/iso9660_rrip.c b/usr.sbin/makefs/cd9660/iso9660_rrip.c index c5ef88d59de6..31c6e38a96fe 100644 --- a/usr.sbin/makefs/cd9660/iso9660_rrip.c +++ b/usr.sbin/makefs/cd9660/iso9660_rrip.c @@ -1,7 +1,7 @@ /* $NetBSD: iso9660_rrip.c,v 1.14 2014/05/30 13:14:47 martin Exp $ */ /*- - * SPDX-License-Identifier: BSD-2-Clause-NetBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2005 Daniel Watt, Walter Deignan, Ryan Gabrys, Alan * Perez-Rathke and Ram Vedam. All rights reserved. @@ -38,8 +38,6 @@ */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/queue.h> #include <sys/types.h> #include <stdio.h> @@ -49,7 +47,7 @@ __FBSDID("$FreeBSD$"); #include "iso9660_rrip.h" #include <util.h> -static void cd9660_rrip_initialize_inode(cd9660node *); +static void cd9660_rrip_initialize_inode(iso9660_disk *, cd9660node *); static int cd9660_susp_handle_continuation(iso9660_disk *, cd9660node *); static int cd9660_susp_handle_continuation_common(iso9660_disk *, cd9660node *, int); @@ -72,6 +70,11 @@ cd9660_susp_initialize(iso9660_disk *diskStructure, cd9660node *node, if (node->dot_dot_record != 0) TAILQ_INIT(&(node->dot_dot_record->head)); + if (diskStructure->rr_inode_next == 0) { + RB_INIT(&diskStructure->rr_inode_map); + diskStructure->rr_inode_next = 1; + } + /* SUSP specific entries here */ if ((r = cd9660_susp_initialize_node(diskStructure, node)) < 0) return r; @@ -103,6 +106,7 @@ int cd9660_susp_finalize(iso9660_disk *diskStructure, cd9660node *node) { cd9660node *temp; + struct inode_map_node *mapnode, *mapnodetmp; int r; assert(node != NULL); @@ -119,6 +123,16 @@ cd9660_susp_finalize(iso9660_disk *diskStructure, cd9660node *node) if ((r = cd9660_susp_finalize(diskStructure, temp)) < 0) return r; } + + if (diskStructure->rr_inode_next != 0) { + RB_FOREACH_SAFE(mapnode, inode_map_tree, + &(diskStructure->rr_inode_map), mapnodetmp) { + RB_REMOVE(inode_map_tree, + &(diskStructure->rr_inode_map), mapnode); + free(mapnode); + } + diskStructure->rr_inode_next = 0; + } return 1; } @@ -325,7 +339,7 @@ cd9660_susp_initialize_node(iso9660_disk *diskStructure, cd9660node *node) } static void -cd9660_rrip_initialize_inode(cd9660node *node) +cd9660_rrip_initialize_inode(iso9660_disk *diskStructure, cd9660node *node) { struct ISO_SUSP_ATTRIBUTES *attr; @@ -339,7 +353,7 @@ cd9660_rrip_initialize_inode(cd9660node *node) /* PX - POSIX attributes */ attr = cd9660node_susp_create_node(SUSP_TYPE_RRIP, SUSP_ENTRY_RRIP_PX, "PX", SUSP_LOC_ENTRY); - cd9660node_rrip_px(attr, node->node); + cd9660node_rrip_px(diskStructure, attr, node->node); TAILQ_INSERT_TAIL(&node->head, attr, rr_ll); @@ -392,7 +406,14 @@ cd9660_rrip_initialize_node(iso9660_disk *diskStructure, cd9660node *node, /* PX - POSIX attributes */ current = cd9660node_susp_create_node(SUSP_TYPE_RRIP, SUSP_ENTRY_RRIP_PX, "PX", SUSP_LOC_ENTRY); - cd9660node_rrip_px(current, parent->node); + cd9660node_rrip_px(diskStructure, current, + parent->node); + TAILQ_INSERT_TAIL(&node->head, current, rr_ll); + + /* TF - timestamp */ + current = cd9660node_susp_create_node(SUSP_TYPE_RRIP, + SUSP_ENTRY_RRIP_TF, "TF", SUSP_LOC_ENTRY); + cd9660node_rrip_tf(current, parent->node); TAILQ_INSERT_TAIL(&node->head, current, rr_ll); } } else if (node->type & CD9660_TYPE_DOTDOT) { @@ -401,7 +422,14 @@ cd9660_rrip_initialize_node(iso9660_disk *diskStructure, cd9660node *node, /* PX - POSIX attributes */ current = cd9660node_susp_create_node(SUSP_TYPE_RRIP, SUSP_ENTRY_RRIP_PX, "PX", SUSP_LOC_ENTRY); - cd9660node_rrip_px(current, grandparent->node); + cd9660node_rrip_px(diskStructure, current, + grandparent->node); + TAILQ_INSERT_TAIL(&node->head, current, rr_ll); + + /* TF - timestamp */ + current = cd9660node_susp_create_node(SUSP_TYPE_RRIP, + SUSP_ENTRY_RRIP_TF, "TF", SUSP_LOC_ENTRY); + cd9660node_rrip_tf(current, grandparent->node); TAILQ_INSERT_TAIL(&node->head, current, rr_ll); } /* Handle PL */ @@ -412,28 +440,14 @@ cd9660_rrip_initialize_node(iso9660_disk *diskStructure, cd9660node *node, TAILQ_INSERT_TAIL(&node->head, current, rr_ll); } } else { - cd9660_rrip_initialize_inode(node); + cd9660_rrip_initialize_inode(diskStructure, node); - /* - * Not every node needs a NM set - only if the name is - * actually different. IE: If a file is TEST -> TEST, - * no NM. test -> TEST, need a NM - * - * The rr_moved_dir needs to be assigned a NM record as well. - */ if (node == diskStructure->rr_moved_dir) { cd9660_rrip_add_NM(node, RRIP_DEFAULT_MOVE_DIR_NAME); - } - else if ((node->node != NULL) && - ((strlen(node->node->name) != - (uint8_t)node->isoDirRecord->name_len[0]) || - (memcmp(node->node->name,node->isoDirRecord->name, - (uint8_t)node->isoDirRecord->name_len[0]) != 0))) { + } else if (node->node != NULL) { cd9660_rrip_NM(node); } - - /* Rock ridge directory relocation code here. */ /* First handle the CL for the placeholder file. */ @@ -634,8 +648,45 @@ cd9660_createSL(cd9660node *node) } } +static int +inode_map_node_cmp(struct inode_map_node *a, struct inode_map_node *b) +{ + if (a->key < b->key) + return (-1); + if (a->key > b->key) + return (1); + return (0); +} + +RB_GENERATE(inode_map_tree, inode_map_node, entry, inode_map_node_cmp); + +static uint64_t +inode_map(iso9660_disk *diskStructure, uint64_t in) +{ + struct inode_map_node lookup = { .key = in }; + struct inode_map_node *node; + + /* + * Always assign an inode number if src inode unset. mtree mode leaves + * src inode unset for files with st_nlink == 1. + */ + if (in != 0) { + node = RB_FIND(inode_map_tree, &(diskStructure->rr_inode_map), + &lookup); + if (node != NULL) + return (node->value); + } + + node = emalloc(sizeof(struct inode_map_node)); + node->key = in; + node->value = diskStructure->rr_inode_next++; + RB_INSERT(inode_map_tree, &(diskStructure->rr_inode_map), node); + return (node->value); +} + int -cd9660node_rrip_px(struct ISO_SUSP_ATTRIBUTES *v, fsnode *pxinfo) +cd9660node_rrip_px(iso9660_disk *diskStructure, struct ISO_SUSP_ATTRIBUTES *v, + fsnode *pxinfo) { v->attr.rr_entry.PX.h.length[0] = 44; v->attr.rr_entry.PX.h.version[0] = 1; @@ -647,8 +698,8 @@ cd9660node_rrip_px(struct ISO_SUSP_ATTRIBUTES *v, fsnode *pxinfo) v->attr.rr_entry.PX.uid); cd9660_bothendian_dword(pxinfo->inode->st.st_gid, v->attr.rr_entry.PX.gid); - cd9660_bothendian_dword(pxinfo->inode->st.st_ino, - v->attr.rr_entry.PX.serial); + cd9660_bothendian_dword(inode_map(diskStructure, + pxinfo->inode->st.st_ino), v->attr.rr_entry.PX.serial); return 1; } @@ -699,11 +750,11 @@ cd9660node_rrip_tf(struct ISO_SUSP_ATTRIBUTES *p, fsnode *_node) */ cd9660_time_915(p->attr.rr_entry.TF.timestamp, - _node->inode->st.st_atime); + _node->inode->st.st_mtime); p->attr.rr_entry.TF.h.length[0] += 7; cd9660_time_915(p->attr.rr_entry.TF.timestamp + 7, - _node->inode->st.st_mtime); + _node->inode->st.st_atime); p->attr.rr_entry.TF.h.length[0] += 7; cd9660_time_915(p->attr.rr_entry.TF.timestamp + 14, @@ -756,7 +807,7 @@ cd9660_rrip_add_NM(cd9660node *node, const char *name) struct ISO_SUSP_ATTRIBUTES *r; /* - * Each NM record has 254 byes to work with. This means that + * Each NM record has 254 bytes to work with. This means that * the name data itself only has 249 bytes to work with. So, a * name with 251 characters would require two nm records. */ diff --git a/usr.sbin/makefs/cd9660/iso9660_rrip.h b/usr.sbin/makefs/cd9660/iso9660_rrip.h index 0c7b89ec860b..4c738d27ba45 100644 --- a/usr.sbin/makefs/cd9660/iso9660_rrip.h +++ b/usr.sbin/makefs/cd9660/iso9660_rrip.h @@ -1,7 +1,7 @@ /* $NetBSD: iso9660_rrip.h,v 1.5 2009/01/10 22:06:29 bjh21 Exp $ */ /*- - * SPDX-License-Identifier: BSD-2-Clause-NetBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2005 Daniel Watt, Walter Deignan, Ryan Gabrys, Alan * Perez-Rathke and Ram Vedam. All rights reserved. @@ -32,8 +32,6 @@ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY * OF SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef __ISO9660_RRIP_H__ #define __ISO9660_RRIP_H__ @@ -52,17 +50,17 @@ #define PX_LENGTH 0x2C #define PN_LENGTH 0x14 -#define TF_CREATION 0x00 -#define TF_MODIFY 0x01 -#define TF_ACCESS 0x02 -#define TF_ATTRIBUTES 0x04 -#define TF_BACKUP 0x08 -#define TF_EXPIRATION 0x10 -#define TF_EFFECTIVE 0x20 -#define TF_LONGFORM 0x40 -#define NM_CONTINUE 0x80 -#define NM_CURRENT 0x100 -#define NM_PARENT 0x200 +#define TF_CREATION 0x01 +#define TF_MODIFY 0x02 +#define TF_ACCESS 0x04 +#define TF_ATTRIBUTES 0x08 +#define TF_BACKUP 0x10 +#define TF_EXPIRATION 0x20 +#define TF_EFFECTIVE 0x40 +#define TF_LONG_FORM 0x80 +#define NM_CONTINUE 0x01 +#define NM_CURRENT 0x02 +#define NM_PARENT 0x04 #define SUSP_LOC_ENTRY 0x01 @@ -209,7 +207,7 @@ struct ISO_SUSP_ATTRIBUTES { char type_of[2]; char last_in_suf; /* last entry in the System Use Field? */ /* Dan's addons - will merge later. This allows use of a switch */ - char susp_type; /* SUSP or RRIP */ + char susp_type; /* SUSP or RRIP */ char entry_type; /* Record type */ char write_location; TAILQ_ENTRY(ISO_SUSP_ATTRIBUTES) rr_ll; @@ -226,7 +224,7 @@ int cd9660_susp_finalize_node(iso9660_disk *, cd9660node *); int cd9660_rrip_finalize_node(cd9660node *); /* POSIX File attribute */ -int cd9660node_rrip_px(struct ISO_SUSP_ATTRIBUTES *, fsnode *); +int cd9660node_rrip_px(iso9660_disk *, struct ISO_SUSP_ATTRIBUTES *, fsnode *); /* Device number */ int cd9660node_rrip_pn(struct ISO_SUSP_ATTRIBUTES *, fsnode *); diff --git a/usr.sbin/makefs/ffs.c b/usr.sbin/makefs/ffs.c index 81101b6aa6bf..c0fcadf11fba 100644 --- a/usr.sbin/makefs/ffs.c +++ b/usr.sbin/makefs/ffs.c @@ -63,13 +63,9 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * @(#)ffs_alloc.c 8.19 (Berkeley) 7/13/95 */ #include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #if HAVE_NBTOOL_CONFIG_H #include "nbtool_config.h" #endif @@ -110,7 +106,7 @@ __FBSDID("$FreeBSD$"); #undef DIP #define DIP(dp, field) \ ((ffs_opts->version == 1) ? \ - (dp)->ffs1_din.di_##field : (dp)->ffs2_din.di_##field) + (dp)->dp1.di_##field : (dp)->dp2.di_##field) /* * Various file system defaults (cribbed from newfs(8)). @@ -342,6 +338,9 @@ ffs_validate(const char *dir, fsnode *root, fsinfo_t *fsopts) /* set FFS defaults */ if (fsopts->sectorsize == -1) fsopts->sectorsize = DFL_SECSIZE; + if (fsopts->sectorsize != DFL_SECSIZE) + warnx("sectorsize %d may produce nonfunctional image", + fsopts->sectorsize); if (ffs_opts->fsize == -1) ffs_opts->fsize = MAX(DFL_FRAGSIZE, fsopts->sectorsize); if (ffs_opts->bsize == -1) @@ -680,15 +679,14 @@ ffs_build_dinode1(struct ufs1_dinode *dinp, dirbuf_t *dbufp, fsnode *cur, { size_t slen; void *membuf; - struct stat *st = stampst.st_ino != 0 ? &stampst : &cur->inode->st; + struct stat *st; + st = &cur->inode->st; memset(dinp, 0, sizeof(*dinp)); dinp->di_mode = cur->inode->st.st_mode; dinp->di_nlink = cur->inode->nlink; dinp->di_size = cur->inode->st.st_size; -#if HAVE_STRUCT_STAT_ST_FLAGS - dinp->di_flags = cur->inode->st.st_flags; -#endif + dinp->di_flags = FSINODE_ST_FLAGS(*cur->inode); dinp->di_gen = random(); dinp->di_uid = cur->inode->st.st_uid; dinp->di_gid = cur->inode->st.st_gid; @@ -728,15 +726,14 @@ ffs_build_dinode2(struct ufs2_dinode *dinp, dirbuf_t *dbufp, fsnode *cur, { size_t slen; void *membuf; - struct stat *st = stampst.st_ino != 0 ? &stampst : &cur->inode->st; + struct stat *st; + st = &cur->inode->st; memset(dinp, 0, sizeof(*dinp)); dinp->di_mode = cur->inode->st.st_mode; dinp->di_nlink = cur->inode->nlink; dinp->di_size = cur->inode->st.st_size; -#if HAVE_STRUCT_STAT_ST_FLAGS - dinp->di_flags = cur->inode->st.st_flags; -#endif + dinp->di_flags = FSINODE_ST_FLAGS(*cur->inode); dinp->di_gen = random(); dinp->di_uid = cur->inode->st.st_uid; dinp->di_gid = cur->inode->st.st_gid; @@ -854,10 +851,10 @@ ffs_populate_dir(const char *dir, fsnode *root, fsinfo_t *fsopts) /* build on-disk inode */ if (ffs_opts->version == 1) - membuf = ffs_build_dinode1(&din.ffs1_din, &dirbuf, cur, + membuf = ffs_build_dinode1(&din.dp1, &dirbuf, cur, root, fsopts); else - membuf = ffs_build_dinode2(&din.ffs2_din, &dirbuf, cur, + membuf = ffs_build_dinode2(&din.dp2, &dirbuf, cur, root, fsopts); if (debug & DEBUG_FS_POPULATE_NODE) { @@ -907,7 +904,7 @@ ffs_populate_dir(const char *dir, fsnode *root, fsinfo_t *fsopts) static void ffs_write_file(union dinode *din, uint32_t ino, void *buf, fsinfo_t *fsopts) { - int isfile, ffd; + int isfile, ffd; char *fbuf, *p; off_t bufleft, chunk, offset; ssize_t nread; @@ -943,18 +940,18 @@ ffs_write_file(union dinode *din, uint32_t ino, void *buf, fsinfo_t *fsopts) in.i_number = ino; in.i_size = DIP(din, size); if (ffs_opts->version == 1) - memcpy(&in.i_din.ffs1_din, &din->ffs1_din, - sizeof(in.i_din.ffs1_din)); + memcpy(&in.i_din.dp1, &din->dp1, + sizeof(in.i_din.dp1)); else - memcpy(&in.i_din.ffs2_din, &din->ffs2_din, - sizeof(in.i_din.ffs2_din)); + memcpy(&in.i_din.dp2, &din->dp2, + sizeof(in.i_din.dp2)); if (DIP(din, size) == 0) goto write_inode_and_leave; /* mmm, cheating */ if (isfile) { fbuf = emalloc(ffs_opts->bsize); - if ((ffd = open((char *)buf, O_RDONLY, 0444)) == -1) { + if ((ffd = open((char *)buf, O_RDONLY)) == -1) { err(EXIT_FAILURE, "Can't open `%s' for reading", (char *)buf); } } else { @@ -1002,7 +999,6 @@ ffs_write_file(union dinode *din, uint32_t ino, void *buf, fsinfo_t *fsopts) errno = bwrite(bp); if (errno != 0) goto bad_ffs_write_file; - brelse(bp); if (!isfile) p += chunk; } @@ -1060,7 +1056,7 @@ ffs_make_dirbuf(dirbuf_t *dbuf, const char *name, fsnode *node, int needswap) reclen = DIRSIZ_SWAP(0, &de, needswap); de.d_reclen = ufs_rw16(reclen, needswap); - dp = (struct direct *)(dbuf->buf + dbuf->cur); + dp = dbuf->buf == NULL ? NULL : (struct direct *)(dbuf->buf + dbuf->cur); llen = 0; if (dp != NULL) llen = DIRSIZ_SWAP(0, dp, needswap); @@ -1097,7 +1093,7 @@ ffs_make_dirbuf(dirbuf_t *dbuf, const char *name, fsnode *node, int needswap) static void ffs_write_inode(union dinode *dp, uint32_t ino, const fsinfo_t *fsopts) { - char *buf; + char *buf; struct ufs1_dinode *dp1; struct ufs2_dinode *dp2, *dip; struct cg *cgp; @@ -1178,16 +1174,16 @@ ffs_write_inode(union dinode *dp, uint32_t ino, const fsinfo_t *fsopts) ffs_rdfs(d, fs->fs_bsize, buf, fsopts); if (fsopts->needswap) { if (ffs_opts->version == 1) - ffs_dinode1_swap(&dp->ffs1_din, + ffs_dinode1_swap(&dp->dp1, &dp1[ino_to_fsbo(fs, ino)]); else - ffs_dinode2_swap(&dp->ffs2_din, + ffs_dinode2_swap(&dp->dp2, &dp2[ino_to_fsbo(fs, ino)]); } else { if (ffs_opts->version == 1) - dp1[ino_to_fsbo(fs, ino)] = dp->ffs1_din; + dp1[ino_to_fsbo(fs, ino)] = dp->dp1; else - dp2[ino_to_fsbo(fs, ino)] = dp->ffs2_din; + dp2[ino_to_fsbo(fs, ino)] = dp->dp2; } ffs_wtfs(d, fs->fs_bsize, buf, fsopts); free(buf); diff --git a/usr.sbin/makefs/ffs.h b/usr.sbin/makefs/ffs.h index e1dda429ff26..834c5f4424f2 100644 --- a/usr.sbin/makefs/ffs.h +++ b/usr.sbin/makefs/ffs.h @@ -35,8 +35,6 @@ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _FFS_H diff --git a/usr.sbin/makefs/ffs/Makefile.inc b/usr.sbin/makefs/ffs/Makefile.inc index a1e839fd7ab6..0db1750ab282 100644 --- a/usr.sbin/makefs/ffs/Makefile.inc +++ b/usr.sbin/makefs/ffs/Makefile.inc @@ -1,9 +1,8 @@ -# $FreeBSD$ -# - .PATH: ${SRCDIR}/ffs ${SRCTOP}/sys/ufs/ffs SRCS+= ffs_alloc.c ffs_balloc.c ffs_bswap.c ffs_subr.c ufs_bmap.c SRCS+= buf.c mkfs.c # Reach-over source from sys/ufs/ffs SRCS+= ffs_tables.c + +CWARNFLAGS.ffs_balloc.c+= -Wno-sign-compare diff --git a/usr.sbin/makefs/ffs/buf.c b/usr.sbin/makefs/ffs/buf.c index 13f3099c4491..5fdb517208f9 100644 --- a/usr.sbin/makefs/ffs/buf.c +++ b/usr.sbin/makefs/ffs/buf.c @@ -37,9 +37,6 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/time.h> @@ -78,7 +75,7 @@ bread(struct m_vnode *vp, daddr_t blkno, int size, struct ucred *u1 __unused, if (lseek((*bpp)->b_fs->fd, offset, SEEK_SET) == -1) err(1, "%s: lseek %lld (%lld)", __func__, (long long)(*bpp)->b_blkno, (long long)offset); - rv = read((*bpp)->b_fs->fd, (*bpp)->b_data, (*bpp)->b_bcount); + rv = read((*bpp)->b_fs->fd, (*bpp)->b_data, (size_t)(*bpp)->b_bcount); if (debug & DEBUG_BUF_BREAD) printf("%s: read %ld (%lld) returned %d\n", __func__, (*bpp)->b_bcount, (long long)offset, (int)rv); @@ -127,26 +124,31 @@ bwrite(struct m_buf *bp) { off_t offset; ssize_t rv; + size_t bytes; + int e; fsinfo_t *fs = bp->b_fs; assert (bp != NULL); offset = (off_t)bp->b_blkno * fs->sectorsize + fs->offset; + bytes = (size_t)bp->b_bcount; if (debug & DEBUG_BUF_BWRITE) - printf("bwrite: blkno %lld offset %lld bcount %ld\n", - (long long)bp->b_blkno, (long long) offset, - bp->b_bcount); - if (lseek(bp->b_fs->fd, offset, SEEK_SET) == -1) + printf("%s: blkno %lld offset %lld bcount %zu\n", __func__, + (long long)bp->b_blkno, (long long) offset, bytes); + if (lseek(bp->b_fs->fd, offset, SEEK_SET) == -1) { + brelse(bp); return (errno); - rv = write(bp->b_fs->fd, bp->b_data, bp->b_bcount); + } + rv = write(bp->b_fs->fd, bp->b_data, bytes); + e = errno; if (debug & DEBUG_BUF_BWRITE) - printf("bwrite: write %ld (offset %lld) returned %lld\n", + printf("%s: write %ld (offset %lld) returned %lld\n", __func__, bp->b_bcount, (long long)offset, (long long)rv); - if (rv == bp->b_bcount) + brelse(bp); + if (rv == (ssize_t)bytes) return (0); - else if (rv == -1) /* write error */ - return (errno); - else /* short write ? */ - return (EAGAIN); + if (rv == -1) /* write error */ + return (e); + return (EAGAIN); } void @@ -163,13 +165,13 @@ bcleanup(void) if (TAILQ_EMPTY(&buftail)) return; - printf("bcleanup: unflushed buffers:\n"); + printf("%s: unflushed buffers:\n", __func__); TAILQ_FOREACH(bp, &buftail, b_tailq) { printf("\tlblkno %10lld blkno %10lld count %6ld bufsize %6ld\n", (long long)bp->b_lblkno, (long long)bp->b_blkno, bp->b_bcount, bp->b_bufsize); } - printf("bcleanup: done\n"); + printf("%s: done\n", __func__); } struct m_buf * @@ -181,12 +183,13 @@ getblk(struct m_vnode *vp, daddr_t blkno, int size, int u1 __unused, void *n; if (debug & DEBUG_BUF_GETBLK) - printf("getblk: blkno %lld size %d\n", (long long)blkno, size); + printf("%s: blkno %lld size %d\n", __func__, (long long)blkno, + size); bp = NULL; if (!buftailinitted) { if (debug & DEBUG_BUF_GETBLK) - printf("getblk: initialising tailq\n"); + printf("%s: initialising tailq\n", __func__); TAILQ_INIT(&buftail); buftailinitted = 1; } else { @@ -206,8 +209,8 @@ getblk(struct m_vnode *vp, daddr_t blkno, int size, int u1 __unused, } bp->b_bcount = size; if (bp->b_data == NULL || bp->b_bcount > bp->b_bufsize) { - n = erealloc(bp->b_data, size); - memset(n, 0, size); + n = erealloc(bp->b_data, (size_t)size); + memset(n, 0, (size_t)size); bp->b_data = n; bp->b_bufsize = size; } diff --git a/usr.sbin/makefs/ffs/buf.h b/usr.sbin/makefs/ffs/buf.h index 31196b8b2fbe..dfe7edb3e784 100644 --- a/usr.sbin/makefs/ffs/buf.h +++ b/usr.sbin/makefs/ffs/buf.h @@ -35,8 +35,6 @@ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _FFS_BUF_H diff --git a/usr.sbin/makefs/ffs/ffs_alloc.c b/usr.sbin/makefs/ffs/ffs_alloc.c index ff0e72c56af0..c5aae97928b5 100644 --- a/usr.sbin/makefs/ffs/ffs_alloc.c +++ b/usr.sbin/makefs/ffs/ffs_alloc.c @@ -39,13 +39,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * @(#)ffs_alloc.c 8.19 (Berkeley) 7/13/95 */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/time.h> @@ -305,7 +300,6 @@ ffs_alloccg(struct inode *ip, int cg, daddr_t bpref, int size) error = bread((void *)ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, NULL, &bp); if (error) { - brelse(bp); return (0); } cgp = (struct cg *)bp->b_data; @@ -449,7 +443,6 @@ ffs_blkfree(struct inode *ip, daddr_t bno, long size) error = bread((void *)ip->i_devvp, fsbtodb(fs, cgtod(fs, cg)), (int)fs->fs_cgsize, NULL, &bp); if (error) { - brelse(bp); return; } cgp = (struct cg *)bp->b_data; diff --git a/usr.sbin/makefs/ffs/ffs_balloc.c b/usr.sbin/makefs/ffs/ffs_balloc.c index 275ec4c04471..969a779d0ae8 100644 --- a/usr.sbin/makefs/ffs/ffs_balloc.c +++ b/usr.sbin/makefs/ffs/ffs_balloc.c @@ -30,13 +30,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95 */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/time.h> diff --git a/usr.sbin/makefs/ffs/ffs_bswap.c b/usr.sbin/makefs/ffs/ffs_bswap.c index 43fa60cb0373..50498cb4f259 100644 --- a/usr.sbin/makefs/ffs/ffs_bswap.c +++ b/usr.sbin/makefs/ffs/ffs_bswap.c @@ -1,7 +1,7 @@ /* $NetBSD: ffs_bswap.c,v 1.28 2004/05/25 14:54:59 hannken Exp $ */ /*- - * SPDX-License-Identifier: BSD-2-Clause-NetBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 1998 Manuel Bouyer. * @@ -27,9 +27,6 @@ * */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #if defined(_KERNEL) #include <sys/systm.h> diff --git a/usr.sbin/makefs/ffs/ffs_extern.h b/usr.sbin/makefs/ffs/ffs_extern.h index 12ba0b77989c..ac0dafaff860 100644 --- a/usr.sbin/makefs/ffs/ffs_extern.h +++ b/usr.sbin/makefs/ffs/ffs_extern.h @@ -30,9 +30,6 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * @(#)ffs_extern.h 8.6 (Berkeley) 3/30/95 - * $FreeBSD$ */ #include "ffs/buf.h" diff --git a/usr.sbin/makefs/ffs/ffs_subr.c b/usr.sbin/makefs/ffs/ffs_subr.c index 53e5b97ada48..3f5b2297389b 100644 --- a/usr.sbin/makefs/ffs/ffs_subr.c +++ b/usr.sbin/makefs/ffs/ffs_subr.c @@ -29,13 +29,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * @(#)ffs_subr.c 8.5 (Berkeley) 3/21/95 */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/types.h> diff --git a/usr.sbin/makefs/ffs/mkfs.c b/usr.sbin/makefs/ffs/mkfs.c index 0f8b040d6997..81e3da5725c8 100644 --- a/usr.sbin/makefs/ffs/mkfs.c +++ b/usr.sbin/makefs/ffs/mkfs.c @@ -40,9 +40,6 @@ * SUCH DAMAGE. */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/time.h> #include <sys/resource.h> @@ -80,6 +77,23 @@ static int count_digits(int); #define UMASK 0755 #define POWEROF2(num) (((num) & ((num) - 1)) == 0) +/* + * The definition of "struct cg" used to contain an extra field at the end + * to represent the variable-length data that followed the fixed structure. + * This had the effect of artificially limiting the number of blocks that + * newfs would put in a CG, since newfs thought that the fixed-size header + * was bigger than it really was. When we started validating that the CG + * header data actually fit into one fs block, the placeholder field caused + * a problem because it caused struct cg to be a different size depending on + * platform. The placeholder field was later removed, but this caused a + * backward compatibility problem with older binaries that still thought + * struct cg was larger, and a new file system could fail validation if + * viewed by the older binaries. To avoid this compatibility problem, we + * now artificially reduce the amount of space that the variable-length data + * can use such that new file systems will pass validation by older binaries. + */ +#define CGSIZEFUDGE 8 + static union { struct fs fs; char pad[SBLOCKSIZE]; @@ -347,7 +361,8 @@ ffs_mkfs(const char *fsys, const fsinfo_t *fsopts, time_t tstamp) sblock.fs_fpg = minfpg; sblock.fs_ipg = roundup(howmany(sblock.fs_fpg, fragsperinode), INOPB(&sblock)); - if (CGSIZE(&sblock) < (unsigned long)sblock.fs_bsize) + if (CGSIZE(&sblock) < (unsigned long)sblock.fs_bsize - + CGSIZEFUDGE) break; density -= sblock.fs_fsize; } @@ -366,9 +381,11 @@ ffs_mkfs(const char *fsys, const fsinfo_t *fsopts, time_t tstamp) INOPB(&sblock)); if (sblock.fs_size / sblock.fs_fpg < 1) break; - if (CGSIZE(&sblock) < (unsigned long)sblock.fs_bsize) + if (CGSIZE(&sblock) < (unsigned long)sblock.fs_bsize - + CGSIZEFUDGE) continue; - if (CGSIZE(&sblock) == (unsigned long)sblock.fs_bsize) + if (CGSIZE(&sblock) == (unsigned long)sblock.fs_bsize - + CGSIZEFUDGE) break; sblock.fs_fpg -= sblock.fs_frag; sblock.fs_ipg = roundup(howmany(sblock.fs_fpg, fragsperinode), @@ -529,8 +546,9 @@ ffs_mkfs(const char *fsys, const fsinfo_t *fsopts, time_t tstamp) initcg(cylno, tstamp, fsopts); if (cylno % nprintcols == 0) printf("\n"); - printf(" %*lld,", printcolwidth, - (long long)fsbtodb(&sblock, cgsblock(&sblock, cylno))); + printf(" %*lld%s", printcolwidth, + (long long)fsbtodb(&sblock, cgsblock(&sblock, cylno)), + cylno == sblock.fs_ncg - 1 ? "" : ","); fflush(stdout); } printf("\n"); @@ -561,13 +579,21 @@ ffs_write_superblock(struct fs *fs, const fsinfo_t *fsopts) { int size, blks, i, saveflag; uint32_t cylno; - void *space; + void *info, *space; char *wrbuf; saveflag = fs->fs_flags & FS_INTERNAL; fs->fs_flags &= ~FS_INTERNAL; - memcpy(writebuf, &sblock, sbsize); + /* + * Write out the superblock. Blank out the summary info field, as it's + * a random pointer that would make the resulting image unreproducible. + */ + info = fs->fs_si; + fs->fs_si = NULL; + memcpy(writebuf, fs, sbsize); + fs->fs_si = info; + if (fsopts->needswap) ffs_sb_swap(fs, (struct fs*)writebuf); ffs_wtfs(fs->fs_sblockloc / sectorsize, sbsize, writebuf, fsopts); @@ -633,7 +659,7 @@ initcg(uint32_t cylno, time_t utime, const fsinfo_t *fsopts) acg.cg_ndblk = dmax - cbase; if (sblock.fs_contigsumsize > 0) acg.cg_nclusterblks = acg.cg_ndblk >> sblock.fs_fragshift; - start = &acg.cg_space[0] - (u_char *)(&acg.cg_firstfield); + start = sizeof(acg); if (Oflag == 2) { acg.cg_iusedoff = start; } else { diff --git a/usr.sbin/makefs/ffs/newfs_extern.h b/usr.sbin/makefs/ffs/newfs_extern.h index 636c86b356f5..82a6337a720a 100644 --- a/usr.sbin/makefs/ffs/newfs_extern.h +++ b/usr.sbin/makefs/ffs/newfs_extern.h @@ -2,7 +2,7 @@ /* From: NetBSD: extern.h,v 1.3 2000/12/01 12:03:27 simonb Exp $ */ /*- - * SPDX-License-Identifier: BSD-2-Clause-NetBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 1997 Christos Zoulas. All rights reserved. * @@ -25,8 +25,6 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ */ /* prototypes */ diff --git a/usr.sbin/makefs/ffs/ufs_bmap.c b/usr.sbin/makefs/ffs/ufs_bmap.c index 196693587da0..1dc644349d3a 100644 --- a/usr.sbin/makefs/ffs/ufs_bmap.c +++ b/usr.sbin/makefs/ffs/ufs_bmap.c @@ -35,13 +35,8 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * @(#)ufs_bmap.c 8.8 (Berkeley) 8/11/95 */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/time.h> diff --git a/usr.sbin/makefs/ffs/ufs_bswap.h b/usr.sbin/makefs/ffs/ufs_bswap.h index 49b96990ccb2..36f9b88a5686 100644 --- a/usr.sbin/makefs/ffs/ufs_bswap.h +++ b/usr.sbin/makefs/ffs/ufs_bswap.h @@ -1,7 +1,7 @@ /* $NetBSD: ufs_bswap.h,v 1.13 2003/10/05 17:48:50 bouyer Exp $ */ /*- - * SPDX-License-Identifier: BSD-2-Clause-NetBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 1998 Manuel Bouyer. * @@ -24,8 +24,6 @@ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _UFS_UFS_BSWAP_H_ diff --git a/usr.sbin/makefs/ffs/ufs_inode.h b/usr.sbin/makefs/ffs/ufs_inode.h index 2b30b801b36e..c960caea5c1e 100644 --- a/usr.sbin/makefs/ffs/ufs_inode.h +++ b/usr.sbin/makefs/ffs/ufs_inode.h @@ -35,63 +35,55 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * @(#)inode.h 8.9 (Berkeley) 5/14/95 - * $FreeBSD$ */ -union dinode { - struct ufs1_dinode ffs1_din; - struct ufs2_dinode ffs2_din; -}; - struct inode { - ino_t i_number; /* The identity of the inode. */ + ino_t i_number; /* The identity of the inode. */ struct vnode *i_devvp; /* vnode pointer (contains fsopts) */ struct fs *i_fs; /* File system */ union dinode i_din; uint64_t i_size; }; -#define i_ffs1_atime i_din.ffs1_din.di_atime -#define i_ffs1_atimensec i_din.ffs1_din.di_atimensec -#define i_ffs1_blocks i_din.ffs1_din.di_blocks -#define i_ffs1_ctime i_din.ffs1_din.di_ctime -#define i_ffs1_ctimensec i_din.ffs1_din.di_ctimensec -#define i_ffs1_db i_din.ffs1_din.di_db -#define i_ffs1_flags i_din.ffs1_din.di_flags -#define i_ffs1_gen i_din.ffs1_din.di_gen -#define i_ffs11_gid i_din.ffs1_din.di_gid -#define i_ffs1_ib i_din.ffs1_din.di_ib -#define i_ffs1_mode i_din.ffs1_din.di_mode -#define i_ffs1_mtime i_din.ffs1_din.di_mtime -#define i_ffs1_mtimensec i_din.ffs1_din.di_mtimensec -#define i_ffs1_nlink i_din.ffs1_din.di_nlink -#define i_ffs1_rdev i_din.ffs1_din.di_rdev -#define i_ffs1_shortlink i_din.ffs1_din.di_shortlink -#define i_ffs1_size i_din.ffs1_din.di_size -#define i_ffs1_uid i_din.ffs1_din.di_uid +#define i_ffs1_atime i_din.dp1.di_atime +#define i_ffs1_atimensec i_din.dp1.di_atimensec +#define i_ffs1_blocks i_din.dp1.di_blocks +#define i_ffs1_ctime i_din.dp1.di_ctime +#define i_ffs1_ctimensec i_din.dp1.di_ctimensec +#define i_ffs1_db i_din.dp1.di_db +#define i_ffs1_flags i_din.dp1.di_flags +#define i_ffs1_gen i_din.dp1.di_gen +#define i_ffs11_gid i_din.dp1.di_gid +#define i_ffs1_ib i_din.dp1.di_ib +#define i_ffs1_mode i_din.dp1.di_mode +#define i_ffs1_mtime i_din.dp1.di_mtime +#define i_ffs1_mtimensec i_din.dp1.di_mtimensec +#define i_ffs1_nlink i_din.dp1.di_nlink +#define i_ffs1_rdev i_din.dp1.di_rdev +#define i_ffs1_shortlink i_din.dp1.di_shortlink +#define i_ffs1_size i_din.dp1.di_size +#define i_ffs1_uid i_din.dp1.di_uid -#define i_ffs2_atime i_din.ffs2_din.di_atime -#define i_ffs2_atimensec i_din.ffs2_din.di_atimensec -#define i_ffs2_blocks i_din.ffs2_din.di_blocks -#define i_ffs2_ctime i_din.ffs2_din.di_ctime -#define i_ffs2_ctimensec i_din.ffs2_din.di_ctimensec -#define i_ffs2_birthtime i_din.ffs2_din.di_birthtime -#define i_ffs2_birthnsec i_din.ffs2_din.di_birthnsec -#define i_ffs2_db i_din.ffs2_din.di_db -#define i_ffs2_flags i_din.ffs2_din.di_flags -#define i_ffs2_gen i_din.ffs2_din.di_gen -#define i_ffs21_gid i_din.ffs2_din.di_gid -#define i_ffs2_ib i_din.ffs2_din.di_ib -#define i_ffs2_mode i_din.ffs2_din.di_mode -#define i_ffs2_mtime i_din.ffs2_din.di_mtime -#define i_ffs2_mtimensec i_din.ffs2_din.di_mtimensec -#define i_ffs2_nlink i_din.ffs2_din.di_nlink -#define i_ffs2_rdev i_din.ffs2_din.di_rdev -#define i_ffs2_shortlink i_din.ffs2_din.di_shortlink -#define i_ffs2_size i_din.ffs2_din.di_size -#define i_ffs2_uid i_din.ffs2_din.di_uid +#define i_ffs2_atime i_din.dp2.di_atime +#define i_ffs2_atimensec i_din.dp2.di_atimensec +#define i_ffs2_blocks i_din.dp2.di_blocks +#define i_ffs2_ctime i_din.dp2.di_ctime +#define i_ffs2_ctimensec i_din.dp2.di_ctimensec +#define i_ffs2_birthtime i_din.dp2.di_birthtime +#define i_ffs2_birthnsec i_din.dp2.di_birthnsec +#define i_ffs2_db i_din.dp2.di_db +#define i_ffs2_flags i_din.dp2.di_flags +#define i_ffs2_gen i_din.dp2.di_gen +#define i_ffs21_gid i_din.dp2.di_gid +#define i_ffs2_ib i_din.dp2.di_ib +#define i_ffs2_mode i_din.dp2.di_mode +#define i_ffs2_mtime i_din.dp2.di_mtime +#define i_ffs2_mtimensec i_din.dp2.di_mtimensec +#define i_ffs2_nlink i_din.dp2.di_nlink +#define i_ffs2_rdev i_din.dp2.di_rdev +#define i_ffs2_shortlink i_din.dp2.di_shortlink +#define i_ffs2_size i_din.dp2.di_size +#define i_ffs2_uid i_din.dp2.di_uid #undef DIP #define DIP(ip, field) \ diff --git a/usr.sbin/makefs/makefs.8 b/usr.sbin/makefs/makefs.8 index fdf8d532b69f..d20f69d87559 100644 --- a/usr.sbin/makefs/makefs.8 +++ b/usr.sbin/makefs/makefs.8 @@ -33,9 +33,7 @@ .\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE .\" POSSIBILITY OF SUCH DAMAGE. .\" -.\" $FreeBSD$ -.\" -.Dd September 17, 2020 +.Dd July 19, 2025 .Dt MAKEFS 8 .Os .Sh NAME @@ -111,11 +109,18 @@ suffix may be provided to indicate that indicates a percentage of the calculated image size. .It Fl D Treat duplicate paths in an mtree manifest as warnings not error. +If this flag is specified more than once, warnings about duplicate paths +are not printed at all. .It Fl d Ar debug-mask Enable various levels of debugging, depending upon which bits are set in .Ar debug-mask . -XXX: document these +This option is intended for source debugging. +.Ar debug-mask +is a bit map defined in the header file +.Ar makefs.h . +See the source for usage, and look for defines starting with +.Ar DEBUG_ . .It Fl F Ar mtree-specfile .Em This is almost certainly not the option you are looking for. To create an image from a list of files in an mtree format manifest, @@ -208,7 +213,9 @@ is a comma separated list of options. Valid file system specific options are detailed below. .It Fl p Deprecated. -See the +Create a sparse file for +.Sy ffs . +This is the same as the preferred .Fl Z flag. .It Fl R Ar roundup-size @@ -251,9 +258,13 @@ can be a .Pa pathname , where the timestamps are derived from that file, or an integer value interpreted as the number of seconds from the Epoch. -Note that timestamps specified in an +Timestamps in a .Xr mtree 5 -spec file, override the default timestamp. +specfile (specified with +.Fl F ) +are used even if a default timestamp is specified. +However, the timestamps in an mtree manifest are ignored +if a default timestamp is specified. .It Fl t Ar fs-type Create an .Ar fs-type @@ -266,6 +277,8 @@ BSD fast file system (default). ISO 9660 file system. .It Sy msdos FAT12, FAT16, or FAT32 file system. +.It Sy zfs +ZFS pool containing one or more file systems. .El .It Fl x Exclude file system nodes not explicitly listed in the specfile. @@ -365,12 +378,6 @@ version id. Allow multiple dots in a filename. .It Sy applicationid Application ID of the image. -.It Sy archimedes -Use the -.Ql ARCHIMEDES -extension to encode -.Tn RISC OS -metadata. .It Sy bootimagedir Boot image directory. This option is not implemented. @@ -494,10 +501,127 @@ Volume ID. .It Cm volume_label Volume Label. .El +.Ss zfs-specific options +The image created by +.Nm +contains a ZFS pool with a single vdev of type +.Ql disk . +The root dataset is always created implicitly and contains the entire input +directory tree unless additional datasets are specified using the options +described below. +.Pp +To keep images reproducible, the pool GUID and other random identifiers will +always be the same across runs of +.Nm . +This means that when a pool is first imported, its GUID should be reset +using +.Xr zpool-reguid 8 +to avoid conflicting with other pools also generated by +.Nm ; +otherwise, it will not be possible to import other pools. +This can be configured from +.Pa /etc/rc.conf +using the +.Va zpool_reguid +variable. +.Pp +The arguments consist of a keyword, an equal sign +.Pq Ql = , +and a value. +The following keywords are supported: +.Pp +.Bl -tag -width omit-trailing-period -offset indent -compact +.It ashift +The base-2 logarithm of the minimum block size. +Typical values are 9 (512B blocks) and 12 (4KB blocks). +The default value is 12. +.It bootfs +The name of the bootable dataset for the pool. +Specifying this option causes the +.Ql bootfs +property to be set in the created pool. +.It mssize +The size of metaslabs in the created pool. +By default, +.Nm +allocates large (up to 512MB) metaslabs with the expectation that +the image will be auto-expanded upon first use. +This option allows the default heuristic to be overridden. +.It verify-txgs +Prompt OpenZFS to verify pool metadata during import. +This is disabled by default as it may significantly increase import times. +.It poolguid +Use the specified 64-bit integer as the pool GUID. +If this option is not specified, the pool GUID will be random but fixed +across multiple identical invocations of +.Nm . +This option is useful for testing but not required for reproducibility. +.It poolname +The name of the ZFS pool. +This option must be specified. +.It rootpath +An implicit path prefix added to dataset mountpoints. +By default it is +.Pa /<poolname> . +For creating bootable pools, the +.Va rootpath +should be set to +.Pa / . +At least one dataset must have a mountpoint equal to +.Va rootpath . +.It fs +Create an additional dataset. +This option may be specified multiple times. +The argument value must be of the form +.Ar <dataset>[;<prop1=v1>[;<prop2=v2>[;...]]] , +where +.Ar dataset +is the name of the dataset and must belong to the pool's namespace. +For example, with a pool name of +.Ql test +all dataset names must be prefixed by +.Ql test/ . +A dataset must exist at each level of the pool's namespace. +For example, to create +.Ql test/foo/bar , +.Ql test/foo +must be created as well. +.Pp +The dataset mountpoints determine how the datasets are populated with +files from the staged directory tree. +Conceptually, all datasets are mounted before any are populated with files. +The root of the staged directory tree is mapped to +.Va rootpath . +.Pp +Dataset properties, as described in +.Xr zfsprops 7 , +may be specified following the dataset name. +The following properties may be set for a dataset: +.Pp +.Bl -tag -compact -offset indent +.It atime +.It canmount +.It compression +.It exec +.It mountpoint +.It setuid +.El +Note that +.Nm +does not implement compression of files included in the image, +regardless of the value of the +.Dv compression +property. +.El .Sh SEE ALSO .Xr mtree 5 , +.Xr rc.conf 5 , +.Xr zfsconcepts 7 , +.Xr zfsprops 7 , +.Xr zpoolprops 7 , .Xr mtree 8 , -.Xr newfs 8 +.Xr newfs 8 , +.Xr zpool-reguid 8 .Sh HISTORY The .Nm @@ -518,4 +642,6 @@ and first appeared in .An Ram Vedam (cd9660 support), .An Christos Zoulas -(msdos support). +(msdos support), +.An Mark Johnston +(zfs support). diff --git a/usr.sbin/makefs/makefs.c b/usr.sbin/makefs/makefs.c index 888a2b3edea7..46e513e22b25 100644 --- a/usr.sbin/makefs/makefs.c +++ b/usr.sbin/makefs/makefs.c @@ -37,15 +37,13 @@ * POSSIBILITY OF SUCH DAMAGE. */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/types.h> #include <sys/stat.h> #include <assert.h> #include <ctype.h> #include <errno.h> #include <limits.h> +#include <locale.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -77,6 +75,9 @@ static fstype_t fstypes[] = { ENTRY(cd9660), ENTRY(ffs), ENTRY(msdos), +#ifdef HAVE_ZFS + ENTRY(zfs), +#endif { .type = NULL }, }; @@ -97,12 +98,19 @@ main(int argc, char *argv[]) fstype_t *fstype; fsinfo_t fsoptions; fsnode *root; - int ch, i, len; + int ch, i, len; const char *subtree; const char *specfile; setprogname(argv[0]); + /* + * Set the locale for collation, so that directory entry sorting is + * consistent. + */ + if (setlocale(LC_COLLATE, "C") == NULL) + err(1, "setlocale"); + debug = 0; if ((fstype = get_fstype(DEFAULT_FSTYPE)) == NULL) errx(1, "Unknown default fs type `%s'.", DEFAULT_FSTYPE); @@ -164,7 +172,7 @@ main(int argc, char *argv[]) break; case 'D': - dupsok = 1; + dupsok++; break; case 'd': @@ -270,7 +278,6 @@ main(int argc, char *argv[]) fsoptions.sparse = 1; break; - case '?': default: usage(fstype, &fsoptions); /* NOTREACHED */ @@ -429,6 +436,22 @@ set_option_var(const option_t *options, const char *var, const char *val, return -1; } +void +set_tstamp(fsnode *cur) +{ + cur->inode->st.st_atime = stampst.st_atime; + cur->inode->st.st_mtime = stampst.st_mtime; + cur->inode->st.st_ctime = stampst.st_ctime; +#if HAVE_STRUCT_STAT_ST_MTIMENSEC + cur->inode->st.st_atimensec = stampst.st_atimensec; + cur->inode->st.st_mtimensec = stampst.st_mtimensec; + cur->inode->st.st_ctimensec = stampst.st_ctimensec; +#endif +#if HAVE_STRUCT_STAT_BIRTHTIME + cur->inode->st.st_birthtime = stampst.st_birthtime; + cur->inode->st.st_birthtimensec = stampst.st_birthtimensec; +#endif +} static fstype_t * get_fstype(const char *type) @@ -471,7 +494,7 @@ get_tstamp(const char *b, struct stat *st) } st->st_ino = 1; -#ifdef HAVE_STRUCT_STAT_BIRTHTIME +#if HAVE_STRUCT_STAT_BIRTHTIME st->st_birthtime = #endif st->st_mtime = st->st_ctime = st->st_atime = when; diff --git a/usr.sbin/makefs/makefs.h b/usr.sbin/makefs/makefs.h index 68dc0362dd21..3cd56a036670 100644 --- a/usr.sbin/makefs/makefs.h +++ b/usr.sbin/makefs/makefs.h @@ -35,13 +35,17 @@ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. - * - * $FreeBSD$ */ #ifndef _MAKEFS_H #define _MAKEFS_H +#if HAVE_NBTOOL_CONFIG_H +#include "nbtool_config.h" +#else +#define HAVE_STRUCT_STAT_ST_FLAGS 1 +#endif + #include <sys/stat.h> #include <err.h> @@ -58,7 +62,7 @@ * * name "." "bin" "netbsd" * type S_IFDIR S_IFDIR S_IFREG - * next > > NULL + * next > > NULL * parent NULL NULL NULL * child NULL v * @@ -78,15 +82,26 @@ enum fi_flags { FI_SIZED = 1<<0, /* inode sized */ FI_ALLOCATED = 1<<1, /* fsinode->ino allocated */ FI_WRITTEN = 1<<2, /* inode written */ + FI_ROOT = 1<<3, /* root of a ZFS dataset */ }; typedef struct { uint32_t ino; /* inode number used on target fs */ uint32_t nlink; /* number of links to this entry */ enum fi_flags flags; /* flags used by fs specific code */ + void *param; /* for use by individual fs impls */ struct stat st; /* stat entry */ +#if !HAVE_STRUCT_STAT_ST_FLAGS + uint32_t st_flags; /* stand-in for st.st_flags */ +#endif } fsinode; +#if HAVE_STRUCT_STAT_ST_FLAGS +#define FSINODE_ST_FLAGS(inode) (inode).st.st_flags +#else +#define FSINODE_ST_FLAGS(inode) (inode).st_flags +#endif + typedef struct _fsnode { struct _fsnode *parent; /* parent (NULL if root) */ struct _fsnode *child; /* child (if type == S_IFDIR) */ @@ -173,6 +188,7 @@ fsnode * read_mtree(const char *, fsnode *); int set_option(const option_t *, const char *, char *, size_t); int set_option_var(const option_t *, const char *, const char *, char *, size_t); +void set_tstamp(fsnode *); fsnode * walk_dir(const char *, const char *, fsnode *, fsnode *); void free_fsnodes(fsnode *); option_t * copy_opts(const option_t *); @@ -186,6 +202,9 @@ void fs ## _makefs(const char *, const char *, fsnode *, fsinfo_t *) DECLARE_FUN(cd9660); DECLARE_FUN(ffs); DECLARE_FUN(msdos); +#ifdef HAVE_ZFS +DECLARE_FUN(zfs); +#endif extern u_int debug; extern int dupsok; diff --git a/usr.sbin/makefs/msdos.c b/usr.sbin/makefs/msdos.c index ec38c1d207dd..3707481a1c47 100644 --- a/usr.sbin/makefs/msdos.c +++ b/usr.sbin/makefs/msdos.c @@ -32,11 +32,6 @@ #include "nbtool_config.h" #endif -#include <sys/cdefs.h> -#if defined(__RCSID) && !defined(__lint) -__FBSDID("$FreeBSD$"); -#endif /* !__lint */ - #include <sys/param.h> #if !HAVE_NBTOOL_CONFIG_H diff --git a/usr.sbin/makefs/msdos.h b/usr.sbin/makefs/msdos.h index ea78e49648a3..376713051d52 100644 --- a/usr.sbin/makefs/msdos.h +++ b/usr.sbin/makefs/msdos.h @@ -1,4 +1,3 @@ -/* $FreeBSD$ */ /* $NetBSD: msdos.h,v 1.3 2015/10/16 16:40:02 christos Exp $ */ /*- diff --git a/usr.sbin/makefs/msdos/Makefile.inc b/usr.sbin/makefs/msdos/Makefile.inc index fa3890d8393b..cfa9e0e114c2 100644 --- a/usr.sbin/makefs/msdos/Makefile.inc +++ b/usr.sbin/makefs/msdos/Makefile.inc @@ -1,12 +1,9 @@ -# $FreeBSD$ -# - MSDOS= ${SRCTOP}/sys/fs/msdosfs MSDOS_NEWFS= ${SRCTOP}/sbin/newfs_msdos .PATH: ${SRCDIR}/msdos ${MSDOS} ${MSDOS_NEWFS} -CFLAGS+= -DMAKEFS -I${MSDOS} -I${MSDOS_NEWFS} +CFLAGS+= -DMAKEFS -D_WANT_MSDOSFS_INTERNALS -I${MSDOS} -I${MSDOS_NEWFS} SRCS+= mkfs_msdos.c SRCS+= msdosfs_conv.c msdosfs_denode.c msdosfs_fat.c msdosfs_lookup.c diff --git a/usr.sbin/makefs/msdos/direntry.h b/usr.sbin/makefs/msdos/direntry.h index 9cc601d5e96c..9d6c65dfcc7d 100644 --- a/usr.sbin/makefs/msdos/direntry.h +++ b/usr.sbin/makefs/msdos/direntry.h @@ -1,4 +1,3 @@ -/* $FreeBSD$ */ /* $NetBSD: direntry.h,v 1.14 1997/11/17 15:36:32 ws Exp $ */ /*- diff --git a/usr.sbin/makefs/msdos/msdosfs_conv.c b/usr.sbin/makefs/msdos/msdosfs_conv.c index b53656d3b439..cacaa4a49a2c 100644 --- a/usr.sbin/makefs/msdos/msdosfs_conv.c +++ b/usr.sbin/makefs/msdos/msdosfs_conv.c @@ -47,9 +47,6 @@ * October 1992 */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/endian.h> diff --git a/usr.sbin/makefs/msdos/msdosfs_denode.c b/usr.sbin/makefs/msdos/msdosfs_denode.c index ff706ad99563..88e90ab87c7e 100644 --- a/usr.sbin/makefs/msdos/msdosfs_denode.c +++ b/usr.sbin/makefs/msdos/msdosfs_denode.c @@ -49,9 +49,6 @@ * October 1992 */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/errno.h> diff --git a/usr.sbin/makefs/msdos/msdosfs_fat.c b/usr.sbin/makefs/msdos/msdosfs_fat.c index 0081e2d2d38c..16e2ce44084d 100644 --- a/usr.sbin/makefs/msdos/msdosfs_fat.c +++ b/usr.sbin/makefs/msdos/msdosfs_fat.c @@ -1,4 +1,3 @@ -/* $FreeBSD$ */ /* $NetBSD: msdosfs_fat.c,v 1.28 1997/11/17 15:36:49 ws Exp $ */ /*- @@ -135,7 +134,7 @@ pcbmap(struct denode *dep, u_long findcn, daddr_t *bnp, u_long *cnp, int *sp) int error; u_long i; u_long cn; - u_long prevcn = 0; /* XXX: prevcn could be used unititialized */ + u_long prevcn = 0; /* XXX: prevcn could be used uninitialized */ u_long byteoffset; u_long bn; u_long bo; @@ -246,7 +245,7 @@ pcbmap(struct denode *dep, u_long findcn, daddr_t *bnp, u_long *cnp, int *sp) return (0); } -hiteof:; +hiteof: if (cnp) *cnp = i; if (bp) @@ -668,7 +667,7 @@ chainlength(struct msdosfsmount *pmp, u_long start, u_long count) } /* - * Allocate contigous free clusters. + * Allocate contiguous free clusters. * * pmp - mount point. * start - start of cluster chain. @@ -737,7 +736,7 @@ clusteralloc1(struct msdosfsmount *pmp, u_long start, u_long count, { u_long idx; u_long len, newst, foundl, cn, l; - u_long foundcn = 0; /* XXX: foundcn could be used unititialized */ + u_long foundcn = 0; /* XXX: foundcn could be used uninitialized */ u_int map; MSDOSFS_DPRINTF(("clusteralloc(): find %lu clusters\n", count)); diff --git a/usr.sbin/makefs/msdos/msdosfs_lookup.c b/usr.sbin/makefs/msdos/msdosfs_lookup.c index fb2f4deceaf2..80bab768959d 100644 --- a/usr.sbin/makefs/msdos/msdosfs_lookup.c +++ b/usr.sbin/makefs/msdos/msdosfs_lookup.c @@ -1,4 +1,3 @@ -/* $FreeBSD$ */ /* $NetBSD: msdosfs_lookup.c,v 1.37 1997/11/17 15:36:54 ws Exp $ */ /*- @@ -125,7 +124,6 @@ createde(struct denode *dep, struct denode *ddep, struct denode **depp, diroffset &= pmp->pm_crbomask; if ((error = bread((void *)pmp->pm_devvp, bn, blsize, NOCRED, &bp)) != 0) { - brelse(bp); return error; } ndep = bptoep(pmp, bp, ddep->de_fndoffset); @@ -157,7 +155,6 @@ createde(struct denode *dep, struct denode *ddep, struct denode **depp, error = bread((void *)pmp->pm_devvp, bn, blsize, NOCRED, &bp); if (error) { - brelse(bp); return error; } ndep = bptoep(pmp, bp, ddep->de_fndoffset); @@ -213,7 +210,6 @@ m_readep(struct msdosfsmount *pmp, u_long dirclust, u_long diroffset, bn = detobn(pmp, dirclust, diroffset); if ((error = bread((void *)pmp->pm_devvp, bn, blsize, NOCRED, bpp)) != 0) { - brelse(*bpp); *bpp = NULL; return (error); } @@ -274,7 +270,6 @@ uniqdosname(struct denode *dep, struct componentname *cnp, u_char *cp) error = bread((void *)pmp->pm_devvp, bn, blsize, NOCRED, &bp); if (error) { - brelse(bp); return error; } for (dentp = (struct direntry *)bp->b_data; diff --git a/usr.sbin/makefs/msdos/msdosfs_vfsops.c b/usr.sbin/makefs/msdos/msdosfs_vfsops.c index d543adc2bbcd..8b5eac4a4b66 100644 --- a/usr.sbin/makefs/msdos/msdosfs_vfsops.c +++ b/usr.sbin/makefs/msdos/msdosfs_vfsops.c @@ -49,8 +49,6 @@ #include <sys/cdefs.h> /* $NetBSD: msdosfs_vfsops.c,v 1.10 2016/01/30 09:59:27 mlelstv Exp $ */ -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/mount.h> diff --git a/usr.sbin/makefs/msdos/msdosfs_vnops.c b/usr.sbin/makefs/msdos/msdosfs_vnops.c index 5bc9b495c586..b104f419a86a 100644 --- a/usr.sbin/makefs/msdos/msdosfs_vnops.c +++ b/usr.sbin/makefs/msdos/msdosfs_vnops.c @@ -49,9 +49,6 @@ * October 1992 */ -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/errno.h> #include <sys/mman.h> @@ -97,10 +94,8 @@ static void unix2fattime(const struct timespec *tsp, uint16_t *ddp, static void msdosfs_times(struct denode *dep, const struct stat *st) { - if (stampst.st_ino) - st = &stampst; -#ifdef HAVE_STRUCT_STAT_BIRTHTIME +#if HAVE_STRUCT_STAT_BIRTHTIME unix2fattime(&st->st_birthtim, &dep->de_CDate, &dep->de_CTime); #else unix2fattime(&st->st_ctim, &dep->de_CDate, &dep->de_CTime); @@ -116,7 +111,7 @@ unix2fattime(const struct timespec *tsp, uint16_t *ddp, uint16_t *dtp) struct tm lt = {0}; t1 = tsp->tv_sec; - localtime_r(&t1, <); + gmtime_r(&t1, <); unsigned long fat_time = ((lt.tm_year - 80) << 25) | ((lt.tm_mon + 1) << 21) | diff --git a/usr.sbin/makefs/mtree.c b/usr.sbin/makefs/mtree.c index 4272299ce135..4f3c3f85dcc3 100644 --- a/usr.sbin/makefs/mtree.c +++ b/usr.sbin/makefs/mtree.c @@ -1,5 +1,5 @@ /*- - * SPDX-License-Identifier: BSD-2-Clause-FreeBSD + * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2011 Marcel Moolenaar * All rights reserved. @@ -29,9 +29,6 @@ #include "nbtool_config.h" #endif -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/queue.h> #include <sys/sbuf.h> @@ -536,13 +533,11 @@ read_mtree_keywords(FILE *fp, fsnode *node) break; } flset = flclr = 0; -#if HAVE_STRUCT_STAT_ST_FLAGS if (!strtofflags(&value, &flset, &flclr)) { - st->st_flags &= ~flclr; - st->st_flags |= flset; + FSINODE_ST_FLAGS(*node->inode) &= ~flclr; + FSINODE_ST_FLAGS(*node->inode) |= flset; } else error = errno; -#endif } else error = ENOSYS; break; @@ -636,6 +631,9 @@ read_mtree_keywords(FILE *fp, fsnode *node) } /* Ignore. */ } else if (strcmp(keyword, "time") == 0) { + /* Ignore if a default timestamp is present. */ + if (stampst.st_ino != 0) + break; if (value == NULL) { error = ENOATTR; break; @@ -725,7 +723,9 @@ read_mtree_keywords(FILE *fp, fsnode *node) return (error); st->st_mode = (st->st_mode & ~S_IFMT) | node->type; - + /* Store default timestamp, if present. */ + if (stampst.st_ino != 0) + set_tstamp(node); /* Nothing more to do for the global defaults. */ if (node->name == NULL) return (0); @@ -742,7 +742,10 @@ read_mtree_keywords(FILE *fp, fsnode *node) type = S_IFREG; } else if (node->type != 0) { type = node->type; - if (type == S_IFREG) { + if (type == S_IFLNK && node->symlink == NULL) { + mtree_error("%s: link type requires link keyword", node->name); + return (0); + } else if (type == S_IFREG) { /* the named path is the default contents */ node->contents = mtree_file_path(node); } @@ -836,7 +839,7 @@ read_mtree_spec1(FILE *fp, bool def, const char *name) * not the '.' node of the parent directory, but the directory * node within the parent to which the child relates. However, * going up a directory means we need to find the '.' node to - * which the directoy node is linked. This we can do via the + * which the directory node is linked. This we can do via the * first * pointer, because '.' is always the first entry in a * directory. */ @@ -894,11 +897,11 @@ read_mtree_spec1(FILE *fp, bool def, const char *name) if (strcmp(name, node->name) == 0) { if (def == true) { - if (!dupsok) + if (dupsok == 0) mtree_error( "duplicate definition of %s", name); - else + else if (dupsok == 1) mtree_warning( "duplicate definition of %s", name); @@ -1016,7 +1019,7 @@ read_mtree_spec(FILE *fp) } } - /* Ignore absolute specfications that end with a slash. */ + /* Ignore absolute specifications that end with a slash. */ if (!error && pathspec[0] != '\0') error = read_mtree_spec1(fp, true, pathspec); @@ -1053,8 +1056,16 @@ read_mtree(const char *fname, fsnode *node) mtree_global.inode = &mtree_global_inode; mtree_global_inode.nlink = 1; mtree_global_inode.st.st_nlink = 1; - mtree_global_inode.st.st_atime = mtree_global_inode.st.st_ctime = - mtree_global_inode.st.st_mtime = time(NULL); + if (stampst.st_ino != 0) { + set_tstamp(&mtree_global); + } else { +#if HAVE_STRUCT_STAT_BIRTHTIME + mtree_global_inode.st.st_birthtime = +#endif + mtree_global_inode.st.st_atime = + mtree_global_inode.st.st_ctime = + mtree_global_inode.st.st_mtime = time(NULL); + } errors = warnings = 0; setgroupent(1); diff --git a/usr.sbin/makefs/tests/Makefile b/usr.sbin/makefs/tests/Makefile index 85e4b233aea7..748bafa06211 100644 --- a/usr.sbin/makefs/tests/Makefile +++ b/usr.sbin/makefs/tests/Makefile @@ -1,7 +1,13 @@ -# $FreeBSD$ +.include <src.opts.mk> ATF_TESTS_SH+= makefs_cd9660_tests +TEST_METADATA.makefs_cd9660_tests+= required_files="/sbin/mount_cd9660" ATF_TESTS_SH+= makefs_ffs_tests +ATF_TESTS_SH+= makefs_msdos_tests +TEST_METADATA.makefs_msdos_tests+= required_files="/sbin/mount_msdosfs" +.if ${MK_ZFS} != "no" +ATF_TESTS_SH+= makefs_zfs_tests +.endif BINDIR= ${TESTSDIR} @@ -9,8 +15,6 @@ BINDIR= ${TESTSDIR} SCRIPTS+= makefs_tests_common.sh SCRIPTSNAME_makefs_tests_common.sh= makefs_tests_common.sh -TEST_METADATA.makefs_cd9660_tests+= required_files="/sbin/mount_cd9660" - .for t in ${ATF_TESTS_SH} TEST_METADATA.$t+= required_user="root" .endfor diff --git a/usr.sbin/makefs/tests/Makefile.depend b/usr.sbin/makefs/tests/Makefile.depend index f80275d86ab1..11aba52f82cf 100644 --- a/usr.sbin/makefs/tests/Makefile.depend +++ b/usr.sbin/makefs/tests/Makefile.depend @@ -1,4 +1,3 @@ -# $FreeBSD$ # Autogenerated - do NOT edit! DIRDEPS = \ diff --git a/usr.sbin/makefs/tests/makefs_cd9660_tests.sh b/usr.sbin/makefs/tests/makefs_cd9660_tests.sh index 8a3ac1684032..e058dfc57b7b 100644 --- a/usr.sbin/makefs/tests/makefs_cd9660_tests.sh +++ b/usr.sbin/makefs/tests/makefs_cd9660_tests.sh @@ -23,9 +23,6 @@ # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# $FreeBSD$ -# # A note on specs: # - A copy of the ISO-9660 spec can be found here: @@ -54,8 +51,8 @@ common_cleanup() check_base_iso9660_image_contents() { # Symlinks are treated like files when rockridge support isn't - # specified - check_image_contents "$@" -X c + # specified, and directories cannot contain a '.'. + check_image_contents "$@" -X c -X .g -X _g atf_check -e empty -o empty -s exit:0 test -L $TEST_INPUTS_DIR/c atf_check -e empty -o empty -s exit:0 test -f $TEST_MOUNT_DIR/c @@ -377,6 +374,114 @@ o_flag_rockridge_dev_nodes_cleanup() common_cleanup } +atf_test_case T_flag_dir cleanup +T_flag_dir_body() +{ + timestamp=1742574909 + check_cd9660_support + create_test_dirs + + mkdir -p $TEST_INPUTS_DIR/dir1 + atf_check -e empty -o empty -s exit:0 \ + $MAKEFS -T $timestamp -o rockridge $TEST_IMAGE $TEST_INPUTS_DIR + + mount_image + eval $(stat -s $TEST_MOUNT_DIR/dir1) + atf_check_equal $st_atime $timestamp + atf_check_equal $st_mtime $timestamp + atf_check_equal $st_ctime $timestamp +} + +T_flag_dir_cleanup() +{ + common_cleanup +} + +atf_test_case T_flag_F_flag cleanup +T_flag_F_flag_body() +{ + atf_expect_fail "-F doesn't take precedence over -T" + timestamp_F=1742574909 + timestamp_T=1742574910 + create_test_dirs + mkdir -p $TEST_INPUTS_DIR/dir1 + + atf_check -e empty -o save:$TEST_SPEC_FILE -s exit:0 \ + mtree -c -k "type,time" -p $TEST_INPUTS_DIR + change_mtree_timestamp $TEST_SPEC_FILE $timestamp_F + atf_check -e empty -o not-empty -s exit:0 \ + $MAKEFS -F $TEST_SPEC_FILE -T $timestamp_T -o rockridge $TEST_IMAGE $TEST_INPUTS_DIR + + mount_image + eval $(stat -s $TEST_MOUNT_DIR/dir1) + atf_check_equal $st_atime $timestamp_F + atf_check_equal $st_mtime $timestamp_F + atf_check_equal $st_ctime $timestamp_F +} + +T_flag_F_flag_cleanup() +{ + common_cleanup +} + +atf_test_case T_flag_mtree cleanup +T_flag_mtree_body() +{ + timestamp=1742574909 + create_test_dirs + mkdir -p $TEST_INPUTS_DIR/dir1 + + atf_check -e empty -o save:$TEST_SPEC_FILE -s exit:0 \ + mtree -c -k "type" -p $TEST_INPUTS_DIR + atf_check -e empty -o empty -s exit:0 \ + $MAKEFS -T $timestamp -o rockridge $TEST_IMAGE $TEST_SPEC_FILE + + check_cd9660_support + mount_image + eval $(stat -s $TEST_MOUNT_DIR/dir1) + atf_check_equal $st_atime $timestamp + atf_check_equal $st_mtime $timestamp + atf_check_equal $st_ctime $timestamp +} + +T_flag_mtree_cleanup() +{ + common_cleanup +} + +atf_test_case duplicate_names cleanup +duplicate_names_head() +{ + atf_set "descr" "Ensure shortened directory names are unique (PR283238)" +} +duplicate_names_body() +{ + check_cd9660_support + create_test_dirs + + # Create three directories which are identical in the first 31 characters. + dir_prefix="this_directory_name_is_31_chars" + mkdir -p $TEST_INPUTS_DIR/${dir_prefix}1 + mkdir -p $TEST_INPUTS_DIR/${dir_prefix}2 + mkdir -p $TEST_INPUTS_DIR/${dir_prefix}3 + + atf_check -e empty -o empty -s exit:0 \ + $MAKEFS -o rockridge $TEST_IMAGE $TEST_INPUTS_DIR + + # Disable Rock Ridge extensions to read the plain ISO Level 2 names. + mount_image -r + + # The specific way the short names are made unique is not important. + # We verify only that there are three unique names and that the unique + # part is at the end of the name. + atf_check_equal $(ls -1 $TEST_MOUNT_DIR | sort | uniq | wc -l) 3 + atf_check_equal $(ls -1 $TEST_MOUNT_DIR | cut -c -29 | sort | uniq | wc -l) 1 +} +duplicate_names_cleanup() +{ + common_cleanup +} + atf_init_test_cases() { atf_add_test_case D_flag @@ -395,4 +500,9 @@ atf_init_test_cases() atf_add_test_case o_flag_publisher atf_add_test_case o_flag_rockridge atf_add_test_case o_flag_rockridge_dev_nodes + atf_add_test_case T_flag_dir + atf_add_test_case T_flag_F_flag + atf_add_test_case T_flag_mtree + + atf_add_test_case duplicate_names } diff --git a/usr.sbin/makefs/tests/makefs_ffs_tests.sh b/usr.sbin/makefs/tests/makefs_ffs_tests.sh index 1a415cb5f518..f828f632b06e 100644 --- a/usr.sbin/makefs/tests/makefs_ffs_tests.sh +++ b/usr.sbin/makefs/tests/makefs_ffs_tests.sh @@ -23,9 +23,6 @@ # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# $FreeBSD$ -# MAKEFS="makefs -t ffs" MOUNT="mount" @@ -244,6 +241,80 @@ o_flag_version_2_cleanup() common_cleanup } + +atf_test_case T_flag_dir cleanup +T_flag_dir_body() +{ + timestamp=1742574909 + create_test_dirs + + mkdir -p $TEST_INPUTS_DIR/dir1 + atf_check -e empty -o not-empty -s exit:0 \ + $MAKEFS -M 1m -T $timestamp $TEST_IMAGE $TEST_INPUTS_DIR + + mount_image + eval $(stat -s $TEST_MOUNT_DIR/dir1) + atf_check_equal $st_atime $timestamp + atf_check_equal $st_mtime $timestamp + atf_check_equal $st_ctime $timestamp +} + +T_flag_dir_cleanup() +{ + common_cleanup +} + +atf_test_case T_flag_F_flag cleanup +T_flag_F_flag_body() +{ + atf_expect_fail "-F doesn't take precedence over -T" + timestamp_F=1742574909 + timestamp_T=1742574910 + create_test_dirs + mkdir -p $TEST_INPUTS_DIR/dir1 + + atf_check -e empty -o save:$TEST_SPEC_FILE -s exit:0 \ + mtree -c -k "type,time" -p $TEST_INPUTS_DIR + change_mtree_timestamp $TEST_SPEC_FILE $timestamp_F + atf_check -e empty -o not-empty -s exit:0 \ + $MAKEFS -F $TEST_SPEC_FILE -T $timestamp_T -M 1m $TEST_IMAGE $TEST_INPUTS_DIR + + mount_image + eval $(stat -s $TEST_MOUNT_DIR/dir1) + atf_check_equal $st_atime $timestamp_F + atf_check_equal $st_mtime $timestamp_F + atf_check_equal $st_ctime $timestamp_F +} + +T_flag_F_flag_cleanup() +{ + common_cleanup +} + +atf_test_case T_flag_mtree cleanup +T_flag_mtree_body() +{ + timestamp=1742574909 + create_test_dirs + mkdir -p $TEST_INPUTS_DIR/dir1 + + atf_check -e empty -o save:$TEST_SPEC_FILE -s exit:0 \ + mtree -c -k "type" -p $TEST_INPUTS_DIR + atf_check -e empty -o not-empty -s exit:0 \ + $MAKEFS -M 1m -T $timestamp $TEST_IMAGE $TEST_SPEC_FILE + + mount_image + eval $(stat -s $TEST_MOUNT_DIR/dir1) + atf_check_equal $st_atime $timestamp + atf_check_equal $st_mtime $timestamp + atf_check_equal $st_ctime $timestamp +} + +T_flag_mtree_cleanup() +{ + common_cleanup +} + atf_init_test_cases() { @@ -258,4 +329,7 @@ atf_init_test_cases() atf_add_test_case o_flag_version_1 atf_add_test_case o_flag_version_2 + atf_add_test_case T_flag_dir + atf_add_test_case T_flag_F_flag + atf_add_test_case T_flag_mtree } diff --git a/usr.sbin/makefs/tests/makefs_msdos_tests.sh b/usr.sbin/makefs/tests/makefs_msdos_tests.sh new file mode 100644 index 000000000000..fb94429b477b --- /dev/null +++ b/usr.sbin/makefs/tests/makefs_msdos_tests.sh @@ -0,0 +1,136 @@ +#- +# SPDX-License-Identifier: BSD-2-Clause +# +# Copyright (c) 2025 The FreeBSD Foundation +# +# This software was developed by Klara, Inc. +# under sponsorship from the FreeBSD Foundation. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# + +MAKEFS="makefs -t msdos" +MOUNT="mount_msdosfs" +. "$(dirname "$0")/makefs_tests_common.sh" + +common_cleanup() +{ + if ! test_md_device=$(cat $TEST_MD_DEVICE_FILE); then + echo "$TEST_MD_DEVICE_FILE could not be opened; has an md(4) device been attached?" + return + fi + + umount -f /dev/$test_md_device || : + mdconfig -d -u $test_md_device || : +} + +check_msdosfs_support() +{ + kldstat -m msdosfs || \ + atf_skip "Requires msdosfs filesystem support to be present in the kernel" +} + +atf_test_case T_flag_dir cleanup +T_flag_dir_body() +{ + atf_expect_fail \ + "The msdos backend saves the wrong timestamp value" \ + "(possibly due to the 2s resolution for FAT timestamp)" + timestamp=1742574909 + check_msdosfs_support + + create_test_dirs + mkdir -p $TEST_INPUTS_DIR/dir1 + atf_check -e empty -o not-empty -s exit:0 \ + $MAKEFS -T $timestamp -s 1m $TEST_IMAGE $TEST_INPUTS_DIR + + mount_image + eval $(stat -s $TEST_MOUNT_DIR/dir1) + atf_check_equal $st_atime $timestamp + atf_check_equal $st_mtime $timestamp + atf_check_equal $st_ctime $timestamp +} + +T_flag_dir_cleanup() +{ + common_cleanup +} + +atf_test_case T_flag_F_flag cleanup +T_flag_F_flag_body() +{ + atf_expect_fail "-F doesn't take precedence over -T" + timestamp_F=1742574909 + timestamp_T=1742574910 + create_test_dirs + mkdir -p $TEST_INPUTS_DIR/dir1 + + atf_check -e empty -o save:$TEST_SPEC_FILE -s exit:0 \ + mtree -c -k "type,time" -p $TEST_INPUTS_DIR + change_mtree_timestamp $TEST_SPEC_FILE $timestamp_F + atf_check -e empty -o not-empty -s exit:0 \ + $MAKEFS -F $TEST_SPEC_FILE -T $timestamp_T -s 1m $TEST_IMAGE $TEST_INPUTS_DIR + + mount_image + eval $(stat -s $TEST_MOUNT_DIR/dir1) + atf_check_equal $st_atime $timestamp_F + atf_check_equal $st_mtime $timestamp_F + atf_check_equal $st_ctime $timestamp_F +} + +T_flag_F_flag_cleanup() +{ + common_cleanup +} + +atf_test_case T_flag_mtree cleanup +T_flag_mtree_body() +{ + timestamp=1742574908 # Even value, timestamp precision is 2s. + check_msdosfs_support + + create_test_dirs + mkdir -p $TEST_INPUTS_DIR/dir1 + atf_check -e empty -o save:$TEST_SPEC_FILE -s exit:0 \ + mtree -c -k "type" -p $TEST_INPUTS_DIR + atf_check -e empty -o not-empty -s exit:0 \ + $MAKEFS -T $timestamp -s 1m $TEST_IMAGE $TEST_SPEC_FILE + + mount_image + eval $(stat -s $TEST_MOUNT_DIR/dir1) + # FAT directory entries don't have an access time, just a date. + #atf_check_equal $st_atime $timestamp + atf_check_equal $st_mtime $timestamp + atf_check_equal $st_ctime $timestamp +} + +T_flag_mtree_cleanup() +{ + common_cleanup +} + +atf_init_test_cases() +{ + atf_add_test_case T_flag_dir + atf_add_test_case T_flag_F_flag + atf_add_test_case T_flag_mtree +} diff --git a/usr.sbin/makefs/tests/makefs_tests_common.sh b/usr.sbin/makefs/tests/makefs_tests_common.sh index 5eb4ee5bf9f5..edb79bc811e1 100644 --- a/usr.sbin/makefs/tests/makefs_tests_common.sh +++ b/usr.sbin/makefs/tests/makefs_tests_common.sh @@ -23,9 +23,6 @@ # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# -# $FreeBSD$ -# KB=1024 : ${TMPDIR=/tmp} @@ -141,6 +138,13 @@ mount_image() atf_check -e empty -o save:$TEST_MD_DEVICE_FILE -s exit:0 \ mdconfig -a -f $TEST_IMAGE atf_check -e empty -o empty -s exit:0 \ - $MOUNT /dev/$(cat $TEST_MD_DEVICE_FILE) $TEST_MOUNT_DIR + $MOUNT ${1} /dev/$(cat $TEST_MD_DEVICE_FILE) $TEST_MOUNT_DIR } +change_mtree_timestamp() +{ + filename="$1" + timestamp="$2" + + sed -i "" "s/time=.*$/time=${timestamp}.0/g" "$filename" +} diff --git a/usr.sbin/makefs/tests/makefs_zfs_tests.sh b/usr.sbin/makefs/tests/makefs_zfs_tests.sh new file mode 100644 index 000000000000..2fafce85b347 --- /dev/null +++ b/usr.sbin/makefs/tests/makefs_zfs_tests.sh @@ -0,0 +1,1060 @@ +#- +# SPDX-License-Identifier: BSD-2-Clause +# +# Copyright (c) 2022-2023 The FreeBSD Foundation +# +# This software was developed by Mark Johnston under sponsorship from +# the FreeBSD Foundation. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# + +MAKEFS="makefs -t zfs -o verify-txgs=true -o poolguid=$$" +ZFS_POOL_NAME="makefstest$$" +TEST_ZFS_POOL_NAME="$TMPDIR/poolname" + +. "$(dirname "$0")/makefs_tests_common.sh" + +common_cleanup() +{ + local pool md + + # Try to force a TXG, this can help catch bugs by triggering a panic. + sync + + pool=$(cat $TEST_ZFS_POOL_NAME) + if zpool list "$pool" >/dev/null; then + zpool destroy "$pool" + fi + + md=$(cat $TEST_MD_DEVICE_FILE) + if [ -c /dev/"$md" ]; then + mdconfig -d -u "$md" + fi +} + +import_image() +{ + atf_check -e empty -o save:$TEST_MD_DEVICE_FILE -s exit:0 \ + mdconfig -a -f $TEST_IMAGE + atf_check -o ignore -e empty -s exit:0 \ + zdb -e -p /dev/$(cat $TEST_MD_DEVICE_FILE) -mmm -ddddd $ZFS_POOL_NAME + atf_check zpool import -R $TEST_MOUNT_DIR $ZFS_POOL_NAME + echo "$ZFS_POOL_NAME" > $TEST_ZFS_POOL_NAME +} + +# +# Test autoexpansion of the vdev. +# +# The pool is initially 10GB, so we get 10GB minus one metaslab's worth of +# usable space for data. Then the pool is expanded to 50GB, and the amount of +# usable space is 50GB minus one metaslab. +# +atf_test_case autoexpand cleanup +autoexpand_body() +{ + local mssize poolsize poolsize1 newpoolsize + + create_test_inputs + + mssize=$((128 * 1024 * 1024)) + poolsize=$((10 * 1024 * 1024 * 1024)) + atf_check $MAKEFS -s $poolsize -o mssize=$mssize -o rootpath=/ \ + -o poolname=$ZFS_POOL_NAME \ + $TEST_IMAGE $TEST_INPUTS_DIR + + newpoolsize=$((50 * 1024 * 1024 * 1024)) + truncate -s $newpoolsize $TEST_IMAGE + + import_image + + check_image_contents + + poolsize1=$(zpool list -Hp -o size $ZFS_POOL_NAME) + atf_check [ $((poolsize1 + $mssize)) -eq $poolsize ] + + atf_check zpool online -e $ZFS_POOL_NAME /dev/$(cat $TEST_MD_DEVICE_FILE) + + check_image_contents + + poolsize1=$(zpool list -Hp -o size $ZFS_POOL_NAME) + atf_check [ $((poolsize1 + $mssize)) -eq $newpoolsize ] +} +autoexpand_cleanup() +{ + common_cleanup +} + +# +# Test with some default layout defined by the common code. +# +atf_test_case basic cleanup +basic_body() +{ + create_test_inputs + + atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ + $TEST_IMAGE $TEST_INPUTS_DIR + + import_image + + check_image_contents +} +basic_cleanup() +{ + common_cleanup +} + +# +# Try configuring various compression algorithms. +# +atf_test_case compression cleanup +compression_body() +{ + create_test_inputs + + cd $TEST_INPUTS_DIR + mkdir dir + mkdir dir2 + cd - + + for alg in off on lzjb gzip gzip-1 gzip-2 gzip-3 gzip-4 \ + gzip-5 gzip-6 gzip-7 gzip-8 gzip-9 zle lz4 zstd; do + atf_check $MAKEFS -s 1g -o rootpath=/ \ + -o poolname=$ZFS_POOL_NAME \ + -o fs=${ZFS_POOL_NAME}\;compression=$alg \ + -o fs=${ZFS_POOL_NAME}/dir \ + -o fs=${ZFS_POOL_NAME}/dir2\;compression=off \ + $TEST_IMAGE $TEST_INPUTS_DIR + + import_image + + check_image_contents + + if [ $alg = gzip-6 ]; then + # ZFS reports gzip-6 as just gzip since it uses + # a default compression level of 6. + alg=gzip + fi + # The "dir" dataset's compression algorithm should be + # inherited from the root dataset. + atf_check -o inline:$alg\\n -e empty -s exit:0 \ + zfs get -H -o value compression ${ZFS_POOL_NAME} + atf_check -o inline:$alg\\n -e empty -s exit:0 \ + zfs get -H -o value compression ${ZFS_POOL_NAME}/dir + atf_check -o inline:off\\n -e empty -s exit:0 \ + zfs get -H -o value compression ${ZFS_POOL_NAME}/dir2 + + atf_check -e ignore dd if=/dev/random \ + of=${TEST_MOUNT_DIR}/dir/random bs=1M count=10 + atf_check -e ignore dd if=/dev/zero \ + of=${TEST_MOUNT_DIR}/dir/zero bs=1M count=10 + atf_check -e ignore dd if=/dev/zero \ + of=${TEST_MOUNT_DIR}/dir2/zero bs=1M count=10 + + # Export and reimport to ensure that everything is + # flushed to disk. + atf_check zpool export ${ZFS_POOL_NAME} + atf_check -o ignore -e empty -s exit:0 \ + zdb -e -p /dev/$(cat $TEST_MD_DEVICE_FILE) -mmm -ddddd \ + $ZFS_POOL_NAME + atf_check zpool import -R $TEST_MOUNT_DIR $ZFS_POOL_NAME + + if [ $alg = off ]; then + # If compression is off, the files should be the + # same size as the input. + atf_check -o match:"^11[[:space:]]+${TEST_MOUNT_DIR}/dir/random" \ + du -m ${TEST_MOUNT_DIR}/dir/random + atf_check -o match:"^11[[:space:]]+${TEST_MOUNT_DIR}/dir/zero" \ + du -m ${TEST_MOUNT_DIR}/dir/zero + atf_check -o match:"^11[[:space:]]+${TEST_MOUNT_DIR}/dir2/zero" \ + du -m ${TEST_MOUNT_DIR}/dir2/zero + else + # If compression is on, the dir/zero file ought + # to be smaller. + atf_check -o match:"^1[[:space:]]+${TEST_MOUNT_DIR}/dir/zero" \ + du -m ${TEST_MOUNT_DIR}/dir/zero + atf_check -o match:"^11[[:space:]]+${TEST_MOUNT_DIR}/dir/random" \ + du -m ${TEST_MOUNT_DIR}/dir/random + atf_check -o match:"^11[[:space:]]+${TEST_MOUNT_DIR}/dir2/zero" \ + du -m ${TEST_MOUNT_DIR}/dir2/zero + fi + + atf_check zpool destroy ${ZFS_POOL_NAME} + atf_check rm -f ${TEST_ZFS_POOL_NAME} + atf_check mdconfig -d -u $(cat ${TEST_MD_DEVICE_FILE}) + atf_check rm -f ${TEST_MD_DEVICE_FILE} + done +} +compression_cleanup() +{ + common_cleanup +} + +# +# Try destroying a dataset that was created by makefs. +# +atf_test_case dataset_removal cleanup +dataset_removal_body() +{ + create_test_dirs + + cd $TEST_INPUTS_DIR + mkdir dir + cd - + + atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ + -o fs=${ZFS_POOL_NAME}/dir \ + $TEST_IMAGE $TEST_INPUTS_DIR + + import_image + + check_image_contents + + atf_check zfs destroy ${ZFS_POOL_NAME}/dir +} +dataset_removal_cleanup() +{ + common_cleanup +} + +# +# Make sure that we can handle some special file types. Anything other than +# regular files, symlinks and directories are ignored. +# +atf_test_case devfs cleanup +devfs_body() +{ + atf_check mkdir dev + atf_check mount -t devfs none ./dev + + atf_check -e match:"skipping unhandled" $MAKEFS -s 1g -o rootpath=/ \ + -o poolname=$ZFS_POOL_NAME $TEST_IMAGE ./dev + + import_image +} +devfs_cleanup() +{ + common_cleanup + umount -f ./dev +} + +# +# Make sure that we can create and remove an empty directory. +# +atf_test_case empty_dir cleanup +empty_dir_body() +{ + create_test_dirs + + cd $TEST_INPUTS_DIR + mkdir dir + cd - + + atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ + $TEST_IMAGE $TEST_INPUTS_DIR + + import_image + + check_image_contents + + atf_check rmdir ${TEST_MOUNT_DIR}/dir +} +empty_dir_cleanup() +{ + common_cleanup +} + +atf_test_case empty_fs cleanup +empty_fs_body() +{ + create_test_dirs + + atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ + $TEST_IMAGE $TEST_INPUTS_DIR + + import_image + + check_image_contents +} +empty_fs_cleanup() +{ + common_cleanup +} + +atf_test_case file_extend cleanup +file_extend_body() +{ + local i start + + create_test_dirs + + # Create a file slightly longer than the maximum block size. + start=132 + dd if=/dev/random of=${TEST_INPUTS_DIR}/foo bs=1k count=$start + md5 -q ${TEST_INPUTS_DIR}/foo > foo.md5 + + atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ + $TEST_IMAGE $TEST_INPUTS_DIR + + import_image + + check_image_contents + + i=0 + while [ $i -lt 1000 ]; do + dd if=/dev/random of=${TEST_MOUNT_DIR}/foo bs=1k count=1 \ + seek=$(($i + $start)) conv=notrunc + # Make sure that the first $start blocks are unmodified. + dd if=${TEST_MOUNT_DIR}/foo bs=1k count=$start of=foo.copy + atf_check -o file:foo.md5 md5 -q foo.copy + i=$(($i + 1)) + done +} +file_extend_cleanup() +{ + common_cleanup +} + +atf_test_case file_sizes cleanup +file_sizes_body() +{ + local i + + create_test_dirs + cd $TEST_INPUTS_DIR + + i=1 + while [ $i -lt $((1 << 20)) ]; do + truncate -s $i ${i}.1 + truncate -s $(($i - 1)) ${i}.2 + truncate -s $(($i + 1)) ${i}.3 + i=$(($i << 1)) + done + + cd - + + # XXXMJ this creates sparse files, make sure makefs doesn't + # preserve the sparseness. + # XXXMJ need to test with larger files (at least 128MB for L2 indirs) + atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ + $TEST_IMAGE $TEST_INPUTS_DIR + + import_image + + check_image_contents +} +file_sizes_cleanup() +{ + common_cleanup +} + +atf_test_case hard_links cleanup +hard_links_body() +{ + local f + + create_test_dirs + cd $TEST_INPUTS_DIR + + mkdir dir + echo "hello" > 1 + ln 1 2 + ln 1 dir/1 + + echo "goodbye" > dir/a + ln dir/a dir/b + ln dir/a a + + cd - + + atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ + $TEST_IMAGE $TEST_INPUTS_DIR + + import_image + + check_image_contents + + stat -f '%i' ${TEST_MOUNT_DIR}/1 > ./ino + stat -f '%l' ${TEST_MOUNT_DIR}/1 > ./nlink + for f in 1 2 dir/1; do + atf_check -o file:./nlink -e empty -s exit:0 \ + stat -f '%l' ${TEST_MOUNT_DIR}/${f} + atf_check -o file:./ino -e empty -s exit:0 \ + stat -f '%i' ${TEST_MOUNT_DIR}/${f} + atf_check cmp -s ${TEST_INPUTS_DIR}/1 ${TEST_MOUNT_DIR}/${f} + done + + stat -f '%i' ${TEST_MOUNT_DIR}/dir/a > ./ino + stat -f '%l' ${TEST_MOUNT_DIR}/dir/a > ./nlink + for f in dir/a dir/b a; do + atf_check -o file:./nlink -e empty -s exit:0 \ + stat -f '%l' ${TEST_MOUNT_DIR}/${f} + atf_check -o file:./ino -e empty -s exit:0 \ + stat -f '%i' ${TEST_MOUNT_DIR}/${f} + atf_check cmp -s ${TEST_INPUTS_DIR}/dir/a ${TEST_MOUNT_DIR}/${f} + done +} +hard_links_cleanup() +{ + common_cleanup +} + +# Allocate enough dnodes from an object set that the meta dnode needs to use +# indirect blocks. +atf_test_case indirect_dnode_array cleanup +indirect_dnode_array_body() +{ + local count i + + # How many dnodes do we need to allocate? Well, the data block size + # for meta dnodes is always 16KB, so with a dnode size of 512B we get + # 32 dnodes per direct block. The maximum indirect block size is 128KB + # and that can fit 1024 block pointers, so we need at least 32 * 1024 + # files to force the use of two levels of indirection. + # + # Unfortunately that number of files makes the test run quite slowly, + # so we settle for a single indirect block for now... + count=$(jot -r 1 32 1024) + + create_test_dirs + cd $TEST_INPUTS_DIR + for i in $(seq 1 $count); do + touch $i + done + cd - + + atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ + $TEST_IMAGE $TEST_INPUTS_DIR + + import_image + + check_image_contents +} +indirect_dnode_array_cleanup() +{ + common_cleanup +} + +# +# Create some files with long names, so as to test fat ZAP handling. +# +atf_test_case long_file_name cleanup +long_file_name_body() +{ + local dir i + + create_test_dirs + cd $TEST_INPUTS_DIR + + # micro ZAP keys can be at most 50 bytes. + for i in $(seq 1 60); do + touch $(jot -s '' $i 1 1) + done + dir=$(jot -s '' 61 1 1) + mkdir $dir + for i in $(seq 1 60); do + touch ${dir}/$(jot -s '' $i 1 1) + done + + cd - + + atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ + $TEST_IMAGE $TEST_INPUTS_DIR + + import_image + + check_image_contents + + # Add a directory entry in the hope that OpenZFS might catch a bug + # in makefs' fat ZAP encoding. + touch ${TEST_MOUNT_DIR}/foo +} +long_file_name_cleanup() +{ + common_cleanup +} + +# +# Exercise handling of multiple datasets. +# +atf_test_case multi_dataset_1 cleanup +multi_dataset_1_body() +{ + create_test_dirs + cd $TEST_INPUTS_DIR + + mkdir dir1 + echo a > dir1/a + mkdir dir2 + echo b > dir2/b + + cd - + + atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ + -o fs=${ZFS_POOL_NAME}/dir1 -o fs=${ZFS_POOL_NAME}/dir2 \ + $TEST_IMAGE $TEST_INPUTS_DIR + + import_image + + check_image_contents + + # Make sure that we have three datasets with the expected mount points. + atf_check -o inline:${ZFS_POOL_NAME}\\n -e empty -s exit:0 \ + zfs list -H -o name ${ZFS_POOL_NAME} + atf_check -o inline:${TEST_MOUNT_DIR}\\n -e empty -s exit:0 \ + zfs list -H -o mountpoint ${ZFS_POOL_NAME} + + atf_check -o inline:${ZFS_POOL_NAME}/dir1\\n -e empty -s exit:0 \ + zfs list -H -o name ${ZFS_POOL_NAME}/dir1 + atf_check -o inline:${TEST_MOUNT_DIR}/dir1\\n -e empty -s exit:0 \ + zfs list -H -o mountpoint ${ZFS_POOL_NAME}/dir1 + + atf_check -o inline:${ZFS_POOL_NAME}/dir2\\n -e empty -s exit:0 \ + zfs list -H -o name ${ZFS_POOL_NAME}/dir2 + atf_check -o inline:${TEST_MOUNT_DIR}/dir2\\n -e empty -s exit:0 \ + zfs list -H -o mountpoint ${ZFS_POOL_NAME}/dir2 +} +multi_dataset_1_cleanup() +{ + common_cleanup +} + +# +# Create a pool with two datasets, where the root dataset is mounted below +# the child dataset. +# +atf_test_case multi_dataset_2 cleanup +multi_dataset_2_body() +{ + create_test_dirs + cd $TEST_INPUTS_DIR + + mkdir dir1 + echo a > dir1/a + mkdir dir2 + echo b > dir2/b + + cd - + + atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ + -o fs=${ZFS_POOL_NAME}/dir1\;mountpoint=/ \ + -o fs=${ZFS_POOL_NAME}\;mountpoint=/dir1 \ + $TEST_IMAGE $TEST_INPUTS_DIR + + import_image + + check_image_contents +} +multi_dataset_2_cleanup() +{ + common_cleanup +} + +# +# Create a dataset with a non-existent mount point. +# +atf_test_case multi_dataset_3 cleanup +multi_dataset_3_body() +{ + create_test_dirs + cd $TEST_INPUTS_DIR + + mkdir dir1 + echo a > dir1/a + + cd - + + atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ + -o fs=${ZFS_POOL_NAME}/dir1 \ + -o fs=${ZFS_POOL_NAME}/dir2 \ + $TEST_IMAGE $TEST_INPUTS_DIR + + import_image + + atf_check -o inline:${TEST_MOUNT_DIR}/dir2\\n -e empty -s exit:0 \ + zfs list -H -o mountpoint ${ZFS_POOL_NAME}/dir2 + + # Mounting dir2 should have created a directory called dir2. Go + # back and create it in the staging tree before comparing. + atf_check mkdir ${TEST_INPUTS_DIR}/dir2 + + check_image_contents +} +multi_dataset_3_cleanup() +{ + common_cleanup +} + +# +# Create an unmounted dataset. +# +atf_test_case multi_dataset_4 cleanup +multi_dataset_4_body() +{ + create_test_dirs + cd $TEST_INPUTS_DIR + + mkdir dir1 + echo a > dir1/a + + cd - + + atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ + -o fs=${ZFS_POOL_NAME}/dir1\;canmount=noauto\;mountpoint=none \ + $TEST_IMAGE $TEST_INPUTS_DIR + + import_image + + atf_check -o inline:none\\n -e empty -s exit:0 \ + zfs list -H -o mountpoint ${ZFS_POOL_NAME}/dir1 + + check_image_contents + + atf_check zfs set mountpoint=/dir1 ${ZFS_POOL_NAME}/dir1 + atf_check zfs mount ${ZFS_POOL_NAME}/dir1 + atf_check -o inline:${TEST_MOUNT_DIR}/dir1\\n -e empty -s exit:0 \ + zfs list -H -o mountpoint ${ZFS_POOL_NAME}/dir1 + + # dir1/a should be part of the root dataset, not dir1. + atf_check -s not-exit:0 -e not-empty stat ${TEST_MOUNT_DIR}dir1/a +} +multi_dataset_4_cleanup() +{ + common_cleanup +} + +# +# Validate handling of multiple staging directories. +# +atf_test_case multi_staging_1 cleanup +multi_staging_1_body() +{ + local tmpdir + + create_test_dirs + cd $TEST_INPUTS_DIR + + mkdir dir1 + echo a > a + echo a > dir1/a + echo z > z + + cd - + + tmpdir=$(mktemp -d) + cd $tmpdir + + mkdir dir2 dir2/dir3 + echo b > dir2/b + echo c > dir2/dir3/c + ln -s dir2/dir3c s + + cd - + + atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ + $TEST_IMAGE ${TEST_INPUTS_DIR} $tmpdir + + import_image + + check_image_contents -d $tmpdir +} +multi_staging_1_cleanup() +{ + common_cleanup +} + +atf_test_case multi_staging_2 cleanup +multi_staging_2_body() +{ + local tmpdir + + create_test_dirs + cd $TEST_INPUTS_DIR + + mkdir dir + echo a > dir/foo + echo b > dir/bar + + cd - + + tmpdir=$(mktemp -d) + cd $tmpdir + + mkdir dir + echo c > dir/baz + + cd - + + atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ + $TEST_IMAGE ${TEST_INPUTS_DIR} $tmpdir + + import_image + + # check_image_contents can't easily handle merged directories, so + # just check that the merged directory contains the files we expect. + atf_check -o not-empty stat ${TEST_MOUNT_DIR}/dir/foo + atf_check -o not-empty stat ${TEST_MOUNT_DIR}/dir/bar + atf_check -o not-empty stat ${TEST_MOUNT_DIR}/dir/baz + + if [ "$(ls ${TEST_MOUNT_DIR}/dir | wc -l)" -ne 3 ]; then + atf_fail "Expected 3 files in ${TEST_MOUNT_DIR}/dir" + fi +} +multi_staging_2_cleanup() +{ + common_cleanup +} + +# +# Rudimentary test to verify that two ZFS images created using the same +# parameters and input hierarchy are byte-identical. In particular, makefs(1) +# does not preserve file access times. +# +atf_test_case reproducible cleanup +reproducible_body() +{ + create_test_inputs + + atf_check $MAKEFS -s 512m -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ + ${TEST_IMAGE}.1 $TEST_INPUTS_DIR + + atf_check $MAKEFS -s 512m -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ + ${TEST_IMAGE}.2 $TEST_INPUTS_DIR + + # XXX-MJ cmp(1) is really slow + atf_check cmp ${TEST_IMAGE}.1 ${TEST_IMAGE}.2 +} +reproducible_cleanup() +{ +} + +# +# Verify that we can take a snapshot of a generated dataset. +# +atf_test_case snapshot cleanup +snapshot_body() +{ + create_test_dirs + cd $TEST_INPUTS_DIR + + mkdir dir + echo "hello" > dir/hello + echo "goodbye" > goodbye + + cd - + + atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ + $TEST_IMAGE $TEST_INPUTS_DIR + + import_image + + atf_check zfs snapshot ${ZFS_POOL_NAME}@1 +} +snapshot_cleanup() +{ + common_cleanup +} + +# +# Check handling of symbolic links. +# +atf_test_case soft_links cleanup +soft_links_body() +{ + create_test_dirs + cd $TEST_INPUTS_DIR + + mkdir dir + ln -s a a + ln -s dir/../a a + ln -s dir/b b + echo 'c' > dir + ln -s dir/c c + # XXX-MJ overflows bonus buffer ln -s $(jot -s '' 320 1 1) 1 + + cd - + + atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ + $TEST_IMAGE $TEST_INPUTS_DIR + + import_image + + check_image_contents +} +soft_links_cleanup() +{ + common_cleanup +} + +# +# Verify that we can set properties on the root dataset. +# +atf_test_case root_props cleanup +root_props_body() +{ + create_test_inputs + + atf_check $MAKEFS -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ + -o fs=${ZFS_POOL_NAME}\;atime=off\;setuid=off \ + $TEST_IMAGE $TEST_INPUTS_DIR + + import_image + + check_image_contents + + atf_check -o inline:off\\n -e empty -s exit:0 \ + zfs get -H -o value atime $ZFS_POOL_NAME + atf_check -o inline:local\\n -e empty -s exit:0 \ + zfs get -H -o source atime $ZFS_POOL_NAME + atf_check -o inline:off\\n -e empty -s exit:0 \ + zfs get -H -o value setuid $ZFS_POOL_NAME + atf_check -o inline:local\\n -e empty -s exit:0 \ + zfs get -H -o source setuid $ZFS_POOL_NAME +} +root_props_cleanup() +{ + common_cleanup +} + +# +# Verify that usedds and usedchild props are set properly. +# +atf_test_case used_space_props cleanup +used_space_props_body() +{ + local used usedds usedchild + local rootmb childmb totalmb fudge + local status + + create_test_dirs + cd $TEST_INPUTS_DIR + mkdir dir + + rootmb=17 + childmb=39 + totalmb=$(($rootmb + $childmb)) + fudge=$((2 * 1024 * 1024)) + + atf_check -e ignore dd if=/dev/random of=foo bs=1M count=$rootmb + atf_check -e ignore dd if=/dev/random of=dir/bar bs=1M count=$childmb + + cd - + + atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ + -o fs=${ZFS_POOL_NAME}/dir \ + $TEST_IMAGE $TEST_INPUTS_DIR + + import_image + + # Make sure that each dataset's space usage is no more than 2MB larger + # than their files. This number is magic and might need to change + # someday. + usedds=$(zfs list -o usedds -Hp ${ZFS_POOL_NAME}) + atf_check test $usedds -gt $(($rootmb * 1024 * 1024)) -a \ + $usedds -le $(($rootmb * 1024 * 1024 + $fudge)) + usedds=$(zfs list -o usedds -Hp ${ZFS_POOL_NAME}/dir) + atf_check test $usedds -gt $(($childmb * 1024 * 1024)) -a \ + $usedds -le $(($childmb * 1024 * 1024 + $fudge)) + + # Make sure that the usedchild property value makes sense: the parent's + # value corresponds to the size of the child, and the child has no + # children. + usedchild=$(zfs list -o usedchild -Hp ${ZFS_POOL_NAME}) + atf_check test $usedchild -gt $(($childmb * 1024 * 1024)) -a \ + $usedchild -le $(($childmb * 1024 * 1024 + $fudge)) + atf_check -o inline:'0\n' \ + zfs list -Hp -o usedchild ${ZFS_POOL_NAME}/dir + + # Make sure that the used property value makes sense: the parent's + # value is the sum of the two sizes, and the child's value is the + # same as its usedds value, which has already been checked. + used=$(zfs list -o used -Hp ${ZFS_POOL_NAME}) + atf_check test $used -gt $(($totalmb * 1024 * 1024)) -a \ + $used -le $(($totalmb * 1024 * 1024 + 2 * $fudge)) + used=$(zfs list -o used -Hp ${ZFS_POOL_NAME}/dir) + atf_check -o inline:$used'\n' \ + zfs list -Hp -o usedds ${ZFS_POOL_NAME}/dir + + # Both datasets do not have snapshots. + atf_check -o inline:'0\n' zfs list -Hp -o usedsnap ${ZFS_POOL_NAME} + atf_check -o inline:'0\n' zfs list -Hp -o usedsnap ${ZFS_POOL_NAME}/dir +} +used_space_props_cleanup() +{ + common_cleanup +} + +# Verify that file permissions are set properly. Make sure that non-executable +# files can't be executed. +atf_test_case perms cleanup +perms_body() +{ + local mode + + create_test_dirs + cd $TEST_INPUTS_DIR + + for mode in $(seq 0 511); do + mode=$(printf "%04o\n" $mode) + echo 'echo a' > $mode + atf_check chmod $mode $mode + done + + cd - + + atf_check $MAKEFS -s 1g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ + $TEST_IMAGE $TEST_INPUTS_DIR + + import_image + + check_image_contents + + for mode in $(seq 0 511); do + mode=$(printf "%04o\n" $mode) + if [ $(($mode & 0111)) -eq 0 ]; then + atf_check -s not-exit:0 -e match:"Permission denied" \ + ${TEST_INPUTS_DIR}/$mode + fi + if [ $(($mode & 0001)) -eq 0 ]; then + atf_check -s not-exit:0 -e match:"Permission denied" \ + su -m tests -c ${TEST_INPUTS_DIR}/$mode + fi + done + +} +perms_cleanup() +{ + common_cleanup +} + +# +# Verify that -T timestamps are honored. +# +atf_test_case T_flag_dir cleanup +T_flag_dir_body() +{ + timestamp=1742574909 + create_test_dirs + mkdir -p $TEST_INPUTS_DIR/dir1 + + atf_check $MAKEFS -T $timestamp -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ + $TEST_IMAGE $TEST_INPUTS_DIR + + import_image + eval $(stat -s $TEST_MOUNT_DIR/dir1) + atf_check_equal $st_atime $timestamp + atf_check_equal $st_mtime $timestamp + atf_check_equal $st_ctime $timestamp +} + +T_flag_dir_cleanup() +{ + common_cleanup +} + +atf_test_case T_flag_F_flag cleanup +T_flag_F_flag_body() +{ + atf_expect_fail "-F doesn't take precedence over -T" + timestamp_F=1742574909 + timestamp_T=1742574910 + create_test_dirs + mkdir -p $TEST_INPUTS_DIR/dir1 + + atf_check -e empty -o save:$TEST_SPEC_FILE -s exit:0 \ + mtree -c -k "type,time" -p $TEST_INPUTS_DIR + change_mtree_timestamp $TEST_SPEC_FILE $timestamp_F + atf_check -e empty -o not-empty -s exit:0 \ + $MAKEFS -F $TEST_SPEC_FILE -T $timestamp_T -s 10g -o rootpath=/ \ + -o poolname=$ZFS_POOL_NAME $TEST_IMAGE $TEST_INPUTS_DIR + + mount_image + eval $(stat -s $TEST_MOUNT_DIR/dir1) + atf_check_equal $st_atime $timestamp_F + atf_check_equal $st_mtime $timestamp_F + atf_check_equal $st_ctime $timestamp_F +} + +T_flag_F_flag_cleanup() +{ + common_cleanup +} + +atf_test_case T_flag_mtree cleanup +T_flag_mtree_body() +{ + timestamp=1742574909 + create_test_dirs + mkdir -p $TEST_INPUTS_DIR/dir1 + + atf_check -e empty -o save:$TEST_SPEC_FILE -s exit:0 \ + mtree -c -k "type" -p $TEST_INPUTS_DIR + atf_check $MAKEFS -T $timestamp -s 10g -o rootpath=/ -o poolname=$ZFS_POOL_NAME \ + $TEST_IMAGE $TEST_SPEC_FILE + + import_image + eval $(stat -s $TEST_MOUNT_DIR/dir1) + atf_check_equal $st_atime $timestamp + atf_check_equal $st_mtime $timestamp + atf_check_equal $st_ctime $timestamp +} + +T_flag_mtree_cleanup() +{ + common_cleanup +} + +atf_init_test_cases() +{ + atf_add_test_case autoexpand + atf_add_test_case basic + atf_add_test_case compression + atf_add_test_case dataset_removal + atf_add_test_case devfs + atf_add_test_case empty_dir + atf_add_test_case empty_fs + atf_add_test_case file_extend + atf_add_test_case file_sizes + atf_add_test_case hard_links + atf_add_test_case indirect_dnode_array + atf_add_test_case long_file_name + atf_add_test_case multi_dataset_1 + atf_add_test_case multi_dataset_2 + atf_add_test_case multi_dataset_3 + atf_add_test_case multi_dataset_4 + atf_add_test_case multi_staging_1 + atf_add_test_case multi_staging_2 + atf_add_test_case reproducible + atf_add_test_case snapshot + atf_add_test_case soft_links + atf_add_test_case root_props + atf_add_test_case used_space_props + atf_add_test_case perms + atf_add_test_case T_flag_dir + atf_add_test_case T_flag_F_flag + atf_add_test_case T_flag_mtree + + # XXXMJ tests: + # - test with different ashifts (at least, 9 and 12), different image sizes + # - create datasets in imported pool +} diff --git a/usr.sbin/makefs/walk.c b/usr.sbin/makefs/walk.c index 79b68d1d3e19..65ba3f41fe02 100644 --- a/usr.sbin/makefs/walk.c +++ b/usr.sbin/makefs/walk.c @@ -37,10 +37,6 @@ * POSSIBILITY OF SUCH DAMAGE. */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - #include <sys/param.h> #include <sys/stat.h> #include <sys/time.h> @@ -63,6 +59,50 @@ static void apply_specentry(const char *, NODE *, fsnode *); static fsnode *create_fsnode(const char *, const char *, const char *, struct stat *); +static int +cmp(const void *_a, const void *_b) +{ + const fsnode * const *a = _a; + const fsnode * const *b = _b; + + assert(strcmp((*a)->name, (*b)->name) != 0); + if (strcmp((*a)->name, ".") == 0) + return (-1); + if (strcmp((*b)->name, ".") == 0) + return (1); + return (strcoll((*a)->name, (*b)->name)); +} + +/* + * Sort the entries rather than relying on the order given by readdir(3), + * which might not be reproducible. + */ +static fsnode * +sort_dir(fsnode *list) +{ + fsnode **array; + fsnode *cur; + size_t nitems, i; + + nitems = 0; + for (cur = list; cur != NULL; cur = cur->next) + nitems++; + assert(nitems > 0); + + array = malloc(nitems * sizeof(fsnode *)); + if (array == NULL) + err(1, "malloc"); + for (i = 0, cur = list; cur != NULL; i++, cur = cur->next) + array[i] = cur; + qsort(array, nitems, sizeof(fsnode *), cmp); + for (i = 0; i < nitems; i++) { + array[i]->first = array[0]; + array[i]->next = i == nitems - 1 ? NULL : array[i + 1]; + } + cur = array[0]; + free(array); + return (cur); +} /* * walk_dir -- @@ -75,7 +115,7 @@ static fsnode *create_fsnode(const char *, const char *, const char *, fsnode * walk_dir(const char *root, const char *dir, fsnode *parent, fsnode *join) { - fsnode *first, *cur, *prev, *last; + fsnode *first, *cur; DIR *dirp; struct dirent *dent; char path[MAXPATHLEN + 1]; @@ -99,10 +139,8 @@ walk_dir(const char *root, const char *dir, fsnode *parent, fsnode *join) first = cur = join; while (cur->next != NULL) cur = cur->next; - prev = cur; } else - first = prev = NULL; - last = prev; + first = NULL; while ((dent = readdir(dirp)) != NULL) { name = dent->d_name; dot = 0; @@ -140,10 +178,6 @@ walk_dir(const char *root, const char *dir, fsnode *parent, fsnode *join) for (;;) { if (cur == NULL || strcmp(cur->name, name) == 0) break; - if (cur == last) { - cur = NULL; - break; - } cur = cur->next; } if (cur != NULL) { @@ -164,24 +198,11 @@ walk_dir(const char *root, const char *dir, fsnode *parent, fsnode *join) cur = create_fsnode(root, dir, name, &stbuf); cur->parent = parent; - if (dot) { - /* ensure "." is at the start of the list */ - cur->next = first; - first = cur; - if (! prev) - prev = cur; - cur->first = first; - } else { /* not "." */ - if (prev) - prev->next = cur; - prev = cur; - if (!first) - first = cur; - cur->first = first; - if (S_ISDIR(cur->type)) { - cur->child = walk_dir(root, rp, cur, NULL); - continue; - } + cur->next = first; + first = cur; + if (!dot && S_ISDIR(cur->type)) { + cur->child = walk_dir(root, rp, cur, NULL); + continue; } if (stbuf.st_nlink > 1) { fsinode *curino; @@ -208,13 +229,9 @@ walk_dir(const char *root, const char *dir, fsnode *parent, fsnode *join) cur->symlink = estrdup(slink); } } - assert(first != NULL); - if (join == NULL) - for (cur = first->next; cur != NULL; cur = cur->next) - cur->first = first; if (closedir(dirp) == -1) err(1, "Can't closedir `%s/%s'", root, dir); - return (first); + return (sort_dir(first)); } static fsnode * @@ -231,20 +248,8 @@ create_fsnode(const char *root, const char *path, const char *name, cur->type = stbuf->st_mode & S_IFMT; cur->inode->nlink = 1; cur->inode->st = *stbuf; - if (stampst.st_ino) { - cur->inode->st.st_atime = stampst.st_atime; - cur->inode->st.st_mtime = stampst.st_mtime; - cur->inode->st.st_ctime = stampst.st_ctime; -#if HAVE_STRUCT_STAT_ST_MTIMENSEC - cur->inode->st.st_atimensec = stampst.st_atimensec; - cur->inode->st.st_mtimensec = stampst.st_mtimensec; - cur->inode->st.st_ctimensec = stampst.st_ctimensec; -#endif -#if HAVE_STRUCT_STAT_BIRTHTIME - cur->inode->st.st_birthtime = stampst.st_birthtime; - cur->inode->st.st_birthtimensec = stampst.st_birthtimensec; -#endif - } + if (stampst.st_ino != 0) + set_tstamp(cur); return (cur); } @@ -534,14 +539,12 @@ apply_specentry(const char *dir, NODE *specnode, fsnode *dirnode) dirnode->inode->st.st_uid, specnode->st_uid); dirnode->inode->st.st_uid = specnode->st_uid; } -#if HAVE_STRUCT_STAT_ST_FLAGS if (specnode->flags & F_FLAGS) { ASEPRINT("flags", "%#lX", - (unsigned long)dirnode->inode->st.st_flags, + (unsigned long)FSINODE_ST_FLAGS(*dirnode->inode), (unsigned long)specnode->st_flags); - dirnode->inode->st.st_flags = specnode->st_flags; + FSINODE_ST_FLAGS(*dirnode->inode) = specnode->st_flags; } -#endif /* if (specnode->flags & F_DEV) { ASEPRINT("rdev", "%#llx", (unsigned long long)dirnode->inode->st.st_rdev, @@ -607,8 +610,6 @@ inode_type(mode_t mode) return ("symlink"); if (S_ISDIR(mode)) return ("dir"); - if (S_ISLNK(mode)) - return ("link"); if (S_ISFIFO(mode)) return ("fifo"); if (S_ISSOCK(mode)) diff --git a/usr.sbin/makefs/zfs.c b/usr.sbin/makefs/zfs.c new file mode 100644 index 000000000000..8d50c450541b --- /dev/null +++ b/usr.sbin/makefs/zfs.c @@ -0,0 +1,808 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2022 The FreeBSD Foundation + * + * This software was developed by Mark Johnston under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/errno.h> +#include <sys/queue.h> + +#include <assert.h> +#include <ctype.h> +#include <fcntl.h> +#include <stdalign.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <util.h> + +#include "makefs.h" +#include "zfs.h" + +#define VDEV_LABEL_SPACE \ + ((off_t)(VDEV_LABEL_START_SIZE + VDEV_LABEL_END_SIZE)) +_Static_assert(VDEV_LABEL_SPACE <= MINDEVSIZE, ""); + +#define MINMSSIZE ((off_t)1 << 24) /* 16MB */ +#define DFLTMSSIZE ((off_t)1 << 29) /* 512MB */ +#define MAXMSSIZE ((off_t)1 << 34) /* 16GB */ + +#define INDIR_LEVELS 6 +/* Indirect blocks are always 128KB. */ +#define BLKPTR_PER_INDIR (MAXBLOCKSIZE / sizeof(blkptr_t)) + +struct dnode_cursor { + char inddir[INDIR_LEVELS][MAXBLOCKSIZE]; + off_t indloc; + off_t indspace; + dnode_phys_t *dnode; + off_t dataoff; + off_t datablksz; +}; + +void +zfs_prep_opts(fsinfo_t *fsopts) +{ + zfs_opt_t *zfs; + size_t align; + + align = alignof(uint64_t); + zfs = aligned_alloc(align, roundup2(sizeof(*zfs), align)); + if (zfs == NULL) + err(1, "aligned_alloc"); + memset(zfs, 0, sizeof(*zfs)); + + const option_t zfs_options[] = { + { '\0', "bootfs", &zfs->bootfs, OPT_STRPTR, + 0, 0, "Bootable dataset" }, + { '\0', "mssize", &zfs->mssize, OPT_INT64, + MINMSSIZE, MAXMSSIZE, "Metaslab size" }, + { '\0', "poolguid", &zfs->poolguid, OPT_INT64, + 0, INT64_MAX, "ZFS pool GUID" }, + { '\0', "poolname", &zfs->poolname, OPT_STRPTR, + 0, 0, "ZFS pool name" }, + { '\0', "rootpath", &zfs->rootpath, OPT_STRPTR, + 0, 0, "Prefix for all dataset mount points" }, + { '\0', "ashift", &zfs->ashift, OPT_INT32, + MINBLOCKSHIFT, MAXBLOCKSHIFT, "ZFS pool ashift" }, + { '\0', "verify-txgs", &zfs->verify_txgs, OPT_BOOL, + 0, 0, "Make OpenZFS verify data upon import" }, + { '\0', "nowarn", &zfs->nowarn, OPT_BOOL, + 0, 0, "Provided for backwards compatibility, ignored" }, + { .name = NULL } + }; + + STAILQ_INIT(&zfs->datasetdescs); + + fsopts->fs_specific = zfs; + fsopts->fs_options = copy_opts(zfs_options); +} + +int +zfs_parse_opts(const char *option, fsinfo_t *fsopts) +{ + zfs_opt_t *zfs; + struct dataset_desc *dsdesc; + char buf[BUFSIZ], *opt, *val; + int rv; + + zfs = fsopts->fs_specific; + + opt = val = estrdup(option); + opt = strsep(&val, "="); + if (strcmp(opt, "fs") == 0) { + if (val == NULL) + errx(1, "invalid filesystem parameters `%s'", option); + + /* + * Dataset descriptions will be parsed later, in dsl_init(). + * Just stash them away for now. + */ + dsdesc = ecalloc(1, sizeof(*dsdesc)); + dsdesc->params = estrdup(val); + free(opt); + STAILQ_INSERT_TAIL(&zfs->datasetdescs, dsdesc, next); + return (1); + } + free(opt); + + rv = set_option(fsopts->fs_options, option, buf, sizeof(buf)); + return (rv == -1 ? 0 : 1); +} + +static void +zfs_size_vdev(fsinfo_t *fsopts) +{ + zfs_opt_t *zfs; + off_t asize, mssize, vdevsize, vdevsize1; + + zfs = fsopts->fs_specific; + + assert(fsopts->maxsize != 0); + assert(zfs->ashift != 0); + + /* + * Figure out how big the vdev should be. + */ + vdevsize = rounddown2(fsopts->maxsize, 1 << zfs->ashift); + if (vdevsize < MINDEVSIZE) + errx(1, "maximum image size is too small"); + if (vdevsize < fsopts->minsize || vdevsize > fsopts->maxsize) { + errx(1, "image size bounds must be multiples of %d", + 1 << zfs->ashift); + } + asize = vdevsize - VDEV_LABEL_SPACE; + + /* + * Size metaslabs according to the following heuristic: + * - provide at least 8 metaslabs, + * - without using a metaslab size larger than 512MB. + * This approximates what OpenZFS does without being complicated. In + * practice we expect pools to be expanded upon first use, and OpenZFS + * does not resize metaslabs in that case, so there is no right answer + * here. In general we want to provide large metaslabs even if the + * image size is small, and 512MB is a reasonable size for pools up to + * several hundred gigabytes. + * + * The user may override this heuristic using the "-o mssize" option. + */ + mssize = zfs->mssize; + if (mssize == 0) { + mssize = MAX(MIN(asize / 8, DFLTMSSIZE), MINMSSIZE); + if (!powerof2(mssize)) + mssize = 1l << (flsll(mssize) - 1); + } + if (!powerof2(mssize)) + errx(1, "metaslab size must be a power of 2"); + + /* + * If we have some slop left over, try to cover it by resizing the vdev, + * subject to the maxsize and minsize parameters. + */ + if (asize % mssize != 0) { + vdevsize1 = rounddown2(asize, mssize) + VDEV_LABEL_SPACE; + if (vdevsize1 < fsopts->minsize) + vdevsize1 = roundup2(asize, mssize) + VDEV_LABEL_SPACE; + if (vdevsize1 <= fsopts->maxsize) + vdevsize = vdevsize1; + } + asize = vdevsize - VDEV_LABEL_SPACE; + + zfs->asize = asize; + zfs->vdevsize = vdevsize; + zfs->mssize = mssize; + zfs->msshift = flsll(mssize) - 1; + zfs->mscount = asize / mssize; +} + +/* + * Validate options and set some default values. + */ +static void +zfs_check_opts(fsinfo_t *fsopts) +{ + zfs_opt_t *zfs; + + zfs = fsopts->fs_specific; + + if (fsopts->offset != 0) + errx(1, "unhandled offset option"); + if (fsopts->maxsize == 0) + errx(1, "an image size must be specified"); + + if (zfs->poolname == NULL) + errx(1, "a pool name must be specified"); + if (!isalpha(zfs->poolname[0])) + errx(1, "the pool name must begin with a letter"); + for (size_t i = 0, len = strlen(zfs->poolname); i < len; i++) { + if (!isalnum(zfs->poolname[i]) && zfs->poolname[i] != '_') + errx(1, "invalid character '%c' in pool name", + zfs->poolname[i]); + } + if (strcmp(zfs->poolname, "mirror") == 0 || + strcmp(zfs->poolname, "raidz") == 0 || + strcmp(zfs->poolname, "draid") == 0) { + errx(1, "pool name '%s' is reserved and cannot be used", + zfs->poolname); + } + + if (zfs->rootpath == NULL) + easprintf(&zfs->rootpath, "/%s", zfs->poolname); + if (zfs->rootpath[0] != '/') + errx(1, "mountpoint `%s' must be absolute", zfs->rootpath); + + if (zfs->ashift == 0) + zfs->ashift = 12; + + zfs_size_vdev(fsopts); +} + +void +zfs_cleanup_opts(fsinfo_t *fsopts) +{ + struct dataset_desc *d, *tmp; + zfs_opt_t *zfs; + + zfs = fsopts->fs_specific; + free(zfs->rootpath); + free(zfs->bootfs); + free(__DECONST(void *, zfs->poolname)); + STAILQ_FOREACH_SAFE(d, &zfs->datasetdescs, next, tmp) { + free(d->params); + free(d); + } + free(zfs); + free(fsopts->fs_options); +} + +static size_t +nvlist_size(const nvlist_t *nvl) +{ + return (sizeof(nvl->nv_header) + nvl->nv_size); +} + +static void +nvlist_copy(const nvlist_t *nvl, char *buf, size_t sz) +{ + assert(sz >= nvlist_size(nvl)); + + memcpy(buf, &nvl->nv_header, sizeof(nvl->nv_header)); + memcpy(buf + sizeof(nvl->nv_header), nvl->nv_data, nvl->nv_size); +} + +/* + * Avoid returning a GUID of 0, just to avoid the possibility that something + * will interpret that as meaning that the GUID is uninitialized. + */ +uint64_t +randomguid(void) +{ + uint64_t ret; + + do { + ret = ((uint64_t)random() << 32) | random(); + } while (ret == 0); + + return (ret); +} + +static nvlist_t * +pool_config_nvcreate(zfs_opt_t *zfs) +{ + nvlist_t *featuresnv, *poolnv; + + poolnv = nvlist_create(NV_UNIQUE_NAME); + nvlist_add_uint64(poolnv, ZPOOL_CONFIG_POOL_TXG, TXG); + nvlist_add_uint64(poolnv, ZPOOL_CONFIG_VERSION, SPA_VERSION); + nvlist_add_uint64(poolnv, ZPOOL_CONFIG_POOL_STATE, POOL_STATE_EXPORTED); + nvlist_add_string(poolnv, ZPOOL_CONFIG_POOL_NAME, zfs->poolname); + nvlist_add_uint64(poolnv, ZPOOL_CONFIG_POOL_GUID, zfs->poolguid); + nvlist_add_uint64(poolnv, ZPOOL_CONFIG_TOP_GUID, zfs->vdevguid); + nvlist_add_uint64(poolnv, ZPOOL_CONFIG_GUID, zfs->vdevguid); + nvlist_add_uint64(poolnv, ZPOOL_CONFIG_VDEV_CHILDREN, 1); + + featuresnv = nvlist_create(NV_UNIQUE_NAME); + nvlist_add_nvlist(poolnv, ZPOOL_CONFIG_FEATURES_FOR_READ, featuresnv); + nvlist_destroy(featuresnv); + + return (poolnv); +} + +static nvlist_t * +pool_disk_vdev_config_nvcreate(zfs_opt_t *zfs) +{ + nvlist_t *diskvdevnv; + + assert(zfs->objarrid != 0); + + diskvdevnv = nvlist_create(NV_UNIQUE_NAME); + nvlist_add_string(diskvdevnv, ZPOOL_CONFIG_TYPE, VDEV_TYPE_DISK); + nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_ASHIFT, zfs->ashift); + nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_ASIZE, zfs->asize); + nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_GUID, zfs->vdevguid); + nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_ID, 0); + nvlist_add_string(diskvdevnv, ZPOOL_CONFIG_PATH, "/dev/null"); + nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_WHOLE_DISK, 1); + nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_CREATE_TXG, TXG); + nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_METASLAB_ARRAY, + zfs->objarrid); + nvlist_add_uint64(diskvdevnv, ZPOOL_CONFIG_METASLAB_SHIFT, + zfs->msshift); + + return (diskvdevnv); +} + +static nvlist_t * +pool_root_vdev_config_nvcreate(zfs_opt_t *zfs) +{ + nvlist_t *diskvdevnv, *rootvdevnv; + + diskvdevnv = pool_disk_vdev_config_nvcreate(zfs); + rootvdevnv = nvlist_create(NV_UNIQUE_NAME); + + nvlist_add_uint64(rootvdevnv, ZPOOL_CONFIG_ID, 0); + nvlist_add_uint64(rootvdevnv, ZPOOL_CONFIG_GUID, zfs->poolguid); + nvlist_add_string(rootvdevnv, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT); + nvlist_add_uint64(rootvdevnv, ZPOOL_CONFIG_CREATE_TXG, TXG); + nvlist_add_nvlist_array(rootvdevnv, ZPOOL_CONFIG_CHILDREN, &diskvdevnv, + 1); + nvlist_destroy(diskvdevnv); + + return (rootvdevnv); +} + +/* + * Create the pool's "config" object, which contains an nvlist describing pool + * parameters and the vdev topology. It is similar but not identical to the + * nvlist stored in vdev labels. The main difference is that vdev labels do not + * describe the full vdev tree and in particular do not contain the "root" + * meta-vdev. + */ +static void +pool_init_objdir_config(zfs_opt_t *zfs, zfs_zap_t *objdir) +{ + dnode_phys_t *dnode; + nvlist_t *poolconfig, *vdevconfig; + void *configbuf; + uint64_t dnid; + off_t configloc, configblksz; + int error; + + dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_PACKED_NVLIST, + DMU_OT_PACKED_NVLIST_SIZE, sizeof(uint64_t), &dnid); + + poolconfig = pool_config_nvcreate(zfs); + + vdevconfig = pool_root_vdev_config_nvcreate(zfs); + nvlist_add_nvlist(poolconfig, ZPOOL_CONFIG_VDEV_TREE, vdevconfig); + nvlist_destroy(vdevconfig); + + error = nvlist_export(poolconfig); + if (error != 0) + errc(1, error, "nvlist_export"); + + configblksz = nvlist_size(poolconfig); + configloc = objset_space_alloc(zfs, zfs->mos, &configblksz); + configbuf = ecalloc(1, configblksz); + nvlist_copy(poolconfig, configbuf, configblksz); + + vdev_pwrite_dnode_data(zfs, dnode, configbuf, configblksz, configloc); + + dnode->dn_datablkszsec = configblksz >> MINBLOCKSHIFT; + dnode->dn_flags = DNODE_FLAG_USED_BYTES; + *(uint64_t *)DN_BONUS(dnode) = nvlist_size(poolconfig); + + zap_add_uint64(objdir, DMU_POOL_CONFIG, dnid); + + nvlist_destroy(poolconfig); + free(configbuf); +} + +/* + * Add objects block pointer list objects, used for deferred frees. We don't do + * anything with them, but they need to be present or OpenZFS will refuse to + * import the pool. + */ +static void +pool_init_objdir_bplists(zfs_opt_t *zfs __unused, zfs_zap_t *objdir) +{ + uint64_t dnid; + + (void)objset_dnode_bonus_alloc(zfs->mos, DMU_OT_BPOBJ, DMU_OT_BPOBJ_HDR, + BPOBJ_SIZE_V2, &dnid); + zap_add_uint64(objdir, DMU_POOL_FREE_BPOBJ, dnid); + + (void)objset_dnode_bonus_alloc(zfs->mos, DMU_OT_BPOBJ, DMU_OT_BPOBJ_HDR, + BPOBJ_SIZE_V2, &dnid); + zap_add_uint64(objdir, DMU_POOL_SYNC_BPLIST, dnid); +} + +/* + * Add required feature metadata objects. We don't know anything about ZFS + * features, so the objects are just empty ZAPs. + */ +static void +pool_init_objdir_feature_maps(zfs_opt_t *zfs, zfs_zap_t *objdir) +{ + dnode_phys_t *dnode; + uint64_t dnid; + + dnode = objset_dnode_alloc(zfs->mos, DMU_OTN_ZAP_METADATA, &dnid); + zap_add_uint64(objdir, DMU_POOL_FEATURES_FOR_READ, dnid); + zap_write(zfs, zap_alloc(zfs->mos, dnode)); + + dnode = objset_dnode_alloc(zfs->mos, DMU_OTN_ZAP_METADATA, &dnid); + zap_add_uint64(objdir, DMU_POOL_FEATURES_FOR_WRITE, dnid); + zap_write(zfs, zap_alloc(zfs->mos, dnode)); + + dnode = objset_dnode_alloc(zfs->mos, DMU_OTN_ZAP_METADATA, &dnid); + zap_add_uint64(objdir, DMU_POOL_FEATURE_DESCRIPTIONS, dnid); + zap_write(zfs, zap_alloc(zfs->mos, dnode)); +} + +static void +pool_init_objdir_dsl(zfs_opt_t *zfs, zfs_zap_t *objdir) +{ + zap_add_uint64(objdir, DMU_POOL_ROOT_DATASET, + dsl_dir_id(zfs->rootdsldir)); +} + +static void +pool_init_objdir_poolprops(zfs_opt_t *zfs, zfs_zap_t *objdir) +{ + dnode_phys_t *dnode; + uint64_t id; + + dnode = objset_dnode_alloc(zfs->mos, DMU_OT_POOL_PROPS, &id); + zap_add_uint64(objdir, DMU_POOL_PROPS, id); + + zfs->poolprops = zap_alloc(zfs->mos, dnode); +} + +/* + * Initialize the MOS object directory, the root of virtually all of the pool's + * data and metadata. + */ +static void +pool_init_objdir(zfs_opt_t *zfs) +{ + zfs_zap_t *zap; + dnode_phys_t *objdir; + + objdir = objset_dnode_lookup(zfs->mos, DMU_POOL_DIRECTORY_OBJECT); + + zap = zap_alloc(zfs->mos, objdir); + pool_init_objdir_config(zfs, zap); + pool_init_objdir_bplists(zfs, zap); + pool_init_objdir_feature_maps(zfs, zap); + pool_init_objdir_dsl(zfs, zap); + pool_init_objdir_poolprops(zfs, zap); + zap_write(zfs, zap); +} + +/* + * Initialize the meta-object set (MOS) and immediately write out several + * special objects whose contents are already finalized, including the object + * directory. + * + * Once the MOS is finalized, it'll look roughly like this: + * + * object directory (ZAP) + * |-> vdev config object (nvlist) + * |-> features for read + * |-> features for write + * |-> feature descriptions + * |-> sync bplist + * |-> free bplist + * |-> pool properties + * L-> root DSL directory + * |-> DSL child directory (ZAP) + * | |-> $MOS (DSL dir) + * | | |-> child map + * | | L-> props (ZAP) + * | |-> $FREE (DSL dir) + * | | |-> child map + * | | L-> props (ZAP) + * | |-> $ORIGIN (DSL dir) + * | | |-> child map + * | | |-> dataset + * | | | L-> deadlist + * | | |-> snapshot + * | | | |-> deadlist + * | | | L-> snapshot names + * | | |-> props (ZAP) + * | | L-> clones (ZAP) + * | |-> dataset 1 (DSL dir) + * | | |-> DSL dataset + * | | | |-> snapshot names + * | | | L-> deadlist + * | | |-> child map + * | | | L-> ... + * | | L-> props + * | |-> dataset 2 + * | | L-> ... + * | |-> ... + * | L-> dataset n + * |-> DSL root dataset + * | |-> snapshot names + * | L-> deadlist + * L-> props (ZAP) + * space map object array + * |-> space map 1 + * |-> space map 2 + * |-> ... + * L-> space map n (zfs->mscount) + * + * The space map object array is pointed to by the "msarray" property in the + * pool configuration. + */ +static void +pool_init(zfs_opt_t *zfs) +{ + uint64_t dnid; + + if (zfs->poolguid == 0) + zfs->poolguid = randomguid(); + zfs->vdevguid = randomguid(); + + zfs->mos = objset_alloc(zfs, DMU_OST_META); + + (void)objset_dnode_alloc(zfs->mos, DMU_OT_OBJECT_DIRECTORY, &dnid); + assert(dnid == DMU_POOL_DIRECTORY_OBJECT); + + (void)objset_dnode_alloc(zfs->mos, DMU_OT_OBJECT_ARRAY, &zfs->objarrid); + + dsl_init(zfs); + + pool_init_objdir(zfs); +} + +static void +pool_labels_write(zfs_opt_t *zfs) +{ + uberblock_t *ub; + vdev_label_t *label; + nvlist_t *poolconfig, *vdevconfig; + int error; + + label = ecalloc(1, sizeof(*label)); + + /* + * Assemble the vdev configuration and store it in the label. + */ + poolconfig = pool_config_nvcreate(zfs); + vdevconfig = pool_disk_vdev_config_nvcreate(zfs); + nvlist_add_nvlist(poolconfig, ZPOOL_CONFIG_VDEV_TREE, vdevconfig); + nvlist_destroy(vdevconfig); + + error = nvlist_export(poolconfig); + if (error != 0) + errc(1, error, "nvlist_export"); + nvlist_copy(poolconfig, label->vl_vdev_phys.vp_nvlist, + sizeof(label->vl_vdev_phys.vp_nvlist)); + nvlist_destroy(poolconfig); + + /* + * Fill out the uberblock. Just make each one the same. The embedded + * checksum is calculated in vdev_label_write(). + */ + for (size_t uoff = 0; uoff < sizeof(label->vl_uberblock); + uoff += (1 << zfs->ashift)) { + ub = (uberblock_t *)(&label->vl_uberblock[0] + uoff); + ub->ub_magic = UBERBLOCK_MAGIC; + ub->ub_version = SPA_VERSION; + + /* + * Upon import, OpenZFS will perform metadata verification of + * the last TXG by default. If all data is written in the same + * TXG, it'll all get verified, which can be painfully slow in + * some cases, e.g., initial boot in a cloud environment with + * slow storage. So, fabricate additional TXGs to avoid this + * overhead, unless the user requests otherwise. + */ + ub->ub_txg = TXG; + if (!zfs->verify_txgs) + ub->ub_txg += TXG_SIZE; + ub->ub_guid_sum = zfs->poolguid + zfs->vdevguid; + ub->ub_timestamp = 0; + + ub->ub_software_version = SPA_VERSION; + ub->ub_mmp_magic = MMP_MAGIC; + ub->ub_mmp_delay = 0; + ub->ub_mmp_config = 0; + ub->ub_checkpoint_txg = 0; + objset_root_blkptr_copy(zfs->mos, &ub->ub_rootbp); + } + + /* + * Write out four copies of the label: two at the beginning of the vdev + * and two at the end. + */ + for (int i = 0; i < VDEV_LABELS; i++) + vdev_label_write(zfs, i, label); + + free(label); +} + +static void +pool_fini(zfs_opt_t *zfs) +{ + zap_write(zfs, zfs->poolprops); + dsl_write(zfs); + objset_write(zfs, zfs->mos); + pool_labels_write(zfs); +} + +struct dnode_cursor * +dnode_cursor_init(zfs_opt_t *zfs, zfs_objset_t *os, dnode_phys_t *dnode, + off_t size, off_t blksz) +{ + struct dnode_cursor *c; + uint64_t nbppindir, indlevel, ndatablks, nindblks; + + assert(dnode->dn_nblkptr == 1); + assert(blksz <= MAXBLOCKSIZE); + + if (blksz == 0) { + /* Must be between 1<<ashift and 128KB. */ + blksz = MIN(MAXBLOCKSIZE, MAX(1 << zfs->ashift, + powerof2(size) ? size : (1l << flsll(size)))); + } + assert(powerof2(blksz)); + + /* + * Do we need indirect blocks? Figure out how many levels are needed + * (indlevel == 1 means no indirect blocks) and how much space is needed + * (it has to be allocated up-front to break the dependency cycle + * described in objset_write()). + */ + ndatablks = size == 0 ? 0 : howmany(size, blksz); + nindblks = 0; + for (indlevel = 1, nbppindir = 1; ndatablks > nbppindir; indlevel++) { + nbppindir *= BLKPTR_PER_INDIR; + nindblks += howmany(ndatablks, indlevel * nbppindir); + } + assert(indlevel < INDIR_LEVELS); + + dnode->dn_nlevels = (uint8_t)indlevel; + dnode->dn_maxblkid = ndatablks > 0 ? ndatablks - 1 : 0; + dnode->dn_datablkszsec = blksz >> MINBLOCKSHIFT; + + c = ecalloc(1, sizeof(*c)); + if (nindblks > 0) { + c->indspace = nindblks * MAXBLOCKSIZE; + c->indloc = objset_space_alloc(zfs, os, &c->indspace); + } + c->dnode = dnode; + c->dataoff = 0; + c->datablksz = blksz; + + return (c); +} + +static void +_dnode_cursor_flush(zfs_opt_t *zfs, struct dnode_cursor *c, unsigned int levels) +{ + blkptr_t *bp, *pbp; + void *buf; + uint64_t fill; + off_t blkid, blksz, loc; + + assert(levels > 0); + assert(levels <= c->dnode->dn_nlevels - 1U); + + blksz = MAXBLOCKSIZE; + blkid = (c->dataoff / c->datablksz) / BLKPTR_PER_INDIR; + for (unsigned int level = 1; level <= levels; level++) { + buf = c->inddir[level - 1]; + + if (level == c->dnode->dn_nlevels - 1U) { + pbp = &c->dnode->dn_blkptr[0]; + } else { + uint64_t iblkid; + + iblkid = blkid & (BLKPTR_PER_INDIR - 1); + pbp = (blkptr_t *) + &c->inddir[level][iblkid * sizeof(blkptr_t)]; + } + + /* + * Space for indirect blocks is allocated up-front; see the + * comment in objset_write(). + */ + loc = c->indloc; + c->indloc += blksz; + assert(c->indspace >= blksz); + c->indspace -= blksz; + + bp = buf; + fill = 0; + for (size_t i = 0; i < BLKPTR_PER_INDIR; i++) + fill += BP_GET_FILL(&bp[i]); + + vdev_pwrite_dnode_indir(zfs, c->dnode, level, fill, buf, blksz, + loc, pbp); + memset(buf, 0, MAXBLOCKSIZE); + + blkid /= BLKPTR_PER_INDIR; + } +} + +blkptr_t * +dnode_cursor_next(zfs_opt_t *zfs, struct dnode_cursor *c, off_t off) +{ + off_t blkid, l1id; + unsigned int levels; + + if (c->dnode->dn_nlevels == 1) { + assert(off < MAXBLOCKSIZE); + return (&c->dnode->dn_blkptr[0]); + } + + assert(off % c->datablksz == 0); + + /* Do we need to flush any full indirect blocks? */ + if (off > 0) { + blkid = off / c->datablksz; + for (levels = 0; levels < c->dnode->dn_nlevels - 1U; levels++) { + if (blkid % BLKPTR_PER_INDIR != 0) + break; + blkid /= BLKPTR_PER_INDIR; + } + if (levels > 0) + _dnode_cursor_flush(zfs, c, levels); + } + + c->dataoff = off; + l1id = (off / c->datablksz) & (BLKPTR_PER_INDIR - 1); + return ((blkptr_t *)&c->inddir[0][l1id * sizeof(blkptr_t)]); +} + +void +dnode_cursor_finish(zfs_opt_t *zfs, struct dnode_cursor *c) +{ + unsigned int levels; + + assert(c->dnode->dn_nlevels > 0); + levels = c->dnode->dn_nlevels - 1; + if (levels > 0) + _dnode_cursor_flush(zfs, c, levels); + assert(c->indspace == 0); + free(c); +} + +void +zfs_makefs(const char *image, const char *dir, fsnode *root, fsinfo_t *fsopts) +{ + zfs_opt_t *zfs; + int dirfd; + + zfs = fsopts->fs_specific; + + /* + * Use a fixed seed to provide reproducible pseudo-random numbers for + * on-disk structures when needed (e.g., GUIDs, ZAP hash salts). + */ + srandom(1729); + + zfs_check_opts(fsopts); + + dirfd = open(dir, O_DIRECTORY | O_RDONLY); + if (dirfd < 0) + err(1, "open(%s)", dir); + + vdev_init(zfs, image); + pool_init(zfs); + fs_build(zfs, dirfd, root); + pool_fini(zfs); + vdev_fini(zfs); +} diff --git a/usr.sbin/makefs/zfs/Makefile.inc b/usr.sbin/makefs/zfs/Makefile.inc new file mode 100644 index 000000000000..78561813e229 --- /dev/null +++ b/usr.sbin/makefs/zfs/Makefile.inc @@ -0,0 +1,13 @@ +.PATH: ${SRCDIR}/zfs +.PATH: ${SRCTOP}/stand/libsa/zfs + +SRCS+= dsl.c \ + fs.c \ + objset.c \ + vdev.c \ + zap.c + +SRCS+= nvlist.c + +CFLAGS.nvlist.c+= -I${SRCTOP}/stand/libsa -Wno-cast-qual +CWARNFLAGS.zap.c+= -Wno-sign-compare diff --git a/usr.sbin/makefs/zfs/dsl.c b/usr.sbin/makefs/zfs/dsl.c new file mode 100644 index 000000000000..1977521d7f92 --- /dev/null +++ b/usr.sbin/makefs/zfs/dsl.c @@ -0,0 +1,661 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2022 The FreeBSD Foundation + * + * This software was developed by Mark Johnston under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#include <util.h> + +#include "makefs.h" +#include "zfs.h" + +typedef struct zfs_dsl_dataset { + zfs_objset_t *os; /* referenced objset, may be null */ + dsl_dataset_phys_t *phys; /* on-disk representation */ + uint64_t dsid; /* DSL dataset dnode */ + + struct zfs_dsl_dir *dir; /* containing parent */ +} zfs_dsl_dataset_t; + +typedef STAILQ_HEAD(zfs_dsl_dir_list, zfs_dsl_dir) zfs_dsl_dir_list_t; + +typedef struct zfs_dsl_dir { + char *fullname; /* full dataset name */ + char *name; /* basename(fullname) */ + dsl_dir_phys_t *phys; /* on-disk representation */ + nvlist_t *propsnv; /* properties saved in propszap */ + + zfs_dsl_dataset_t *headds; /* principal dataset, may be null */ + + uint64_t dirid; /* DSL directory dnode */ + zfs_zap_t *propszap; /* dataset properties */ + zfs_zap_t *childzap; /* child directories */ + + /* DSL directory tree linkage. */ + struct zfs_dsl_dir *parent; + zfs_dsl_dir_list_t children; + STAILQ_ENTRY(zfs_dsl_dir) next; +} zfs_dsl_dir_t; + +static zfs_dsl_dir_t *dsl_dir_alloc(zfs_opt_t *zfs, const char *name); +static zfs_dsl_dataset_t *dsl_dataset_alloc(zfs_opt_t *zfs, zfs_dsl_dir_t *dir); + +static int +nvlist_find_string(nvlist_t *nvl, const char *key, char **retp) +{ + char *str; + int error, len; + + error = nvlist_find(nvl, key, DATA_TYPE_STRING, NULL, &str, &len); + if (error == 0) { + *retp = ecalloc(1, len + 1); + memcpy(*retp, str, len); + } + return (error); +} + +static int +nvlist_find_uint64(nvlist_t *nvl, const char *key, uint64_t *retp) +{ + return (nvlist_find(nvl, key, DATA_TYPE_UINT64, NULL, retp, NULL)); +} + +/* + * Return an allocated string containing the head dataset's mountpoint, + * including the root path prefix. + * + * If the dataset has a mountpoint property, it is returned. Otherwise we have + * to follow ZFS' inheritance rules. + */ +char * +dsl_dir_get_mountpoint(zfs_opt_t *zfs, zfs_dsl_dir_t *dir) +{ + zfs_dsl_dir_t *pdir; + char *mountpoint; + + if (nvlist_find_string(dir->propsnv, "mountpoint", &mountpoint) == 0) { + if (strcmp(mountpoint, "none") == 0) + return (NULL); + } else { + /* + * If we don't have a mountpoint, it's inherited from one of our + * ancestors. Walk up the hierarchy until we find it, building + * up our mountpoint along the way. The mountpoint property is + * always set for the root dataset. + */ + for (pdir = dir->parent, mountpoint = estrdup(dir->name);; + pdir = pdir->parent) { + char *origmountpoint, *tmp; + + origmountpoint = mountpoint; + + if (nvlist_find_string(pdir->propsnv, "mountpoint", + &tmp) == 0) { + (void)easprintf(&mountpoint, "%s%s%s", tmp, + tmp[strlen(tmp) - 1] == '/' ? "" : "/", + origmountpoint); + free(tmp); + free(origmountpoint); + break; + } + + (void)easprintf(&mountpoint, "%s/%s", pdir->name, + origmountpoint); + free(origmountpoint); + } + } + assert(mountpoint[0] == '/'); + assert(strstr(mountpoint, zfs->rootpath) == mountpoint); + + return (mountpoint); +} + +int +dsl_dir_get_canmount(zfs_dsl_dir_t *dir, uint64_t *canmountp) +{ + return (nvlist_find_uint64(dir->propsnv, "canmount", canmountp)); +} + +/* + * Handle dataset properties that we know about; stash them into an nvlist to be + * written later to the properties ZAP object. + * + * If the set of properties we handle grows too much, we should probably explore + * using libzfs to manage them. + */ +static void +dsl_dir_set_prop(zfs_opt_t *zfs, zfs_dsl_dir_t *dir, const char *key, + const char *val) +{ + nvlist_t *nvl; + + nvl = dir->propsnv; + if (val == NULL || val[0] == '\0') + errx(1, "missing value for property `%s'", key); + if (nvpair_find(nvl, key) != NULL) + errx(1, "property `%s' already set", key); + + if (strcmp(key, "mountpoint") == 0) { + if (strcmp(val, "none") != 0) { + if (val[0] != '/') + errx(1, "mountpoint `%s' is not absolute", val); + if (strcmp(val, zfs->rootpath) != 0 && + strcmp(zfs->rootpath, "/") != 0 && + (strstr(val, zfs->rootpath) != val || + val[strlen(zfs->rootpath)] != '/')) { + errx(1, "mountpoint `%s' is not prefixed by " + "the root path `%s'", val, zfs->rootpath); + } + } + (void)nvlist_add_string(nvl, key, val); + } else if (strcmp(key, "atime") == 0 || strcmp(key, "exec") == 0 || + strcmp(key, "setuid") == 0) { + if (strcmp(val, "on") == 0) + (void)nvlist_add_uint64(nvl, key, 1); + else if (strcmp(val, "off") == 0) + (void)nvlist_add_uint64(nvl, key, 0); + else + errx(1, "invalid value `%s' for %s", val, key); + } else if (strcmp(key, "canmount") == 0) { + if (strcmp(val, "noauto") == 0) + (void)nvlist_add_uint64(nvl, key, 2); + else if (strcmp(val, "on") == 0) + (void)nvlist_add_uint64(nvl, key, 1); + else if (strcmp(val, "off") == 0) + (void)nvlist_add_uint64(nvl, key, 0); + else + errx(1, "invalid value `%s' for %s", val, key); + } else if (strcmp(key, "compression") == 0) { + size_t i; + + const struct zfs_compression_algorithm { + const char *name; + enum zio_compress alg; + } compression_algorithms[] = { + { "off", ZIO_COMPRESS_OFF }, + { "on", ZIO_COMPRESS_ON }, + { "lzjb", ZIO_COMPRESS_LZJB }, + { "gzip", ZIO_COMPRESS_GZIP_6 }, + { "gzip-1", ZIO_COMPRESS_GZIP_1 }, + { "gzip-2", ZIO_COMPRESS_GZIP_2 }, + { "gzip-3", ZIO_COMPRESS_GZIP_3 }, + { "gzip-4", ZIO_COMPRESS_GZIP_4 }, + { "gzip-5", ZIO_COMPRESS_GZIP_5 }, + { "gzip-6", ZIO_COMPRESS_GZIP_6 }, + { "gzip-7", ZIO_COMPRESS_GZIP_7 }, + { "gzip-8", ZIO_COMPRESS_GZIP_8 }, + { "gzip-9", ZIO_COMPRESS_GZIP_9 }, + { "zle", ZIO_COMPRESS_ZLE }, + { "lz4", ZIO_COMPRESS_LZ4 }, + { "zstd", ZIO_COMPRESS_ZSTD }, + }; + for (i = 0; i < nitems(compression_algorithms); i++) { + if (strcmp(val, compression_algorithms[i].name) == 0) { + nvlist_add_uint64(nvl, key, + compression_algorithms[i].alg); + break; + } + } + if (i == nitems(compression_algorithms)) + errx(1, "invalid compression algorithm `%s'", val); + } else { + errx(1, "unknown property `%s'", key); + } +} + +static zfs_dsl_dir_t * +dsl_metadir_alloc(zfs_opt_t *zfs, const char *name) +{ + zfs_dsl_dir_t *dir; + char *path; + + (void)easprintf(&path, "%s/%s", zfs->poolname, name); + dir = dsl_dir_alloc(zfs, path); + free(path); + return (dir); +} + +static void +dsl_origindir_init(zfs_opt_t *zfs) +{ + dnode_phys_t *clones; + uint64_t clonesid; + + zfs->origindsldir = dsl_metadir_alloc(zfs, "$ORIGIN"); + zfs->originds = dsl_dataset_alloc(zfs, zfs->origindsldir); + zfs->snapds = dsl_dataset_alloc(zfs, zfs->origindsldir); + + clones = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_CLONES, &clonesid); + zfs->cloneszap = zap_alloc(zfs->mos, clones); + zfs->origindsldir->phys->dd_clones = clonesid; +} + +void +dsl_init(zfs_opt_t *zfs) +{ + zfs_dsl_dir_t *dir; + struct dataset_desc *d; + const char *dspropdelim; + + dspropdelim = ";"; + + zfs->rootdsldir = dsl_dir_alloc(zfs, NULL); + + zfs->rootds = dsl_dataset_alloc(zfs, zfs->rootdsldir); + zfs->rootdsldir->headds = zfs->rootds; + + zfs->mosdsldir = dsl_metadir_alloc(zfs, "$MOS"); + zfs->freedsldir = dsl_metadir_alloc(zfs, "$FREE"); + dsl_origindir_init(zfs); + + /* + * Go through the list of user-specified datasets and create DSL objects + * for them. + */ + STAILQ_FOREACH(d, &zfs->datasetdescs, next) { + char *dsname, *next, *params, *param, *nextparam; + + params = d->params; + dsname = strsep(¶ms, dspropdelim); + + if (strcmp(dsname, zfs->poolname) == 0) { + /* + * This is the root dataset; it's already created, so + * we're just setting options. + */ + dir = zfs->rootdsldir; + } else { + /* + * This dataset must be a child of the root dataset. + */ + if (strstr(dsname, zfs->poolname) != dsname || + (next = strchr(dsname, '/')) == NULL || + (size_t)(next - dsname) != strlen(zfs->poolname)) { + errx(1, "dataset `%s' must be a child of `%s'", + dsname, zfs->poolname); + } + dir = dsl_dir_alloc(zfs, dsname); + dir->headds = dsl_dataset_alloc(zfs, dir); + } + + for (nextparam = param = params; nextparam != NULL;) { + char *key, *val; + + param = strsep(&nextparam, dspropdelim); + + key = val = param; + key = strsep(&val, "="); + dsl_dir_set_prop(zfs, dir, key, val); + } + } + + /* + * Set the root dataset's mount point and compression strategy if the + * user didn't override the defaults. + */ + if (nvpair_find(zfs->rootdsldir->propsnv, "compression") == NULL) { + (void)nvlist_add_uint64(zfs->rootdsldir->propsnv, + "compression", ZIO_COMPRESS_OFF); + } + if (nvpair_find(zfs->rootdsldir->propsnv, "mountpoint") == NULL) { + (void)nvlist_add_string(zfs->rootdsldir->propsnv, "mountpoint", + zfs->rootpath); + } +} + +uint64_t +dsl_dir_id(zfs_dsl_dir_t *dir) +{ + return (dir->dirid); +} + +uint64_t +dsl_dir_dataset_id(zfs_dsl_dir_t *dir) +{ + return (dir->headds->dsid); +} + +static void +dsl_dir_foreach_post(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, + void (*cb)(zfs_opt_t *, zfs_dsl_dir_t *, void *), void *arg) +{ + zfs_dsl_dir_t *cdsldir; + + STAILQ_FOREACH(cdsldir, &dsldir->children, next) { + dsl_dir_foreach_post(zfs, cdsldir, cb, arg); + } + cb(zfs, dsldir, arg); +} + +/* + * Used when the caller doesn't care about the order one way or another. + */ +void +dsl_dir_foreach(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, + void (*cb)(zfs_opt_t *, zfs_dsl_dir_t *, void *), void *arg) +{ + dsl_dir_foreach_post(zfs, dsldir, cb, arg); +} + +const char * +dsl_dir_fullname(const zfs_dsl_dir_t *dir) +{ + return (dir->fullname); +} + +/* + * Create a DSL directory, which is effectively an entry in the ZFS namespace. + * We always create a root DSL directory, whose name is the pool's name, and + * several metadata directories. + * + * Each directory has two ZAP objects, one pointing to child directories, and + * one for properties (which are inherited by children unless overridden). + * Directories typically reference a DSL dataset, the "head dataset", which + * points to an object set. + */ +static zfs_dsl_dir_t * +dsl_dir_alloc(zfs_opt_t *zfs, const char *name) +{ + zfs_dsl_dir_list_t l, *lp; + zfs_dsl_dir_t *dir, *parent; + dnode_phys_t *dnode; + char *dirname, *nextdir, *origname; + uint64_t childid, propsid; + + dir = ecalloc(1, sizeof(*dir)); + + dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_DSL_DIR, + DMU_OT_DSL_DIR, sizeof(dsl_dir_phys_t), &dir->dirid); + dir->phys = (dsl_dir_phys_t *)DN_BONUS(dnode); + + dnode = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_PROPS, &propsid); + dir->propszap = zap_alloc(zfs->mos, dnode); + + dnode = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_DIR_CHILD_MAP, + &childid); + dir->childzap = zap_alloc(zfs->mos, dnode); + + dir->propsnv = nvlist_create(NV_UNIQUE_NAME); + STAILQ_INIT(&dir->children); + + dir->phys->dd_child_dir_zapobj = childid; + dir->phys->dd_props_zapobj = propsid; + + if (name == NULL) { + /* + * This is the root DSL directory. + */ + dir->name = estrdup(zfs->poolname); + dir->fullname = estrdup(zfs->poolname); + dir->parent = NULL; + dir->phys->dd_parent_obj = 0; + + assert(zfs->rootdsldir == NULL); + zfs->rootdsldir = dir; + return (dir); + } + + /* + * Insert the new directory into the hierarchy. Currently this must be + * done in order, e.g., when creating pool/a/b, pool/a must already + * exist. + */ + STAILQ_INIT(&l); + STAILQ_INSERT_HEAD(&l, zfs->rootdsldir, next); + origname = dirname = nextdir = estrdup(name); + parent = NULL; + for (lp = &l;; lp = &parent->children) { + dirname = strsep(&nextdir, "/"); + if (nextdir == NULL) + break; + + STAILQ_FOREACH(parent, lp, next) { + if (strcmp(parent->name, dirname) == 0) + break; + } + if (parent == NULL) { + errx(1, "no parent at `%s' for filesystem `%s'", + dirname, name); + } + } + + dir->fullname = estrdup(name); + dir->name = estrdup(dirname); + free(origname); + STAILQ_INSERT_TAIL(lp, dir, next); + zap_add_uint64(parent->childzap, dir->name, dir->dirid); + + dir->parent = parent; + dir->phys->dd_parent_obj = parent->dirid; + return (dir); +} + +static void +dsl_dir_size_add(zfs_dsl_dir_t *dir, uint64_t bytes) +{ + dir->phys->dd_used_bytes += bytes; + dir->phys->dd_compressed_bytes += bytes; + dir->phys->dd_uncompressed_bytes += bytes; +} + +/* + * See dsl_dir_root_finalize(). + */ +void +dsl_dir_root_finalize(zfs_opt_t *zfs, uint64_t bytes) +{ + dsl_dir_size_add(zfs->mosdsldir, bytes); + zfs->mosdsldir->phys->dd_used_breakdown[DD_USED_HEAD] += bytes; + + dsl_dir_size_add(zfs->rootdsldir, bytes); + zfs->rootdsldir->phys->dd_used_breakdown[DD_USED_CHILD] += bytes; +} + +/* + * Convert dataset properties into entries in the DSL directory's properties + * ZAP. + */ +static void +dsl_dir_finalize_props(zfs_dsl_dir_t *dir) +{ + for (nvp_header_t *nvh = NULL; + (nvh = nvlist_next_nvpair(dir->propsnv, nvh)) != NULL;) { + nv_string_t *nvname; + nv_pair_data_t *nvdata; + char *name; + + nvname = (nv_string_t *)(nvh + 1); + nvdata = (nv_pair_data_t *)(&nvname->nv_data[0] + + NV_ALIGN4(nvname->nv_size)); + + name = nvstring_get(nvname); + switch (nvdata->nv_type) { + case DATA_TYPE_UINT64: { + uint64_t val; + + memcpy(&val, &nvdata->nv_data[0], sizeof(uint64_t)); + zap_add_uint64(dir->propszap, name, val); + break; + } + case DATA_TYPE_STRING: { + nv_string_t *nvstr; + char *val; + + nvstr = (nv_string_t *)&nvdata->nv_data[0]; + val = nvstring_get(nvstr); + zap_add_string(dir->propszap, name, val); + free(val); + break; + } + default: + assert(0); + } + free(name); + } +} + +static void +dsl_dir_finalize(zfs_opt_t *zfs, zfs_dsl_dir_t *dir, void *arg __unused) +{ + zfs_dsl_dir_t *cdir; + dnode_phys_t *snapnames; + zfs_dsl_dataset_t *headds; + zfs_objset_t *os; + uint64_t bytes, childbytes, snapnamesid; + + dsl_dir_finalize_props(dir); + zap_write(zfs, dir->propszap); + zap_write(zfs, dir->childzap); + + headds = dir->headds; + if (headds == NULL) + return; + os = headds->os; + if (os == NULL) + return; + + snapnames = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_DS_SNAP_MAP, + &snapnamesid); + zap_write(zfs, zap_alloc(zfs->mos, snapnames)); + + dir->phys->dd_head_dataset_obj = headds->dsid; + dir->phys->dd_clone_parent_obj = zfs->snapds->dsid; + headds->phys->ds_prev_snap_obj = zfs->snapds->dsid; + headds->phys->ds_snapnames_zapobj = snapnamesid; + objset_root_blkptr_copy(os, &headds->phys->ds_bp); + + zfs->snapds->phys->ds_num_children++; + zap_add_uint64_self(zfs->cloneszap, headds->dsid); + + bytes = objset_space(os); + headds->phys->ds_used_bytes = bytes; + headds->phys->ds_uncompressed_bytes = bytes; + headds->phys->ds_compressed_bytes = bytes; + + childbytes = 0; + STAILQ_FOREACH(cdir, &dir->children, next) { + /* + * The root directory needs a special case: the amount of + * space used for the MOS isn't known until everything else is + * finalized, so it can't be accounted in the MOS directory's + * parent until then, at which point dsl_dir_root_finalize() is + * called. + */ + if (dir == zfs->rootdsldir && cdir == zfs->mosdsldir) + continue; + childbytes += cdir->phys->dd_used_bytes; + } + dsl_dir_size_add(dir, bytes + childbytes); + + dir->phys->dd_flags |= DD_FLAG_USED_BREAKDOWN; + dir->phys->dd_used_breakdown[DD_USED_HEAD] = bytes; + dir->phys->dd_used_breakdown[DD_USED_CHILD] = childbytes; +} + +void +dsl_write(zfs_opt_t *zfs) +{ + zfs_zap_t *snapnameszap; + dnode_phys_t *snapnames; + uint64_t snapmapid; + + /* + * Perform accounting, starting from the leaves of the DSL directory + * tree. Accounting for $MOS is done later, once we've finished + * allocating space. + */ + dsl_dir_foreach_post(zfs, zfs->rootdsldir, dsl_dir_finalize, NULL); + + snapnames = objset_dnode_alloc(zfs->mos, DMU_OT_DSL_DS_SNAP_MAP, + &snapmapid); + snapnameszap = zap_alloc(zfs->mos, snapnames); + zap_add_uint64(snapnameszap, "$ORIGIN", zfs->snapds->dsid); + zap_write(zfs, snapnameszap); + + zfs->origindsldir->phys->dd_head_dataset_obj = zfs->originds->dsid; + zfs->originds->phys->ds_prev_snap_obj = zfs->snapds->dsid; + zfs->originds->phys->ds_snapnames_zapobj = snapmapid; + + zfs->snapds->phys->ds_next_snap_obj = zfs->originds->dsid; + assert(zfs->snapds->phys->ds_num_children > 0); + zfs->snapds->phys->ds_num_children++; + + zap_write(zfs, zfs->cloneszap); + + /* XXX-MJ dirs and datasets are leaked */ +} + +void +dsl_dir_dataset_write(zfs_opt_t *zfs, zfs_objset_t *os, zfs_dsl_dir_t *dir) +{ + dir->headds->os = os; + objset_write(zfs, os); +} + +bool +dsl_dir_has_dataset(zfs_dsl_dir_t *dir) +{ + return (dir->headds != NULL); +} + +bool +dsl_dir_dataset_has_objset(zfs_dsl_dir_t *dir) +{ + return (dsl_dir_has_dataset(dir) && dir->headds->os != NULL); +} + +static zfs_dsl_dataset_t * +dsl_dataset_alloc(zfs_opt_t *zfs, zfs_dsl_dir_t *dir) +{ + zfs_dsl_dataset_t *ds; + dnode_phys_t *dnode; + uint64_t deadlistid; + + ds = ecalloc(1, sizeof(*ds)); + + dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_DSL_DATASET, + DMU_OT_DSL_DATASET, sizeof(dsl_dataset_phys_t), &ds->dsid); + ds->phys = (dsl_dataset_phys_t *)DN_BONUS(dnode); + + dnode = objset_dnode_bonus_alloc(zfs->mos, DMU_OT_DEADLIST, + DMU_OT_DEADLIST_HDR, sizeof(dsl_deadlist_phys_t), &deadlistid); + zap_write(zfs, zap_alloc(zfs->mos, dnode)); + + ds->phys->ds_dir_obj = dir->dirid; + ds->phys->ds_deadlist_obj = deadlistid; + ds->phys->ds_creation_txg = TXG - 1; + if (ds != zfs->snapds) + ds->phys->ds_prev_snap_txg = TXG - 1; + ds->phys->ds_guid = randomguid(); + ds->dir = dir; + + return (ds); +} diff --git a/usr.sbin/makefs/zfs/fs.c b/usr.sbin/makefs/zfs/fs.c new file mode 100644 index 000000000000..75f6e30e1500 --- /dev/null +++ b/usr.sbin/makefs/zfs/fs.c @@ -0,0 +1,1120 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2022 The FreeBSD Foundation + * + * This software was developed by Mark Johnston under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/stat.h> + +#include <assert.h> +#include <dirent.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <util.h> + +#include "makefs.h" +#include "zfs.h" + +typedef struct { + const char *name; + unsigned int id; + uint16_t size; + sa_bswap_type_t bs; +} zfs_sattr_t; + +typedef struct zfs_fs { + zfs_objset_t *os; + + /* Offset table for system attributes, indexed by a zpl_attr_t. */ + uint16_t *saoffs; + size_t sacnt; + const zfs_sattr_t *satab; +} zfs_fs_t; + +/* + * The order of the attributes doesn't matter, this is simply the one hard-coded + * by OpenZFS, based on a zdb dump of the SA_REGISTRY table. + */ +typedef enum zpl_attr { + ZPL_ATIME, + ZPL_MTIME, + ZPL_CTIME, + ZPL_CRTIME, + ZPL_GEN, + ZPL_MODE, + ZPL_SIZE, + ZPL_PARENT, + ZPL_LINKS, + ZPL_XATTR, + ZPL_RDEV, + ZPL_FLAGS, + ZPL_UID, + ZPL_GID, + ZPL_PAD, + ZPL_ZNODE_ACL, + ZPL_DACL_COUNT, + ZPL_SYMLINK, + ZPL_SCANSTAMP, + ZPL_DACL_ACES, + ZPL_DXATTR, + ZPL_PROJID, +} zpl_attr_t; + +/* + * This table must be kept in sync with zpl_attr_layout[] and zpl_attr_t. + */ +static const zfs_sattr_t zpl_attrs[] = { +#define _ZPL_ATTR(n, s, b) { .name = #n, .id = n, .size = s, .bs = b } + _ZPL_ATTR(ZPL_ATIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY), + _ZPL_ATTR(ZPL_MTIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY), + _ZPL_ATTR(ZPL_CTIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY), + _ZPL_ATTR(ZPL_CRTIME, sizeof(uint64_t) * 2, SA_UINT64_ARRAY), + _ZPL_ATTR(ZPL_GEN, sizeof(uint64_t), SA_UINT64_ARRAY), + _ZPL_ATTR(ZPL_MODE, sizeof(uint64_t), SA_UINT64_ARRAY), + _ZPL_ATTR(ZPL_SIZE, sizeof(uint64_t), SA_UINT64_ARRAY), + _ZPL_ATTR(ZPL_PARENT, sizeof(uint64_t), SA_UINT64_ARRAY), + _ZPL_ATTR(ZPL_LINKS, sizeof(uint64_t), SA_UINT64_ARRAY), + _ZPL_ATTR(ZPL_XATTR, sizeof(uint64_t), SA_UINT64_ARRAY), + _ZPL_ATTR(ZPL_RDEV, sizeof(uint64_t), SA_UINT64_ARRAY), + _ZPL_ATTR(ZPL_FLAGS, sizeof(uint64_t), SA_UINT64_ARRAY), + _ZPL_ATTR(ZPL_UID, sizeof(uint64_t), SA_UINT64_ARRAY), + _ZPL_ATTR(ZPL_GID, sizeof(uint64_t), SA_UINT64_ARRAY), + _ZPL_ATTR(ZPL_PAD, sizeof(uint64_t), SA_UINT64_ARRAY), + _ZPL_ATTR(ZPL_ZNODE_ACL, 88, SA_UINT64_ARRAY), + _ZPL_ATTR(ZPL_DACL_COUNT, sizeof(uint64_t), SA_UINT64_ARRAY), + _ZPL_ATTR(ZPL_SYMLINK, 0, SA_UINT8_ARRAY), + _ZPL_ATTR(ZPL_SCANSTAMP, sizeof(uint64_t) * 4, SA_UINT8_ARRAY), + _ZPL_ATTR(ZPL_DACL_ACES, 0, SA_ACL), + _ZPL_ATTR(ZPL_DXATTR, 0, SA_UINT8_ARRAY), + _ZPL_ATTR(ZPL_PROJID, sizeof(uint64_t), SA_UINT64_ARRAY), +#undef ZPL_ATTR +}; + +/* + * This layout matches that of a filesystem created using OpenZFS on FreeBSD. + * It need not match in general, but FreeBSD's loader doesn't bother parsing the + * layout and just hard-codes attribute offsets. + */ +static const sa_attr_type_t zpl_attr_layout[] = { + ZPL_MODE, + ZPL_SIZE, + ZPL_GEN, + ZPL_UID, + ZPL_GID, + ZPL_PARENT, + ZPL_FLAGS, + ZPL_ATIME, + ZPL_MTIME, + ZPL_CTIME, + ZPL_CRTIME, + ZPL_LINKS, + ZPL_DACL_COUNT, + ZPL_DACL_ACES, + ZPL_SYMLINK, +}; + +/* + * Keys for the ZPL attribute tables in the SA layout ZAP. The first two + * indices are reserved for legacy attribute encoding. + */ +#define SA_LAYOUT_INDEX_DEFAULT 2 +#define SA_LAYOUT_INDEX_SYMLINK 3 + +struct fs_populate_dir { + SLIST_ENTRY(fs_populate_dir) next; + int dirfd; + uint64_t objid; + zfs_zap_t *zap; +}; + +struct fs_populate_arg { + zfs_opt_t *zfs; + zfs_fs_t *fs; /* owning filesystem */ + uint64_t rootdirid; /* root directory dnode ID */ + int rootdirfd; /* root directory fd */ + SLIST_HEAD(, fs_populate_dir) dirs; /* stack of directories */ +}; + +static void fs_build_one(zfs_opt_t *, zfs_dsl_dir_t *, fsnode *, int); + +static void +eclose(int fd) +{ + if (close(fd) != 0) + err(1, "close"); +} + +static bool +fsnode_isroot(const fsnode *cur) +{ + return (strcmp(cur->name, ".") == 0); +} + +static bool +fsnode_valid(const fsnode *cur) +{ + return (cur->type == S_IFREG || cur->type == S_IFDIR || + cur->type == S_IFLNK); +} + +/* + * Visit each node in a directory hierarchy, in pre-order depth-first order. + */ +static void +fsnode_foreach(fsnode *root, int (*cb)(fsnode *, void *), void *arg) +{ + assert(root->type == S_IFDIR); + + for (fsnode *cur = root; cur != NULL; cur = cur->next) { + if (!fsnode_valid(cur)) { + warnx("skipping unhandled %s %s/%s", + inode_type(cur->type), cur->path, cur->name); + continue; + } + if (cb(cur, arg) == 0) + continue; + if (cur->type == S_IFDIR && cur->child != NULL) + fsnode_foreach(cur->child, cb, arg); + } +} + +static void +fs_populate_dirent(struct fs_populate_arg *arg, fsnode *cur, uint64_t dnid) +{ + struct fs_populate_dir *dir; + uint64_t type; + + switch (cur->type) { + case S_IFREG: + type = DT_REG; + break; + case S_IFDIR: + type = DT_DIR; + break; + case S_IFLNK: + type = DT_LNK; + break; + default: + assert(0); + } + + dir = SLIST_FIRST(&arg->dirs); + zap_add_uint64(dir->zap, cur->name, ZFS_DIRENT_MAKE(type, dnid)); +} + +static void +fs_populate_attr(zfs_fs_t *fs, char *attrbuf, const void *val, uint16_t ind, + size_t *szp) +{ + assert(ind < fs->sacnt); + assert(fs->saoffs[ind] != 0xffff); + + memcpy(attrbuf + fs->saoffs[ind], val, fs->satab[ind].size); + *szp += fs->satab[ind].size; +} + +static void +fs_populate_varszattr(zfs_fs_t *fs, char *attrbuf, const void *val, + size_t valsz, size_t varoff, uint16_t ind, size_t *szp) +{ + assert(ind < fs->sacnt); + assert(fs->saoffs[ind] != 0xffff); + assert(fs->satab[ind].size == 0); + + memcpy(attrbuf + fs->saoffs[ind] + varoff, val, valsz); + *szp += valsz; +} + +/* + * Derive the relative fd/path combo needed to access a file. Ideally we'd + * always be able to use relative lookups (i.e., use the *at() system calls), + * since they require less path translation and are more amenable to sandboxing, + * but the handling of multiple staging directories makes that difficult. To + * make matters worse, we have no choice but to use relative lookups when + * dealing with an mtree manifest, so both mechanisms are implemented. + */ +static void +fs_populate_path(const fsnode *cur, struct fs_populate_arg *arg, + char *path, size_t sz, int *dirfdp) +{ + if (cur->contents != NULL) { + size_t n; + + *dirfdp = AT_FDCWD; + n = strlcpy(path, cur->contents, sz); + assert(n < sz); + } else if (cur->root == NULL) { + size_t n; + + *dirfdp = SLIST_FIRST(&arg->dirs)->dirfd; + n = strlcpy(path, cur->name, sz); + assert(n < sz); + } else { + int n; + + *dirfdp = AT_FDCWD; + n = snprintf(path, sz, "%s/%s/%s", + cur->root, cur->path, cur->name); + assert(n >= 0); + assert((size_t)n < sz); + } +} + +static int +fs_open(const fsnode *cur, struct fs_populate_arg *arg, int flags) +{ + char path[PATH_MAX]; + int fd; + + fs_populate_path(cur, arg, path, sizeof(path), &fd); + + fd = openat(fd, path, flags); + if (fd < 0) + err(1, "openat(%s)", path); + return (fd); +} + +static int +fs_open_can_fail(const fsnode *cur, struct fs_populate_arg *arg, int flags) +{ + int fd; + char path[PATH_MAX]; + + fs_populate_path(cur, arg, path, sizeof(path), &fd); + + return (openat(fd, path, flags)); +} + +static void +fs_readlink(const fsnode *cur, struct fs_populate_arg *arg, + char *buf, size_t bufsz) +{ + char path[PATH_MAX]; + int fd; + + if (cur->symlink != NULL) { + size_t n; + + n = strlcpy(buf, cur->symlink, bufsz); + assert(n < bufsz); + } else { + ssize_t n; + + fs_populate_path(cur, arg, path, sizeof(path), &fd); + + n = readlinkat(fd, path, buf, bufsz - 1); + if (n == -1) + err(1, "readlinkat(%s)", cur->name); + buf[n] = '\0'; + } +} + +static void +fs_populate_time(zfs_fs_t *fs, char *attrbuf, struct timespec *ts, + uint16_t ind, size_t *szp) +{ + uint64_t timebuf[2]; + + assert(ind < fs->sacnt); + assert(fs->saoffs[ind] != 0xffff); + assert(fs->satab[ind].size == sizeof(timebuf)); + + timebuf[0] = ts->tv_sec; + timebuf[1] = ts->tv_nsec; + fs_populate_attr(fs, attrbuf, timebuf, ind, szp); +} + +static void +fs_populate_sattrs(struct fs_populate_arg *arg, const fsnode *cur, + dnode_phys_t *dnode) +{ + char target[PATH_MAX]; + zfs_fs_t *fs; + zfs_ace_hdr_t aces[3]; + struct stat *sb; + sa_hdr_phys_t *sahdr; + uint64_t daclcount, flags, gen, gid, links, mode, parent, objsize, uid; + char *attrbuf; + size_t bonussz, hdrsz; + int layout; + + assert(dnode->dn_bonustype == DMU_OT_SA); + assert(dnode->dn_nblkptr == 1); + + fs = arg->fs; + sb = &cur->inode->st; + + switch (cur->type) { + case S_IFREG: + layout = SA_LAYOUT_INDEX_DEFAULT; + links = cur->inode->nlink; + objsize = sb->st_size; + parent = SLIST_FIRST(&arg->dirs)->objid; + break; + case S_IFDIR: + layout = SA_LAYOUT_INDEX_DEFAULT; + links = 1; /* .. */ + objsize = 1; /* .. */ + + if ((cur->inode->flags & FI_ROOT) == 0 ) { + /* + * The size of a ZPL directory is the number of entries + * (including "." and ".."), and the link count is the + * number of entries which are directories + * (including "." and ".."). + */ + for (fsnode *c = + fsnode_isroot(cur) ? cur->next : cur->child; + c != NULL; c = c->next) { + switch (c->type) { + case S_IFDIR: + links++; + /* FALLTHROUGH */ + case S_IFREG: + case S_IFLNK: + objsize++; + break; + } + } + } else { + /* + * Root directory children do belong to + * different dataset and this directory is + * empty in the current objset. + */ + links++; /* . */ + objsize++; /* . */ + } + + /* The root directory is its own parent. */ + parent = SLIST_EMPTY(&arg->dirs) ? + arg->rootdirid : SLIST_FIRST(&arg->dirs)->objid; + break; + case S_IFLNK: + fs_readlink(cur, arg, target, sizeof(target)); + + layout = SA_LAYOUT_INDEX_SYMLINK; + links = 1; + objsize = strlen(target); + parent = SLIST_FIRST(&arg->dirs)->objid; + break; + default: + assert(0); + } + + daclcount = nitems(aces); + flags = ZFS_ACL_TRIVIAL | ZFS_ACL_AUTO_INHERIT | ZFS_ARCHIVE | + ZFS_AV_MODIFIED; + gen = 1; + gid = sb->st_gid; + mode = sb->st_mode; + uid = sb->st_uid; + + memset(aces, 0, sizeof(aces)); + aces[0].z_flags = ACE_OWNER; + aces[0].z_type = ACE_ACCESS_ALLOWED_ACE_TYPE; + aces[0].z_access_mask = ACE_WRITE_ATTRIBUTES | ACE_WRITE_OWNER | + ACE_WRITE_ACL | ACE_WRITE_NAMED_ATTRS | ACE_READ_ACL | + ACE_READ_ATTRIBUTES | ACE_READ_NAMED_ATTRS | ACE_SYNCHRONIZE; + if ((mode & S_IRUSR) != 0) + aces[0].z_access_mask |= ACE_READ_DATA; + if ((mode & S_IWUSR) != 0) + aces[0].z_access_mask |= ACE_WRITE_DATA | ACE_APPEND_DATA; + if ((mode & S_IXUSR) != 0) + aces[0].z_access_mask |= ACE_EXECUTE; + + aces[1].z_flags = ACE_GROUP | ACE_IDENTIFIER_GROUP; + aces[1].z_type = ACE_ACCESS_ALLOWED_ACE_TYPE; + aces[1].z_access_mask = ACE_READ_ACL | ACE_READ_ATTRIBUTES | + ACE_READ_NAMED_ATTRS | ACE_SYNCHRONIZE; + if ((mode & S_IRGRP) != 0) + aces[1].z_access_mask |= ACE_READ_DATA; + if ((mode & S_IWGRP) != 0) + aces[1].z_access_mask |= ACE_WRITE_DATA | ACE_APPEND_DATA; + if ((mode & S_IXGRP) != 0) + aces[1].z_access_mask |= ACE_EXECUTE; + + aces[2].z_flags = ACE_EVERYONE; + aces[2].z_type = ACE_ACCESS_ALLOWED_ACE_TYPE; + aces[2].z_access_mask = ACE_READ_ACL | ACE_READ_ATTRIBUTES | + ACE_READ_NAMED_ATTRS | ACE_SYNCHRONIZE; + if ((mode & S_IROTH) != 0) + aces[2].z_access_mask |= ACE_READ_DATA; + if ((mode & S_IWOTH) != 0) + aces[2].z_access_mask |= ACE_WRITE_DATA | ACE_APPEND_DATA; + if ((mode & S_IXOTH) != 0) + aces[2].z_access_mask |= ACE_EXECUTE; + + switch (layout) { + case SA_LAYOUT_INDEX_DEFAULT: + /* At most one variable-length attribute. */ + hdrsz = sizeof(uint64_t); + break; + case SA_LAYOUT_INDEX_SYMLINK: + /* At most five variable-length attributes. */ + hdrsz = sizeof(uint64_t) * 2; + break; + default: + assert(0); + } + + sahdr = (sa_hdr_phys_t *)DN_BONUS(dnode); + sahdr->sa_magic = SA_MAGIC; + SA_HDR_LAYOUT_INFO_ENCODE(sahdr->sa_layout_info, layout, hdrsz); + + bonussz = SA_HDR_SIZE(sahdr); + attrbuf = (char *)sahdr + SA_HDR_SIZE(sahdr); + + fs_populate_attr(fs, attrbuf, &daclcount, ZPL_DACL_COUNT, &bonussz); + fs_populate_attr(fs, attrbuf, &flags, ZPL_FLAGS, &bonussz); + fs_populate_attr(fs, attrbuf, &gen, ZPL_GEN, &bonussz); + fs_populate_attr(fs, attrbuf, &gid, ZPL_GID, &bonussz); + fs_populate_attr(fs, attrbuf, &links, ZPL_LINKS, &bonussz); + fs_populate_attr(fs, attrbuf, &mode, ZPL_MODE, &bonussz); + fs_populate_attr(fs, attrbuf, &parent, ZPL_PARENT, &bonussz); + fs_populate_attr(fs, attrbuf, &objsize, ZPL_SIZE, &bonussz); + fs_populate_attr(fs, attrbuf, &uid, ZPL_UID, &bonussz); + + /* + * We deliberately set atime = mtime here to ensure that images are + * reproducible. + */ + fs_populate_time(fs, attrbuf, &sb->st_mtim, ZPL_ATIME, &bonussz); + fs_populate_time(fs, attrbuf, &sb->st_ctim, ZPL_CTIME, &bonussz); + fs_populate_time(fs, attrbuf, &sb->st_mtim, ZPL_MTIME, &bonussz); +#ifdef __linux__ + /* Linux has no st_birthtim; approximate with st_ctim */ + fs_populate_time(fs, attrbuf, &sb->st_ctim, ZPL_CRTIME, &bonussz); +#else + fs_populate_time(fs, attrbuf, &sb->st_birthtim, ZPL_CRTIME, &bonussz); +#endif + + fs_populate_varszattr(fs, attrbuf, aces, sizeof(aces), 0, + ZPL_DACL_ACES, &bonussz); + sahdr->sa_lengths[0] = sizeof(aces); + + if (cur->type == S_IFLNK) { + assert(layout == SA_LAYOUT_INDEX_SYMLINK); + /* Need to use a spill block pointer if the target is long. */ + assert(bonussz + objsize <= DN_OLD_MAX_BONUSLEN); + fs_populate_varszattr(fs, attrbuf, target, objsize, + sahdr->sa_lengths[0], ZPL_SYMLINK, &bonussz); + sahdr->sa_lengths[1] = (uint16_t)objsize; + } + + dnode->dn_bonuslen = bonussz; +} + +static void +fs_populate_file(fsnode *cur, struct fs_populate_arg *arg) +{ + struct dnode_cursor *c; + dnode_phys_t *dnode; + zfs_opt_t *zfs; + char *buf; + uint64_t dnid; + ssize_t n; + size_t bufsz; + off_t nbytes, reqbytes, size; + int fd; + + assert(cur->type == S_IFREG); + assert((cur->inode->flags & FI_ROOT) == 0); + + zfs = arg->zfs; + + assert(cur->inode->ino != 0); + if ((cur->inode->flags & FI_ALLOCATED) != 0) { + /* + * This is a hard link of an existing file. + * + * XXX-MJ need to check whether it crosses datasets, add a test + * case for that + */ + fs_populate_dirent(arg, cur, cur->inode->ino); + return; + } + + dnode = objset_dnode_bonus_alloc(arg->fs->os, + DMU_OT_PLAIN_FILE_CONTENTS, DMU_OT_SA, 0, &dnid); + cur->inode->ino = dnid; + cur->inode->flags |= FI_ALLOCATED; + + fd = fs_open(cur, arg, O_RDONLY); + + buf = zfs->filebuf; + bufsz = sizeof(zfs->filebuf); + size = cur->inode->st.st_size; + c = dnode_cursor_init(zfs, arg->fs->os, dnode, size, 0); + for (off_t foff = 0; foff < size; foff += nbytes) { + off_t loc, sofar; + + /* + * Fill up our buffer, handling partial reads. + */ + sofar = 0; + nbytes = MIN(size - foff, (off_t)bufsz); + do { + n = read(fd, buf + sofar, nbytes); + if (n < 0) + err(1, "reading from '%s'", cur->name); + if (n == 0) + errx(1, "unexpected EOF reading '%s'", + cur->name); + sofar += n; + } while (sofar < nbytes); + + if (nbytes < (off_t)bufsz) + memset(buf + nbytes, 0, bufsz - nbytes); + + reqbytes = foff == 0 ? nbytes : MAXBLOCKSIZE; + loc = objset_space_alloc(zfs, arg->fs->os, &reqbytes); + vdev_pwrite_dnode_indir(zfs, dnode, 0, 1, buf, reqbytes, loc, + dnode_cursor_next(zfs, c, foff)); + } + eclose(fd); + dnode_cursor_finish(zfs, c); + + fs_populate_sattrs(arg, cur, dnode); + fs_populate_dirent(arg, cur, dnid); +} + +static void +fs_populate_dir(fsnode *cur, struct fs_populate_arg *arg) +{ + dnode_phys_t *dnode; + zfs_objset_t *os; + uint64_t dnid; + int dirfd; + + assert(cur->type == S_IFDIR); + assert((cur->inode->flags & FI_ALLOCATED) == 0); + + os = arg->fs->os; + + dnode = objset_dnode_bonus_alloc(os, DMU_OT_DIRECTORY_CONTENTS, + DMU_OT_SA, 0, &dnid); + + /* + * Add an entry to the parent directory and open this directory. + */ + if (!SLIST_EMPTY(&arg->dirs)) { + fs_populate_dirent(arg, cur, dnid); + /* + * We only need the directory fd if we're finding files in + * it. If it's just there for other directories or + * files using contents= we don't need to succeed here. + */ + dirfd = fs_open_can_fail(cur, arg, O_DIRECTORY | O_RDONLY); + } else { + arg->rootdirid = dnid; + dirfd = arg->rootdirfd; + arg->rootdirfd = -1; + } + + /* + * Set ZPL attributes. + */ + fs_populate_sattrs(arg, cur, dnode); + + /* + * If this is a root directory, then its children belong to a different + * dataset and this directory remains empty in the current objset. + */ + if ((cur->inode->flags & FI_ROOT) == 0) { + struct fs_populate_dir *dir; + + dir = ecalloc(1, sizeof(*dir)); + dir->dirfd = dirfd; + dir->objid = dnid; + dir->zap = zap_alloc(os, dnode); + SLIST_INSERT_HEAD(&arg->dirs, dir, next); + } else { + zap_write(arg->zfs, zap_alloc(os, dnode)); + fs_build_one(arg->zfs, cur->inode->param, cur->child, dirfd); + } +} + +static void +fs_populate_symlink(fsnode *cur, struct fs_populate_arg *arg) +{ + dnode_phys_t *dnode; + uint64_t dnid; + + assert(cur->type == S_IFLNK); + assert((cur->inode->flags & (FI_ALLOCATED | FI_ROOT)) == 0); + + dnode = objset_dnode_bonus_alloc(arg->fs->os, + DMU_OT_PLAIN_FILE_CONTENTS, DMU_OT_SA, 0, &dnid); + + fs_populate_dirent(arg, cur, dnid); + + fs_populate_sattrs(arg, cur, dnode); +} + +static fsnode * +fsnode_next(fsnode *cur) +{ + for (cur = cur->next; cur != NULL; cur = cur->next) { + if (fsnode_valid(cur)) + return (cur); + } + return (NULL); +} + +static int +fs_foreach_populate(fsnode *cur, void *_arg) +{ + struct fs_populate_arg *arg; + struct fs_populate_dir *dir; + int ret; + + arg = _arg; + switch (cur->type) { + case S_IFREG: + fs_populate_file(cur, arg); + break; + case S_IFDIR: + if (fsnode_isroot(cur)) + break; + fs_populate_dir(cur, arg); + break; + case S_IFLNK: + fs_populate_symlink(cur, arg); + break; + default: + assert(0); + } + + ret = (cur->inode->flags & FI_ROOT) != 0 ? 0 : 1; + + if (fsnode_next(cur) == NULL && + (cur->child == NULL || (cur->inode->flags & FI_ROOT) != 0)) { + /* + * We reached a terminal node in a subtree. Walk back up and + * write out directories. We're done once we hit the root of a + * dataset or find a level where we're not on the edge of the + * tree. + */ + do { + dir = SLIST_FIRST(&arg->dirs); + SLIST_REMOVE_HEAD(&arg->dirs, next); + zap_write(arg->zfs, dir->zap); + if (dir->dirfd != -1) + eclose(dir->dirfd); + free(dir); + cur = cur->parent; + } while (cur != NULL && fsnode_next(cur) == NULL && + (cur->inode->flags & FI_ROOT) == 0); + } + + return (ret); +} + +static void +fs_add_zpl_attr_layout(zfs_zap_t *zap, unsigned int index, + const sa_attr_type_t layout[], size_t sacnt) +{ + char ti[16]; + + assert(sizeof(layout[0]) == 2); + + (void)snprintf(ti, sizeof(ti), "%u", index); + zap_add(zap, ti, sizeof(sa_attr_type_t), sacnt, + (const uint8_t *)layout); +} + +/* + * Initialize system attribute tables. + * + * There are two elements to this. First, we write the zpl_attrs[] and + * zpl_attr_layout[] tables to disk. Then we create a lookup table which + * allows us to set file attributes quickly. + */ +static uint64_t +fs_set_zpl_attrs(zfs_opt_t *zfs, zfs_fs_t *fs) +{ + zfs_zap_t *sazap, *salzap, *sarzap; + zfs_objset_t *os; + dnode_phys_t *saobj, *salobj, *sarobj; + uint64_t saobjid, salobjid, sarobjid; + uint16_t offset; + + os = fs->os; + + /* + * The on-disk tables are stored in two ZAP objects, the registry object + * and the layout object. Individual attributes are described by + * entries in the registry object; for example, the value for the + * "ZPL_SIZE" key gives the size and encoding of the ZPL_SIZE attribute. + * The attributes of a file are ordered according to one of the layouts + * defined in the layout object. The master node object is simply used + * to locate the registry and layout objects. + */ + saobj = objset_dnode_alloc(os, DMU_OT_SA_MASTER_NODE, &saobjid); + salobj = objset_dnode_alloc(os, DMU_OT_SA_ATTR_LAYOUTS, &salobjid); + sarobj = objset_dnode_alloc(os, DMU_OT_SA_ATTR_REGISTRATION, &sarobjid); + + sarzap = zap_alloc(os, sarobj); + for (size_t i = 0; i < nitems(zpl_attrs); i++) { + const zfs_sattr_t *sa; + uint64_t attr; + + attr = 0; + sa = &zpl_attrs[i]; + SA_ATTR_ENCODE(attr, (uint64_t)i, sa->size, sa->bs); + zap_add_uint64(sarzap, sa->name, attr); + } + zap_write(zfs, sarzap); + + /* + * Layouts are arrays of indices into the registry. We define two + * layouts for use by the ZPL, one for non-symlinks and one for + * symlinks. They are identical except that the symlink layout includes + * ZPL_SYMLINK as its final attribute. + */ + salzap = zap_alloc(os, salobj); + assert(zpl_attr_layout[nitems(zpl_attr_layout) - 1] == ZPL_SYMLINK); + fs_add_zpl_attr_layout(salzap, SA_LAYOUT_INDEX_DEFAULT, + zpl_attr_layout, nitems(zpl_attr_layout) - 1); + fs_add_zpl_attr_layout(salzap, SA_LAYOUT_INDEX_SYMLINK, + zpl_attr_layout, nitems(zpl_attr_layout)); + zap_write(zfs, salzap); + + sazap = zap_alloc(os, saobj); + zap_add_uint64(sazap, SA_LAYOUTS, salobjid); + zap_add_uint64(sazap, SA_REGISTRY, sarobjid); + zap_write(zfs, sazap); + + /* Sanity check. */ + for (size_t i = 0; i < nitems(zpl_attrs); i++) + assert(i == zpl_attrs[i].id); + + /* + * Build the offset table used when setting file attributes. File + * attributes are stored in the object's bonus buffer; this table + * provides the buffer offset of attributes referenced by the layout + * table. + */ + fs->sacnt = nitems(zpl_attrs); + fs->saoffs = ecalloc(fs->sacnt, sizeof(*fs->saoffs)); + for (size_t i = 0; i < fs->sacnt; i++) + fs->saoffs[i] = 0xffff; + offset = 0; + for (size_t i = 0; i < nitems(zpl_attr_layout); i++) { + uint16_t size; + + assert(zpl_attr_layout[i] < fs->sacnt); + + fs->saoffs[zpl_attr_layout[i]] = offset; + size = zpl_attrs[zpl_attr_layout[i]].size; + offset += size; + } + fs->satab = zpl_attrs; + + return (saobjid); +} + +static void +fs_layout_one(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, void *arg) +{ + char *mountpoint, *origmountpoint, *name, *next; + fsnode *cur, *root; + uint64_t canmount; + + if (!dsl_dir_has_dataset(dsldir)) + return; + + if (dsl_dir_get_canmount(dsldir, &canmount) == 0 && canmount == 0) + return; + mountpoint = dsl_dir_get_mountpoint(zfs, dsldir); + if (mountpoint == NULL) + return; + + /* + * If we were asked to specify a bootfs, set it here. + */ + if (zfs->bootfs != NULL && strcmp(zfs->bootfs, + dsl_dir_fullname(dsldir)) == 0) { + zap_add_uint64(zfs->poolprops, "bootfs", + dsl_dir_dataset_id(dsldir)); + } + + origmountpoint = mountpoint; + + /* + * Figure out which fsnode corresponds to our mountpoint. + */ + root = arg; + cur = root; + if (strcmp(mountpoint, zfs->rootpath) != 0) { + mountpoint += strlen(zfs->rootpath); + + /* + * Look up the directory in the staged tree. For example, if + * the dataset's mount point is /foo/bar/baz, we'll search the + * root directory for "foo", search "foo" for "baz", and so on. + * Each intermediate name must refer to a directory; the final + * component need not exist. + */ + cur = root; + for (next = name = mountpoint; next != NULL;) { + for (; *next == '/'; next++) + ; + name = strsep(&next, "/"); + + for (; cur != NULL && strcmp(cur->name, name) != 0; + cur = cur->next) + ; + if (cur == NULL) { + if (next == NULL) + break; + errx(1, "missing mountpoint directory for `%s'", + dsl_dir_fullname(dsldir)); + } + if (cur->type != S_IFDIR) { + errx(1, + "mountpoint for `%s' is not a directory", + dsl_dir_fullname(dsldir)); + } + if (next != NULL) + cur = cur->child; + } + } + + if (cur != NULL) { + assert(cur->type == S_IFDIR); + + /* + * Multiple datasets shouldn't share a mountpoint. It's + * technically allowed, but it's not clear what makefs should do + * in that case. + */ + assert((cur->inode->flags & FI_ROOT) == 0); + if (cur != root) + cur->inode->flags |= FI_ROOT; + assert(cur->inode->param == NULL); + cur->inode->param = dsldir; + } + + free(origmountpoint); +} + +static int +fs_foreach_mark(fsnode *cur, void *arg) +{ + uint64_t *countp; + + countp = arg; + if (cur->type == S_IFDIR && fsnode_isroot(cur)) + return (1); + + if (cur->inode->ino == 0) { + cur->inode->ino = ++(*countp); + cur->inode->nlink = 1; + } else { + cur->inode->nlink++; + } + + return ((cur->inode->flags & FI_ROOT) != 0 ? 0 : 1); +} + +/* + * Create a filesystem dataset. More specifically: + * - create an object set for the dataset, + * - add required metadata (SA tables, property definitions, etc.) to that + * object set, + * - optionally populate the object set with file objects, using "root" as the + * root directory. + * + * "dirfd" is a directory descriptor for the directory referenced by "root". It + * is closed before returning. + */ +static void +fs_build_one(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, fsnode *root, int dirfd) +{ + struct fs_populate_arg arg; + zfs_fs_t fs; + zfs_zap_t *masterzap; + zfs_objset_t *os; + dnode_phys_t *deleteq, *masterobj; + uint64_t deleteqid, dnodecount, moid, rootdirid, saobjid; + bool fakedroot; + + /* + * This dataset's mountpoint doesn't exist in the staging tree, or the + * dataset doesn't have a mountpoint at all. In either case we still + * need a root directory. Fake up a root fsnode to handle this case. + */ + fakedroot = root == NULL; + if (fakedroot) { + struct stat *stp; + + assert(dirfd == -1); + + root = ecalloc(1, sizeof(*root)); + root->inode = ecalloc(1, sizeof(*root->inode)); + root->name = estrdup("."); + root->type = S_IFDIR; + + stp = &root->inode->st; + stp->st_uid = 0; + stp->st_gid = 0; + stp->st_mode = S_IFDIR | 0755; + } + assert(root->type == S_IFDIR); + assert(fsnode_isroot(root)); + + /* + * Initialize the object set for this dataset. + */ + os = objset_alloc(zfs, DMU_OST_ZFS); + masterobj = objset_dnode_alloc(os, DMU_OT_MASTER_NODE, &moid); + assert(moid == MASTER_NODE_OBJ); + + memset(&fs, 0, sizeof(fs)); + fs.os = os; + + /* + * Create the ZAP SA layout now since filesystem object dnodes will + * refer to those attributes. + */ + saobjid = fs_set_zpl_attrs(zfs, &fs); + + /* + * Make a pass over the staged directory to detect hard links and assign + * virtual dnode numbers. + */ + dnodecount = 1; /* root directory */ + fsnode_foreach(root, fs_foreach_mark, &dnodecount); + + /* + * Make a second pass to populate the dataset with files from the + * staged directory. Most of our runtime is spent here. + */ + arg.rootdirfd = dirfd; + arg.zfs = zfs; + arg.fs = &fs; + SLIST_INIT(&arg.dirs); + fs_populate_dir(root, &arg); + assert(!SLIST_EMPTY(&arg.dirs)); + fsnode_foreach(root, fs_foreach_populate, &arg); + assert(SLIST_EMPTY(&arg.dirs)); + rootdirid = arg.rootdirid; + + /* + * Create an empty delete queue. We don't do anything with it, but + * OpenZFS will refuse to mount filesystems that don't have one. + */ + deleteq = objset_dnode_alloc(os, DMU_OT_UNLINKED_SET, &deleteqid); + zap_write(zfs, zap_alloc(os, deleteq)); + + /* + * Populate and write the master node object. This is a ZAP object + * containing various dataset properties and the object IDs of the root + * directory and delete queue. + */ + masterzap = zap_alloc(os, masterobj); + zap_add_uint64(masterzap, ZFS_ROOT_OBJ, rootdirid); + zap_add_uint64(masterzap, ZFS_UNLINKED_SET, deleteqid); + zap_add_uint64(masterzap, ZFS_SA_ATTRS, saobjid); + zap_add_uint64(masterzap, ZPL_VERSION_OBJ, 5 /* ZPL_VERSION_SA */); + zap_add_uint64(masterzap, "normalization", 0 /* off */); + zap_add_uint64(masterzap, "utf8only", 0 /* off */); + zap_add_uint64(masterzap, "casesensitivity", 0 /* case sensitive */); + zap_add_uint64(masterzap, "acltype", 2 /* NFSv4 */); + zap_write(zfs, masterzap); + + /* + * All finished with this object set, we may as well write it now. + * The DSL layer will sum up the bytes consumed by each dataset using + * information stored in the object set, so it can't be freed just yet. + */ + dsl_dir_dataset_write(zfs, os, dsldir); + + if (fakedroot) { + free(root->inode); + free(root->name); + free(root); + } + free(fs.saoffs); +} + +/* + * Create an object set for each DSL directory which has a dataset and doesn't + * already have an object set. + */ +static void +fs_build_unmounted(zfs_opt_t *zfs, zfs_dsl_dir_t *dsldir, void *arg __unused) +{ + if (dsl_dir_has_dataset(dsldir) && !dsl_dir_dataset_has_objset(dsldir)) + fs_build_one(zfs, dsldir, NULL, -1); +} + +/* + * Create our datasets and populate them with files. + */ +void +fs_build(zfs_opt_t *zfs, int dirfd, fsnode *root) +{ + /* + * Run through our datasets and find the root fsnode for each one. Each + * root fsnode is flagged so that we can figure out which dataset it + * belongs to. + */ + dsl_dir_foreach(zfs, zfs->rootdsldir, fs_layout_one, root); + + /* + * Did we find our boot filesystem? + */ + if (zfs->bootfs != NULL && !zap_entry_exists(zfs->poolprops, "bootfs")) + errx(1, "no mounted dataset matches bootfs property `%s'", + zfs->bootfs); + + /* + * Traverse the file hierarchy starting from the root fsnode. One + * dataset, not necessarily the root dataset, must "own" the root + * directory by having its mountpoint be equal to the root path. + * + * As roots of other datasets are encountered during the traversal, + * fs_build_one() recursively creates the corresponding object sets and + * populates them. Once this function has returned, all datasets will + * have been fully populated. + */ + fs_build_one(zfs, root->inode->param, root, dirfd); + + /* + * Now create object sets for datasets whose mountpoints weren't found + * in the staging directory, either because there is no mountpoint, or + * because the mountpoint doesn't correspond to an existing directory. + */ + dsl_dir_foreach(zfs, zfs->rootdsldir, fs_build_unmounted, NULL); +} diff --git a/usr.sbin/makefs/zfs/objset.c b/usr.sbin/makefs/zfs/objset.c new file mode 100644 index 000000000000..f47953ac4339 --- /dev/null +++ b/usr.sbin/makefs/zfs/objset.c @@ -0,0 +1,263 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2022 The FreeBSD Foundation + * + * This software was developed by Mark Johnston under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#include <util.h> + +#include "zfs.h" + +#define DNODES_PER_CHUNK (MAXBLOCKSIZE / sizeof(dnode_phys_t)) + +struct objset_dnode_chunk { + dnode_phys_t buf[DNODES_PER_CHUNK]; + unsigned int nextfree; + STAILQ_ENTRY(objset_dnode_chunk) next; +}; + +typedef struct zfs_objset { + /* Physical object set. */ + objset_phys_t *phys; + off_t osloc; + off_t osblksz; + blkptr_t osbp; /* set in objset_write() */ + + /* Accounting. */ + off_t space; /* bytes allocated to this objset */ + + /* dnode allocator. */ + uint64_t dnodecount; + STAILQ_HEAD(, objset_dnode_chunk) dnodechunks; +} zfs_objset_t; + +static void +dnode_init(dnode_phys_t *dnode, uint8_t type, uint8_t bonustype, + uint16_t bonuslen) +{ + dnode->dn_indblkshift = MAXBLOCKSHIFT; + dnode->dn_type = type; + dnode->dn_bonustype = bonustype; + dnode->dn_bonuslen = bonuslen; + dnode->dn_checksum = ZIO_CHECKSUM_FLETCHER_4; + dnode->dn_nlevels = 1; + dnode->dn_nblkptr = 1; + dnode->dn_flags = DNODE_FLAG_USED_BYTES; +} + +zfs_objset_t * +objset_alloc(zfs_opt_t *zfs, uint64_t type) +{ + struct objset_dnode_chunk *chunk; + zfs_objset_t *os; + + os = ecalloc(1, sizeof(*os)); + os->osblksz = sizeof(objset_phys_t); + os->osloc = objset_space_alloc(zfs, os, &os->osblksz); + + /* + * Object ID zero is always reserved for the meta dnode, which is + * embedded in the objset itself. + */ + STAILQ_INIT(&os->dnodechunks); + chunk = ecalloc(1, sizeof(*chunk)); + chunk->nextfree = 1; + STAILQ_INSERT_HEAD(&os->dnodechunks, chunk, next); + os->dnodecount = 1; + + os->phys = ecalloc(1, os->osblksz); + os->phys->os_type = type; + + dnode_init(&os->phys->os_meta_dnode, DMU_OT_DNODE, DMU_OT_NONE, 0); + os->phys->os_meta_dnode.dn_datablkszsec = + DNODE_BLOCK_SIZE >> MINBLOCKSHIFT; + + return (os); +} + +/* + * Write the dnode array and physical object set to disk. + */ +static void +_objset_write(zfs_opt_t *zfs, zfs_objset_t *os, struct dnode_cursor *c, + off_t loc) +{ + struct objset_dnode_chunk *chunk, *tmp; + unsigned int total; + + /* + * Write out the dnode array, i.e., the meta-dnode. For some reason its + * data blocks must be 16KB in size no matter how large the array is. + */ + total = 0; + STAILQ_FOREACH_SAFE(chunk, &os->dnodechunks, next, tmp) { + unsigned int i; + + assert(chunk->nextfree > 0); + assert(chunk->nextfree <= os->dnodecount); + assert(chunk->nextfree <= DNODES_PER_CHUNK); + + for (i = 0; i < chunk->nextfree; i += DNODES_PER_BLOCK) { + blkptr_t *bp; + uint64_t fill; + + if (chunk->nextfree - i < DNODES_PER_BLOCK) + fill = DNODES_PER_BLOCK - (chunk->nextfree - i); + else + fill = 0; + bp = dnode_cursor_next(zfs, c, + (total + i) * sizeof(dnode_phys_t)); + vdev_pwrite_dnode_indir(zfs, &os->phys->os_meta_dnode, + 0, fill, chunk->buf + i, DNODE_BLOCK_SIZE, loc, bp); + loc += DNODE_BLOCK_SIZE; + } + total += i; + + free(chunk); + } + dnode_cursor_finish(zfs, c); + STAILQ_INIT(&os->dnodechunks); + + /* + * Write the object set itself. The saved block pointer will be copied + * into the referencing DSL dataset or the uberblocks. + */ + vdev_pwrite_data(zfs, DMU_OT_OBJSET, ZIO_CHECKSUM_FLETCHER_4, 0, + os->dnodecount - 1, os->phys, os->osblksz, os->osloc, &os->osbp); +} + +void +objset_write(zfs_opt_t *zfs, zfs_objset_t *os) +{ + struct dnode_cursor *c; + off_t dnodeloc, dnodesz; + uint64_t dnodecount; + + /* + * There is a chicken-and-egg problem here when writing the MOS: we + * cannot write space maps before we're finished allocating space from + * the vdev, and we can't write the MOS without having allocated space + * for indirect dnode blocks. Thus, rather than lazily allocating + * indirect blocks for the meta-dnode (which would be simpler), they are + * allocated up-front and before writing space maps. + */ + dnodecount = os->dnodecount; + if (os == zfs->mos) + dnodecount += zfs->mscount; + dnodesz = dnodecount * sizeof(dnode_phys_t); + c = dnode_cursor_init(zfs, os, &os->phys->os_meta_dnode, dnodesz, + DNODE_BLOCK_SIZE); + dnodesz = roundup2(dnodesz, DNODE_BLOCK_SIZE); + dnodeloc = objset_space_alloc(zfs, os, &dnodesz); + + if (os == zfs->mos) { + vdev_spacemap_write(zfs); + + /* + * We've finished allocating space, account for it in $MOS and + * in the parent directory. + */ + dsl_dir_root_finalize(zfs, os->space); + } + _objset_write(zfs, os, c, dnodeloc); +} + +dnode_phys_t * +objset_dnode_bonus_alloc(zfs_objset_t *os, uint8_t type, uint8_t bonustype, + uint16_t bonuslen, uint64_t *idp) +{ + struct objset_dnode_chunk *chunk; + dnode_phys_t *dnode; + + assert(bonuslen <= DN_OLD_MAX_BONUSLEN); + assert(!STAILQ_EMPTY(&os->dnodechunks)); + + chunk = STAILQ_LAST(&os->dnodechunks, objset_dnode_chunk, next); + if (chunk->nextfree == DNODES_PER_CHUNK) { + chunk = ecalloc(1, sizeof(*chunk)); + STAILQ_INSERT_TAIL(&os->dnodechunks, chunk, next); + } + *idp = os->dnodecount++; + dnode = &chunk->buf[chunk->nextfree++]; + dnode_init(dnode, type, bonustype, bonuslen); + dnode->dn_datablkszsec = os->osblksz >> MINBLOCKSHIFT; + return (dnode); +} + +dnode_phys_t * +objset_dnode_alloc(zfs_objset_t *os, uint8_t type, uint64_t *idp) +{ + return (objset_dnode_bonus_alloc(os, type, DMU_OT_NONE, 0, idp)); +} + +/* + * Look up a physical dnode by ID. This is not used often so a linear search is + * fine. + */ +dnode_phys_t * +objset_dnode_lookup(zfs_objset_t *os, uint64_t id) +{ + struct objset_dnode_chunk *chunk; + + assert(id > 0); + assert(id < os->dnodecount); + + STAILQ_FOREACH(chunk, &os->dnodechunks, next) { + if (id < DNODES_PER_CHUNK) + return (&chunk->buf[id]); + id -= DNODES_PER_CHUNK; + } + assert(0); + return (NULL); +} + +off_t +objset_space_alloc(zfs_opt_t *zfs, zfs_objset_t *os, off_t *lenp) +{ + off_t loc; + + loc = vdev_space_alloc(zfs, lenp); + os->space += *lenp; + return (loc); +} + +uint64_t +objset_space(const zfs_objset_t *os) +{ + return (os->space); +} + +void +objset_root_blkptr_copy(const zfs_objset_t *os, blkptr_t *bp) +{ + memcpy(bp, &os->osbp, sizeof(blkptr_t)); +} diff --git a/usr.sbin/makefs/zfs/vdev.c b/usr.sbin/makefs/zfs/vdev.c new file mode 100644 index 000000000000..afcce402cb13 --- /dev/null +++ b/usr.sbin/makefs/zfs/vdev.c @@ -0,0 +1,437 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2022 The FreeBSD Foundation + * + * This software was developed by Mark Johnston under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <assert.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <util.h> + +#include "zfs.h" + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-function" +#include "zfs/fletcher.c" +#include "zfs/sha256.c" +#pragma GCC diagnostic pop + +static void +blkptr_set(blkptr_t *bp, off_t off, off_t size, uint8_t dntype, uint8_t level, + uint64_t fill, enum zio_checksum cksumt, zio_cksum_t *cksum) +{ + dva_t *dva; + + assert(powerof2(size)); + + BP_ZERO(bp); + BP_SET_LSIZE(bp, size); + BP_SET_PSIZE(bp, size); + BP_SET_CHECKSUM(bp, cksumt); + BP_SET_COMPRESS(bp, ZIO_COMPRESS_OFF); + BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER); + BP_SET_BIRTH(bp, TXG, TXG); + BP_SET_LEVEL(bp, level); + BP_SET_FILL(bp, fill); + BP_SET_TYPE(bp, dntype); + + dva = BP_IDENTITY(bp); + DVA_SET_VDEV(dva, 0); + DVA_SET_OFFSET(dva, off); + DVA_SET_ASIZE(dva, size); + memcpy(&bp->blk_cksum, cksum, sizeof(*cksum)); +} + +/* + * Write a block of data to the vdev. The offset is always relative to the end + * of the second leading vdev label. + * + * Consumers should generally use the helpers below, which provide block + * pointers and update dnode accounting, rather than calling this function + * directly. + */ +static void +vdev_pwrite(const zfs_opt_t *zfs, const void *buf, size_t len, off_t off) +{ + ssize_t n; + + assert(off >= 0 && off < zfs->asize); + assert(powerof2(len)); + assert((off_t)len > 0 && off + (off_t)len > off && + off + (off_t)len < zfs->asize); + if (zfs->spacemap != NULL) { + /* + * Verify that the blocks being written were in fact allocated. + * + * The space map isn't available once the on-disk space map is + * finalized, so this check doesn't quite catch everything. + */ + assert(bit_ntest(zfs->spacemap, off >> zfs->ashift, + (off + len - 1) >> zfs->ashift, 1)); + } + + off += VDEV_LABEL_START_SIZE; + for (size_t sofar = 0; sofar < len; sofar += n) { + n = pwrite(zfs->fd, (const char *)buf + sofar, len - sofar, + off + sofar); + if (n < 0) + err(1, "pwrite"); + assert(n > 0); + } +} + +void +vdev_pwrite_data(zfs_opt_t *zfs, uint8_t datatype, uint8_t cksumtype, + uint8_t level, uint64_t fill, const void *data, off_t sz, off_t loc, + blkptr_t *bp) +{ + zio_cksum_t cksum; + + assert(cksumtype == ZIO_CHECKSUM_FLETCHER_4); + + fletcher_4_native(data, sz, NULL, &cksum); + blkptr_set(bp, loc, sz, datatype, level, fill, cksumtype, &cksum); + vdev_pwrite(zfs, data, sz, loc); +} + +void +vdev_pwrite_dnode_indir(zfs_opt_t *zfs, dnode_phys_t *dnode, uint8_t level, + uint64_t fill, const void *data, off_t sz, off_t loc, blkptr_t *bp) +{ + vdev_pwrite_data(zfs, dnode->dn_type, dnode->dn_checksum, level, fill, + data, sz, loc, bp); + + assert((dnode->dn_flags & DNODE_FLAG_USED_BYTES) != 0); + dnode->dn_used += sz; +} + +void +vdev_pwrite_dnode_data(zfs_opt_t *zfs, dnode_phys_t *dnode, const void *data, + off_t sz, off_t loc) +{ + vdev_pwrite_dnode_indir(zfs, dnode, 0, 1, data, sz, loc, + &dnode->dn_blkptr[0]); +} + +static void +vdev_label_set_checksum(void *buf, off_t off, off_t size) +{ + zio_cksum_t cksum; + zio_eck_t *eck; + + assert(size > 0 && (size_t)size >= sizeof(zio_eck_t)); + + eck = (zio_eck_t *)((char *)buf + size) - 1; + eck->zec_magic = ZEC_MAGIC; + ZIO_SET_CHECKSUM(&eck->zec_cksum, off, 0, 0, 0); + zio_checksum_SHA256(buf, size, NULL, &cksum); + eck->zec_cksum = cksum; +} + +/* + * Set embedded checksums and write the label at the specified index. + */ +void +vdev_label_write(zfs_opt_t *zfs, int ind, const vdev_label_t *labelp) +{ + vdev_label_t *label; + ssize_t n; + off_t blksz, loff; + + assert(ind >= 0 && ind < VDEV_LABELS); + + /* + * Make a copy since we have to modify the label to set checksums. + */ + label = ecalloc(1, sizeof(*label)); + memcpy(label, labelp, sizeof(*label)); + + if (ind < 2) + loff = ind * sizeof(*label); + else + loff = zfs->vdevsize - (VDEV_LABELS - ind) * sizeof(*label); + + /* + * Set the verifier checksum for the boot block. We don't use it, but + * the FreeBSD loader reads it and will complain if the checksum isn't + * valid. + */ + vdev_label_set_checksum(&label->vl_be, + loff + __offsetof(vdev_label_t, vl_be), sizeof(label->vl_be)); + + /* + * Set the verifier checksum for the label. + */ + vdev_label_set_checksum(&label->vl_vdev_phys, + loff + __offsetof(vdev_label_t, vl_vdev_phys), + sizeof(label->vl_vdev_phys)); + + /* + * Set the verifier checksum for the uberblocks. There is one uberblock + * per sector; for example, with an ashift of 12 we end up with + * 128KB/4KB=32 copies of the uberblock in the ring. + */ + blksz = 1 << zfs->ashift; + assert(sizeof(label->vl_uberblock) % blksz == 0); + for (size_t roff = 0; roff < sizeof(label->vl_uberblock); + roff += blksz) { + vdev_label_set_checksum(&label->vl_uberblock[0] + roff, + loff + __offsetof(vdev_label_t, vl_uberblock) + roff, + blksz); + } + + n = pwrite(zfs->fd, label, sizeof(*label), loff); + if (n < 0) + err(1, "writing vdev label"); + assert(n == sizeof(*label)); + + free(label); +} + +/* + * Find a chunk of contiguous free space of length *lenp, according to the + * following rules: + * 1. If the length is less than or equal to 128KB, the returned run's length + * will be the smallest power of 2 equal to or larger than the length. + * 2. If the length is larger than 128KB, the returned run's length will be + * the smallest multiple of 128KB that is larger than the length. + * 3. The returned run's length will be size-aligned up to 128KB. + * + * XXX-MJ the third rule isn't actually required, so this can just be a dumb + * bump allocator. Maybe there's some benefit to keeping large blocks aligned, + * so let's keep it for now and hope we don't get too much fragmentation. + * Alternately we could try to allocate all blocks of a certain size from the + * same metaslab. + */ +off_t +vdev_space_alloc(zfs_opt_t *zfs, off_t *lenp) +{ + off_t len; + int align, loc, minblksz, nbits; + + minblksz = 1 << zfs->ashift; + len = roundup2(*lenp, minblksz); + + assert(len != 0); + assert(len / minblksz <= INT_MAX); + + if (len < MAXBLOCKSIZE) { + if ((len & (len - 1)) != 0) + len = (off_t)1 << flsll(len); + align = len / minblksz; + } else { + len = roundup2(len, MAXBLOCKSIZE); + align = MAXBLOCKSIZE / minblksz; + } + + for (loc = 0, nbits = len / minblksz;; loc = roundup2(loc, align)) { + bit_ffc_area_at(zfs->spacemap, loc, zfs->spacemapbits, nbits, + &loc); + if (loc == -1) { + errx(1, "failed to find %ju bytes of space", + (uintmax_t)len); + } + if ((loc & (align - 1)) == 0) + break; + } + assert(loc + nbits > loc); + bit_nset(zfs->spacemap, loc, loc + nbits - 1); + *lenp = len; + + return ((off_t)loc << zfs->ashift); +} + +static void +vdev_spacemap_init(zfs_opt_t *zfs) +{ + uint64_t nbits; + + assert(powerof2(zfs->mssize)); + + nbits = rounddown2(zfs->asize, zfs->mssize) >> zfs->ashift; + if (nbits > INT_MAX) { + /* + * With the smallest block size of 512B, the limit on the image + * size is 2TB. That should be enough for anyone. + */ + errx(1, "image size is too large"); + } + zfs->spacemapbits = (int)nbits; + zfs->spacemap = bit_alloc(zfs->spacemapbits); + if (zfs->spacemap == NULL) + err(1, "bitstring allocation failed"); +} + +void +vdev_spacemap_write(zfs_opt_t *zfs) +{ + dnode_phys_t *objarr; + bitstr_t *spacemap; + uint64_t *objarrblk; + off_t smblksz, objarrblksz, objarrloc; + + struct { + dnode_phys_t *dnode; + uint64_t dnid; + off_t loc; + } *sma; + + objarrblksz = sizeof(uint64_t) * zfs->mscount; + assert(objarrblksz <= MAXBLOCKSIZE); + objarrloc = objset_space_alloc(zfs, zfs->mos, &objarrblksz); + objarrblk = ecalloc(1, objarrblksz); + + objarr = objset_dnode_lookup(zfs->mos, zfs->objarrid); + objarr->dn_datablkszsec = objarrblksz >> MINBLOCKSHIFT; + + /* + * Use the smallest block size for space maps. The space allocation + * algorithm should aim to minimize the number of holes. + */ + smblksz = 1 << zfs->ashift; + + /* + * First allocate dnodes and space for all of our space maps. No more + * space can be allocated from the vdev after this point. + */ + sma = ecalloc(zfs->mscount, sizeof(*sma)); + for (uint64_t i = 0; i < zfs->mscount; i++) { + sma[i].dnode = objset_dnode_bonus_alloc(zfs->mos, + DMU_OT_SPACE_MAP, DMU_OT_SPACE_MAP_HEADER, + sizeof(space_map_phys_t), &sma[i].dnid); + sma[i].loc = objset_space_alloc(zfs, zfs->mos, &smblksz); + } + spacemap = zfs->spacemap; + zfs->spacemap = NULL; + + /* + * Now that the set of allocated space is finalized, populate each space + * map and write it to the vdev. + */ + for (uint64_t i = 0; i < zfs->mscount; i++) { + space_map_phys_t *sm; + uint64_t alloc, length, *smblk; + int shift, startb, endb, srunb, erunb; + + /* + * We only allocate a single block for this space map, but + * OpenZFS assumes that a space map object with sufficient bonus + * space supports histograms. + */ + sma[i].dnode->dn_nblkptr = 3; + sma[i].dnode->dn_datablkszsec = smblksz >> MINBLOCKSHIFT; + + smblk = ecalloc(1, smblksz); + + alloc = length = 0; + shift = zfs->msshift - zfs->ashift; + for (srunb = startb = i * (1 << shift), + endb = (i + 1) * (1 << shift); + srunb < endb; srunb = erunb) { + uint64_t runlen, runoff; + + /* Find a run of allocated space. */ + bit_ffs_at(spacemap, srunb, zfs->spacemapbits, &srunb); + if (srunb == -1 || srunb >= endb) + break; + + bit_ffc_at(spacemap, srunb, zfs->spacemapbits, &erunb); + if (erunb == -1 || erunb > endb) + erunb = endb; + + /* + * The space represented by [srunb, erunb) has been + * allocated. Add a record to the space map to indicate + * this. Run offsets are relative to the beginning of + * the metaslab. + */ + runlen = erunb - srunb; + runoff = srunb - startb; + + assert(length * sizeof(uint64_t) < (uint64_t)smblksz); + smblk[length] = SM_PREFIX_ENCODE(SM2_PREFIX) | + SM2_RUN_ENCODE(runlen) | SM2_VDEV_ENCODE(0); + smblk[length + 1] = SM2_TYPE_ENCODE(SM_ALLOC) | + SM2_OFFSET_ENCODE(runoff); + + alloc += runlen << zfs->ashift; + length += 2; + } + + sm = DN_BONUS(sma[i].dnode); + sm->smp_length = length * sizeof(uint64_t); + sm->smp_alloc = alloc; + + vdev_pwrite_dnode_data(zfs, sma[i].dnode, smblk, smblksz, + sma[i].loc); + free(smblk); + + /* Record this space map in the space map object array. */ + objarrblk[i] = sma[i].dnid; + } + + /* + * All of the space maps are written, now write the object array. + */ + vdev_pwrite_dnode_data(zfs, objarr, objarrblk, objarrblksz, objarrloc); + free(objarrblk); + + assert(zfs->spacemap == NULL); + free(spacemap); + free(sma); +} + +void +vdev_init(zfs_opt_t *zfs, const char *image) +{ + assert(zfs->ashift >= MINBLOCKSHIFT); + + zfs->fd = open(image, O_RDWR | O_CREAT | O_TRUNC, 0644); + if (zfs->fd == -1) + err(1, "Can't open `%s' for writing", image); + if (ftruncate(zfs->fd, zfs->vdevsize) != 0) + err(1, "Failed to extend image file `%s'", image); + + vdev_spacemap_init(zfs); +} + +void +vdev_fini(zfs_opt_t *zfs) +{ + assert(zfs->spacemap == NULL); + + if (zfs->fd != -1) { + if (close(zfs->fd) != 0) + err(1, "close"); + zfs->fd = -1; + } +} diff --git a/usr.sbin/makefs/zfs/zap.c b/usr.sbin/makefs/zfs/zap.c new file mode 100644 index 000000000000..316d1446cecf --- /dev/null +++ b/usr.sbin/makefs/zfs/zap.c @@ -0,0 +1,567 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2022 The FreeBSD Foundation + * + * This software was developed by Mark Johnston under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/endian.h> + +#include <assert.h> +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include <util.h> + +#include "makefs.h" +#include "zfs.h" + +typedef struct zfs_zap_entry { + char *name; /* entry key, private copy */ + uint64_t hash; /* key hash */ + union { + uint8_t *valp; + uint16_t *val16p; + uint32_t *val32p; + uint64_t *val64p; + }; /* entry value, an integer array */ + uint64_t val64; /* embedded value for a common case */ + size_t intsz; /* array element size; 1, 2, 4 or 8 */ + size_t intcnt; /* array size */ + STAILQ_ENTRY(zfs_zap_entry) next; +} zfs_zap_entry_t; + +struct zfs_zap { + STAILQ_HEAD(, zfs_zap_entry) kvps; + uint64_t hashsalt; /* key hash input */ + unsigned long kvpcnt; /* number of key-value pairs */ + unsigned long chunks; /* count of chunks needed for fat ZAP */ + bool micro; /* can this be a micro ZAP? */ + + dnode_phys_t *dnode; /* backpointer */ + zfs_objset_t *os; /* backpointer */ +}; + +static uint16_t +zap_entry_chunks(zfs_zap_entry_t *ent) +{ + return (1 + howmany(strlen(ent->name) + 1, ZAP_LEAF_ARRAY_BYTES) + + howmany(ent->intsz * ent->intcnt, ZAP_LEAF_ARRAY_BYTES)); +} + +static uint64_t +zap_hash(uint64_t salt, const char *name) +{ + static uint64_t crc64_table[256]; + const uint64_t crc64_poly = 0xC96C5795D7870F42UL; + const uint8_t *cp; + uint64_t crc; + uint8_t c; + + assert(salt != 0); + if (crc64_table[128] == 0) { + for (int i = 0; i < 256; i++) { + uint64_t *t; + + t = crc64_table + i; + *t = i; + for (int j = 8; j > 0; j--) + *t = (*t >> 1) ^ (-(*t & 1) & crc64_poly); + } + } + assert(crc64_table[128] == crc64_poly); + + for (cp = (const uint8_t *)name, crc = salt; (c = *cp) != '\0'; cp++) + crc = (crc >> 8) ^ crc64_table[(crc ^ c) & 0xFF]; + + /* + * Only use 28 bits, since we need 4 bits in the cookie for the + * collision differentiator. We MUST use the high bits, since + * those are the ones that we first pay attention to when + * choosing the bucket. + */ + crc &= ~((1ULL << (64 - ZAP_HASHBITS)) - 1); + + return (crc); +} + +zfs_zap_t * +zap_alloc(zfs_objset_t *os, dnode_phys_t *dnode) +{ + zfs_zap_t *zap; + + zap = ecalloc(1, sizeof(*zap)); + STAILQ_INIT(&zap->kvps); + zap->hashsalt = ((uint64_t)random() << 32) | random(); + zap->micro = true; + zap->kvpcnt = 0; + zap->chunks = 0; + zap->dnode = dnode; + zap->os = os; + return (zap); +} + +void +zap_add(zfs_zap_t *zap, const char *name, size_t intsz, size_t intcnt, + const uint8_t *val) +{ + zfs_zap_entry_t *ent; + + assert(intsz == 1 || intsz == 2 || intsz == 4 || intsz == 8); + assert(strlen(name) + 1 <= ZAP_MAXNAMELEN); + assert(intcnt <= ZAP_MAXVALUELEN && intcnt * intsz <= ZAP_MAXVALUELEN); + + ent = ecalloc(1, sizeof(*ent)); + ent->name = estrdup(name); + ent->hash = zap_hash(zap->hashsalt, ent->name); + ent->intsz = intsz; + ent->intcnt = intcnt; + if (intsz == sizeof(uint64_t) && intcnt == 1) { + /* + * Micro-optimization to elide a memory allocation in that most + * common case where this is a directory entry. + */ + ent->val64p = &ent->val64; + } else { + ent->valp = ecalloc(intcnt, intsz); + } + memcpy(ent->valp, val, intcnt * intsz); + zap->kvpcnt++; + zap->chunks += zap_entry_chunks(ent); + STAILQ_INSERT_TAIL(&zap->kvps, ent, next); + + if (zap->micro && (intcnt != 1 || intsz != sizeof(uint64_t) || + strlen(name) + 1 > MZAP_NAME_LEN || zap->kvpcnt > MZAP_ENT_MAX)) + zap->micro = false; +} + +void +zap_add_uint64(zfs_zap_t *zap, const char *name, uint64_t val) +{ + zap_add(zap, name, sizeof(uint64_t), 1, (uint8_t *)&val); +} + +void +zap_add_uint64_self(zfs_zap_t *zap, uint64_t val) +{ + char name[32]; + + (void)snprintf(name, sizeof(name), "%jx", (uintmax_t)val); + zap_add(zap, name, sizeof(uint64_t), 1, (uint8_t *)&val); +} + +void +zap_add_string(zfs_zap_t *zap, const char *name, const char *val) +{ + zap_add(zap, name, 1, strlen(val) + 1, (const uint8_t *)val); +} + +bool +zap_entry_exists(zfs_zap_t *zap, const char *name) +{ + zfs_zap_entry_t *ent; + + STAILQ_FOREACH(ent, &zap->kvps, next) { + if (strcmp(ent->name, name) == 0) + return (true); + } + return (false); +} + +static void +zap_micro_write(zfs_opt_t *zfs, zfs_zap_t *zap) +{ + dnode_phys_t *dnode; + zfs_zap_entry_t *ent; + mzap_phys_t *mzap; + mzap_ent_phys_t *ment; + off_t bytes, loc; + uint16_t cd; + + _Static_assert(MZAP_ENT_MAX <= UINT16_MAX, + "micro ZAP collision differentiator must fit in 16 bits"); + + memset(zfs->filebuf, 0, sizeof(zfs->filebuf)); + mzap = (mzap_phys_t *)&zfs->filebuf[0]; + mzap->mz_block_type = ZBT_MICRO; + mzap->mz_salt = zap->hashsalt; + mzap->mz_normflags = 0; + + bytes = sizeof(*mzap) + (zap->kvpcnt - 1) * sizeof(*ment); + assert(bytes <= (off_t)MZAP_MAX_BLKSZ); + + cd = 0; + ment = &mzap->mz_chunk[0]; + STAILQ_FOREACH(ent, &zap->kvps, next) { + memcpy(&ment->mze_value, ent->valp, ent->intsz * ent->intcnt); + ment->mze_cd = cd++; + (void)strlcpy(ment->mze_name, ent->name, + sizeof(ment->mze_name)); + ment++; + } + + loc = objset_space_alloc(zfs, zap->os, &bytes); + + dnode = zap->dnode; + dnode->dn_maxblkid = 0; + dnode->dn_datablkszsec = bytes >> MINBLOCKSHIFT; + + vdev_pwrite_dnode_data(zfs, dnode, zfs->filebuf, bytes, loc); +} + +/* + * Write some data to the fat ZAP leaf chunk starting at index "li". + * + * Note that individual integers in the value may be split among consecutive + * leaves. + */ +static void +zap_fat_write_array_chunk(zap_leaf_t *l, uint16_t li, size_t sz, + const uint8_t *val) +{ + struct zap_leaf_array *la; + + assert(sz <= ZAP_MAXVALUELEN); + assert(sz > 0); + + for (uint16_t n, resid = sz; resid > 0; resid -= n, val += n, li++) { + n = MIN(resid, ZAP_LEAF_ARRAY_BYTES); + + la = &ZAP_LEAF_CHUNK(l, li).l_array; + assert(la->la_type == ZAP_CHUNK_FREE); + la->la_type = ZAP_CHUNK_ARRAY; + memcpy(la->la_array, val, n); + la->la_next = li + 1; + } + la->la_next = 0xffff; +} + +/* + * Find the shortest hash prefix length which lets us distribute keys without + * overflowing a leaf block. This is not (space) optimal, but is simple, and + * directories large enough to overflow a single 128KB leaf block are uncommon. + */ +static unsigned int +zap_fat_write_prefixlen(zfs_zap_t *zap, zap_leaf_t *l) +{ + zfs_zap_entry_t *ent; + unsigned int prefixlen; + + if (zap->chunks <= ZAP_LEAF_NUMCHUNKS(l)) { + /* + * All chunks will fit in a single leaf block. + */ + return (0); + } + + for (prefixlen = 1; prefixlen < (unsigned int)l->l_bs; prefixlen++) { + uint32_t *leafchunks; + + leafchunks = ecalloc(1u << prefixlen, sizeof(*leafchunks)); + STAILQ_FOREACH(ent, &zap->kvps, next) { + uint64_t li; + uint16_t chunks; + + li = ZAP_HASH_IDX(ent->hash, prefixlen); + + chunks = zap_entry_chunks(ent); + if (ZAP_LEAF_NUMCHUNKS(l) - leafchunks[li] < chunks) { + /* + * Not enough space, grow the prefix and retry. + */ + break; + } + leafchunks[li] += chunks; + } + free(leafchunks); + + if (ent == NULL) { + /* + * Everything fits, we're done. + */ + break; + } + } + + /* + * If this fails, then we need to expand the pointer table. For now + * this situation is unhandled since it is hard to trigger. + */ + assert(prefixlen < (unsigned int)l->l_bs); + + return (prefixlen); +} + +/* + * Initialize a fat ZAP leaf block. + */ +static void +zap_fat_write_leaf_init(zap_leaf_t *l, uint64_t prefix, int prefixlen) +{ + zap_leaf_phys_t *leaf; + + leaf = l->l_phys; + + leaf->l_hdr.lh_block_type = ZBT_LEAF; + leaf->l_hdr.lh_magic = ZAP_LEAF_MAGIC; + leaf->l_hdr.lh_nfree = ZAP_LEAF_NUMCHUNKS(l); + leaf->l_hdr.lh_prefix = prefix; + leaf->l_hdr.lh_prefix_len = prefixlen; + + /* Initialize the leaf hash table. */ + assert(leaf->l_hdr.lh_nfree < 0xffff); + memset(leaf->l_hash, 0xff, + ZAP_LEAF_HASH_NUMENTRIES(l) * sizeof(*leaf->l_hash)); + + /* Initialize the leaf chunks. */ + for (uint16_t i = 0; i < ZAP_LEAF_NUMCHUNKS(l); i++) { + struct zap_leaf_free *lf; + + lf = &ZAP_LEAF_CHUNK(l, i).l_free; + lf->lf_type = ZAP_CHUNK_FREE; + if (i + 1 == ZAP_LEAF_NUMCHUNKS(l)) + lf->lf_next = 0xffff; + else + lf->lf_next = i + 1; + } +} + +static void +zap_fat_write(zfs_opt_t *zfs, zfs_zap_t *zap) +{ + struct dnode_cursor *c; + zap_leaf_t l; + zap_phys_t *zaphdr; + struct zap_table_phys *zt; + zfs_zap_entry_t *ent; + dnode_phys_t *dnode; + uint8_t *leafblks; + uint64_t lblkcnt, *ptrhasht; + off_t loc, blksz; + size_t blkshift; + unsigned int prefixlen; + int ptrcnt; + + /* + * For simplicity, always use the largest block size. This should be ok + * since most directories will be micro ZAPs, but it's space inefficient + * for small ZAPs and might need to be revisited. + */ + blkshift = MAXBLOCKSHIFT; + blksz = (off_t)1 << blkshift; + + /* + * Embedded pointer tables give up to 8192 entries. This ought to be + * enough for anything except massive directories. + */ + ptrcnt = (blksz / 2) / sizeof(uint64_t); + + memset(zfs->filebuf, 0, sizeof(zfs->filebuf)); + zaphdr = (zap_phys_t *)&zfs->filebuf[0]; + zaphdr->zap_block_type = ZBT_HEADER; + zaphdr->zap_magic = ZAP_MAGIC; + zaphdr->zap_num_entries = zap->kvpcnt; + zaphdr->zap_salt = zap->hashsalt; + + l.l_bs = blkshift; + l.l_phys = NULL; + + zt = &zaphdr->zap_ptrtbl; + zt->zt_blk = 0; + zt->zt_numblks = 0; + zt->zt_shift = flsll(ptrcnt) - 1; + zt->zt_nextblk = 0; + zt->zt_blks_copied = 0; + + /* + * How many leaf blocks do we need? Initialize them and update the + * header. + */ + prefixlen = zap_fat_write_prefixlen(zap, &l); + lblkcnt = (uint64_t)1 << prefixlen; + leafblks = ecalloc(lblkcnt, blksz); + for (unsigned int li = 0; li < lblkcnt; li++) { + l.l_phys = (zap_leaf_phys_t *)(leafblks + li * blksz); + zap_fat_write_leaf_init(&l, li, prefixlen); + } + zaphdr->zap_num_leafs = lblkcnt; + zaphdr->zap_freeblk = lblkcnt + 1; + + /* + * For each entry, figure out which leaf block it belongs to based on + * the upper bits of its hash, allocate chunks from that leaf, and fill + * them out. + */ + ptrhasht = (uint64_t *)(&zfs->filebuf[0] + blksz / 2); + STAILQ_FOREACH(ent, &zap->kvps, next) { + struct zap_leaf_entry *le; + uint16_t *lptr; + uint64_t hi, li; + uint16_t namelen, nchunks, nnamechunks, nvalchunks; + + hi = ZAP_HASH_IDX(ent->hash, zt->zt_shift); + li = ZAP_HASH_IDX(ent->hash, prefixlen); + assert(ptrhasht[hi] == 0 || ptrhasht[hi] == li + 1); + ptrhasht[hi] = li + 1; + l.l_phys = (zap_leaf_phys_t *)(leafblks + li * blksz); + + namelen = strlen(ent->name) + 1; + + /* + * How many leaf chunks do we need for this entry? + */ + nnamechunks = howmany(namelen, ZAP_LEAF_ARRAY_BYTES); + nvalchunks = howmany(ent->intcnt, + ZAP_LEAF_ARRAY_BYTES / ent->intsz); + nchunks = 1 + nnamechunks + nvalchunks; + + /* + * Allocate a run of free leaf chunks for this entry, + * potentially extending a hash chain. + */ + assert(l.l_phys->l_hdr.lh_nfree >= nchunks); + l.l_phys->l_hdr.lh_nfree -= nchunks; + l.l_phys->l_hdr.lh_nentries++; + lptr = ZAP_LEAF_HASH_ENTPTR(&l, ent->hash); + while (*lptr != 0xffff) { + assert(*lptr < ZAP_LEAF_NUMCHUNKS(&l)); + le = ZAP_LEAF_ENTRY(&l, *lptr); + assert(le->le_type == ZAP_CHUNK_ENTRY); + le->le_cd++; + lptr = &le->le_next; + } + *lptr = l.l_phys->l_hdr.lh_freelist; + l.l_phys->l_hdr.lh_freelist += nchunks; + assert(l.l_phys->l_hdr.lh_freelist <= + ZAP_LEAF_NUMCHUNKS(&l)); + if (l.l_phys->l_hdr.lh_freelist == + ZAP_LEAF_NUMCHUNKS(&l)) + l.l_phys->l_hdr.lh_freelist = 0xffff; + + /* + * Integer values must be stored in big-endian format. + */ + switch (ent->intsz) { + case 1: + break; + case 2: + for (uint16_t *v = ent->val16p; + v - ent->val16p < (ptrdiff_t)ent->intcnt; + v++) + *v = htobe16(*v); + break; + case 4: + for (uint32_t *v = ent->val32p; + v - ent->val32p < (ptrdiff_t)ent->intcnt; + v++) + *v = htobe32(*v); + break; + case 8: + for (uint64_t *v = ent->val64p; + v - ent->val64p < (ptrdiff_t)ent->intcnt; + v++) + *v = htobe64(*v); + break; + default: + assert(0); + } + + /* + * Finally, write out the leaf chunks for this entry. + */ + le = ZAP_LEAF_ENTRY(&l, *lptr); + assert(le->le_type == ZAP_CHUNK_FREE); + le->le_type = ZAP_CHUNK_ENTRY; + le->le_next = 0xffff; + le->le_name_chunk = *lptr + 1; + le->le_name_numints = namelen; + le->le_value_chunk = *lptr + 1 + nnamechunks; + le->le_value_intlen = ent->intsz; + le->le_value_numints = ent->intcnt; + le->le_hash = ent->hash; + zap_fat_write_array_chunk(&l, *lptr + 1, namelen, + (uint8_t *)ent->name); + zap_fat_write_array_chunk(&l, *lptr + 1 + nnamechunks, + ent->intcnt * ent->intsz, ent->valp); + } + + /* + * Initialize unused slots of the pointer table. + */ + for (int i = 0; i < ptrcnt; i++) + if (ptrhasht[i] == 0) + ptrhasht[i] = (i >> (zt->zt_shift - prefixlen)) + 1; + + /* + * Write the whole thing to disk. + */ + dnode = zap->dnode; + dnode->dn_datablkszsec = blksz >> MINBLOCKSHIFT; + dnode->dn_maxblkid = lblkcnt + 1; + + c = dnode_cursor_init(zfs, zap->os, zap->dnode, + (lblkcnt + 1) * blksz, blksz); + + loc = objset_space_alloc(zfs, zap->os, &blksz); + vdev_pwrite_dnode_indir(zfs, dnode, 0, 1, zfs->filebuf, blksz, loc, + dnode_cursor_next(zfs, c, 0)); + + for (uint64_t i = 0; i < lblkcnt; i++) { + loc = objset_space_alloc(zfs, zap->os, &blksz); + vdev_pwrite_dnode_indir(zfs, dnode, 0, 1, leafblks + i * blksz, + blksz, loc, dnode_cursor_next(zfs, c, (i + 1) * blksz)); + } + + dnode_cursor_finish(zfs, c); + + free(leafblks); +} + +void +zap_write(zfs_opt_t *zfs, zfs_zap_t *zap) +{ + zfs_zap_entry_t *ent; + + if (zap->micro) { + zap_micro_write(zfs, zap); + } else { + assert(!STAILQ_EMPTY(&zap->kvps)); + assert(zap->kvpcnt > 0); + zap_fat_write(zfs, zap); + } + + while ((ent = STAILQ_FIRST(&zap->kvps)) != NULL) { + STAILQ_REMOVE_HEAD(&zap->kvps, next); + if (ent->val64p != &ent->val64) + free(ent->valp); + free(ent->name); + free(ent); + } + free(zap); +} diff --git a/usr.sbin/makefs/zfs/zfs.h b/usr.sbin/makefs/zfs/zfs.h new file mode 100644 index 000000000000..33694e2bdbee --- /dev/null +++ b/usr.sbin/makefs/zfs/zfs.h @@ -0,0 +1,176 @@ +/*- + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (c) 2022 The FreeBSD Foundation + * + * This software was developed by Mark Johnston under sponsorship from + * the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#ifndef _MAKEFS_ZFS_H_ +#define _MAKEFS_ZFS_H_ + +#include <sys/types.h> +#include <sys/endian.h> +#include <sys/queue.h> + +#include <bitstring.h> +#include <stdalign.h> +#include <stdbool.h> + +#include "makefs.h" + +#include "zfs/nvlist.h" +#define ASSERT assert +#include "zfs/zfsimpl.h" + +#define MAXBLOCKSHIFT 17 /* 128KB */ +#define MAXBLOCKSIZE ((off_t)(1 << MAXBLOCKSHIFT)) +_Static_assert(MAXBLOCKSIZE == SPA_OLDMAXBLOCKSIZE, ""); +#define MINBLOCKSHIFT 9 /* 512B */ +#define MINBLOCKSIZE ((off_t)(1 << MINBLOCKSHIFT)) +_Static_assert(MINBLOCKSIZE == SPA_MINBLOCKSIZE, ""); +#define MINDEVSIZE ((off_t)SPA_MINDEVSIZE) + +/* All data was written in this transaction group. */ +#define TXG 4 +#define TXG_SIZE 4 + +typedef struct zfs_dsl_dataset zfs_dsl_dataset_t; +typedef struct zfs_dsl_dir zfs_dsl_dir_t; +typedef struct zfs_objset zfs_objset_t; +typedef struct zfs_zap zfs_zap_t; + +struct dataset_desc { + char *params; + STAILQ_ENTRY(dataset_desc) next; +}; + +typedef struct { + /* + * Block buffer, needs to be aligned for various on-disk structures, + * ZAPs, etc.. + */ + char filebuf[MAXBLOCKSIZE] __aligned(alignof(uint64_t)); + + bool nowarn; /* ignored */ + + /* Pool parameters. */ + const char *poolname; + char *rootpath; /* implicit mount point prefix */ + char *bootfs; /* bootable dataset, pool property */ + int ashift; /* vdev block size */ + uint64_t mssize; /* metaslab size */ + STAILQ_HEAD(, dataset_desc) datasetdescs; /* non-root dataset descrs */ + bool verify_txgs; /* verify data upon import */ + + /* Pool state. */ + uint64_t poolguid; /* pool and root vdev GUID */ + zfs_zap_t *poolprops; + + /* MOS state. */ + zfs_objset_t *mos; /* meta object set */ + uint64_t objarrid; /* space map object array */ + + /* DSL state. */ + zfs_dsl_dir_t *rootdsldir; /* root DSL directory */ + zfs_dsl_dataset_t *rootds; + zfs_dsl_dir_t *origindsldir; /* $ORIGIN */ + zfs_dsl_dataset_t *originds; + zfs_dsl_dataset_t *snapds; + zfs_zap_t *cloneszap; + zfs_dsl_dir_t *freedsldir; /* $FREE */ + zfs_dsl_dir_t *mosdsldir; /* $MOS */ + + /* vdev state. */ + int fd; /* vdev disk fd */ + uint64_t vdevguid; /* disk vdev GUID */ + off_t vdevsize; /* vdev size, including labels */ + off_t asize; /* vdev size, excluding labels */ + bitstr_t *spacemap; /* space allocation tracking */ + int spacemapbits; /* one bit per ashift-sized block */ + uint64_t msshift; /* log2(metaslab size) */ + uint64_t mscount; /* number of metaslabs for this vdev */ +} zfs_opt_t; + +/* dsl.c */ +void dsl_init(zfs_opt_t *); +const char *dsl_dir_fullname(const zfs_dsl_dir_t *); +uint64_t dsl_dir_id(zfs_dsl_dir_t *); +uint64_t dsl_dir_dataset_id(zfs_dsl_dir_t *); +void dsl_dir_foreach(zfs_opt_t *, zfs_dsl_dir_t *, + void (*)(zfs_opt_t *, zfs_dsl_dir_t *, void *), void *); +int dsl_dir_get_canmount(zfs_dsl_dir_t *, uint64_t *); +char *dsl_dir_get_mountpoint(zfs_opt_t *, zfs_dsl_dir_t *); +bool dsl_dir_has_dataset(zfs_dsl_dir_t *); +bool dsl_dir_dataset_has_objset(zfs_dsl_dir_t *); +void dsl_dir_dataset_write(zfs_opt_t *, zfs_objset_t *, zfs_dsl_dir_t *); +void dsl_dir_root_finalize(zfs_opt_t *, uint64_t); +void dsl_write(zfs_opt_t *); + +/* fs.c */ +void fs_build(zfs_opt_t *, int, fsnode *); + +/* objset.c */ +zfs_objset_t *objset_alloc(zfs_opt_t *zfs, uint64_t type); +off_t objset_space_alloc(zfs_opt_t *, zfs_objset_t *, off_t *); +dnode_phys_t *objset_dnode_alloc(zfs_objset_t *, uint8_t, uint64_t *); +dnode_phys_t *objset_dnode_bonus_alloc(zfs_objset_t *, uint8_t, uint8_t, + uint16_t, uint64_t *); +dnode_phys_t *objset_dnode_lookup(zfs_objset_t *, uint64_t); +void objset_root_blkptr_copy(const zfs_objset_t *, blkptr_t *); +uint64_t objset_space(const zfs_objset_t *); +void objset_write(zfs_opt_t *zfs, zfs_objset_t *os); + +/* vdev.c */ +void vdev_init(zfs_opt_t *, const char *); +off_t vdev_space_alloc(zfs_opt_t *zfs, off_t *lenp); +void vdev_pwrite_data(zfs_opt_t *zfs, uint8_t datatype, uint8_t cksumtype, + uint8_t level, uint64_t fill, const void *data, off_t sz, off_t loc, + blkptr_t *bp); +void vdev_pwrite_dnode_indir(zfs_opt_t *zfs, dnode_phys_t *dnode, uint8_t level, + uint64_t fill, const void *data, off_t sz, off_t loc, blkptr_t *bp); +void vdev_pwrite_dnode_data(zfs_opt_t *zfs, dnode_phys_t *dnode, const void *data, + off_t sz, off_t loc); +void vdev_label_write(zfs_opt_t *zfs, int ind, const vdev_label_t *labelp); +void vdev_spacemap_write(zfs_opt_t *); +void vdev_fini(zfs_opt_t *zfs); + +/* zap.c */ +zfs_zap_t *zap_alloc(zfs_objset_t *, dnode_phys_t *); +void zap_add(zfs_zap_t *, const char *, size_t, size_t, const uint8_t *); +void zap_add_uint64(zfs_zap_t *, const char *, uint64_t); +void zap_add_uint64_self(zfs_zap_t *, uint64_t); +void zap_add_string(zfs_zap_t *, const char *, const char *); +bool zap_entry_exists(zfs_zap_t *, const char *); +void zap_write(zfs_opt_t *, zfs_zap_t *); + +/* zfs.c */ +struct dnode_cursor *dnode_cursor_init(zfs_opt_t *, zfs_objset_t *, + dnode_phys_t *, off_t, off_t); +blkptr_t *dnode_cursor_next(zfs_opt_t *, struct dnode_cursor *, off_t); +void dnode_cursor_finish(zfs_opt_t *, struct dnode_cursor *); +uint64_t randomguid(void); + +#endif /* !_MAKEFS_ZFS_H_ */ |